Merge pull request #3498 from rommapp/hasheous-lookup-all-hashes

Send all top-level file hashes to Hasheous lookup
This commit is contained in:
Georges-Antoine Assi
2026-06-09 08:08:36 -04:00
committed by GitHub
2 changed files with 91 additions and 28 deletions

View File

@@ -153,7 +153,7 @@ class HasheousHandler(MetadataHandler):
url: str,
method: str = "POST",
params: dict | None = None,
data: dict | None = None,
data: dict | list | None = None,
) -> dict:
httpx_client = ctx_httpx_client.get()
@@ -172,7 +172,7 @@ class HasheousHandler(MetadataHandler):
)
# Prepare request kwargs
request_kwargs = {
request_kwargs: dict[str, Any] = {
"url": url,
"params": params,
"headers": {
@@ -251,39 +251,35 @@ class HasheousHandler(MetadataHandler):
)
]
# Select the largest file by size, as it is most likely to be the main ROM file.
# This increases the accuracy of metadata lookups, since the largest file is
# expected to have the correct and complete hash values for external services.
first_file = max(filtered_files, key=lambda f: f.file_size_bytes, default=None)
if first_file is None:
return fallback_rom
# The lookup endpoint accepts the hashes of all top-level files, which
# increases the accuracy of metadata lookups by letting Hasheous match
# against any of them.
data: list[dict] = []
for file in filtered_files:
file_hashes: dict[str, str | None]
if file.chd_sha1_hash:
# CHD files are indexed by disc-data SHA1 only
# Raw file MD5/CRC are hashes of the container and won't match
file_hashes = {"sha1": file.chd_sha1_hash}
else:
file_hashes = {
"md5": file.md5_hash,
"sha1": file.sha1_hash,
"crc": file.crc_hash,
}
if first_file.chd_sha1_hash:
# For CHD files, Hasheous indexes by disc-data SHA1 only.
# Raw file MD5/CRC are hashes of the container and won't match.
md5_hash = None
sha1_hash = first_file.chd_sha1_hash
crc_hash = None
else:
md5_hash = first_file.md5_hash
sha1_hash = first_file.sha1_hash
crc_hash = first_file.crc_hash
# Drop empty hashes and skip files that have none.
file_hashes = {key: value for key, value in file_hashes.items() if value}
if file_hashes:
data.append(file_hashes)
if not (md5_hash or sha1_hash or crc_hash):
if not data:
log.warning(
"No hashes provided for Hasheous lookup. "
"At least one of md5_hash, sha1_hash, or crc_hash is required."
"At least one of md5, sha1, or crc is required."
)
return fallback_rom
data = {}
if md5_hash:
data["mD5"] = md5_hash
if sha1_hash:
data["shA1"] = sha1_hash
if crc_hash:
data["crc"] = crc_hash
hasheous_game = await self._request(
self.games_endpoint,
params={

View File

@@ -309,3 +309,70 @@ async def test_scan_rom_unmatched_skips_ra_when_id_and_metadata_exist(
mock_get_rom.assert_not_called()
# Existing ra_id should be preserved
assert result.ra_id == 2774
def _top_level_rom_file(**kwargs) -> RomFile:
"""Build a RomFile whose `is_top_level` cached_property is pre-seeded to
True, so it passes lookup_rom's filtering without a persisted rom."""
file = RomFile(file_path="n64/Game", **kwargs)
file.__dict__["is_top_level"] = True
return file
@patch.object(meta_hasheous_handler, "_request", new_callable=AsyncMock)
@patch.object(meta_hasheous_handler, "is_enabled", return_value=True)
async def test_lookup_rom_sends_all_top_level_file_hashes(
mock_is_enabled, mock_request
):
"""lookup_rom must send the hashes of every top-level file as a list,
using chd_sha1_hash (and only it) for files that have one, and skipping
files with no hashes or zero size."""
mock_request.return_value = {}
files = [
_top_level_rom_file(
file_name="disc1.bin",
file_size_bytes=100,
md5_hash="md5one",
sha1_hash="sha1one",
crc_hash="crcone",
),
# CHD file: only chd_sha1_hash should be sent, raw md5/crc ignored.
_top_level_rom_file(
file_name="disc2.chd",
file_size_bytes=200,
md5_hash="ignoredmd5",
crc_hash="ignoredcrc",
chd_sha1_hash="chdsha1",
),
# Zero-size file: must be filtered out entirely.
_top_level_rom_file(
file_name="empty.bin",
file_size_bytes=0,
md5_hash="zeromd5",
),
# No hashes at all: must be skipped.
_top_level_rom_file(file_name="nohash.bin", file_size_bytes=50),
]
result = await meta_hasheous_handler.lookup_rom("n64", files)
assert result["hasheous_id"] is None
mock_request.assert_called_once()
sent_data = mock_request.call_args.kwargs["data"]
assert sent_data == [
{"md5": "md5one", "sha1": "sha1one", "crc": "crcone"},
{"sha1": "chdsha1"},
]
@patch.object(meta_hasheous_handler, "_request", new_callable=AsyncMock)
@patch.object(meta_hasheous_handler, "is_enabled", return_value=True)
async def test_lookup_rom_skips_request_when_no_hashes(mock_is_enabled, mock_request):
"""lookup_rom must not hit the API when no file has any usable hash."""
files = [_top_level_rom_file(file_name="nohash.bin", file_size_bytes=50)]
result = await meta_hasheous_handler.lookup_rom("n64", files)
assert result["hasheous_id"] is None
mock_request.assert_not_called()