diff --git a/backend/handler/metadata/hasheous_handler.py b/backend/handler/metadata/hasheous_handler.py index aa13da365..b32bc3855 100644 --- a/backend/handler/metadata/hasheous_handler.py +++ b/backend/handler/metadata/hasheous_handler.py @@ -153,7 +153,7 @@ class HasheousHandler(MetadataHandler): url: str, method: str = "POST", params: dict | None = None, - data: dict | None = None, + data: dict | list | None = None, ) -> dict: httpx_client = ctx_httpx_client.get() @@ -172,7 +172,7 @@ class HasheousHandler(MetadataHandler): ) # Prepare request kwargs - request_kwargs = { + request_kwargs: dict[str, Any] = { "url": url, "params": params, "headers": { @@ -251,39 +251,35 @@ class HasheousHandler(MetadataHandler): ) ] - # Select the largest file by size, as it is most likely to be the main ROM file. - # This increases the accuracy of metadata lookups, since the largest file is - # expected to have the correct and complete hash values for external services. - first_file = max(filtered_files, key=lambda f: f.file_size_bytes, default=None) - if first_file is None: - return fallback_rom + # The lookup endpoint accepts the hashes of all top-level files, which + # increases the accuracy of metadata lookups by letting Hasheous match + # against any of them. + data: list[dict] = [] + for file in filtered_files: + file_hashes: dict[str, str | None] + if file.chd_sha1_hash: + # CHD files are indexed by disc-data SHA1 only + # Raw file MD5/CRC are hashes of the container and won't match + file_hashes = {"sha1": file.chd_sha1_hash} + else: + file_hashes = { + "md5": file.md5_hash, + "sha1": file.sha1_hash, + "crc": file.crc_hash, + } - if first_file.chd_sha1_hash: - # For CHD files, Hasheous indexes by disc-data SHA1 only. - # Raw file MD5/CRC are hashes of the container and won't match. - md5_hash = None - sha1_hash = first_file.chd_sha1_hash - crc_hash = None - else: - md5_hash = first_file.md5_hash - sha1_hash = first_file.sha1_hash - crc_hash = first_file.crc_hash + # Drop empty hashes and skip files that have none. + file_hashes = {key: value for key, value in file_hashes.items() if value} + if file_hashes: + data.append(file_hashes) - if not (md5_hash or sha1_hash or crc_hash): + if not data: log.warning( "No hashes provided for Hasheous lookup. " - "At least one of md5_hash, sha1_hash, or crc_hash is required." + "At least one of md5, sha1, or crc is required." ) return fallback_rom - data = {} - if md5_hash: - data["mD5"] = md5_hash - if sha1_hash: - data["shA1"] = sha1_hash - if crc_hash: - data["crc"] = crc_hash - hasheous_game = await self._request( self.games_endpoint, params={ diff --git a/backend/tests/handler/test_fastapi.py b/backend/tests/handler/test_fastapi.py index f2c1b3b43..1235797cd 100644 --- a/backend/tests/handler/test_fastapi.py +++ b/backend/tests/handler/test_fastapi.py @@ -309,3 +309,70 @@ async def test_scan_rom_unmatched_skips_ra_when_id_and_metadata_exist( mock_get_rom.assert_not_called() # Existing ra_id should be preserved assert result.ra_id == 2774 + + +def _top_level_rom_file(**kwargs) -> RomFile: + """Build a RomFile whose `is_top_level` cached_property is pre-seeded to + True, so it passes lookup_rom's filtering without a persisted rom.""" + file = RomFile(file_path="n64/Game", **kwargs) + file.__dict__["is_top_level"] = True + return file + + +@patch.object(meta_hasheous_handler, "_request", new_callable=AsyncMock) +@patch.object(meta_hasheous_handler, "is_enabled", return_value=True) +async def test_lookup_rom_sends_all_top_level_file_hashes( + mock_is_enabled, mock_request +): + """lookup_rom must send the hashes of every top-level file as a list, + using chd_sha1_hash (and only it) for files that have one, and skipping + files with no hashes or zero size.""" + mock_request.return_value = {} + + files = [ + _top_level_rom_file( + file_name="disc1.bin", + file_size_bytes=100, + md5_hash="md5one", + sha1_hash="sha1one", + crc_hash="crcone", + ), + # CHD file: only chd_sha1_hash should be sent, raw md5/crc ignored. + _top_level_rom_file( + file_name="disc2.chd", + file_size_bytes=200, + md5_hash="ignoredmd5", + crc_hash="ignoredcrc", + chd_sha1_hash="chdsha1", + ), + # Zero-size file: must be filtered out entirely. + _top_level_rom_file( + file_name="empty.bin", + file_size_bytes=0, + md5_hash="zeromd5", + ), + # No hashes at all: must be skipped. + _top_level_rom_file(file_name="nohash.bin", file_size_bytes=50), + ] + + result = await meta_hasheous_handler.lookup_rom("n64", files) + + assert result["hasheous_id"] is None + mock_request.assert_called_once() + sent_data = mock_request.call_args.kwargs["data"] + assert sent_data == [ + {"md5": "md5one", "sha1": "sha1one", "crc": "crcone"}, + {"sha1": "chdsha1"}, + ] + + +@patch.object(meta_hasheous_handler, "_request", new_callable=AsyncMock) +@patch.object(meta_hasheous_handler, "is_enabled", return_value=True) +async def test_lookup_rom_skips_request_when_no_hashes(mock_is_enabled, mock_request): + """lookup_rom must not hit the API when no file has any usable hash.""" + files = [_top_level_rom_file(file_name="nohash.bin", file_size_bytes=50)] + + result = await meta_hasheous_handler.lookup_rom("n64", files) + + assert result["hasheous_id"] is None + mock_request.assert_not_called()