diff --git a/backend/adapters/services/rahasher.py b/backend/adapters/services/rahasher.py index afd25b58b..cb3414e5b 100644 --- a/backend/adapters/services/rahasher.py +++ b/backend/adapters/services/rahasher.py @@ -11,6 +11,48 @@ from utils.filesystem import COMPRESSED_FILE_EXTENSIONS RAHASHER_VALID_HASH_REGEX = re.compile(r"[0-9a-f]{32}") +# Disc descriptor / playlist files that point RAHasher at the real data +# tracks. Handing RAHasher one of these reproduces the hash a shell-expanded +# glob would, in priority order: a per-disc descriptor (.cue/.gdi) wins over +# an .m3u playlist so single-disc folders resolve to their own disc. +RA_DISC_DESCRIPTOR_EXTENSIONS: tuple[str, ...] = (".cue", ".gdi", ".m3u") + + +def _pick_ra_file(folder: Path) -> Path | None: + """Pick a single real file to hand RAHasher for a folder-based ROM. + + RAHasher is launched without a shell, so it never expands a ``/*`` glob — + it must be given a concrete file. Prefer a disc descriptor + (``.cue``/``.gdi``/``.m3u``), which RAHasher follows to the referenced + tracks; otherwise fall back to the largest file in the folder (a raw + ``.iso``/``.bin`` image, or the main file of a multi-file cartridge set). + Returns ``None`` when the folder is missing or holds no files. + """ + if not folder.is_dir(): + return None + try: + files = [f for f in folder.iterdir() if f.is_file()] + except (OSError, PermissionError): + return None + if not files: + return None + + def _size(p: Path) -> int: + try: + return p.stat().st_size + except (OSError, PermissionError): + return -1 + + for ext in RA_DISC_DESCRIPTOR_EXTENSIONS: + matches = [f for f in files if f.name.lower().endswith(ext)] + if matches: + picked = max(matches, key=_size) + return picked if _size(picked) >= 0 else None + + picked = max(files, key=_size) + return picked if _size(picked) >= 0 else None + + # Platforms whose hash algorithm requires an on-disk disc image # (ISO9660/bin+cue/CHD). When the source file is an archive, RAHasher falls # back to "buffer hash" mode which these consoles don't support, failing @@ -120,11 +162,13 @@ class RAHasherService: ) return "" - # For folder-based multi-file ROMs the path ends with /*. RAHasher - # cannot process compressed files via a glob — it fails with "Could not - # open file". When the folder contains archives, switch to hashing the - # largest archive directly (cartridge platforms support buffer hashing), - # or skip entirely for disc platforms that don't. + # For folder-based multi-file ROMs the path ends with /*. RAHasher is + # launched without a shell (create_subprocess_exec), so it never expands + # the glob — it receives the literal "*" and fails ("Could not open + # track/file"). Resolve "/*" to a single real file: hash the largest + # archive directly when the folder holds archives (or skip for disc + # platforms that can't buffer-hash them), otherwise pick a disc + # descriptor / track via _pick_ra_file. if file_path.endswith("/*"): folder = Path(file_path[:-2]) @@ -151,6 +195,15 @@ class RAHasherService: max, archive_files, key=lambda f: f.stat().st_size ) file_path = str(largest) + else: + # Folder of uncompressed disc tracks (the standard Redump + # .cue + .bin layout, .gdi sets, multi-bin) or a multi-file + # cartridge set. RAHasher never expands the "/*" glob itself, + # so resolve it to a single real file — the disc descriptor + # when present, otherwise the largest track. + resolved = await asyncio.to_thread(_pick_ra_file, folder) + if resolved is not None: + file_path = str(resolved) log.debug( f"Executing {hl('RAHasher', color=LIGHTMAGENTA)} for platform: {hl(platform['slug'], color=LIGHTMAGENTA)} - file: {hl(file_path)}" diff --git a/backend/tests/adapters/services/test_rahasher.py b/backend/tests/adapters/services/test_rahasher.py index eb358802e..481c7ed46 100644 --- a/backend/tests/adapters/services/test_rahasher.py +++ b/backend/tests/adapters/services/test_rahasher.py @@ -9,6 +9,7 @@ from adapters.services.rahasher import ( RAHASHER_VALID_HASH_REGEX, RAHasherError, RAHasherService, + _pick_ra_file, ) from handler.metadata.base_handler import UniversalPlatformSlug as UPS @@ -324,10 +325,12 @@ class TestRAHasherArchiveSkip: async def test_does_not_skip_wildcard_folder_without_archives_on_disc_platform( self, service: RAHasherService, tmp_path ): - """Folder-based ROM with only raw disc images must still go through RAHasher.""" + """Folder-based ROM with only raw disc images must still go through RAHasher, + handed the real descriptor file rather than the unexpanded glob.""" platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] (tmp_path / "disc1.bin").write_bytes(b"fake") - (tmp_path / "disc1.cue").write_bytes(b"fake") + cue = tmp_path / "disc1.cue" + cue.write_bytes(b"fake") mock_proc = AsyncMock() mock_proc.wait.return_value = 0 @@ -344,6 +347,10 @@ class TestRAHasherArchiveSkip: assert result == "a1b2c3d4e5f6789012345678901234ab" mock_subprocess.assert_called_once() + # RAHasher must be handed the .cue descriptor, never the literal "/*" + call_args = mock_subprocess.call_args[0] + assert str(cue) in call_args + assert not any(str(a).endswith("/*") for a in call_args) @pytest.mark.asyncio async def test_cartridge_platform_folder_with_archives_hashes_largest( @@ -399,6 +406,174 @@ class TestRAHasherArchiveSkip: mock_subprocess.assert_called_once() +class TestPickRAFile: + """Unit tests for picking a single real file to hand RAHasher for a + folder-based ROM (the standard Redump .cue + .bin layout, .gdi sets, etc.).""" + + def test_returns_none_for_missing_folder(self, tmp_path): + assert _pick_ra_file(tmp_path / "does-not-exist") is None + + def test_returns_none_for_empty_folder(self, tmp_path): + assert _pick_ra_file(tmp_path) is None + + def test_prefers_cue_over_bin(self, tmp_path): + """A bin/cue disc: hand RAHasher the .cue, which references the tracks.""" + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_prefers_cue_for_multi_bin_disc(self, tmp_path): + """Multi-track disc with several .bin files and one .cue → the .cue.""" + for i in range(1, 4): + (tmp_path / f"game (Track {i}).bin").write_bytes(b"x" * (1000 * i)) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 80) + + assert _pick_ra_file(tmp_path) == cue + + def test_prefers_gdi_for_dreamcast(self, tmp_path): + """Dreamcast GD-ROM: the .gdi descriptor points at the track files.""" + (tmp_path / "track01.bin").write_bytes(b"x" * 4000) + (tmp_path / "track02.raw").write_bytes(b"x" * 9000) + gdi = tmp_path / "game.gdi" + gdi.write_bytes(b"x" * 60) + + assert _pick_ra_file(tmp_path) == gdi + + def test_picks_m3u_when_only_descriptor(self, tmp_path): + """A folder whose only descriptor is an .m3u playlist resolves to it.""" + (tmp_path / "data.bin").write_bytes(b"x" * 4000) + m3u = tmp_path / "game.m3u" + m3u.write_bytes(b"disc.cue\n") + + assert _pick_ra_file(tmp_path) == m3u + + def test_cue_wins_over_m3u(self, tmp_path): + """When both a .cue and an .m3u exist, the per-disc .cue is preferred.""" + m3u = tmp_path / "game.m3u" + m3u.write_bytes(b"x" * 4000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_falls_back_to_largest_file_without_descriptor(self, tmp_path): + """No descriptor (e.g. raw .iso, or a multi-file cartridge set): the + largest file is the best single-file guess.""" + (tmp_path / "small.dat").write_bytes(b"x" * 100) + big = tmp_path / "game.iso" + big.write_bytes(b"x" * 9000) + + assert _pick_ra_file(tmp_path) == big + + def test_descriptor_match_is_case_insensitive(self, tmp_path): + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "GAME.CUE" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_ignores_subdirectories(self, tmp_path): + (tmp_path / "subdir").mkdir() + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + +class TestRAHasherWildcardFolderResolution: + """Folder-based disc ROMs (.cue + .bin) must resolve the "/*" glob to a real + descriptor file before spawning RAHasher — it is launched without a shell, + so it never expands the glob itself (GitHub issue #3497).""" + + @pytest.fixture + def service(self): + return RAHasherService() + + @pytest.mark.asyncio + async def test_resolves_glob_to_cue_descriptor( + self, service: RAHasherService, tmp_path + ): + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 0 + mock_proc.stdout.read.return_value = b"8c8db8c41e2a4c7d0ef7fc1c2f74ef7a\n" + mock_proc.stderr = None + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_proc + ) as mock_subprocess: + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "psx"}, + f"{tmp_path}/*", + ) + + assert result == "8c8db8c41e2a4c7d0ef7fc1c2f74ef7a" + mock_subprocess.assert_called_once_with( + "RAHasher", + str(platform_id), + str(cue), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + @pytest.mark.asyncio + async def test_resolves_glob_to_largest_file_without_descriptor( + self, service: RAHasherService, tmp_path + ): + """Folder-stored cartridge set / raw image with no descriptor: hand + RAHasher the largest single file rather than the unexpanded glob.""" + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.GBA] + (tmp_path / "small.dat").write_bytes(b"x" * 100) + big = tmp_path / "game.gba" + big.write_bytes(b"x" * 9000) + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 0 + mock_proc.stdout.read.return_value = b"a1b2c3d4e5f6789012345678901234ab\n" + mock_proc.stderr = None + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_proc + ) as mock_subprocess: + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "gba"}, + f"{tmp_path}/*", + ) + + assert result == "a1b2c3d4e5f6789012345678901234ab" + call_args = mock_subprocess.call_args[0] + assert str(big) in call_args + assert not any(str(a).endswith("/*") for a in call_args) + + @pytest.mark.asyncio + async def test_unresolvable_empty_folder_does_not_crash( + self, service: RAHasherService, tmp_path + ): + """An empty/unresolvable folder leaves the glob untouched (nothing to + hash) and still returns gracefully.""" + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 1 + mock_proc.stdout.read.return_value = b"" + mock_proc.stderr.read.return_value = b"Could not open track" + + with patch("asyncio.create_subprocess_exec", return_value=mock_proc): + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "psx"}, + f"{tmp_path}/*", + ) + + assert result == "" + + class TestRAHasherError: """Test the RAHasherError exception."""