From f4e39c19bcaca2a312295fab83ad74dc1c5d15e8 Mon Sep 17 00:00:00 2001 From: Spinnich Date: Tue, 9 Jun 2026 13:13:07 +0000 Subject: [PATCH 1/2] fix(ra): hash folder-based disc ROMs (.cue + .bin) for RetroAchievements Folder-stored disc games (the standard Redump .cue + .bin layout, .gdi sets, multi-bin, etc.) never matched RetroAchievements. roms_handler builds an `ra_path` ending in `/*` when the folder has no .chd, and RAHasherService only rescued that glob for folders containing archives. A folder of plain uncompressed tracks fell through, so the literal `*` reached RAHasher via create_subprocess_exec (no shell to expand it), which failed with "Could not open track" and stored an empty ra_hash. Resolve the `/*` glob to a single real file before spawning RAHasher: prefer a disc descriptor (.cue/.gdi/.m3u), which RAHasher follows to the referenced tracks, otherwise fall back to the largest file in the folder (raw .iso/.bin, or the main file of a multi-file cartridge set). This mirrors the existing "pick the largest .chd" handling for CHD folders. Fixes #3497. Co-Authored-By: Claude Opus 4.8 --- backend/adapters/services/rahasher.py | 50 ++++- .../tests/adapters/services/test_rahasher.py | 179 +++++++++++++++++- 2 files changed, 222 insertions(+), 7 deletions(-) diff --git a/backend/adapters/services/rahasher.py b/backend/adapters/services/rahasher.py index afd25b58b..1cb1a172e 100644 --- a/backend/adapters/services/rahasher.py +++ b/backend/adapters/services/rahasher.py @@ -11,6 +11,35 @@ from utils.filesystem import COMPRESSED_FILE_EXTENSIONS RAHASHER_VALID_HASH_REGEX = re.compile(r"[0-9a-f]{32}") +# Disc descriptor / playlist files that point RAHasher at the real data +# tracks. Handing RAHasher one of these reproduces the hash a shell-expanded +# glob would, in priority order: a per-disc descriptor (.cue/.gdi) wins over +# an .m3u playlist so single-disc folders resolve to their own disc. +RA_DISC_DESCRIPTOR_EXTENSIONS: tuple[str, ...] = (".cue", ".gdi", ".m3u") + + +def _pick_ra_file(folder: Path) -> Path | None: + """Pick a single real file to hand RAHasher for a folder-based ROM. + + RAHasher is launched without a shell, so it never expands a ``/*`` glob — + it must be given a concrete file. Prefer a disc descriptor + (``.cue``/``.gdi``/``.m3u``), which RAHasher follows to the referenced + tracks; otherwise fall back to the largest file in the folder (a raw + ``.iso``/``.bin`` image, or the main file of a multi-file cartridge set). + Returns ``None`` when the folder is missing or holds no files. + """ + if not folder.is_dir(): + return None + files = [f for f in folder.iterdir() if f.is_file()] + if not files: + return None + for ext in RA_DISC_DESCRIPTOR_EXTENSIONS: + matches = [f for f in files if f.name.lower().endswith(ext)] + if matches: + return max(matches, key=lambda f: f.stat().st_size) + return max(files, key=lambda f: f.stat().st_size) + + # Platforms whose hash algorithm requires an on-disk disc image # (ISO9660/bin+cue/CHD). When the source file is an archive, RAHasher falls # back to "buffer hash" mode which these consoles don't support, failing @@ -120,11 +149,13 @@ class RAHasherService: ) return "" - # For folder-based multi-file ROMs the path ends with /*. RAHasher - # cannot process compressed files via a glob — it fails with "Could not - # open file". When the folder contains archives, switch to hashing the - # largest archive directly (cartridge platforms support buffer hashing), - # or skip entirely for disc platforms that don't. + # For folder-based multi-file ROMs the path ends with /*. RAHasher is + # launched without a shell (create_subprocess_exec), so it never expands + # the glob — it receives the literal "*" and fails ("Could not open + # track/file"). Resolve "/*" to a single real file: hash the largest + # archive directly when the folder holds archives (or skip for disc + # platforms that can't buffer-hash them), otherwise pick a disc + # descriptor / track via _pick_ra_file. if file_path.endswith("/*"): folder = Path(file_path[:-2]) @@ -151,6 +182,15 @@ class RAHasherService: max, archive_files, key=lambda f: f.stat().st_size ) file_path = str(largest) + else: + # Folder of uncompressed disc tracks (the standard Redump + # .cue + .bin layout, .gdi sets, multi-bin) or a multi-file + # cartridge set. RAHasher never expands the "/*" glob itself, + # so resolve it to a single real file — the disc descriptor + # when present, otherwise the largest track. + resolved = await asyncio.to_thread(_pick_ra_file, folder) + if resolved is not None: + file_path = str(resolved) log.debug( f"Executing {hl('RAHasher', color=LIGHTMAGENTA)} for platform: {hl(platform['slug'], color=LIGHTMAGENTA)} - file: {hl(file_path)}" diff --git a/backend/tests/adapters/services/test_rahasher.py b/backend/tests/adapters/services/test_rahasher.py index eb358802e..481c7ed46 100644 --- a/backend/tests/adapters/services/test_rahasher.py +++ b/backend/tests/adapters/services/test_rahasher.py @@ -9,6 +9,7 @@ from adapters.services.rahasher import ( RAHASHER_VALID_HASH_REGEX, RAHasherError, RAHasherService, + _pick_ra_file, ) from handler.metadata.base_handler import UniversalPlatformSlug as UPS @@ -324,10 +325,12 @@ class TestRAHasherArchiveSkip: async def test_does_not_skip_wildcard_folder_without_archives_on_disc_platform( self, service: RAHasherService, tmp_path ): - """Folder-based ROM with only raw disc images must still go through RAHasher.""" + """Folder-based ROM with only raw disc images must still go through RAHasher, + handed the real descriptor file rather than the unexpanded glob.""" platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] (tmp_path / "disc1.bin").write_bytes(b"fake") - (tmp_path / "disc1.cue").write_bytes(b"fake") + cue = tmp_path / "disc1.cue" + cue.write_bytes(b"fake") mock_proc = AsyncMock() mock_proc.wait.return_value = 0 @@ -344,6 +347,10 @@ class TestRAHasherArchiveSkip: assert result == "a1b2c3d4e5f6789012345678901234ab" mock_subprocess.assert_called_once() + # RAHasher must be handed the .cue descriptor, never the literal "/*" + call_args = mock_subprocess.call_args[0] + assert str(cue) in call_args + assert not any(str(a).endswith("/*") for a in call_args) @pytest.mark.asyncio async def test_cartridge_platform_folder_with_archives_hashes_largest( @@ -399,6 +406,174 @@ class TestRAHasherArchiveSkip: mock_subprocess.assert_called_once() +class TestPickRAFile: + """Unit tests for picking a single real file to hand RAHasher for a + folder-based ROM (the standard Redump .cue + .bin layout, .gdi sets, etc.).""" + + def test_returns_none_for_missing_folder(self, tmp_path): + assert _pick_ra_file(tmp_path / "does-not-exist") is None + + def test_returns_none_for_empty_folder(self, tmp_path): + assert _pick_ra_file(tmp_path) is None + + def test_prefers_cue_over_bin(self, tmp_path): + """A bin/cue disc: hand RAHasher the .cue, which references the tracks.""" + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_prefers_cue_for_multi_bin_disc(self, tmp_path): + """Multi-track disc with several .bin files and one .cue → the .cue.""" + for i in range(1, 4): + (tmp_path / f"game (Track {i}).bin").write_bytes(b"x" * (1000 * i)) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 80) + + assert _pick_ra_file(tmp_path) == cue + + def test_prefers_gdi_for_dreamcast(self, tmp_path): + """Dreamcast GD-ROM: the .gdi descriptor points at the track files.""" + (tmp_path / "track01.bin").write_bytes(b"x" * 4000) + (tmp_path / "track02.raw").write_bytes(b"x" * 9000) + gdi = tmp_path / "game.gdi" + gdi.write_bytes(b"x" * 60) + + assert _pick_ra_file(tmp_path) == gdi + + def test_picks_m3u_when_only_descriptor(self, tmp_path): + """A folder whose only descriptor is an .m3u playlist resolves to it.""" + (tmp_path / "data.bin").write_bytes(b"x" * 4000) + m3u = tmp_path / "game.m3u" + m3u.write_bytes(b"disc.cue\n") + + assert _pick_ra_file(tmp_path) == m3u + + def test_cue_wins_over_m3u(self, tmp_path): + """When both a .cue and an .m3u exist, the per-disc .cue is preferred.""" + m3u = tmp_path / "game.m3u" + m3u.write_bytes(b"x" * 4000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_falls_back_to_largest_file_without_descriptor(self, tmp_path): + """No descriptor (e.g. raw .iso, or a multi-file cartridge set): the + largest file is the best single-file guess.""" + (tmp_path / "small.dat").write_bytes(b"x" * 100) + big = tmp_path / "game.iso" + big.write_bytes(b"x" * 9000) + + assert _pick_ra_file(tmp_path) == big + + def test_descriptor_match_is_case_insensitive(self, tmp_path): + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "GAME.CUE" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + def test_ignores_subdirectories(self, tmp_path): + (tmp_path / "subdir").mkdir() + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + assert _pick_ra_file(tmp_path) == cue + + +class TestRAHasherWildcardFolderResolution: + """Folder-based disc ROMs (.cue + .bin) must resolve the "/*" glob to a real + descriptor file before spawning RAHasher — it is launched without a shell, + so it never expands the glob itself (GitHub issue #3497).""" + + @pytest.fixture + def service(self): + return RAHasherService() + + @pytest.mark.asyncio + async def test_resolves_glob_to_cue_descriptor( + self, service: RAHasherService, tmp_path + ): + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] + (tmp_path / "game.bin").write_bytes(b"x" * 5000) + cue = tmp_path / "game.cue" + cue.write_bytes(b"x" * 50) + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 0 + mock_proc.stdout.read.return_value = b"8c8db8c41e2a4c7d0ef7fc1c2f74ef7a\n" + mock_proc.stderr = None + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_proc + ) as mock_subprocess: + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "psx"}, + f"{tmp_path}/*", + ) + + assert result == "8c8db8c41e2a4c7d0ef7fc1c2f74ef7a" + mock_subprocess.assert_called_once_with( + "RAHasher", + str(platform_id), + str(cue), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + @pytest.mark.asyncio + async def test_resolves_glob_to_largest_file_without_descriptor( + self, service: RAHasherService, tmp_path + ): + """Folder-stored cartridge set / raw image with no descriptor: hand + RAHasher the largest single file rather than the unexpanded glob.""" + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.GBA] + (tmp_path / "small.dat").write_bytes(b"x" * 100) + big = tmp_path / "game.gba" + big.write_bytes(b"x" * 9000) + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 0 + mock_proc.stdout.read.return_value = b"a1b2c3d4e5f6789012345678901234ab\n" + mock_proc.stderr = None + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_proc + ) as mock_subprocess: + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "gba"}, + f"{tmp_path}/*", + ) + + assert result == "a1b2c3d4e5f6789012345678901234ab" + call_args = mock_subprocess.call_args[0] + assert str(big) in call_args + assert not any(str(a).endswith("/*") for a in call_args) + + @pytest.mark.asyncio + async def test_unresolvable_empty_folder_does_not_crash( + self, service: RAHasherService, tmp_path + ): + """An empty/unresolvable folder leaves the glob untouched (nothing to + hash) and still returns gracefully.""" + platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX] + + mock_proc = AsyncMock() + mock_proc.wait.return_value = 1 + mock_proc.stdout.read.return_value = b"" + mock_proc.stderr.read.return_value = b"Could not open track" + + with patch("asyncio.create_subprocess_exec", return_value=mock_proc): + result = await service.calculate_hash( + {"ra_id": platform_id, "slug": "psx"}, + f"{tmp_path}/*", + ) + + assert result == "" + + class TestRAHasherError: """Test the RAHasherError exception.""" From 8b9c3d316696986742691b6b0d9525433eebb711 Mon Sep 17 00:00:00 2001 From: Georges-Antoine Assi <3247106+gantoine@users.noreply.github.com> Date: Wed, 10 Jun 2026 07:01:19 -0400 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- backend/adapters/services/rahasher.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/backend/adapters/services/rahasher.py b/backend/adapters/services/rahasher.py index 1cb1a172e..cb3414e5b 100644 --- a/backend/adapters/services/rahasher.py +++ b/backend/adapters/services/rahasher.py @@ -30,14 +30,27 @@ def _pick_ra_file(folder: Path) -> Path | None: """ if not folder.is_dir(): return None - files = [f for f in folder.iterdir() if f.is_file()] + try: + files = [f for f in folder.iterdir() if f.is_file()] + except (OSError, PermissionError): + return None if not files: return None + + def _size(p: Path) -> int: + try: + return p.stat().st_size + except (OSError, PermissionError): + return -1 + for ext in RA_DISC_DESCRIPTOR_EXTENSIONS: matches = [f for f in files if f.name.lower().endswith(ext)] if matches: - return max(matches, key=lambda f: f.stat().st_size) - return max(files, key=lambda f: f.stat().st_size) + picked = max(matches, key=_size) + return picked if _size(picked) >= 0 else None + + picked = max(files, key=_size) + return picked if _size(picked) >= 0 else None # Platforms whose hash algorithm requires an on-disk disc image