Merge pull request #3500 from Spinnich/fix/ra-hash-folder-disc-games

fix(ra): hash folder-based disc ROMs (.cue + .bin) for RetroAchievements
This commit is contained in:
Georges-Antoine Assi
2026-06-10 07:27:55 -04:00
committed by GitHub
2 changed files with 235 additions and 7 deletions

View File

@@ -11,6 +11,48 @@ from utils.filesystem import COMPRESSED_FILE_EXTENSIONS
RAHASHER_VALID_HASH_REGEX = re.compile(r"[0-9a-f]{32}")
# Disc descriptor / playlist files that point RAHasher at the real data
# tracks. Handing RAHasher one of these reproduces the hash a shell-expanded
# glob would, in priority order: a per-disc descriptor (.cue/.gdi) wins over
# an .m3u playlist so single-disc folders resolve to their own disc.
RA_DISC_DESCRIPTOR_EXTENSIONS: tuple[str, ...] = (".cue", ".gdi", ".m3u")
def _pick_ra_file(folder: Path) -> Path | None:
"""Pick a single real file to hand RAHasher for a folder-based ROM.
RAHasher is launched without a shell, so it never expands a ``/*`` glob —
it must be given a concrete file. Prefer a disc descriptor
(``.cue``/``.gdi``/``.m3u``), which RAHasher follows to the referenced
tracks; otherwise fall back to the largest file in the folder (a raw
``.iso``/``.bin`` image, or the main file of a multi-file cartridge set).
Returns ``None`` when the folder is missing or holds no files.
"""
if not folder.is_dir():
return None
try:
files = [f for f in folder.iterdir() if f.is_file()]
except (OSError, PermissionError):
return None
if not files:
return None
def _size(p: Path) -> int:
try:
return p.stat().st_size
except (OSError, PermissionError):
return -1
for ext in RA_DISC_DESCRIPTOR_EXTENSIONS:
matches = [f for f in files if f.name.lower().endswith(ext)]
if matches:
picked = max(matches, key=_size)
return picked if _size(picked) >= 0 else None
picked = max(files, key=_size)
return picked if _size(picked) >= 0 else None
# Platforms whose hash algorithm requires an on-disk disc image
# (ISO9660/bin+cue/CHD). When the source file is an archive, RAHasher falls
# back to "buffer hash" mode which these consoles don't support, failing
@@ -120,11 +162,13 @@ class RAHasherService:
)
return ""
# For folder-based multi-file ROMs the path ends with /*. RAHasher
# cannot process compressed files via a glob — it fails with "Could not
# open file". When the folder contains archives, switch to hashing the
# largest archive directly (cartridge platforms support buffer hashing),
# or skip entirely for disc platforms that don't.
# For folder-based multi-file ROMs the path ends with /*. RAHasher is
# launched without a shell (create_subprocess_exec), so it never expands
# the glob — it receives the literal "*" and fails ("Could not open
# track/file"). Resolve "/*" to a single real file: hash the largest
# archive directly when the folder holds archives (or skip for disc
# platforms that can't buffer-hash them), otherwise pick a disc
# descriptor / track via _pick_ra_file.
if file_path.endswith("/*"):
folder = Path(file_path[:-2])
@@ -151,6 +195,15 @@ class RAHasherService:
max, archive_files, key=lambda f: f.stat().st_size
)
file_path = str(largest)
else:
# Folder of uncompressed disc tracks (the standard Redump
# .cue + .bin layout, .gdi sets, multi-bin) or a multi-file
# cartridge set. RAHasher never expands the "/*" glob itself,
# so resolve it to a single real file — the disc descriptor
# when present, otherwise the largest track.
resolved = await asyncio.to_thread(_pick_ra_file, folder)
if resolved is not None:
file_path = str(resolved)
log.debug(
f"Executing {hl('RAHasher', color=LIGHTMAGENTA)} for platform: {hl(platform['slug'], color=LIGHTMAGENTA)} - file: {hl(file_path)}"

View File

@@ -9,6 +9,7 @@ from adapters.services.rahasher import (
RAHASHER_VALID_HASH_REGEX,
RAHasherError,
RAHasherService,
_pick_ra_file,
)
from handler.metadata.base_handler import UniversalPlatformSlug as UPS
@@ -324,10 +325,12 @@ class TestRAHasherArchiveSkip:
async def test_does_not_skip_wildcard_folder_without_archives_on_disc_platform(
self, service: RAHasherService, tmp_path
):
"""Folder-based ROM with only raw disc images must still go through RAHasher."""
"""Folder-based ROM with only raw disc images must still go through RAHasher,
handed the real descriptor file rather than the unexpanded glob."""
platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX]
(tmp_path / "disc1.bin").write_bytes(b"fake")
(tmp_path / "disc1.cue").write_bytes(b"fake")
cue = tmp_path / "disc1.cue"
cue.write_bytes(b"fake")
mock_proc = AsyncMock()
mock_proc.wait.return_value = 0
@@ -344,6 +347,10 @@ class TestRAHasherArchiveSkip:
assert result == "a1b2c3d4e5f6789012345678901234ab"
mock_subprocess.assert_called_once()
# RAHasher must be handed the .cue descriptor, never the literal "/*"
call_args = mock_subprocess.call_args[0]
assert str(cue) in call_args
assert not any(str(a).endswith("/*") for a in call_args)
@pytest.mark.asyncio
async def test_cartridge_platform_folder_with_archives_hashes_largest(
@@ -399,6 +406,174 @@ class TestRAHasherArchiveSkip:
mock_subprocess.assert_called_once()
class TestPickRAFile:
"""Unit tests for picking a single real file to hand RAHasher for a
folder-based ROM (the standard Redump .cue + .bin layout, .gdi sets, etc.)."""
def test_returns_none_for_missing_folder(self, tmp_path):
assert _pick_ra_file(tmp_path / "does-not-exist") is None
def test_returns_none_for_empty_folder(self, tmp_path):
assert _pick_ra_file(tmp_path) is None
def test_prefers_cue_over_bin(self, tmp_path):
"""A bin/cue disc: hand RAHasher the .cue, which references the tracks."""
(tmp_path / "game.bin").write_bytes(b"x" * 5000)
cue = tmp_path / "game.cue"
cue.write_bytes(b"x" * 50)
assert _pick_ra_file(tmp_path) == cue
def test_prefers_cue_for_multi_bin_disc(self, tmp_path):
"""Multi-track disc with several .bin files and one .cue → the .cue."""
for i in range(1, 4):
(tmp_path / f"game (Track {i}).bin").write_bytes(b"x" * (1000 * i))
cue = tmp_path / "game.cue"
cue.write_bytes(b"x" * 80)
assert _pick_ra_file(tmp_path) == cue
def test_prefers_gdi_for_dreamcast(self, tmp_path):
"""Dreamcast GD-ROM: the .gdi descriptor points at the track files."""
(tmp_path / "track01.bin").write_bytes(b"x" * 4000)
(tmp_path / "track02.raw").write_bytes(b"x" * 9000)
gdi = tmp_path / "game.gdi"
gdi.write_bytes(b"x" * 60)
assert _pick_ra_file(tmp_path) == gdi
def test_picks_m3u_when_only_descriptor(self, tmp_path):
"""A folder whose only descriptor is an .m3u playlist resolves to it."""
(tmp_path / "data.bin").write_bytes(b"x" * 4000)
m3u = tmp_path / "game.m3u"
m3u.write_bytes(b"disc.cue\n")
assert _pick_ra_file(tmp_path) == m3u
def test_cue_wins_over_m3u(self, tmp_path):
"""When both a .cue and an .m3u exist, the per-disc .cue is preferred."""
m3u = tmp_path / "game.m3u"
m3u.write_bytes(b"x" * 4000)
cue = tmp_path / "game.cue"
cue.write_bytes(b"x" * 50)
assert _pick_ra_file(tmp_path) == cue
def test_falls_back_to_largest_file_without_descriptor(self, tmp_path):
"""No descriptor (e.g. raw .iso, or a multi-file cartridge set): the
largest file is the best single-file guess."""
(tmp_path / "small.dat").write_bytes(b"x" * 100)
big = tmp_path / "game.iso"
big.write_bytes(b"x" * 9000)
assert _pick_ra_file(tmp_path) == big
def test_descriptor_match_is_case_insensitive(self, tmp_path):
(tmp_path / "game.bin").write_bytes(b"x" * 5000)
cue = tmp_path / "GAME.CUE"
cue.write_bytes(b"x" * 50)
assert _pick_ra_file(tmp_path) == cue
def test_ignores_subdirectories(self, tmp_path):
(tmp_path / "subdir").mkdir()
cue = tmp_path / "game.cue"
cue.write_bytes(b"x" * 50)
assert _pick_ra_file(tmp_path) == cue
class TestRAHasherWildcardFolderResolution:
"""Folder-based disc ROMs (.cue + .bin) must resolve the "/*" glob to a real
descriptor file before spawning RAHasher — it is launched without a shell,
so it never expands the glob itself (GitHub issue #3497)."""
@pytest.fixture
def service(self):
return RAHasherService()
@pytest.mark.asyncio
async def test_resolves_glob_to_cue_descriptor(
self, service: RAHasherService, tmp_path
):
platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX]
(tmp_path / "game.bin").write_bytes(b"x" * 5000)
cue = tmp_path / "game.cue"
cue.write_bytes(b"x" * 50)
mock_proc = AsyncMock()
mock_proc.wait.return_value = 0
mock_proc.stdout.read.return_value = b"8c8db8c41e2a4c7d0ef7fc1c2f74ef7a\n"
mock_proc.stderr = None
with patch(
"asyncio.create_subprocess_exec", return_value=mock_proc
) as mock_subprocess:
result = await service.calculate_hash(
{"ra_id": platform_id, "slug": "psx"},
f"{tmp_path}/*",
)
assert result == "8c8db8c41e2a4c7d0ef7fc1c2f74ef7a"
mock_subprocess.assert_called_once_with(
"RAHasher",
str(platform_id),
str(cue),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
@pytest.mark.asyncio
async def test_resolves_glob_to_largest_file_without_descriptor(
self, service: RAHasherService, tmp_path
):
"""Folder-stored cartridge set / raw image with no descriptor: hand
RAHasher the largest single file rather than the unexpanded glob."""
platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.GBA]
(tmp_path / "small.dat").write_bytes(b"x" * 100)
big = tmp_path / "game.gba"
big.write_bytes(b"x" * 9000)
mock_proc = AsyncMock()
mock_proc.wait.return_value = 0
mock_proc.stdout.read.return_value = b"a1b2c3d4e5f6789012345678901234ab\n"
mock_proc.stderr = None
with patch(
"asyncio.create_subprocess_exec", return_value=mock_proc
) as mock_subprocess:
result = await service.calculate_hash(
{"ra_id": platform_id, "slug": "gba"},
f"{tmp_path}/*",
)
assert result == "a1b2c3d4e5f6789012345678901234ab"
call_args = mock_subprocess.call_args[0]
assert str(big) in call_args
assert not any(str(a).endswith("/*") for a in call_args)
@pytest.mark.asyncio
async def test_unresolvable_empty_folder_does_not_crash(
self, service: RAHasherService, tmp_path
):
"""An empty/unresolvable folder leaves the glob untouched (nothing to
hash) and still returns gracefully."""
platform_id = PLATFORM_SLUG_TO_RETROACHIEVEMENTS_ID[UPS.PSX]
mock_proc = AsyncMock()
mock_proc.wait.return_value = 1
mock_proc.stdout.read.return_value = b""
mock_proc.stderr.read.return_value = b"Could not open track"
with patch("asyncio.create_subprocess_exec", return_value=mock_proc):
result = await service.calculate_hash(
{"ra_id": platform_id, "slug": "psx"},
f"{tmp_path}/*",
)
assert result == ""
class TestRAHasherError:
"""Test the RAHasherError exception."""