feat(hashing): add RAR support to multi-file archive composite hashing

Add read_rar_archive_files via the existing 7zz binary (which natively
handles RAR3/RAR5 read), and collapse the per-extension reader dispatch
into an ARCHIVE_READERS dict so future formats are one entry away. Also
extract a small _make_file_hash helper to remove the repeated nested
ternaries in the inner loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Georges-Antoine Assi
2026-05-27 09:09:37 -04:00
parent 438c03facc
commit f255b5a7d9
2 changed files with 44 additions and 40 deletions

View File

@@ -24,7 +24,7 @@ from exceptions.fs_exceptions import (
from handler.metadata.base_handler import UniversalPlatformSlug as UPS
from models.platform import Platform
from models.rom import Rom, RomFile, RomFileCategory
from utils.archive_7zip import read_7z_archive_files
from utils.archive_7zip import read_7z_archive_files, read_rar_archive_files
from utils.archives import (
detect_mime_type,
extract_chd_hash,
@@ -113,6 +113,29 @@ DEFAULT_CRC_C = 0
DEFAULT_MD5_H_DIGEST = hashlib.md5(usedforsecurity=False).digest()
DEFAULT_SHA1_H_DIGEST = hashlib.sha1(usedforsecurity=False).digest()
# Multi-file archive readers, keyed by lowercased file extension.
ARCHIVE_READERS = {
".zip": read_zip_archive_files,
".tar": read_tar_archive_files,
".7z": read_7z_archive_files,
".rar": read_rar_archive_files,
}
def _make_file_hash(
crc_c: int, md5_h: Any, sha1_h: Any, chd_sha1_hash: str = ""
) -> FileHash:
"""Build a FileHash, blanking each field whose hasher state is still the default."""
return FileHash(
crc_hash=crc32_to_hex(crc_c) if crc_c != DEFAULT_CRC_C else "",
md5_hash=md5_h.hexdigest() if md5_h.digest() != DEFAULT_MD5_H_DIGEST else "",
sha1_hash=(
sha1_h.hexdigest() if sha1_h.digest() != DEFAULT_SHA1_H_DIGEST else ""
),
chd_sha1_hash=chd_sha1_hash,
)
VERSION_TAG_REGEX = re.compile(r"^(?:version|ver|v)[\s_-]?(.*)", re.I)
REGION_TAG_REGEX = re.compile(r"^reg[\s|-](.*)$", re.I)
REVISION_TAG_REGEX = re.compile(r"^rev[\s|-](.*)$", re.I)
@@ -410,32 +433,15 @@ class FSRomsHandler(FSHandler):
file_hash=file_hash,
)
)
elif hashable_platform and rom_ext in {".zip", ".tar", ".7z"}:
elif hashable_platform and rom_ext in ARCHIVE_READERS:
# Multi-file archive: compute per-file individual hashes + composite,
# mirroring the folder-based multi-part ROM behaviour above.
archive_entries: list[tuple[str, int, list[bytes]]] = []
if rom_ext == ".zip":
archive_entries = await asyncio.to_thread(
read_zip_archive_files,
rom_dir,
DEFAULT_EXCLUDED_FILES,
DEFAULT_EXCLUDED_EXTENSIONS,
)
elif rom_ext == ".tar":
archive_entries = await asyncio.to_thread(
read_tar_archive_files,
rom_dir,
DEFAULT_EXCLUDED_FILES,
DEFAULT_EXCLUDED_EXTENSIONS,
)
elif rom_ext == ".7z":
archive_entries = await asyncio.to_thread(
read_7z_archive_files,
rom_dir,
DEFAULT_EXCLUDED_FILES,
DEFAULT_EXCLUDED_EXTENSIONS,
)
archive_entries = await asyncio.to_thread(
ARCHIVE_READERS[rom_ext],
rom_dir,
DEFAULT_EXCLUDED_FILES,
DEFAULT_EXCLUDED_EXTENSIONS,
)
if archive_entries:
archive_mtime = (await AnyioPath(rom_dir).stat()).st_mtime
@@ -451,26 +457,12 @@ class FSRomsHandler(FSHandler):
rom_crc_c = binascii.crc32(chunk, rom_crc_c)
rom_md5_h.update(chunk)
rom_sha1_h.update(chunk)
file_hash = FileHash(
crc_hash=crc32_to_hex(crc_c) if crc_c != DEFAULT_CRC_C else "",
md5_hash=(
md5_h.hexdigest()
if md5_h.digest() != DEFAULT_MD5_H_DIGEST
else ""
),
sha1_hash=(
sha1_h.hexdigest()
if sha1_h.digest() != DEFAULT_SHA1_H_DIGEST
else ""
),
chd_sha1_hash="",
)
rom_files.append(
self._build_rom_file(
rom=rom,
rom_path=Path(rel_roms_path),
file_name=internal_name,
file_hash=file_hash,
file_hash=_make_file_hash(crc_c, md5_h, sha1_h),
file_size_bytes=entry_size,
last_modified=archive_mtime,
)

View File

@@ -189,3 +189,15 @@ def read_7z_archive_files(
output.append((name, size, chunks))
return output
def read_rar_archive_files(
file_path: Path,
excluded_names: list[str],
excluded_exts: list[str],
) -> list[tuple[str, int, list[bytes]]]:
"""Read all eligible files from a RAR archive, sorted by internal path (ASCII).
Delegates to the 7zz binary, which natively supports RAR (v3-v5, read-only).
"""
return read_7z_archive_files(file_path, excluded_names, excluded_exts)