From f255b5a7d91eba7cca1d11ab33ef2d22b06bdbb5 Mon Sep 17 00:00:00 2001 From: Georges-Antoine Assi Date: Wed, 27 May 2026 09:09:37 -0400 Subject: [PATCH] feat(hashing): add RAR support to multi-file archive composite hashing Add read_rar_archive_files via the existing 7zz binary (which natively handles RAR3/RAR5 read), and collapse the per-extension reader dispatch into an ARCHIVE_READERS dict so future formats are one entry away. Also extract a small _make_file_hash helper to remove the repeated nested ternaries in the inner loop. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/handler/filesystem/roms_handler.py | 72 ++++++++++------------ backend/utils/archive_7zip.py | 12 ++++ 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/backend/handler/filesystem/roms_handler.py b/backend/handler/filesystem/roms_handler.py index f073e34b8..a9aae000f 100644 --- a/backend/handler/filesystem/roms_handler.py +++ b/backend/handler/filesystem/roms_handler.py @@ -24,7 +24,7 @@ from exceptions.fs_exceptions import ( from handler.metadata.base_handler import UniversalPlatformSlug as UPS from models.platform import Platform from models.rom import Rom, RomFile, RomFileCategory -from utils.archive_7zip import read_7z_archive_files +from utils.archive_7zip import read_7z_archive_files, read_rar_archive_files from utils.archives import ( detect_mime_type, extract_chd_hash, @@ -113,6 +113,29 @@ DEFAULT_CRC_C = 0 DEFAULT_MD5_H_DIGEST = hashlib.md5(usedforsecurity=False).digest() DEFAULT_SHA1_H_DIGEST = hashlib.sha1(usedforsecurity=False).digest() +# Multi-file archive readers, keyed by lowercased file extension. +ARCHIVE_READERS = { + ".zip": read_zip_archive_files, + ".tar": read_tar_archive_files, + ".7z": read_7z_archive_files, + ".rar": read_rar_archive_files, +} + + +def _make_file_hash( + crc_c: int, md5_h: Any, sha1_h: Any, chd_sha1_hash: str = "" +) -> FileHash: + """Build a FileHash, blanking each field whose hasher state is still the default.""" + return FileHash( + crc_hash=crc32_to_hex(crc_c) if crc_c != DEFAULT_CRC_C else "", + md5_hash=md5_h.hexdigest() if md5_h.digest() != DEFAULT_MD5_H_DIGEST else "", + sha1_hash=( + sha1_h.hexdigest() if sha1_h.digest() != DEFAULT_SHA1_H_DIGEST else "" + ), + chd_sha1_hash=chd_sha1_hash, + ) + + VERSION_TAG_REGEX = re.compile(r"^(?:version|ver|v)[\s_-]?(.*)", re.I) REGION_TAG_REGEX = re.compile(r"^reg[\s|-](.*)$", re.I) REVISION_TAG_REGEX = re.compile(r"^rev[\s|-](.*)$", re.I) @@ -410,32 +433,15 @@ class FSRomsHandler(FSHandler): file_hash=file_hash, ) ) - elif hashable_platform and rom_ext in {".zip", ".tar", ".7z"}: + elif hashable_platform and rom_ext in ARCHIVE_READERS: # Multi-file archive: compute per-file individual hashes + composite, # mirroring the folder-based multi-part ROM behaviour above. - archive_entries: list[tuple[str, int, list[bytes]]] = [] - - if rom_ext == ".zip": - archive_entries = await asyncio.to_thread( - read_zip_archive_files, - rom_dir, - DEFAULT_EXCLUDED_FILES, - DEFAULT_EXCLUDED_EXTENSIONS, - ) - elif rom_ext == ".tar": - archive_entries = await asyncio.to_thread( - read_tar_archive_files, - rom_dir, - DEFAULT_EXCLUDED_FILES, - DEFAULT_EXCLUDED_EXTENSIONS, - ) - elif rom_ext == ".7z": - archive_entries = await asyncio.to_thread( - read_7z_archive_files, - rom_dir, - DEFAULT_EXCLUDED_FILES, - DEFAULT_EXCLUDED_EXTENSIONS, - ) + archive_entries = await asyncio.to_thread( + ARCHIVE_READERS[rom_ext], + rom_dir, + DEFAULT_EXCLUDED_FILES, + DEFAULT_EXCLUDED_EXTENSIONS, + ) if archive_entries: archive_mtime = (await AnyioPath(rom_dir).stat()).st_mtime @@ -451,26 +457,12 @@ class FSRomsHandler(FSHandler): rom_crc_c = binascii.crc32(chunk, rom_crc_c) rom_md5_h.update(chunk) rom_sha1_h.update(chunk) - file_hash = FileHash( - crc_hash=crc32_to_hex(crc_c) if crc_c != DEFAULT_CRC_C else "", - md5_hash=( - md5_h.hexdigest() - if md5_h.digest() != DEFAULT_MD5_H_DIGEST - else "" - ), - sha1_hash=( - sha1_h.hexdigest() - if sha1_h.digest() != DEFAULT_SHA1_H_DIGEST - else "" - ), - chd_sha1_hash="", - ) rom_files.append( self._build_rom_file( rom=rom, rom_path=Path(rel_roms_path), file_name=internal_name, - file_hash=file_hash, + file_hash=_make_file_hash(crc_c, md5_h, sha1_h), file_size_bytes=entry_size, last_modified=archive_mtime, ) diff --git a/backend/utils/archive_7zip.py b/backend/utils/archive_7zip.py index 7377a7769..fddc783e0 100644 --- a/backend/utils/archive_7zip.py +++ b/backend/utils/archive_7zip.py @@ -189,3 +189,15 @@ def read_7z_archive_files( output.append((name, size, chunks)) return output + + +def read_rar_archive_files( + file_path: Path, + excluded_names: list[str], + excluded_exts: list[str], +) -> list[tuple[str, int, list[bytes]]]: + """Read all eligible files from a RAR archive, sorted by internal path (ASCII). + + Delegates to the 7zz binary, which natively supports RAR (v3-v5, read-only). + """ + return read_7z_archive_files(file_path, excluded_names, excluded_exts)