Merge branch 'master' into find-best-match

This commit is contained in:
Georges-Antoine Assi
2025-08-08 16:55:39 -04:00
106 changed files with 114 additions and 352 deletions

View File

@@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
ca-certificates \
libmagic-dev \
p7zip \
p7zip-full \
tzdata \
libbz2-dev \
libssl-dev \

View File

@@ -1,7 +0,0 @@
from handler.tests.conftest import ( # noqa
admin_user,
clear_database,
platform,
rom,
setup_database,
)

View File

@@ -1,7 +0,0 @@
from handler.tests.conftest import ( # noqa
admin_user,
clear_database,
editor_user,
setup_database,
viewer_user,
)

View File

@@ -12,7 +12,6 @@ from pathlib import Path
from typing import IO, Any, Final, Literal, TypedDict
import magic
import py7zr
import zipfile_inflate64 # trunk-ignore(ruff/F401): Patches zipfile to support Enhanced Deflate
from config import LIBRARY_BASE_PATH
from config.config_manager import config_manager as cm
@@ -23,13 +22,7 @@ from exceptions.fs_exceptions import (
from handler.metadata.base_hander import UniversalPlatformSlug as UPS
from models.platform import Platform
from models.rom import Rom, RomFile, RomFileCategory
from py7zr.exceptions import (
Bad7zFile,
DecompressionError,
PasswordRequired,
UnsupportedCompressionMethodError,
)
from utils.archive_7zip import CallbackIOFactory
from utils.archive_7zip import process_file_7z
from utils.filesystem import iter_files
from utils.hashing import crc32_to_hex
@@ -177,39 +170,11 @@ def read_gz_file(file_path: Path) -> Iterator[bytes]:
def process_7z_file(
file_path: Path,
fn_hash_update: Callable[[bytes | bytearray], None],
fn_hash_read: Callable[[int | None], bytes],
) -> None:
"""Process a 7zip file and use the provided callables to update the calculated hashes.
7zip files are special, as the py7zr library does not provide a similar interface to the
other compression utils. Instead, we must use a factory to intercept the read and write
operations of the 7zip file to calculate the hashes.
Hashes end up being updated by reference in the provided callables, so they will include the
final hash when this function returns.
"""
try:
factory = CallbackIOFactory(
on_write=fn_hash_update,
on_read=fn_hash_read,
)
# Provide a file handler to `SevenZipFile` instead of a file path to deactivate the
# "parallel" mode in py7zr, which is needed to deterministically calculate the hashes
with open(file_path, "rb") as f:
with py7zr.SevenZipFile(f, mode="r") as archive:
file_list = archive.getnames()
for file in file_list:
archive.extract(file, factory=factory)
break # We only need to read the first file in the archive
except (
Bad7zFile,
DecompressionError,
PasswordRequired,
UnsupportedCompressionMethodError,
):
for chunk in read_basic_file(file_path):
fn_hash_update(chunk)
process_file_7z(
file_path=file_path,
fn_hash_update=fn_hash_update,
)
def read_bz2_file(file_path: Path) -> Iterator[bytes]:
@@ -479,7 +444,6 @@ class FSRomsHandler(FSHandler):
mime = magic.Magic(mime=True)
try:
file_type = mime.from_file(file_path)
file_type = None
crc_c = 0
md5_h = hashlib.md5(usedforsecurity=False)
@@ -513,7 +477,6 @@ class FSRomsHandler(FSHandler):
process_7z_file(
file_path=file_path,
fn_hash_update=update_hashes,
fn_hash_read=lambda size: sha1_h.digest(),
)
elif extension == ".bz2" or file_type == "application/x-bzip2":

View File

@@ -1,6 +0,0 @@
from handler.tests.conftest import ( # noqa
admin_user,
clear_database,
editor_user,
setup_database,
)

View File

@@ -1,12 +0,0 @@
from handler.tests.conftest import ( # noqa
admin_user,
clear_database,
editor_user,
platform,
rom,
save,
screenshot,
setup_database,
state,
viewer_user,
)

View File

@@ -1,5 +1,6 @@
[pytest]
asyncio_mode = auto
testpaths = tests
env =
ROMM_BASE_PATH=romm_test
DB_NAME=romm_test

View File

@@ -1,20 +1,8 @@
from datetime import timedelta
import pytest
from endpoints.auth import ACCESS_TOKEN_EXPIRE_MINUTES, REFRESH_TOKEN_EXPIRE_DAYS
from handler.auth import oauth_handler
from handler.tests.conftest import ( # noqa
admin_user,
clear_database,
editor_user,
platform,
rom,
save,
setup_database,
state,
viewer_user,
)
from ..auth import ACCESS_TOKEN_EXPIRE_MINUTES, REFRESH_TOKEN_EXPIRE_DAYS
@pytest.fixture()

View File

View File

@@ -1,58 +1,86 @@
from collections.abc import Callable
# trunk-ignore-all(bandit/B404)
from py7zr import Py7zIO, WriterFactory
import subprocess
import tempfile
from collections.abc import Callable, Iterator
from pathlib import Path
SEVEN_ZIP_PATH = "/usr/bin/7z"
class CallbackIO(Py7zIO):
"""Py7zIO implementation that calls a callback on write and read."""
def process_file_7z(
file_path: Path,
fn_hash_update: Callable[[bytes | bytearray], None],
) -> None:
"""
Process a 7zip file using the system's 7zip binary and use the provided callables to update the calculated hashes.
def __init__(
self,
filename: str,
on_write: Callable[[bytes | bytearray], None],
on_read: Callable[[int | None], bytes],
):
self.filename = filename
self.on_write = on_write
self.on_read = on_read
self._size = 0
Args:
file_path: Path to the 7z file
fn_hash_update: Callback to update hashes with data chunks
"""
def write(self, s: bytes | bytearray) -> int:
length = len(s)
self._size += length
self.on_write(s)
return length
def read(self, size: int | None = None) -> bytes:
return self.on_read(size)
def seek(self, offset: int, whence: int = 0) -> int:
return 0
def flush(self) -> None: ...
def size(self) -> int:
return self._size
class CallbackIOFactory(WriterFactory):
"""WriterFactory implementation that creates CallbackIO instances."""
def __init__(
self,
on_write: Callable[[bytes | bytearray], None],
on_read: Callable[[int | None], bytes],
):
self.products: dict[str, CallbackIO] = {}
self.on_write = on_write
self.on_read = on_read
def create(self, filename: str) -> CallbackIO:
product = CallbackIO(
filename=filename, on_write=self.on_write, on_read=self.on_read
try:
result = subprocess.run(
[SEVEN_ZIP_PATH, "l", "-slt", "-ba", str(file_path)],
capture_output=True,
text=True,
check=True,
timeout=60,
shell=False, # trunk-ignore(bandit/B603): 7z path is hardcoded, args are validated
)
self.products[filename] = product
return product
def get(self, filename: str) -> Py7zIO:
return self.products[filename]
lines = result.stdout.split("\n")
first_file = None
for line in lines:
if line.strip().startswith("Path"):
first_file = line.split(" = ")[1].strip()
break
if not first_file:
for chunk in read_basic_file(file_path):
fn_hash_update(chunk)
return
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Extract only the first file
subprocess.run(
[
SEVEN_ZIP_PATH,
"e",
str(file_path),
first_file,
f"-o{temp_path}",
"-y",
],
capture_output=True,
check=True,
timeout=60,
shell=False, # trunk-ignore(bandit/B603): 7z path is hardcoded, args are validated
)
extracted_file = temp_path / first_file
if extracted_file.exists():
with open(extracted_file, "rb") as f:
while chunk := f.read(8192):
fn_hash_update(chunk)
else:
for chunk in read_basic_file(file_path):
fn_hash_update(chunk)
except (
subprocess.TimeoutExpired,
subprocess.CalledProcessError,
FileNotFoundError,
):
for chunk in read_basic_file(file_path):
fn_hash_update(chunk)
def read_basic_file(file_path: Path) -> Iterator[bytes]:
with open(file_path, "rb") as f:
while chunk := f.read(8192):
yield chunk

View File

@@ -1,23 +0,0 @@
**/node_modules
node_modules/
**/venv
venv/
**/__pycache__
__pycache__/
**/assets/library
../frontend/assets/library/
**/.env
.env
**/romm_test
romm_test/
**/romm_mock
romm_mock/
**/.pytest_cache
.pytest_cache/

View File

@@ -34,13 +34,11 @@ RUN npm run build
FROM python-alias AS backend-build
# git is needed to install streaming-form-data fork
# linux-headers is needed to install py7zr
# libpq-dev is needed to build psycopg-c
# mariadb-connector-c-dev is needed to build mariadb-connector
RUN apk add --no-cache \
gcc \
git \
linux-headers \
libpq-dev \
mariadb-connector-c-dev \
musl-dev
@@ -55,6 +53,10 @@ RUN uv sync --locked --no-cache
FROM backend-build AS backend-dev-build
# linux-headers is needed to install psutil
RUN apk add --no-cache \
linux-headers
RUN uv sync --locked --no-cache --all-extras
@@ -85,7 +87,7 @@ RUN git clone --recursive --branch "${RALIBRETRO_VERSION}" --depth 1 https://git
FROM alpine:${ALPINE_VERSION} AS emulator-stage
RUN apk add --no-cache \
7zip \
p7zip \
wget
ARG EMULATORJS_VERSION=4.2.3

View File

@@ -0,0 +1,18 @@
# Configuration
**/.env
# Frontend
**/assets/library/
**/node_modules/
# Backend
backend/tests/
**/venv/
**/.venv/
**/.mypy_cache/
**/.pytest_cache/
**/__pycache__/
# Testing
**/romm_test/
**/romm_mock/

Some files were not shown because too many files have changed in this diff Show More