mirror of
https://github.com/rommapp/romm.git
synced 2026-06-27 22:35:57 +00:00
feat(fs): hardlink import/export assets when possible, harden sync init
Importer (gamelist/launchbox file:// flows) and exporters (gamelist.xml, metadata.pegasus.txt local exports) now hardlink media assets when source and destination share a filesystem, falling back transparently to a copy on EXDEV / EPERM / EOPNOTSUPP / EMLINK / EACCES (cross-device, FAT32, exFAT, network mounts, etc.). Saves disk space and is effectively instantaneous on large files (videos, manuals, miximages). Covers keep a real copy (allow_link=False) because _store_cover resizes the small cover in place via PIL.Image.save, which would truncate the shared inode and corrupt the user's source image. Also makes FSSyncHandler tolerate a missing/unwritable /romm/sync at startup: an OSError from mkdir now logs a warning instead of crashing the whole app at module-import time. Sync calls still fail at use time if the mount remains broken — the right place to surface the error. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,8 +16,9 @@ from anyio import open_file
|
||||
from starlette.datastructures import UploadFile
|
||||
|
||||
from config.config_manager import config_manager as cm
|
||||
from logger.logger import log
|
||||
from models.base import FILE_NAME_MAX_LENGTH
|
||||
from utils.filesystem import iter_directories, iter_files
|
||||
from utils.filesystem import iter_directories, iter_files, link_or_copy_file
|
||||
|
||||
TAG_REGEX = re.compile(r"\(([^)]+)\)|\[([^]]+)\]")
|
||||
EXTENSION_REGEX = re.compile(r"\.(([a-z]+\.)*\w+)$")
|
||||
@@ -148,13 +149,22 @@ class Asset(Enum):
|
||||
|
||||
|
||||
class FSHandler:
|
||||
def __init__(self, base_path: str):
|
||||
def __init__(self, base_path: str, tolerate_missing_base: bool = False):
|
||||
self.base_path = Path(base_path).resolve()
|
||||
self._locks: dict[str, asyncio.Lock] = {}
|
||||
self._lock_mutex = asyncio.Lock()
|
||||
|
||||
# Create base directory synchronously during initialization
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
# Create base directory synchronously during initialization.
|
||||
try:
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
except OSError:
|
||||
if not tolerate_missing_base:
|
||||
raise
|
||||
|
||||
log.warning(
|
||||
f"Could not create or access {self.base_path}; "
|
||||
"feature will be unavailable until the directory is writable."
|
||||
)
|
||||
|
||||
async def _get_file_lock(self, file_path: str) -> asyncio.Lock:
|
||||
"""Get or create a lock for a specific file path."""
|
||||
@@ -487,13 +497,23 @@ class FSHandler:
|
||||
|
||||
return await open_file(full_path, "rb")
|
||||
|
||||
async def copy_file(self, source_full_path: Path, dest_path: str) -> None:
|
||||
async def copy_file(
|
||||
self,
|
||||
source_full_path: Path,
|
||||
dest_path: str,
|
||||
allow_link: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
Copy a file from source to destination.
|
||||
|
||||
Args:
|
||||
source_full_path: Absolute path to the source file
|
||||
dest_path: Relative path to the destination file
|
||||
allow_link: If True (default), try a hardlink first and fall back to
|
||||
a copy when the link isn't possible (cross-device, unsupported
|
||||
filesystem, etc.). Pass False when the caller will mutate the
|
||||
destination in place, since mutating a hardlinked file also
|
||||
mutates the source — see `_store_cover`'s resize step.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If source file does not exist
|
||||
@@ -518,7 +538,10 @@ class FSHandler:
|
||||
# Create destination directory if needed
|
||||
dest_parent_anyio_path = AnyioPath(str(dest_full_path.parent))
|
||||
await dest_parent_anyio_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(str(source_full_path), str(dest_full_path))
|
||||
if allow_link:
|
||||
link_or_copy_file(source_full_path, dest_full_path)
|
||||
else:
|
||||
shutil.copy2(str(source_full_path), str(dest_full_path))
|
||||
|
||||
async def move_file_or_folder(self, source_path: str, dest_path: str) -> None:
|
||||
"""
|
||||
|
||||
@@ -129,7 +129,10 @@ class FSResourcesHandler(FSHandler):
|
||||
log.warning(f"Cover file not found: {url_cover}")
|
||||
return None
|
||||
dest_path = f"{cover_file}/{size.value}.png"
|
||||
await self.copy_file(resolved, dest_path)
|
||||
# allow_link=False: small-size covers get resized in place
|
||||
# below, which would mutate the user's source image if the
|
||||
# destination were a hardlink.
|
||||
await self.copy_file(resolved, dest_path, allow_link=False)
|
||||
|
||||
if ENABLE_SCHEDULED_CONVERT_IMAGES_TO_WEBP:
|
||||
self.image_converter.convert_to_webp(
|
||||
|
||||
@@ -12,7 +12,7 @@ class FSSyncHandler(FSHandler):
|
||||
"""Filesystem handler for sync folder operations (File Transfer mode)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(base_path=SYNC_BASE_PATH)
|
||||
super().__init__(base_path=SYNC_BASE_PATH, tolerate_missing_base=True)
|
||||
|
||||
def build_incoming_path(
|
||||
self, device_id: str, platform_slug: str | None = None
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import errno
|
||||
import shutil
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
@@ -515,3 +516,105 @@ class TestFSHandler:
|
||||
dirs = await handler.list_directories(".")
|
||||
for i in range(5):
|
||||
assert f"test_dir_{i}" not in dirs
|
||||
|
||||
async def test_copy_file_hardlinks_by_default(
|
||||
self, handler: FSHandler, sample_file_content
|
||||
):
|
||||
"""copy_file defaults to allow_link=True: when source and dest are on
|
||||
the same filesystem, the destination is a hardlink to the source."""
|
||||
source_full = handler.base_path / "source.bin"
|
||||
source_full.write_bytes(sample_file_content)
|
||||
|
||||
await handler.copy_file(source_full, "dest.bin")
|
||||
|
||||
dest_full = handler.base_path / "dest.bin"
|
||||
assert dest_full.read_bytes() == sample_file_content
|
||||
assert source_full.stat().st_ino == dest_full.stat().st_ino
|
||||
|
||||
async def test_copy_file_allow_link_false_does_real_copy(
|
||||
self, handler: FSHandler, sample_file_content
|
||||
):
|
||||
"""allow_link=False forces a real copy — content matches but inodes
|
||||
differ, so mutating dest won't affect source. This is the path
|
||||
`_store_cover` uses to keep PIL's in-place resize from corrupting the
|
||||
user's source image."""
|
||||
source_full = handler.base_path / "source.bin"
|
||||
source_full.write_bytes(sample_file_content)
|
||||
|
||||
await handler.copy_file(source_full, "dest.bin", allow_link=False)
|
||||
|
||||
dest_full = handler.base_path / "dest.bin"
|
||||
assert dest_full.read_bytes() == sample_file_content
|
||||
assert source_full.stat().st_ino != dest_full.stat().st_ino
|
||||
|
||||
async def test_copy_file_allow_link_falls_back_to_copy_on_exdev(
|
||||
self, handler: FSHandler, sample_file_content
|
||||
):
|
||||
"""When os.link raises EXDEV (cross-filesystem), copy_file transparently
|
||||
falls back to a real copy and the result is still a valid file."""
|
||||
source_full = handler.base_path / "source.bin"
|
||||
source_full.write_bytes(sample_file_content)
|
||||
|
||||
with patch(
|
||||
"utils.filesystem.os.link",
|
||||
side_effect=OSError(errno.EXDEV, "Cross-device link"),
|
||||
):
|
||||
await handler.copy_file(source_full, "dest.bin")
|
||||
|
||||
dest_full = handler.base_path / "dest.bin"
|
||||
assert dest_full.read_bytes() == sample_file_content
|
||||
assert source_full.stat().st_ino != dest_full.stat().st_ino
|
||||
|
||||
async def test_copy_file_nonexistent_source(self, handler: FSHandler):
|
||||
"""Missing source must raise FileNotFoundError regardless of link mode."""
|
||||
with pytest.raises(FileNotFoundError, match="Source file not found"):
|
||||
await handler.copy_file(handler.base_path / "missing.bin", "dest.bin")
|
||||
|
||||
|
||||
class TestFSHandlerTolerateMissingBase:
|
||||
"""Tests for the tolerate_missing_base flag, which lets optional features
|
||||
(like the sync folder) come up degraded instead of crashing the whole app
|
||||
when the base path can't be created."""
|
||||
|
||||
def test_default_raises_on_mkdir_failure(self):
|
||||
"""Default behavior: an OSError from mkdir propagates, so misconfigured
|
||||
critical paths (resources, library) still fail loudly at startup."""
|
||||
with patch.object(
|
||||
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
|
||||
):
|
||||
with pytest.raises(PermissionError):
|
||||
FSHandler("/some/unwritable/path")
|
||||
|
||||
def test_tolerate_missing_base_swallows_oserror(self, tmp_path, caplog):
|
||||
"""With tolerate_missing_base=True, mkdir failure is logged but does
|
||||
not raise — the handler instance is still constructed so module-level
|
||||
imports don't crash."""
|
||||
target = tmp_path / "missing"
|
||||
|
||||
with patch.object(
|
||||
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
|
||||
):
|
||||
handler = FSHandler(str(target), tolerate_missing_base=True)
|
||||
|
||||
# Handler exists and base_path is set, even though the directory
|
||||
# could not be created.
|
||||
assert handler.base_path == target.resolve()
|
||||
assert not handler.base_path.exists()
|
||||
|
||||
def test_tolerate_missing_base_still_creates_when_possible(self, tmp_path):
|
||||
"""tolerate_missing_base=True should not change behavior when mkdir
|
||||
succeeds — the directory must still be created normally."""
|
||||
target = tmp_path / "new_dir"
|
||||
assert not target.exists()
|
||||
|
||||
handler = FSHandler(str(target), tolerate_missing_base=True)
|
||||
|
||||
assert handler.base_path.exists()
|
||||
assert handler.base_path.is_dir()
|
||||
|
||||
def test_tolerate_missing_base_reraises_non_oserror(self, tmp_path):
|
||||
"""Only OSError gets swallowed; programming errors (e.g. a RuntimeError
|
||||
from corrupted state) must still surface."""
|
||||
with patch.object(Path, "mkdir", side_effect=RuntimeError("unexpected")):
|
||||
with pytest.raises(RuntimeError, match="unexpected"):
|
||||
FSHandler(str(tmp_path / "x"), tolerate_missing_base=True)
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
"""Tests for filesystem sync handler."""
|
||||
|
||||
import errno
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -133,3 +135,21 @@ class TestFSSyncHandler:
|
||||
def test_remove_incoming_file_nonexistent(self, handler: FSSyncHandler):
|
||||
# Should not raise for nonexistent files
|
||||
handler.remove_incoming_file("/nonexistent/path/file.sav")
|
||||
|
||||
|
||||
class TestFSSyncHandlerStartup:
|
||||
"""Sync is an optional feature: if /romm/sync isn't writable (bad mount,
|
||||
wrong ownership), the app must still boot. Failures should surface when
|
||||
sync is actually used, not at module-import time."""
|
||||
|
||||
def test_init_does_not_raise_when_base_path_unwritable(self):
|
||||
"""Regression test for the PermissionError on /romm/sync at boot.
|
||||
FSSyncHandler must construct successfully even when mkdir fails."""
|
||||
with patch.object(
|
||||
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
|
||||
):
|
||||
handler = FSSyncHandler()
|
||||
|
||||
assert handler is not None
|
||||
# base_path is set even though the directory wasn't created.
|
||||
assert isinstance(handler.base_path, Path)
|
||||
|
||||
134
backend/tests/utils/test_filesystem.py
Normal file
134
backend/tests/utils/test_filesystem.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Tests for utils.filesystem helpers."""
|
||||
|
||||
import errno
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from utils.filesystem import link_or_copy_file
|
||||
|
||||
|
||||
class TestLinkOrCopyFile:
|
||||
"""Test the hardlink-with-copy-fallback helper used by importers and exporters."""
|
||||
|
||||
def test_same_filesystem_creates_hardlink(self, tmp_path):
|
||||
"""When source and dest are on the same filesystem, the helper creates a
|
||||
hardlink — source and dest share an inode and st_nlink reflects both."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
assert dest.read_bytes() == b"payload"
|
||||
# Hardlink: shared inode, link count >= 2
|
||||
assert source.stat().st_ino == dest.stat().st_ino
|
||||
assert source.stat().st_nlink >= 2
|
||||
|
||||
def test_falls_back_to_copy_on_exdev(self, tmp_path):
|
||||
"""When os.link raises EXDEV (cross-device), the helper falls back to
|
||||
shutil.copy2 — content matches but inodes differ."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
exdev = OSError(errno.EXDEV, "Cross-device link")
|
||||
|
||||
with patch("utils.filesystem.os.link", side_effect=exdev):
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
assert dest.read_bytes() == b"payload"
|
||||
# Real copy: separate inodes
|
||||
assert source.stat().st_ino != dest.stat().st_ino
|
||||
|
||||
def test_falls_back_to_copy_on_eperm(self, tmp_path):
|
||||
"""EPERM (filesystem doesn't permit hardlinks, e.g. FAT32) must also
|
||||
trigger the copy fallback."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
eperm = OSError(errno.EPERM, "Operation not permitted")
|
||||
|
||||
with patch("utils.filesystem.os.link", side_effect=eperm):
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
assert dest.read_bytes() == b"payload"
|
||||
assert source.stat().st_ino != dest.stat().st_ino
|
||||
|
||||
def test_reraises_non_fallback_oserror(self, tmp_path):
|
||||
"""An OSError that isn't in the fallback set (e.g. ENOSPC — disk full)
|
||||
must propagate; we don't want to mask real disk errors."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
enospc = OSError(errno.ENOSPC, "No space left on device")
|
||||
|
||||
with patch("utils.filesystem.os.link", side_effect=enospc):
|
||||
with pytest.raises(OSError) as excinfo:
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
assert excinfo.value.errno == errno.ENOSPC
|
||||
assert not dest.exists()
|
||||
|
||||
def test_mutation_after_link_affects_source(self, tmp_path):
|
||||
"""Document hardlink semantics: writing through the dest path with
|
||||
O_TRUNC truncates the shared inode and therefore mutates the source.
|
||||
This is the exact hazard `_store_cover` avoids with allow_link=False."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"original")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
# Truncating-write through dest affects source (same inode).
|
||||
with open(dest, "wb") as f:
|
||||
f.write(b"mutated")
|
||||
|
||||
assert source.read_bytes() == b"mutated"
|
||||
|
||||
def test_mutation_after_copy_does_not_affect_source(self, tmp_path):
|
||||
"""When the helper falls back to copy, the inodes are independent —
|
||||
mutation through dest leaves the source untouched."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"original")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
with patch(
|
||||
"utils.filesystem.os.link",
|
||||
side_effect=OSError(errno.EXDEV, "Cross-device link"),
|
||||
):
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
with open(dest, "wb") as f:
|
||||
f.write(b"mutated")
|
||||
|
||||
assert source.read_bytes() == b"original"
|
||||
|
||||
def test_dest_already_exists_raises(self, tmp_path):
|
||||
"""os.link raises EEXIST when dest already exists; that's not in the
|
||||
fallback set, so it propagates. Callers are responsible for handling
|
||||
the already-exists case before calling this helper."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
dest.write_bytes(b"existing")
|
||||
|
||||
with pytest.raises(OSError) as excinfo:
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
assert excinfo.value.errno == errno.EEXIST
|
||||
|
||||
def test_helper_uses_os_link_first(self, tmp_path):
|
||||
"""Sanity-check that the helper calls os.link before any copy logic —
|
||||
guards against a future refactor regressing to copy-only."""
|
||||
source = tmp_path / "source.bin"
|
||||
source.write_bytes(b"payload")
|
||||
dest = tmp_path / "dest.bin"
|
||||
|
||||
with patch("utils.filesystem.os.link", wraps=os.link) as link_spy:
|
||||
link_or_copy_file(source, dest)
|
||||
|
||||
link_spy.assert_called_once_with(source, dest)
|
||||
@@ -1,5 +1,7 @@
|
||||
import errno
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
@@ -37,6 +39,41 @@ def iter_directories(path: str, recursive: bool = False) -> Iterator[tuple[Path,
|
||||
break
|
||||
|
||||
|
||||
# errno values that mean "hardlink not possible here, fall back to copy".
|
||||
# EXDEV: cross-device link. EPERM: filesystem doesn't permit/support hardlinks
|
||||
# (e.g. FAT32, exFAT, some network mounts). EOPNOTSUPP/ENOTSUP: same, on BSD/macOS.
|
||||
# EMLINK: source already has the maximum number of hardlinks for the filesystem.
|
||||
_LINK_FALLBACK_ERRNOS: frozenset[int] = frozenset(
|
||||
e
|
||||
for e in (
|
||||
getattr(errno, "EXDEV", None),
|
||||
getattr(errno, "EPERM", None),
|
||||
getattr(errno, "EOPNOTSUPP", None),
|
||||
getattr(errno, "ENOTSUP", None),
|
||||
getattr(errno, "EMLINK", None),
|
||||
getattr(errno, "EACCES", None),
|
||||
)
|
||||
if e is not None
|
||||
)
|
||||
|
||||
|
||||
def link_or_copy_file(source: Path, dest: Path) -> None:
|
||||
"""Hardlink ``source`` to ``dest``, falling back to a copy on cross-device
|
||||
or unsupported-link errors. Caller is responsible for creating ``dest.parent``.
|
||||
|
||||
Hardlinking is preferred because it's instantaneous and uses no extra disk
|
||||
space, but only works within a single filesystem. If linking isn't possible,
|
||||
we transparently fall back to ``shutil.copy2`` (preserving metadata).
|
||||
"""
|
||||
try:
|
||||
os.link(source, dest)
|
||||
return
|
||||
except OSError as exc:
|
||||
if exc.errno not in _LINK_FALLBACK_ERRNOS:
|
||||
raise
|
||||
shutil.copy2(source, dest)
|
||||
|
||||
|
||||
INVALID_CHARS_HYPHENS = re.compile(r"[\\/:|]")
|
||||
INVALID_CHARS_EMPTY = re.compile(r'[*?"<>+]')
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import shutil
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from xml.etree.ElementTree import ( # trunk-ignore(bandit/B405)
|
||||
@@ -17,6 +16,7 @@ from handler.database import db_platform_handler, db_rom_handler
|
||||
from handler.filesystem import fs_platform_handler, fs_resource_handler
|
||||
from logger.logger import log
|
||||
from models.rom import Rom
|
||||
from utils.filesystem import link_or_copy_file
|
||||
|
||||
# Map gamelist asset keys to subdirectory names inside assets/
|
||||
ASSET_DIRS: dict[str, str] = {
|
||||
@@ -151,14 +151,14 @@ class GamelistExporter:
|
||||
return refs
|
||||
|
||||
def _copy_asset(self, source: Path, dest: Path) -> bool:
|
||||
"""Copy a file from source to dest. Returns True on success."""
|
||||
"""Place ``source`` at ``dest`` via hardlink (same filesystem) or copy
|
||||
(otherwise). Returns True on success."""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
if dest.exists():
|
||||
return True
|
||||
|
||||
try:
|
||||
with open(source, "rb") as src, open(dest, "wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
link_or_copy_file(source, dest)
|
||||
return True
|
||||
except OSError as e:
|
||||
log.warning(f"Failed to copy {source} -> {dest}: {e}")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import shutil
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -8,6 +7,7 @@ from handler.database import db_platform_handler, db_rom_handler
|
||||
from handler.filesystem import fs_platform_handler, fs_resource_handler
|
||||
from logger.logger import log
|
||||
from models.rom import Rom
|
||||
from utils.filesystem import link_or_copy_file
|
||||
|
||||
# Map Pegasus asset keys to subdirectory names inside assets/
|
||||
ASSET_DIRS: dict[str, str] = {
|
||||
@@ -177,14 +177,14 @@ class PegasusExporter:
|
||||
return "\n".join(lines)
|
||||
|
||||
def _copy_asset(self, source: Path, dest: Path) -> bool:
|
||||
"""Copy a file from source to dest using raw read/write. Returns True on success."""
|
||||
"""Place ``source`` at ``dest`` via hardlink (same filesystem) or copy
|
||||
(otherwise). Returns True on success."""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
if dest.exists():
|
||||
return True
|
||||
|
||||
try:
|
||||
with open(source, "rb") as src, open(dest, "wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
link_or_copy_file(source, dest)
|
||||
return True
|
||||
except OSError as e:
|
||||
log.warning(f"Failed to copy {source} -> {dest}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user