feat(fs): hardlink import/export assets when possible, harden sync init

Importer (gamelist/launchbox file:// flows) and exporters (gamelist.xml,
metadata.pegasus.txt local exports) now hardlink media assets when source
and destination share a filesystem, falling back transparently to a copy
on EXDEV / EPERM / EOPNOTSUPP / EMLINK / EACCES (cross-device, FAT32,
exFAT, network mounts, etc.). Saves disk space and is effectively
instantaneous on large files (videos, manuals, miximages).

Covers keep a real copy (allow_link=False) because _store_cover resizes
the small cover in place via PIL.Image.save, which would truncate the
shared inode and corrupt the user's source image.

Also makes FSSyncHandler tolerate a missing/unwritable /romm/sync at
startup: an OSError from mkdir now logs a warning instead of crashing
the whole app at module-import time. Sync calls still fail at use time
if the mount remains broken — the right place to surface the error.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Georges-Antoine Assi
2026-05-18 07:38:11 -04:00
parent c7ecf5d197
commit 757fafae5f
9 changed files with 336 additions and 16 deletions

View File

@@ -16,8 +16,9 @@ from anyio import open_file
from starlette.datastructures import UploadFile
from config.config_manager import config_manager as cm
from logger.logger import log
from models.base import FILE_NAME_MAX_LENGTH
from utils.filesystem import iter_directories, iter_files
from utils.filesystem import iter_directories, iter_files, link_or_copy_file
TAG_REGEX = re.compile(r"\(([^)]+)\)|\[([^]]+)\]")
EXTENSION_REGEX = re.compile(r"\.(([a-z]+\.)*\w+)$")
@@ -148,13 +149,22 @@ class Asset(Enum):
class FSHandler:
def __init__(self, base_path: str):
def __init__(self, base_path: str, tolerate_missing_base: bool = False):
self.base_path = Path(base_path).resolve()
self._locks: dict[str, asyncio.Lock] = {}
self._lock_mutex = asyncio.Lock()
# Create base directory synchronously during initialization
self.base_path.mkdir(parents=True, exist_ok=True)
# Create base directory synchronously during initialization.
try:
self.base_path.mkdir(parents=True, exist_ok=True)
except OSError:
if not tolerate_missing_base:
raise
log.warning(
f"Could not create or access {self.base_path}; "
"feature will be unavailable until the directory is writable."
)
async def _get_file_lock(self, file_path: str) -> asyncio.Lock:
"""Get or create a lock for a specific file path."""
@@ -487,13 +497,23 @@ class FSHandler:
return await open_file(full_path, "rb")
async def copy_file(self, source_full_path: Path, dest_path: str) -> None:
async def copy_file(
self,
source_full_path: Path,
dest_path: str,
allow_link: bool = True,
) -> None:
"""
Copy a file from source to destination.
Args:
source_full_path: Absolute path to the source file
dest_path: Relative path to the destination file
allow_link: If True (default), try a hardlink first and fall back to
a copy when the link isn't possible (cross-device, unsupported
filesystem, etc.). Pass False when the caller will mutate the
destination in place, since mutating a hardlinked file also
mutates the source — see `_store_cover`'s resize step.
Raises:
FileNotFoundError: If source file does not exist
@@ -518,7 +538,10 @@ class FSHandler:
# Create destination directory if needed
dest_parent_anyio_path = AnyioPath(str(dest_full_path.parent))
await dest_parent_anyio_path.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(source_full_path), str(dest_full_path))
if allow_link:
link_or_copy_file(source_full_path, dest_full_path)
else:
shutil.copy2(str(source_full_path), str(dest_full_path))
async def move_file_or_folder(self, source_path: str, dest_path: str) -> None:
"""

View File

@@ -129,7 +129,10 @@ class FSResourcesHandler(FSHandler):
log.warning(f"Cover file not found: {url_cover}")
return None
dest_path = f"{cover_file}/{size.value}.png"
await self.copy_file(resolved, dest_path)
# allow_link=False: small-size covers get resized in place
# below, which would mutate the user's source image if the
# destination were a hardlink.
await self.copy_file(resolved, dest_path, allow_link=False)
if ENABLE_SCHEDULED_CONVERT_IMAGES_TO_WEBP:
self.image_converter.convert_to_webp(

View File

@@ -12,7 +12,7 @@ class FSSyncHandler(FSHandler):
"""Filesystem handler for sync folder operations (File Transfer mode)."""
def __init__(self) -> None:
super().__init__(base_path=SYNC_BASE_PATH)
super().__init__(base_path=SYNC_BASE_PATH, tolerate_missing_base=True)
def build_incoming_path(
self, device_id: str, platform_slug: str | None = None

View File

@@ -1,4 +1,5 @@
import asyncio
import errno
import shutil
import tempfile
from io import BytesIO
@@ -515,3 +516,105 @@ class TestFSHandler:
dirs = await handler.list_directories(".")
for i in range(5):
assert f"test_dir_{i}" not in dirs
async def test_copy_file_hardlinks_by_default(
self, handler: FSHandler, sample_file_content
):
"""copy_file defaults to allow_link=True: when source and dest are on
the same filesystem, the destination is a hardlink to the source."""
source_full = handler.base_path / "source.bin"
source_full.write_bytes(sample_file_content)
await handler.copy_file(source_full, "dest.bin")
dest_full = handler.base_path / "dest.bin"
assert dest_full.read_bytes() == sample_file_content
assert source_full.stat().st_ino == dest_full.stat().st_ino
async def test_copy_file_allow_link_false_does_real_copy(
self, handler: FSHandler, sample_file_content
):
"""allow_link=False forces a real copy — content matches but inodes
differ, so mutating dest won't affect source. This is the path
`_store_cover` uses to keep PIL's in-place resize from corrupting the
user's source image."""
source_full = handler.base_path / "source.bin"
source_full.write_bytes(sample_file_content)
await handler.copy_file(source_full, "dest.bin", allow_link=False)
dest_full = handler.base_path / "dest.bin"
assert dest_full.read_bytes() == sample_file_content
assert source_full.stat().st_ino != dest_full.stat().st_ino
async def test_copy_file_allow_link_falls_back_to_copy_on_exdev(
self, handler: FSHandler, sample_file_content
):
"""When os.link raises EXDEV (cross-filesystem), copy_file transparently
falls back to a real copy and the result is still a valid file."""
source_full = handler.base_path / "source.bin"
source_full.write_bytes(sample_file_content)
with patch(
"utils.filesystem.os.link",
side_effect=OSError(errno.EXDEV, "Cross-device link"),
):
await handler.copy_file(source_full, "dest.bin")
dest_full = handler.base_path / "dest.bin"
assert dest_full.read_bytes() == sample_file_content
assert source_full.stat().st_ino != dest_full.stat().st_ino
async def test_copy_file_nonexistent_source(self, handler: FSHandler):
"""Missing source must raise FileNotFoundError regardless of link mode."""
with pytest.raises(FileNotFoundError, match="Source file not found"):
await handler.copy_file(handler.base_path / "missing.bin", "dest.bin")
class TestFSHandlerTolerateMissingBase:
"""Tests for the tolerate_missing_base flag, which lets optional features
(like the sync folder) come up degraded instead of crashing the whole app
when the base path can't be created."""
def test_default_raises_on_mkdir_failure(self):
"""Default behavior: an OSError from mkdir propagates, so misconfigured
critical paths (resources, library) still fail loudly at startup."""
with patch.object(
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
):
with pytest.raises(PermissionError):
FSHandler("/some/unwritable/path")
def test_tolerate_missing_base_swallows_oserror(self, tmp_path, caplog):
"""With tolerate_missing_base=True, mkdir failure is logged but does
not raise — the handler instance is still constructed so module-level
imports don't crash."""
target = tmp_path / "missing"
with patch.object(
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
):
handler = FSHandler(str(target), tolerate_missing_base=True)
# Handler exists and base_path is set, even though the directory
# could not be created.
assert handler.base_path == target.resolve()
assert not handler.base_path.exists()
def test_tolerate_missing_base_still_creates_when_possible(self, tmp_path):
"""tolerate_missing_base=True should not change behavior when mkdir
succeeds — the directory must still be created normally."""
target = tmp_path / "new_dir"
assert not target.exists()
handler = FSHandler(str(target), tolerate_missing_base=True)
assert handler.base_path.exists()
assert handler.base_path.is_dir()
def test_tolerate_missing_base_reraises_non_oserror(self, tmp_path):
"""Only OSError gets swallowed; programming errors (e.g. a RuntimeError
from corrupted state) must still surface."""
with patch.object(Path, "mkdir", side_effect=RuntimeError("unexpected")):
with pytest.raises(RuntimeError, match="unexpected"):
FSHandler(str(tmp_path / "x"), tolerate_missing_base=True)

View File

@@ -1,9 +1,11 @@
"""Tests for filesystem sync handler."""
import errno
import os
import shutil
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
@@ -133,3 +135,21 @@ class TestFSSyncHandler:
def test_remove_incoming_file_nonexistent(self, handler: FSSyncHandler):
# Should not raise for nonexistent files
handler.remove_incoming_file("/nonexistent/path/file.sav")
class TestFSSyncHandlerStartup:
"""Sync is an optional feature: if /romm/sync isn't writable (bad mount,
wrong ownership), the app must still boot. Failures should surface when
sync is actually used, not at module-import time."""
def test_init_does_not_raise_when_base_path_unwritable(self):
"""Regression test for the PermissionError on /romm/sync at boot.
FSSyncHandler must construct successfully even when mkdir fails."""
with patch.object(
Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied")
):
handler = FSSyncHandler()
assert handler is not None
# base_path is set even though the directory wasn't created.
assert isinstance(handler.base_path, Path)

View File

@@ -0,0 +1,134 @@
"""Tests for utils.filesystem helpers."""
import errno
import os
from unittest.mock import patch
import pytest
from utils.filesystem import link_or_copy_file
class TestLinkOrCopyFile:
"""Test the hardlink-with-copy-fallback helper used by importers and exporters."""
def test_same_filesystem_creates_hardlink(self, tmp_path):
"""When source and dest are on the same filesystem, the helper creates a
hardlink — source and dest share an inode and st_nlink reflects both."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
link_or_copy_file(source, dest)
assert dest.read_bytes() == b"payload"
# Hardlink: shared inode, link count >= 2
assert source.stat().st_ino == dest.stat().st_ino
assert source.stat().st_nlink >= 2
def test_falls_back_to_copy_on_exdev(self, tmp_path):
"""When os.link raises EXDEV (cross-device), the helper falls back to
shutil.copy2 — content matches but inodes differ."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
exdev = OSError(errno.EXDEV, "Cross-device link")
with patch("utils.filesystem.os.link", side_effect=exdev):
link_or_copy_file(source, dest)
assert dest.read_bytes() == b"payload"
# Real copy: separate inodes
assert source.stat().st_ino != dest.stat().st_ino
def test_falls_back_to_copy_on_eperm(self, tmp_path):
"""EPERM (filesystem doesn't permit hardlinks, e.g. FAT32) must also
trigger the copy fallback."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
eperm = OSError(errno.EPERM, "Operation not permitted")
with patch("utils.filesystem.os.link", side_effect=eperm):
link_or_copy_file(source, dest)
assert dest.read_bytes() == b"payload"
assert source.stat().st_ino != dest.stat().st_ino
def test_reraises_non_fallback_oserror(self, tmp_path):
"""An OSError that isn't in the fallback set (e.g. ENOSPC — disk full)
must propagate; we don't want to mask real disk errors."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
enospc = OSError(errno.ENOSPC, "No space left on device")
with patch("utils.filesystem.os.link", side_effect=enospc):
with pytest.raises(OSError) as excinfo:
link_or_copy_file(source, dest)
assert excinfo.value.errno == errno.ENOSPC
assert not dest.exists()
def test_mutation_after_link_affects_source(self, tmp_path):
"""Document hardlink semantics: writing through the dest path with
O_TRUNC truncates the shared inode and therefore mutates the source.
This is the exact hazard `_store_cover` avoids with allow_link=False."""
source = tmp_path / "source.bin"
source.write_bytes(b"original")
dest = tmp_path / "dest.bin"
link_or_copy_file(source, dest)
# Truncating-write through dest affects source (same inode).
with open(dest, "wb") as f:
f.write(b"mutated")
assert source.read_bytes() == b"mutated"
def test_mutation_after_copy_does_not_affect_source(self, tmp_path):
"""When the helper falls back to copy, the inodes are independent —
mutation through dest leaves the source untouched."""
source = tmp_path / "source.bin"
source.write_bytes(b"original")
dest = tmp_path / "dest.bin"
with patch(
"utils.filesystem.os.link",
side_effect=OSError(errno.EXDEV, "Cross-device link"),
):
link_or_copy_file(source, dest)
with open(dest, "wb") as f:
f.write(b"mutated")
assert source.read_bytes() == b"original"
def test_dest_already_exists_raises(self, tmp_path):
"""os.link raises EEXIST when dest already exists; that's not in the
fallback set, so it propagates. Callers are responsible for handling
the already-exists case before calling this helper."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
dest.write_bytes(b"existing")
with pytest.raises(OSError) as excinfo:
link_or_copy_file(source, dest)
assert excinfo.value.errno == errno.EEXIST
def test_helper_uses_os_link_first(self, tmp_path):
"""Sanity-check that the helper calls os.link before any copy logic —
guards against a future refactor regressing to copy-only."""
source = tmp_path / "source.bin"
source.write_bytes(b"payload")
dest = tmp_path / "dest.bin"
with patch("utils.filesystem.os.link", wraps=os.link) as link_spy:
link_or_copy_file(source, dest)
link_spy.assert_called_once_with(source, dest)

View File

@@ -1,5 +1,7 @@
import errno
import os
import re
import shutil
from collections.abc import Iterator
from pathlib import Path
@@ -37,6 +39,41 @@ def iter_directories(path: str, recursive: bool = False) -> Iterator[tuple[Path,
break
# errno values that mean "hardlink not possible here, fall back to copy".
# EXDEV: cross-device link. EPERM: filesystem doesn't permit/support hardlinks
# (e.g. FAT32, exFAT, some network mounts). EOPNOTSUPP/ENOTSUP: same, on BSD/macOS.
# EMLINK: source already has the maximum number of hardlinks for the filesystem.
_LINK_FALLBACK_ERRNOS: frozenset[int] = frozenset(
e
for e in (
getattr(errno, "EXDEV", None),
getattr(errno, "EPERM", None),
getattr(errno, "EOPNOTSUPP", None),
getattr(errno, "ENOTSUP", None),
getattr(errno, "EMLINK", None),
getattr(errno, "EACCES", None),
)
if e is not None
)
def link_or_copy_file(source: Path, dest: Path) -> None:
"""Hardlink ``source`` to ``dest``, falling back to a copy on cross-device
or unsupported-link errors. Caller is responsible for creating ``dest.parent``.
Hardlinking is preferred because it's instantaneous and uses no extra disk
space, but only works within a single filesystem. If linking isn't possible,
we transparently fall back to ``shutil.copy2`` (preserving metadata).
"""
try:
os.link(source, dest)
return
except OSError as exc:
if exc.errno not in _LINK_FALLBACK_ERRNOS:
raise
shutil.copy2(source, dest)
INVALID_CHARS_HYPHENS = re.compile(r"[\\/:|]")
INVALID_CHARS_EMPTY = re.compile(r'[*?"<>+]')

View File

@@ -1,4 +1,3 @@
import shutil
from datetime import UTC, datetime
from pathlib import Path
from xml.etree.ElementTree import ( # trunk-ignore(bandit/B405)
@@ -17,6 +16,7 @@ from handler.database import db_platform_handler, db_rom_handler
from handler.filesystem import fs_platform_handler, fs_resource_handler
from logger.logger import log
from models.rom import Rom
from utils.filesystem import link_or_copy_file
# Map gamelist asset keys to subdirectory names inside assets/
ASSET_DIRS: dict[str, str] = {
@@ -151,14 +151,14 @@ class GamelistExporter:
return refs
def _copy_asset(self, source: Path, dest: Path) -> bool:
"""Copy a file from source to dest. Returns True on success."""
"""Place ``source`` at ``dest`` via hardlink (same filesystem) or copy
(otherwise). Returns True on success."""
dest.parent.mkdir(parents=True, exist_ok=True)
if dest.exists():
return True
try:
with open(source, "rb") as src, open(dest, "wb") as dst:
shutil.copyfileobj(src, dst)
link_or_copy_file(source, dest)
return True
except OSError as e:
log.warning(f"Failed to copy {source} -> {dest}: {e}")

View File

@@ -1,4 +1,3 @@
import shutil
from datetime import UTC, datetime
from pathlib import Path
@@ -8,6 +7,7 @@ from handler.database import db_platform_handler, db_rom_handler
from handler.filesystem import fs_platform_handler, fs_resource_handler
from logger.logger import log
from models.rom import Rom
from utils.filesystem import link_or_copy_file
# Map Pegasus asset keys to subdirectory names inside assets/
ASSET_DIRS: dict[str, str] = {
@@ -177,14 +177,14 @@ class PegasusExporter:
return "\n".join(lines)
def _copy_asset(self, source: Path, dest: Path) -> bool:
"""Copy a file from source to dest using raw read/write. Returns True on success."""
"""Place ``source`` at ``dest`` via hardlink (same filesystem) or copy
(otherwise). Returns True on success."""
dest.parent.mkdir(parents=True, exist_ok=True)
if dest.exists():
return True
try:
with open(source, "rb") as src, open(dest, "wb") as dst:
shutil.copyfileobj(src, dst)
link_or_copy_file(source, dest)
return True
except OSError as e:
log.warning(f"Failed to copy {source} -> {dest}: {e}")