mirror of
https://github.com/rommapp/romm.git
synced 2026-06-27 22:35:57 +00:00
fix(scan): prevent duplicate ROM entries from racing scans
The patcher uploads the patched ROM and then fires a platform scan to register it. When a second scan runs against the same platform around the same time (a filesystem-watcher rescan, a scheduled rescan, or another manual scan on a multi-worker setup), both scans could see the new file as absent from the DB and each insert it, producing two identical library entries for one patched file. A platform folder can't physically hold two entries with the same name, so a ROM is uniquely identified by (platform_id, fs_name). Enforce that with a unique index instead of the previous plain index, which makes the duplicate impossible. The scan's early ROM insert now adopts the row created by a concurrent scan (catching the integrity error and skipping) instead of failing, and ROM rename pre-checks for a name collision so it returns a clean 409 rather than hitting the constraint. Includes a migration that removes any pre-existing duplicates (keeping the lowest id; dependents cascade) before upgrading the index to unique. Fixes #3590 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_0135UV8Xn2XHkRhjzhm9UptP
This commit is contained in:
72
backend/alembic/versions/0091_unique_platform_fs_name.py
Normal file
72
backend/alembic/versions/0091_unique_platform_fs_name.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Enforce unique (platform_id, fs_name) on roms
|
||||
|
||||
A platform folder can't physically hold two entries with the same name, so a
|
||||
ROM is uniquely identified by (platform_id, fs_name). The index covering this
|
||||
pair was non-unique, which let two concurrent scans (e.g. the manual scan the
|
||||
patcher fires after uploading a patched ROM, racing a filesystem-watcher or
|
||||
scheduled scan) both insert the same ROM, producing duplicate library entries.
|
||||
|
||||
This migration removes any pre-existing duplicates (keeping the lowest id) and
|
||||
upgrades the index to unique so the duplicate can never be created again.
|
||||
|
||||
Revision ID: 0091_unique_platform_fs_name
|
||||
Revises: 0090_roms_sibling_cover_index
|
||||
Create Date: 2026-06-25 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0091_unique_platform_fs_name"
|
||||
down_revision = "0090_roms_sibling_cover_index"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
INDEX_NAME = "idx_roms_platform_id_fs_name"
|
||||
INDEX_COLUMNS = ["platform_id", "fs_name"]
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
connection = op.get_bind()
|
||||
|
||||
# Drop duplicate roms sharing (platform_id, fs_name), keeping the lowest id.
|
||||
# The nested derived table is required so MySQL/MariaDB don't reject reading
|
||||
# the same table being deleted from; it's a valid no-op on PostgreSQL too.
|
||||
# Dependent rows (files, user props, assets, collections, ...) are removed by
|
||||
# their ON DELETE CASCADE foreign keys.
|
||||
connection.execute(
|
||||
sa.text(
|
||||
"""
|
||||
DELETE FROM roms
|
||||
WHERE id NOT IN (
|
||||
SELECT keep_id FROM (
|
||||
SELECT MIN(id) AS keep_id
|
||||
FROM roms
|
||||
GROUP BY platform_id, fs_name
|
||||
) AS keepers
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
with op.batch_alter_table("roms", schema=None) as batch_op:
|
||||
batch_op.drop_index(INDEX_NAME, if_exists=True)
|
||||
batch_op.create_index(
|
||||
INDEX_NAME,
|
||||
INDEX_COLUMNS,
|
||||
unique=True,
|
||||
if_not_exists=True,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
with op.batch_alter_table("roms", schema=None) as batch_op:
|
||||
batch_op.drop_index(INDEX_NAME, if_exists=True)
|
||||
batch_op.create_index(
|
||||
INDEX_NAME,
|
||||
INDEX_COLUMNS,
|
||||
unique=False,
|
||||
if_not_exists=True,
|
||||
)
|
||||
@@ -1484,6 +1484,16 @@ async def update_rom(
|
||||
# Re-parse tags from the filename so region/language/revision/version/tags
|
||||
# stay in sync whenever the fs_name changes.
|
||||
if new_fs_name != rom.fs_name:
|
||||
# (platform_id, fs_name) is unique, so reject a rename that would collide
|
||||
# with another ROM on this platform before the DB update would fail.
|
||||
if db_rom_handler.get_roms_by_fs_name(
|
||||
platform_id=rom.platform_id, fs_names={new_fs_name}
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"A ROM named '{new_fs_name}' already exists on this platform",
|
||||
)
|
||||
|
||||
parsed_tags = fs_rom_handler.parse_tags(new_fs_name)
|
||||
cleaned_data.update(
|
||||
{
|
||||
|
||||
@@ -9,6 +9,7 @@ import pydash
|
||||
import socketio # type: ignore
|
||||
from rq import Worker
|
||||
from rq.job import Job
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from config import DEV_MODE, REDIS_URL, SCAN_TIMEOUT, SCAN_WORKERS, TASK_RESULT_TTL
|
||||
from config.config_manager import MetadataMediaType
|
||||
@@ -252,22 +253,31 @@ async def _identify_rom(
|
||||
# Create the entry early so we have the ID
|
||||
newly_added: bool = rom is None
|
||||
if not rom:
|
||||
rom = db_rom_handler.add_rom(
|
||||
Rom(
|
||||
fs_name=fs_rom["fs_name"],
|
||||
fs_path=roms_path,
|
||||
regions=parsed_tags.regions,
|
||||
revision=parsed_tags.revision,
|
||||
version=parsed_tags.version,
|
||||
languages=parsed_tags.languages,
|
||||
tags=parsed_tags.other_tags,
|
||||
platform_id=platform.id,
|
||||
name=fs_rom["fs_name"],
|
||||
url_cover="",
|
||||
url_manual="",
|
||||
url_screenshots=[],
|
||||
try:
|
||||
rom = db_rom_handler.add_rom(
|
||||
Rom(
|
||||
fs_name=fs_rom["fs_name"],
|
||||
fs_path=roms_path,
|
||||
regions=parsed_tags.regions,
|
||||
revision=parsed_tags.revision,
|
||||
version=parsed_tags.version,
|
||||
languages=parsed_tags.languages,
|
||||
tags=parsed_tags.other_tags,
|
||||
platform_id=platform.id,
|
||||
name=fs_rom["fs_name"],
|
||||
url_cover="",
|
||||
url_manual="",
|
||||
url_screenshots=[],
|
||||
)
|
||||
)
|
||||
)
|
||||
except IntegrityError:
|
||||
# A concurrent scan already created this ROM ((platform_id, fs_name)
|
||||
# is unique). That scan owns the full processing of this file, so
|
||||
# skip it here instead of inserting a duplicate library entry.
|
||||
log.debug(
|
||||
f"Skipping {hl(fs_rom['fs_name'])}: already created by a concurrent scan"
|
||||
)
|
||||
return
|
||||
|
||||
# Build rom files object before scanning
|
||||
should_update_files = _should_get_rom_files(
|
||||
|
||||
@@ -208,7 +208,10 @@ class Rom(BaseModel):
|
||||
libretro_id: Mapped[str | None] = mapped_column(String(length=64), default=None)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"),
|
||||
# A platform folder can't hold two entries with the same name on disk,
|
||||
# so (platform_id, fs_name) is unique. Enforcing it also stops racing
|
||||
# scans from inserting the same ROM twice.
|
||||
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name", unique=True),
|
||||
# Covers the sibling_roms view self-join
|
||||
Index(
|
||||
"idx_roms_sibling_cover",
|
||||
|
||||
@@ -4,7 +4,11 @@ Bulk `update()` bypasses the ORM `@validates` hooks, so `update_rom` keeps
|
||||
the columns derived from `name` / `fs_name` in sync explicitly.
|
||||
"""
|
||||
|
||||
from handler.database import db_rom_handler
|
||||
import pytest
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from handler.database import db_platform_handler, db_rom_handler
|
||||
from models.platform import Platform
|
||||
from models.rom import Rom
|
||||
|
||||
|
||||
@@ -44,3 +48,38 @@ class TestUpdateRomDerivedColumns:
|
||||
# A pinned custom key is never clobbered by a name change.
|
||||
assert updated.name == "The New Name 2"
|
||||
assert updated.name_sort_key == "pinned"
|
||||
|
||||
|
||||
def _make_rom(platform: Platform, fs_name: str) -> Rom:
|
||||
return Rom(
|
||||
platform_id=platform.id,
|
||||
fs_name=fs_name,
|
||||
fs_path=f"{platform.slug}/roms",
|
||||
name=fs_name,
|
||||
url_cover="",
|
||||
url_manual="",
|
||||
url_screenshots=[],
|
||||
)
|
||||
|
||||
|
||||
class TestUniquePlatformFsName:
|
||||
"""A platform folder can't hold two entries with the same name, so the DB
|
||||
rejects a second ROM with the same (platform_id, fs_name). This is what
|
||||
stops racing scans (e.g. after the patcher uploads a patched ROM) from
|
||||
creating duplicate library entries."""
|
||||
|
||||
def test_duplicate_platform_fs_name_rejected(self, platform: Platform):
|
||||
db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
|
||||
|
||||
with pytest.raises(IntegrityError):
|
||||
db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
|
||||
|
||||
def test_same_fs_name_other_platform_allowed(self, platform: Platform):
|
||||
other = db_platform_handler.add_platform(
|
||||
Platform(name="other", slug="other_slug", fs_slug="other_slug")
|
||||
)
|
||||
|
||||
first = db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
|
||||
second = db_rom_handler.add_rom(_make_rom(other, "Patched Game.gba"))
|
||||
|
||||
assert first.id != second.id
|
||||
|
||||
Reference in New Issue
Block a user