fix(scan): prevent duplicate ROM entries from racing scans

The patcher uploads the patched ROM and then fires a platform scan to
register it. When a second scan runs against the same platform around the
same time (a filesystem-watcher rescan, a scheduled rescan, or another
manual scan on a multi-worker setup), both scans could see the new file as
absent from the DB and each insert it, producing two identical library
entries for one patched file.

A platform folder can't physically hold two entries with the same name, so
a ROM is uniquely identified by (platform_id, fs_name). Enforce that with a
unique index instead of the previous plain index, which makes the duplicate
impossible. The scan's early ROM insert now adopts the row created by a
concurrent scan (catching the integrity error and skipping) instead of
failing, and ROM rename pre-checks for a name collision so it returns a
clean 409 rather than hitting the constraint.

Includes a migration that removes any pre-existing duplicates (keeping the
lowest id; dependents cascade) before upgrading the index to unique.

Fixes #3590

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0135UV8Xn2XHkRhjzhm9UptP
This commit is contained in:
Claude
2026-06-25 23:01:37 +00:00
parent 4d8d85c0ff
commit e4660aca4c
5 changed files with 151 additions and 17 deletions

View File

@@ -0,0 +1,72 @@
"""Enforce unique (platform_id, fs_name) on roms
A platform folder can't physically hold two entries with the same name, so a
ROM is uniquely identified by (platform_id, fs_name). The index covering this
pair was non-unique, which let two concurrent scans (e.g. the manual scan the
patcher fires after uploading a patched ROM, racing a filesystem-watcher or
scheduled scan) both insert the same ROM, producing duplicate library entries.
This migration removes any pre-existing duplicates (keeping the lowest id) and
upgrades the index to unique so the duplicate can never be created again.
Revision ID: 0091_unique_platform_fs_name
Revises: 0090_roms_sibling_cover_index
Create Date: 2026-06-25 00:00:00.000000
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "0091_unique_platform_fs_name"
down_revision = "0090_roms_sibling_cover_index"
branch_labels = None
depends_on = None
INDEX_NAME = "idx_roms_platform_id_fs_name"
INDEX_COLUMNS = ["platform_id", "fs_name"]
def upgrade() -> None:
connection = op.get_bind()
# Drop duplicate roms sharing (platform_id, fs_name), keeping the lowest id.
# The nested derived table is required so MySQL/MariaDB don't reject reading
# the same table being deleted from; it's a valid no-op on PostgreSQL too.
# Dependent rows (files, user props, assets, collections, ...) are removed by
# their ON DELETE CASCADE foreign keys.
connection.execute(
sa.text(
"""
DELETE FROM roms
WHERE id NOT IN (
SELECT keep_id FROM (
SELECT MIN(id) AS keep_id
FROM roms
GROUP BY platform_id, fs_name
) AS keepers
)
"""
)
)
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.drop_index(INDEX_NAME, if_exists=True)
batch_op.create_index(
INDEX_NAME,
INDEX_COLUMNS,
unique=True,
if_not_exists=True,
)
def downgrade() -> None:
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.drop_index(INDEX_NAME, if_exists=True)
batch_op.create_index(
INDEX_NAME,
INDEX_COLUMNS,
unique=False,
if_not_exists=True,
)

View File

@@ -1484,6 +1484,16 @@ async def update_rom(
# Re-parse tags from the filename so region/language/revision/version/tags
# stay in sync whenever the fs_name changes.
if new_fs_name != rom.fs_name:
# (platform_id, fs_name) is unique, so reject a rename that would collide
# with another ROM on this platform before the DB update would fail.
if db_rom_handler.get_roms_by_fs_name(
platform_id=rom.platform_id, fs_names={new_fs_name}
):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"A ROM named '{new_fs_name}' already exists on this platform",
)
parsed_tags = fs_rom_handler.parse_tags(new_fs_name)
cleaned_data.update(
{

View File

@@ -9,6 +9,7 @@ import pydash
import socketio # type: ignore
from rq import Worker
from rq.job import Job
from sqlalchemy.exc import IntegrityError
from config import DEV_MODE, REDIS_URL, SCAN_TIMEOUT, SCAN_WORKERS, TASK_RESULT_TTL
from config.config_manager import MetadataMediaType
@@ -252,22 +253,31 @@ async def _identify_rom(
# Create the entry early so we have the ID
newly_added: bool = rom is None
if not rom:
rom = db_rom_handler.add_rom(
Rom(
fs_name=fs_rom["fs_name"],
fs_path=roms_path,
regions=parsed_tags.regions,
revision=parsed_tags.revision,
version=parsed_tags.version,
languages=parsed_tags.languages,
tags=parsed_tags.other_tags,
platform_id=platform.id,
name=fs_rom["fs_name"],
url_cover="",
url_manual="",
url_screenshots=[],
try:
rom = db_rom_handler.add_rom(
Rom(
fs_name=fs_rom["fs_name"],
fs_path=roms_path,
regions=parsed_tags.regions,
revision=parsed_tags.revision,
version=parsed_tags.version,
languages=parsed_tags.languages,
tags=parsed_tags.other_tags,
platform_id=platform.id,
name=fs_rom["fs_name"],
url_cover="",
url_manual="",
url_screenshots=[],
)
)
)
except IntegrityError:
# A concurrent scan already created this ROM ((platform_id, fs_name)
# is unique). That scan owns the full processing of this file, so
# skip it here instead of inserting a duplicate library entry.
log.debug(
f"Skipping {hl(fs_rom['fs_name'])}: already created by a concurrent scan"
)
return
# Build rom files object before scanning
should_update_files = _should_get_rom_files(

View File

@@ -208,7 +208,10 @@ class Rom(BaseModel):
libretro_id: Mapped[str | None] = mapped_column(String(length=64), default=None)
__table_args__ = (
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"),
# A platform folder can't hold two entries with the same name on disk,
# so (platform_id, fs_name) is unique. Enforcing it also stops racing
# scans from inserting the same ROM twice.
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name", unique=True),
# Covers the sibling_roms view self-join
Index(
"idx_roms_sibling_cover",

View File

@@ -4,7 +4,11 @@ Bulk `update()` bypasses the ORM `@validates` hooks, so `update_rom` keeps
the columns derived from `name` / `fs_name` in sync explicitly.
"""
from handler.database import db_rom_handler
import pytest
from sqlalchemy.exc import IntegrityError
from handler.database import db_platform_handler, db_rom_handler
from models.platform import Platform
from models.rom import Rom
@@ -44,3 +48,38 @@ class TestUpdateRomDerivedColumns:
# A pinned custom key is never clobbered by a name change.
assert updated.name == "The New Name 2"
assert updated.name_sort_key == "pinned"
def _make_rom(platform: Platform, fs_name: str) -> Rom:
return Rom(
platform_id=platform.id,
fs_name=fs_name,
fs_path=f"{platform.slug}/roms",
name=fs_name,
url_cover="",
url_manual="",
url_screenshots=[],
)
class TestUniquePlatformFsName:
"""A platform folder can't hold two entries with the same name, so the DB
rejects a second ROM with the same (platform_id, fs_name). This is what
stops racing scans (e.g. after the patcher uploads a patched ROM) from
creating duplicate library entries."""
def test_duplicate_platform_fs_name_rejected(self, platform: Platform):
db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
with pytest.raises(IntegrityError):
db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
def test_same_fs_name_other_platform_allowed(self, platform: Platform):
other = db_platform_handler.add_platform(
Platform(name="other", slug="other_slug", fs_slug="other_slug")
)
first = db_rom_handler.add_rom(_make_rom(platform, "Patched Game.gba"))
second = db_rom_handler.add_rom(_make_rom(other, "Patched Game.gba"))
assert first.id != second.id