diff --git a/backend/alembic/versions/0090_roms_sibling_cover_index.py b/backend/alembic/versions/0090_roms_sibling_cover_index.py new file mode 100644 index 000000000..ca1608901 --- /dev/null +++ b/backend/alembic/versions/0090_roms_sibling_cover_index.py @@ -0,0 +1,51 @@ +"""Add covering index for the sibling_roms self-join + +The sibling_roms view self-joins roms on platform_id plus a 7-way OR over +metadata id columns (igdb_id, moby_id, ss_id, launchbox_id, ra_id, hasheous_id, +tgdb_id). When the join is filtered by a parent rom, the OR can't be +index-driven (the compared values come from the joined row, not constants), so +each parent scanned every rom on its platform and read wide rows just to +evaluate the OR. This composite index lets the join seek the platform partition +and resolve the OR from the index alone, without hydrating the wide rows. + +Revision ID: 0090_roms_sibling_cover_index +Revises: 0089_client_tokens_device_id +Create Date: 2026-06-24 00:00:00.000000 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0090_roms_sibling_cover_index" +down_revision = "0089_client_tokens_device_id" +branch_labels = None +depends_on = None + +INDEX_NAME = "idx_roms_sibling_cover" +INDEX_COLUMNS = [ + "platform_id", + "igdb_id", + "moby_id", + "ss_id", + "launchbox_id", + "ra_id", + "hasheous_id", + "tgdb_id", + "id", +] + + +def upgrade() -> None: + with op.batch_alter_table("roms", schema=None) as batch_op: + batch_op.create_index( + INDEX_NAME, + INDEX_COLUMNS, + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + with op.batch_alter_table("roms", schema=None) as batch_op: + batch_op.drop_index(INDEX_NAME, if_exists=True) diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index 534a7dd3b..d4940dea7 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -101,6 +101,27 @@ def safe_int_or_none(value: Any) -> int | None: return safe_int(value) +def build_unscoped_sidecar_cache_key( + user_id: int, + order_by: str, + order_dir: str, + group_by_meta_id: bool, + is_unscoped: bool, +) -> str | None: + """Cache key for the unscoped library sidecars (char index, filter values, + rom id index). Returns None for scoped/searched sets, which are computed live. + The computed values depend on user, ordering and grouping, so all are part + of the key. + """ + if not is_unscoped: + return None + + return ( + f"all:u{user_id}" + f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}" + ) + + class RomUpdateForm(BaseModel): igdb_id: str | None = Field(default=None, description="IGDB game ID.") sgdb_id: str | None = Field(default=None, description="SteamGridDB game ID.") @@ -552,27 +573,46 @@ def get_roms( include_file_stats=True, ) - # Cache only the unscoped library scan; scoped/searched sets are narrower and computed live. + # Cache only the fully unscoped library scan; any narrowing parameter makes + # the result set narrower, so it is computed live. The sidecar cache key + # encodes only user/order/grouping, not the filters, so every filter applied + # to `query` below must gate caching here or a narrowed list leaks under the + # shared "all" key. Bool flags use `is not None` since False is an active + # filter. Logic operators are omitted: they only matter when their list + # filter is set, which is already covered. is_unscoped = not ( search_term or platform_ids or collection_id or virtual_collection_id or smart_collection_id + or genres + or franchises + or collections + or companies + or age_ratings + or statuses + or regions + or languages + or player_counts + or updated_after + or matched is not None + or favorite is not None + or duplicate is not None + or last_played is not None + or playable is not None + or has_ra is not None + or missing is not None + or verified is not None ) # Get the char index for the roms char_index_dict = {} if with_char_index: - # The computed positions depend on ordering and grouping, so those - # must be part of the cache key. Otherwise switching sort - # direction/column (or toggling grouping) reuses a stale index and - # the AlphaStrip highlights the wrong letters. - char_index_cache_key = ( - f"all:u{request.user.id}" - f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}" - if is_unscoped - else None + # Switching sort direction/column (or toggling grouping) must not reuse + # a stale index, or the AlphaStrip highlights the wrong letters. + char_index_cache_key = build_unscoped_sidecar_cache_key( + request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped ) char_index = db_rom_handler.with_char_index( query=query, @@ -605,11 +645,8 @@ def get_roms( smart_collection_id=smart_collection_id, search_term=search_term, ) - cache_key = ( - f"all:u{request.user.id}" - f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}" - if is_unscoped - else None + cache_key = build_unscoped_sidecar_cache_key( + request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped ) query_filters = db_rom_handler.with_filter_values( query=filter_query, @@ -618,9 +655,18 @@ def get_roms( # trunk-ignore(mypy/typeddict-item) filter_values = RomFiltersDict(**query_filters) - # Get all ROM IDs in order for the additional data + # The full ordered id list backs virtual scroll, so it's computed over the + # whole result set on every request. Memoise the unscoped library scan (same + # key scheme as the other sidecars); scoped/searched sets stay live. + rom_id_index_cache_key = build_unscoped_sidecar_cache_key( + request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped + ) + rom_id_index = db_rom_handler.get_rom_id_index( + query=query, cache_key=rom_id_index_cache_key + ) + + # Hydrate the requested page and its additional data with sync_session.begin() as session: - rom_id_index = session.scalars(query.with_only_columns(Rom.id)).all() # type: ignore def _transform(items: Sequence[Rom]) -> list[SimpleRomSchema]: rom_ids = [i.id for i in items] diff --git a/backend/handler/database/roms_handler.py b/backend/handler/database/roms_handler.py index 479ebb9f3..a47622467 100644 --- a/backend/handler/database/roms_handler.py +++ b/backend/handler/database/roms_handler.py @@ -842,24 +842,34 @@ class DBRomsHandler(DBBaseHandler): ) # Create a subquery that identifies the primary ROM in each group - # Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc) - base_subquery = query.subquery() + # Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc). + # Materialize only the columns the dedup window needs (not all of + # Rom, whose JSON metadata blobs make the derived table huge), and + # drop the carried-over ORDER BY the window doesn't use. + base_subquery = ( + query.order_by(None) + .with_only_columns( + Rom.id, + Rom.fs_name_no_ext, + Rom.platform_id, + Rom.igdb_id, + Rom.ss_id, + Rom.moby_id, + Rom.ra_id, + Rom.hasheous_id, + Rom.launchbox_id, + Rom.tgdb_id, + Rom.flashpoint_id, + ) + .subquery() + ) + # Only id and the row number flow downstream; the partition/order + # inputs are read straight from base_subquery, so keeping them out of + # this SELECT keeps the window's temp table narrow (carrying the wide + # fs_name_no_ext through it spilled the sort to disk). group_subquery = ( select(base_subquery.c.id) .select_from(base_subquery) - .with_only_columns( - base_subquery.c.id, - base_subquery.c.fs_name_no_ext, - base_subquery.c.platform_id, - base_subquery.c.igdb_id, - base_subquery.c.ss_id, - base_subquery.c.moby_id, - base_subquery.c.ra_id, - base_subquery.c.hasheous_id, - base_subquery.c.launchbox_id, - base_subquery.c.tgdb_id, - base_subquery.c.flashpoint_id, - ) .outerjoin( RomUser, and_( @@ -1125,45 +1135,64 @@ class DBRomsHandler(DBBaseHandler): if not isinstance(order_by_attr.type, (String, Text)): order_by_attr = Rom.name_sort_key - # Apply the same direction the main query uses so the position - # numbers we emit (and the per-letter min position downstream) - # match the actual order the client paginates over. Without this - # the frontend AlphaStrip would highlight the wrong letter when - # order_dir=desc. - order_window = ( - order_by_attr.desc() if order_dir.lower() == "desc" else order_by_attr.asc() - ) - - # Get the row number and first letter for each item - subquery = ( - query.with_only_columns(Rom.id, Rom.name) # type: ignore - .add_columns( # type: ignore - func.substring( - order_by_attr, - 1, - 1, - ).label("letter"), - func.row_number().over(order_by=order_window).label("position"), + # The alpha-strip only needs each first letter's starting offset, not a + # positional number for every row. Counting rows per letter and + # accumulating those counts avoids row_number() over the whole library, + # which forced a full materialization + filesort on large libraries. + descending = order_dir.lower() == "desc" + letter = func.substring(order_by_attr, 1, 1) + counts = ( + query.with_only_columns( # type: ignore + letter.label("letter"), func.count().label("count") ) - .subquery() + .group_by(letter) + .order_by(letter.desc() if descending else letter.asc()) ) - # Get the minimum position for each letter - rows = ( - session.query( - subquery.c.letter, func.min(subquery.c.position - 1).label("position") - ) - .filter(subquery.c.letter.isnot(None)) - .group_by(subquery.c.letter) - .order_by(subquery.c.letter) - .all() - ) - - result = [(letter, int(position)) for letter, position in rows] + # Walk the letters in the same direction the client paginates over, so + # each letter's offset is the count of rows that sort before it. + result: list[tuple[str, int]] = [] + offset = 0 + for value, count in session.execute(counts).all(): + if value is not None: + result.append((value, offset)) + offset += count + result.sort(key=lambda entry: entry[0]) if redis_key is not None and version is not None: _store_versioned_cache(redis_key, version, result) return result + @begin_session + def get_rom_id_index( + self, + query: Query, + *, + cache_key: str | None = None, + session: Session = None, # type: ignore + ) -> list[int]: + """Return every matching rom id in query order. + + The list backs the gallery's virtual scroll, so it spans the whole + result set (not a page) and is recomputed on every request. Building it + runs the sibling-dedup window over the full library, so the unscoped + case is memoised under the same versioned cache as the other gallery + sidecars. + """ + redis_key: str | None = None + version: str | None = None + if cache_key: + version = _filter_values_cache_version() + redis_key = f"rom_id_index:{cache_key}:v{version}" + cached = sync_cache.get(redis_key) + if cached is not None: + return json.loads(cached) + + ids = list(session.scalars(query.with_only_columns(Rom.id)).all()) # type: ignore + + if redis_key is not None and version is not None: + _store_versioned_cache(redis_key, version, ids) + return ids + @begin_session def get_roms_by_fs_name( self, diff --git a/backend/models/rom.py b/backend/models/rom.py index f428bb702..104979f2b 100644 --- a/backend/models/rom.py +++ b/backend/models/rom.py @@ -209,6 +209,19 @@ class Rom(BaseModel): __table_args__ = ( Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"), + # Covers the sibling_roms view self-join + Index( + "idx_roms_sibling_cover", + "platform_id", + "igdb_id", + "moby_id", + "ss_id", + "launchbox_id", + "ra_id", + "hasheous_id", + "tgdb_id", + "id", + ), Index("idx_roms_name", "name"), Index("idx_roms_name_sort_key", "name_sort_key"), Index("idx_roms_igdb_id", "igdb_id"), diff --git a/backend/tests/handler/database/test_roms_filter_cache.py b/backend/tests/handler/database/test_roms_filter_cache.py index f5b83ab2b..3f8213e35 100644 --- a/backend/tests/handler/database/test_roms_filter_cache.py +++ b/backend/tests/handler/database/test_roms_filter_cache.py @@ -178,6 +178,35 @@ class TestCacheHitMatchesMiss: # Same consumed shape as the miss (the endpoint folds this into a dict). assert dict(hit) == dict(miss) == {"t": 0} + def test_get_rom_id_index_hit_matches_miss(self, rom: Rom): + query = cast(Query[Rom], select(Rom)) + cache_key = "all:test-idindex" + + miss = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key) + + version = _filter_values_cache_version() + redis_key = f"rom_id_index:{cache_key}:v{version}" + assert sync_cache.get(redis_key) is not None + assert miss == [rom.id] + + # Add a ROM; a cache hit must ignore it until the version bumps. + with session_factory.begin() as s: + s.add( + Rom( + platform_id=rom.platform_id, + name="Another", + slug="another-slug-idx", + fs_name="another-idx.zip", + fs_name_no_tags="another-idx", + fs_name_no_ext="another-idx", + fs_extension="zip", + fs_path=rom.fs_path, + ) + ) + + hit = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key) + assert hit == miss == [rom.id] + class TestInvalidateFilterValuesCache: def test_deletes_prior_version_keys_and_set(self, rom_with_metadata: Rom): diff --git a/backend/tools/generate_test_data.py b/backend/tools/generate_test_data.py index 72c376390..0c26319a9 100644 --- a/backend/tools/generate_test_data.py +++ b/backend/tools/generate_test_data.py @@ -714,11 +714,6 @@ def main() -> int: parser.add_argument( "--password", default="password", help="Plain password for every seeded user" ) - parser.add_argument( - "--user-prefix", - default="loadtest", - help="Prefix for seeded usernames (kept unique by id)", - ) parser.add_argument( "--wipe", action="store_true", @@ -852,11 +847,11 @@ def main() -> int: # Suffix with the assigned id so usernames/emails never collide with # rows already in the target database. if i == 0: - username, role = f"{args.user_prefix}_admin_{uid}", Role.ADMIN + username, role = f"admin_{uid}", Role.ADMIN elif i <= 2: - username, role = f"{args.user_prefix}_editor_{uid}", Role.EDITOR + username, role = f"editor_{uid}", Role.EDITOR else: - username, role = f"{args.user_prefix}_viewer_{uid}", Role.VIEWER + username, role = f"viewer_{uid}", Role.VIEWER created = rand_past(rng, now) user_rows.append( {