Merge pull request #3595 from rommapp/perf/roms-list-query-optimizations

perf(roms): speed up the gallery/search list endpoint on large libraries
This commit is contained in:
Georges-Antoine Assi
2026-06-25 09:18:42 -04:00
committed by GitHub
6 changed files with 235 additions and 72 deletions

View File

@@ -0,0 +1,51 @@
"""Add covering index for the sibling_roms self-join
The sibling_roms view self-joins roms on platform_id plus a 7-way OR over
metadata id columns (igdb_id, moby_id, ss_id, launchbox_id, ra_id, hasheous_id,
tgdb_id). When the join is filtered by a parent rom, the OR can't be
index-driven (the compared values come from the joined row, not constants), so
each parent scanned every rom on its platform and read wide rows just to
evaluate the OR. This composite index lets the join seek the platform partition
and resolve the OR from the index alone, without hydrating the wide rows.
Revision ID: 0090_roms_sibling_cover_index
Revises: 0089_client_tokens_device_id
Create Date: 2026-06-24 00:00:00.000000
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "0090_roms_sibling_cover_index"
down_revision = "0089_client_tokens_device_id"
branch_labels = None
depends_on = None
INDEX_NAME = "idx_roms_sibling_cover"
INDEX_COLUMNS = [
"platform_id",
"igdb_id",
"moby_id",
"ss_id",
"launchbox_id",
"ra_id",
"hasheous_id",
"tgdb_id",
"id",
]
def upgrade() -> None:
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.create_index(
INDEX_NAME,
INDEX_COLUMNS,
unique=False,
if_not_exists=True,
)
def downgrade() -> None:
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.drop_index(INDEX_NAME, if_exists=True)

View File

@@ -101,6 +101,27 @@ def safe_int_or_none(value: Any) -> int | None:
return safe_int(value)
def build_unscoped_sidecar_cache_key(
user_id: int,
order_by: str,
order_dir: str,
group_by_meta_id: bool,
is_unscoped: bool,
) -> str | None:
"""Cache key for the unscoped library sidecars (char index, filter values,
rom id index). Returns None for scoped/searched sets, which are computed live.
The computed values depend on user, ordering and grouping, so all are part
of the key.
"""
if not is_unscoped:
return None
return (
f"all:u{user_id}"
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
)
class RomUpdateForm(BaseModel):
igdb_id: str | None = Field(default=None, description="IGDB game ID.")
sgdb_id: str | None = Field(default=None, description="SteamGridDB game ID.")
@@ -552,27 +573,46 @@ def get_roms(
include_file_stats=True,
)
# Cache only the unscoped library scan; scoped/searched sets are narrower and computed live.
# Cache only the fully unscoped library scan; any narrowing parameter makes
# the result set narrower, so it is computed live. The sidecar cache key
# encodes only user/order/grouping, not the filters, so every filter applied
# to `query` below must gate caching here or a narrowed list leaks under the
# shared "all" key. Bool flags use `is not None` since False is an active
# filter. Logic operators are omitted: they only matter when their list
# filter is set, which is already covered.
is_unscoped = not (
search_term
or platform_ids
or collection_id
or virtual_collection_id
or smart_collection_id
or genres
or franchises
or collections
or companies
or age_ratings
or statuses
or regions
or languages
or player_counts
or updated_after
or matched is not None
or favorite is not None
or duplicate is not None
or last_played is not None
or playable is not None
or has_ra is not None
or missing is not None
or verified is not None
)
# Get the char index for the roms
char_index_dict = {}
if with_char_index:
# The computed positions depend on ordering and grouping, so those
# must be part of the cache key. Otherwise switching sort
# direction/column (or toggling grouping) reuses a stale index and
# the AlphaStrip highlights the wrong letters.
char_index_cache_key = (
f"all:u{request.user.id}"
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
if is_unscoped
else None
# Switching sort direction/column (or toggling grouping) must not reuse
# a stale index, or the AlphaStrip highlights the wrong letters.
char_index_cache_key = build_unscoped_sidecar_cache_key(
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
)
char_index = db_rom_handler.with_char_index(
query=query,
@@ -605,11 +645,8 @@ def get_roms(
smart_collection_id=smart_collection_id,
search_term=search_term,
)
cache_key = (
f"all:u{request.user.id}"
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
if is_unscoped
else None
cache_key = build_unscoped_sidecar_cache_key(
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
)
query_filters = db_rom_handler.with_filter_values(
query=filter_query,
@@ -618,9 +655,18 @@ def get_roms(
# trunk-ignore(mypy/typeddict-item)
filter_values = RomFiltersDict(**query_filters)
# Get all ROM IDs in order for the additional data
# The full ordered id list backs virtual scroll, so it's computed over the
# whole result set on every request. Memoise the unscoped library scan (same
# key scheme as the other sidecars); scoped/searched sets stay live.
rom_id_index_cache_key = build_unscoped_sidecar_cache_key(
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
)
rom_id_index = db_rom_handler.get_rom_id_index(
query=query, cache_key=rom_id_index_cache_key
)
# Hydrate the requested page and its additional data
with sync_session.begin() as session:
rom_id_index = session.scalars(query.with_only_columns(Rom.id)).all() # type: ignore
def _transform(items: Sequence[Rom]) -> list[SimpleRomSchema]:
rom_ids = [i.id for i in items]

View File

@@ -842,24 +842,34 @@ class DBRomsHandler(DBBaseHandler):
)
# Create a subquery that identifies the primary ROM in each group
# Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc)
base_subquery = query.subquery()
# Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc).
# Materialize only the columns the dedup window needs (not all of
# Rom, whose JSON metadata blobs make the derived table huge), and
# drop the carried-over ORDER BY the window doesn't use.
base_subquery = (
query.order_by(None)
.with_only_columns(
Rom.id,
Rom.fs_name_no_ext,
Rom.platform_id,
Rom.igdb_id,
Rom.ss_id,
Rom.moby_id,
Rom.ra_id,
Rom.hasheous_id,
Rom.launchbox_id,
Rom.tgdb_id,
Rom.flashpoint_id,
)
.subquery()
)
# Only id and the row number flow downstream; the partition/order
# inputs are read straight from base_subquery, so keeping them out of
# this SELECT keeps the window's temp table narrow (carrying the wide
# fs_name_no_ext through it spilled the sort to disk).
group_subquery = (
select(base_subquery.c.id)
.select_from(base_subquery)
.with_only_columns(
base_subquery.c.id,
base_subquery.c.fs_name_no_ext,
base_subquery.c.platform_id,
base_subquery.c.igdb_id,
base_subquery.c.ss_id,
base_subquery.c.moby_id,
base_subquery.c.ra_id,
base_subquery.c.hasheous_id,
base_subquery.c.launchbox_id,
base_subquery.c.tgdb_id,
base_subquery.c.flashpoint_id,
)
.outerjoin(
RomUser,
and_(
@@ -1125,45 +1135,64 @@ class DBRomsHandler(DBBaseHandler):
if not isinstance(order_by_attr.type, (String, Text)):
order_by_attr = Rom.name_sort_key
# Apply the same direction the main query uses so the position
# numbers we emit (and the per-letter min position downstream)
# match the actual order the client paginates over. Without this
# the frontend AlphaStrip would highlight the wrong letter when
# order_dir=desc.
order_window = (
order_by_attr.desc() if order_dir.lower() == "desc" else order_by_attr.asc()
)
# Get the row number and first letter for each item
subquery = (
query.with_only_columns(Rom.id, Rom.name) # type: ignore
.add_columns( # type: ignore
func.substring(
order_by_attr,
1,
1,
).label("letter"),
func.row_number().over(order_by=order_window).label("position"),
# The alpha-strip only needs each first letter's starting offset, not a
# positional number for every row. Counting rows per letter and
# accumulating those counts avoids row_number() over the whole library,
# which forced a full materialization + filesort on large libraries.
descending = order_dir.lower() == "desc"
letter = func.substring(order_by_attr, 1, 1)
counts = (
query.with_only_columns( # type: ignore
letter.label("letter"), func.count().label("count")
)
.subquery()
.group_by(letter)
.order_by(letter.desc() if descending else letter.asc())
)
# Get the minimum position for each letter
rows = (
session.query(
subquery.c.letter, func.min(subquery.c.position - 1).label("position")
)
.filter(subquery.c.letter.isnot(None))
.group_by(subquery.c.letter)
.order_by(subquery.c.letter)
.all()
)
result = [(letter, int(position)) for letter, position in rows]
# Walk the letters in the same direction the client paginates over, so
# each letter's offset is the count of rows that sort before it.
result: list[tuple[str, int]] = []
offset = 0
for value, count in session.execute(counts).all():
if value is not None:
result.append((value, offset))
offset += count
result.sort(key=lambda entry: entry[0])
if redis_key is not None and version is not None:
_store_versioned_cache(redis_key, version, result)
return result
@begin_session
def get_rom_id_index(
self,
query: Query,
*,
cache_key: str | None = None,
session: Session = None, # type: ignore
) -> list[int]:
"""Return every matching rom id in query order.
The list backs the gallery's virtual scroll, so it spans the whole
result set (not a page) and is recomputed on every request. Building it
runs the sibling-dedup window over the full library, so the unscoped
case is memoised under the same versioned cache as the other gallery
sidecars.
"""
redis_key: str | None = None
version: str | None = None
if cache_key:
version = _filter_values_cache_version()
redis_key = f"rom_id_index:{cache_key}:v{version}"
cached = sync_cache.get(redis_key)
if cached is not None:
return json.loads(cached)
ids = list(session.scalars(query.with_only_columns(Rom.id)).all()) # type: ignore
if redis_key is not None and version is not None:
_store_versioned_cache(redis_key, version, ids)
return ids
@begin_session
def get_roms_by_fs_name(
self,

View File

@@ -209,6 +209,19 @@ class Rom(BaseModel):
__table_args__ = (
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"),
# Covers the sibling_roms view self-join
Index(
"idx_roms_sibling_cover",
"platform_id",
"igdb_id",
"moby_id",
"ss_id",
"launchbox_id",
"ra_id",
"hasheous_id",
"tgdb_id",
"id",
),
Index("idx_roms_name", "name"),
Index("idx_roms_name_sort_key", "name_sort_key"),
Index("idx_roms_igdb_id", "igdb_id"),

View File

@@ -178,6 +178,35 @@ class TestCacheHitMatchesMiss:
# Same consumed shape as the miss (the endpoint folds this into a dict).
assert dict(hit) == dict(miss) == {"t": 0}
def test_get_rom_id_index_hit_matches_miss(self, rom: Rom):
query = cast(Query[Rom], select(Rom))
cache_key = "all:test-idindex"
miss = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key)
version = _filter_values_cache_version()
redis_key = f"rom_id_index:{cache_key}:v{version}"
assert sync_cache.get(redis_key) is not None
assert miss == [rom.id]
# Add a ROM; a cache hit must ignore it until the version bumps.
with session_factory.begin() as s:
s.add(
Rom(
platform_id=rom.platform_id,
name="Another",
slug="another-slug-idx",
fs_name="another-idx.zip",
fs_name_no_tags="another-idx",
fs_name_no_ext="another-idx",
fs_extension="zip",
fs_path=rom.fs_path,
)
)
hit = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key)
assert hit == miss == [rom.id]
class TestInvalidateFilterValuesCache:
def test_deletes_prior_version_keys_and_set(self, rom_with_metadata: Rom):

View File

@@ -714,11 +714,6 @@ def main() -> int:
parser.add_argument(
"--password", default="password", help="Plain password for every seeded user"
)
parser.add_argument(
"--user-prefix",
default="loadtest",
help="Prefix for seeded usernames (kept unique by id)",
)
parser.add_argument(
"--wipe",
action="store_true",
@@ -852,11 +847,11 @@ def main() -> int:
# Suffix with the assigned id so usernames/emails never collide with
# rows already in the target database.
if i == 0:
username, role = f"{args.user_prefix}_admin_{uid}", Role.ADMIN
username, role = f"admin_{uid}", Role.ADMIN
elif i <= 2:
username, role = f"{args.user_prefix}_editor_{uid}", Role.EDITOR
username, role = f"editor_{uid}", Role.EDITOR
else:
username, role = f"{args.user_prefix}_viewer_{uid}", Role.VIEWER
username, role = f"viewer_{uid}", Role.VIEWER
created = rand_past(rng, now)
user_rows.append(
{