mirror of
https://github.com/rommapp/romm.git
synced 2026-06-28 06:46:00 +00:00
Merge pull request #3595 from rommapp/perf/roms-list-query-optimizations
perf(roms): speed up the gallery/search list endpoint on large libraries
This commit is contained in:
51
backend/alembic/versions/0090_roms_sibling_cover_index.py
Normal file
51
backend/alembic/versions/0090_roms_sibling_cover_index.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Add covering index for the sibling_roms self-join
|
||||
|
||||
The sibling_roms view self-joins roms on platform_id plus a 7-way OR over
|
||||
metadata id columns (igdb_id, moby_id, ss_id, launchbox_id, ra_id, hasheous_id,
|
||||
tgdb_id). When the join is filtered by a parent rom, the OR can't be
|
||||
index-driven (the compared values come from the joined row, not constants), so
|
||||
each parent scanned every rom on its platform and read wide rows just to
|
||||
evaluate the OR. This composite index lets the join seek the platform partition
|
||||
and resolve the OR from the index alone, without hydrating the wide rows.
|
||||
|
||||
Revision ID: 0090_roms_sibling_cover_index
|
||||
Revises: 0089_client_tokens_device_id
|
||||
Create Date: 2026-06-24 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "0090_roms_sibling_cover_index"
|
||||
down_revision = "0089_client_tokens_device_id"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
INDEX_NAME = "idx_roms_sibling_cover"
|
||||
INDEX_COLUMNS = [
|
||||
"platform_id",
|
||||
"igdb_id",
|
||||
"moby_id",
|
||||
"ss_id",
|
||||
"launchbox_id",
|
||||
"ra_id",
|
||||
"hasheous_id",
|
||||
"tgdb_id",
|
||||
"id",
|
||||
]
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
with op.batch_alter_table("roms", schema=None) as batch_op:
|
||||
batch_op.create_index(
|
||||
INDEX_NAME,
|
||||
INDEX_COLUMNS,
|
||||
unique=False,
|
||||
if_not_exists=True,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
with op.batch_alter_table("roms", schema=None) as batch_op:
|
||||
batch_op.drop_index(INDEX_NAME, if_exists=True)
|
||||
@@ -101,6 +101,27 @@ def safe_int_or_none(value: Any) -> int | None:
|
||||
return safe_int(value)
|
||||
|
||||
|
||||
def build_unscoped_sidecar_cache_key(
|
||||
user_id: int,
|
||||
order_by: str,
|
||||
order_dir: str,
|
||||
group_by_meta_id: bool,
|
||||
is_unscoped: bool,
|
||||
) -> str | None:
|
||||
"""Cache key for the unscoped library sidecars (char index, filter values,
|
||||
rom id index). Returns None for scoped/searched sets, which are computed live.
|
||||
The computed values depend on user, ordering and grouping, so all are part
|
||||
of the key.
|
||||
"""
|
||||
if not is_unscoped:
|
||||
return None
|
||||
|
||||
return (
|
||||
f"all:u{user_id}"
|
||||
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
|
||||
)
|
||||
|
||||
|
||||
class RomUpdateForm(BaseModel):
|
||||
igdb_id: str | None = Field(default=None, description="IGDB game ID.")
|
||||
sgdb_id: str | None = Field(default=None, description="SteamGridDB game ID.")
|
||||
@@ -552,27 +573,46 @@ def get_roms(
|
||||
include_file_stats=True,
|
||||
)
|
||||
|
||||
# Cache only the unscoped library scan; scoped/searched sets are narrower and computed live.
|
||||
# Cache only the fully unscoped library scan; any narrowing parameter makes
|
||||
# the result set narrower, so it is computed live. The sidecar cache key
|
||||
# encodes only user/order/grouping, not the filters, so every filter applied
|
||||
# to `query` below must gate caching here or a narrowed list leaks under the
|
||||
# shared "all" key. Bool flags use `is not None` since False is an active
|
||||
# filter. Logic operators are omitted: they only matter when their list
|
||||
# filter is set, which is already covered.
|
||||
is_unscoped = not (
|
||||
search_term
|
||||
or platform_ids
|
||||
or collection_id
|
||||
or virtual_collection_id
|
||||
or smart_collection_id
|
||||
or genres
|
||||
or franchises
|
||||
or collections
|
||||
or companies
|
||||
or age_ratings
|
||||
or statuses
|
||||
or regions
|
||||
or languages
|
||||
or player_counts
|
||||
or updated_after
|
||||
or matched is not None
|
||||
or favorite is not None
|
||||
or duplicate is not None
|
||||
or last_played is not None
|
||||
or playable is not None
|
||||
or has_ra is not None
|
||||
or missing is not None
|
||||
or verified is not None
|
||||
)
|
||||
|
||||
# Get the char index for the roms
|
||||
char_index_dict = {}
|
||||
if with_char_index:
|
||||
# The computed positions depend on ordering and grouping, so those
|
||||
# must be part of the cache key. Otherwise switching sort
|
||||
# direction/column (or toggling grouping) reuses a stale index and
|
||||
# the AlphaStrip highlights the wrong letters.
|
||||
char_index_cache_key = (
|
||||
f"all:u{request.user.id}"
|
||||
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
|
||||
if is_unscoped
|
||||
else None
|
||||
# Switching sort direction/column (or toggling grouping) must not reuse
|
||||
# a stale index, or the AlphaStrip highlights the wrong letters.
|
||||
char_index_cache_key = build_unscoped_sidecar_cache_key(
|
||||
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
|
||||
)
|
||||
char_index = db_rom_handler.with_char_index(
|
||||
query=query,
|
||||
@@ -605,11 +645,8 @@ def get_roms(
|
||||
smart_collection_id=smart_collection_id,
|
||||
search_term=search_term,
|
||||
)
|
||||
cache_key = (
|
||||
f"all:u{request.user.id}"
|
||||
f":o{order_by.lower()}:d{order_dir.lower()}:g{int(group_by_meta_id)}"
|
||||
if is_unscoped
|
||||
else None
|
||||
cache_key = build_unscoped_sidecar_cache_key(
|
||||
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
|
||||
)
|
||||
query_filters = db_rom_handler.with_filter_values(
|
||||
query=filter_query,
|
||||
@@ -618,9 +655,18 @@ def get_roms(
|
||||
# trunk-ignore(mypy/typeddict-item)
|
||||
filter_values = RomFiltersDict(**query_filters)
|
||||
|
||||
# Get all ROM IDs in order for the additional data
|
||||
# The full ordered id list backs virtual scroll, so it's computed over the
|
||||
# whole result set on every request. Memoise the unscoped library scan (same
|
||||
# key scheme as the other sidecars); scoped/searched sets stay live.
|
||||
rom_id_index_cache_key = build_unscoped_sidecar_cache_key(
|
||||
request.user.id, order_by, order_dir, group_by_meta_id, is_unscoped
|
||||
)
|
||||
rom_id_index = db_rom_handler.get_rom_id_index(
|
||||
query=query, cache_key=rom_id_index_cache_key
|
||||
)
|
||||
|
||||
# Hydrate the requested page and its additional data
|
||||
with sync_session.begin() as session:
|
||||
rom_id_index = session.scalars(query.with_only_columns(Rom.id)).all() # type: ignore
|
||||
|
||||
def _transform(items: Sequence[Rom]) -> list[SimpleRomSchema]:
|
||||
rom_ids = [i.id for i in items]
|
||||
|
||||
@@ -842,24 +842,34 @@ class DBRomsHandler(DBBaseHandler):
|
||||
)
|
||||
|
||||
# Create a subquery that identifies the primary ROM in each group
|
||||
# Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc)
|
||||
base_subquery = query.subquery()
|
||||
# Priority order: is_main_sibling (desc), then by fs_name_no_ext (asc).
|
||||
# Materialize only the columns the dedup window needs (not all of
|
||||
# Rom, whose JSON metadata blobs make the derived table huge), and
|
||||
# drop the carried-over ORDER BY the window doesn't use.
|
||||
base_subquery = (
|
||||
query.order_by(None)
|
||||
.with_only_columns(
|
||||
Rom.id,
|
||||
Rom.fs_name_no_ext,
|
||||
Rom.platform_id,
|
||||
Rom.igdb_id,
|
||||
Rom.ss_id,
|
||||
Rom.moby_id,
|
||||
Rom.ra_id,
|
||||
Rom.hasheous_id,
|
||||
Rom.launchbox_id,
|
||||
Rom.tgdb_id,
|
||||
Rom.flashpoint_id,
|
||||
)
|
||||
.subquery()
|
||||
)
|
||||
# Only id and the row number flow downstream; the partition/order
|
||||
# inputs are read straight from base_subquery, so keeping them out of
|
||||
# this SELECT keeps the window's temp table narrow (carrying the wide
|
||||
# fs_name_no_ext through it spilled the sort to disk).
|
||||
group_subquery = (
|
||||
select(base_subquery.c.id)
|
||||
.select_from(base_subquery)
|
||||
.with_only_columns(
|
||||
base_subquery.c.id,
|
||||
base_subquery.c.fs_name_no_ext,
|
||||
base_subquery.c.platform_id,
|
||||
base_subquery.c.igdb_id,
|
||||
base_subquery.c.ss_id,
|
||||
base_subquery.c.moby_id,
|
||||
base_subquery.c.ra_id,
|
||||
base_subquery.c.hasheous_id,
|
||||
base_subquery.c.launchbox_id,
|
||||
base_subquery.c.tgdb_id,
|
||||
base_subquery.c.flashpoint_id,
|
||||
)
|
||||
.outerjoin(
|
||||
RomUser,
|
||||
and_(
|
||||
@@ -1125,45 +1135,64 @@ class DBRomsHandler(DBBaseHandler):
|
||||
if not isinstance(order_by_attr.type, (String, Text)):
|
||||
order_by_attr = Rom.name_sort_key
|
||||
|
||||
# Apply the same direction the main query uses so the position
|
||||
# numbers we emit (and the per-letter min position downstream)
|
||||
# match the actual order the client paginates over. Without this
|
||||
# the frontend AlphaStrip would highlight the wrong letter when
|
||||
# order_dir=desc.
|
||||
order_window = (
|
||||
order_by_attr.desc() if order_dir.lower() == "desc" else order_by_attr.asc()
|
||||
)
|
||||
|
||||
# Get the row number and first letter for each item
|
||||
subquery = (
|
||||
query.with_only_columns(Rom.id, Rom.name) # type: ignore
|
||||
.add_columns( # type: ignore
|
||||
func.substring(
|
||||
order_by_attr,
|
||||
1,
|
||||
1,
|
||||
).label("letter"),
|
||||
func.row_number().over(order_by=order_window).label("position"),
|
||||
# The alpha-strip only needs each first letter's starting offset, not a
|
||||
# positional number for every row. Counting rows per letter and
|
||||
# accumulating those counts avoids row_number() over the whole library,
|
||||
# which forced a full materialization + filesort on large libraries.
|
||||
descending = order_dir.lower() == "desc"
|
||||
letter = func.substring(order_by_attr, 1, 1)
|
||||
counts = (
|
||||
query.with_only_columns( # type: ignore
|
||||
letter.label("letter"), func.count().label("count")
|
||||
)
|
||||
.subquery()
|
||||
.group_by(letter)
|
||||
.order_by(letter.desc() if descending else letter.asc())
|
||||
)
|
||||
|
||||
# Get the minimum position for each letter
|
||||
rows = (
|
||||
session.query(
|
||||
subquery.c.letter, func.min(subquery.c.position - 1).label("position")
|
||||
)
|
||||
.filter(subquery.c.letter.isnot(None))
|
||||
.group_by(subquery.c.letter)
|
||||
.order_by(subquery.c.letter)
|
||||
.all()
|
||||
)
|
||||
|
||||
result = [(letter, int(position)) for letter, position in rows]
|
||||
# Walk the letters in the same direction the client paginates over, so
|
||||
# each letter's offset is the count of rows that sort before it.
|
||||
result: list[tuple[str, int]] = []
|
||||
offset = 0
|
||||
for value, count in session.execute(counts).all():
|
||||
if value is not None:
|
||||
result.append((value, offset))
|
||||
offset += count
|
||||
result.sort(key=lambda entry: entry[0])
|
||||
if redis_key is not None and version is not None:
|
||||
_store_versioned_cache(redis_key, version, result)
|
||||
return result
|
||||
|
||||
@begin_session
|
||||
def get_rom_id_index(
|
||||
self,
|
||||
query: Query,
|
||||
*,
|
||||
cache_key: str | None = None,
|
||||
session: Session = None, # type: ignore
|
||||
) -> list[int]:
|
||||
"""Return every matching rom id in query order.
|
||||
|
||||
The list backs the gallery's virtual scroll, so it spans the whole
|
||||
result set (not a page) and is recomputed on every request. Building it
|
||||
runs the sibling-dedup window over the full library, so the unscoped
|
||||
case is memoised under the same versioned cache as the other gallery
|
||||
sidecars.
|
||||
"""
|
||||
redis_key: str | None = None
|
||||
version: str | None = None
|
||||
if cache_key:
|
||||
version = _filter_values_cache_version()
|
||||
redis_key = f"rom_id_index:{cache_key}:v{version}"
|
||||
cached = sync_cache.get(redis_key)
|
||||
if cached is not None:
|
||||
return json.loads(cached)
|
||||
|
||||
ids = list(session.scalars(query.with_only_columns(Rom.id)).all()) # type: ignore
|
||||
|
||||
if redis_key is not None and version is not None:
|
||||
_store_versioned_cache(redis_key, version, ids)
|
||||
return ids
|
||||
|
||||
@begin_session
|
||||
def get_roms_by_fs_name(
|
||||
self,
|
||||
|
||||
@@ -209,6 +209,19 @@ class Rom(BaseModel):
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"),
|
||||
# Covers the sibling_roms view self-join
|
||||
Index(
|
||||
"idx_roms_sibling_cover",
|
||||
"platform_id",
|
||||
"igdb_id",
|
||||
"moby_id",
|
||||
"ss_id",
|
||||
"launchbox_id",
|
||||
"ra_id",
|
||||
"hasheous_id",
|
||||
"tgdb_id",
|
||||
"id",
|
||||
),
|
||||
Index("idx_roms_name", "name"),
|
||||
Index("idx_roms_name_sort_key", "name_sort_key"),
|
||||
Index("idx_roms_igdb_id", "igdb_id"),
|
||||
|
||||
@@ -178,6 +178,35 @@ class TestCacheHitMatchesMiss:
|
||||
# Same consumed shape as the miss (the endpoint folds this into a dict).
|
||||
assert dict(hit) == dict(miss) == {"t": 0}
|
||||
|
||||
def test_get_rom_id_index_hit_matches_miss(self, rom: Rom):
|
||||
query = cast(Query[Rom], select(Rom))
|
||||
cache_key = "all:test-idindex"
|
||||
|
||||
miss = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key)
|
||||
|
||||
version = _filter_values_cache_version()
|
||||
redis_key = f"rom_id_index:{cache_key}:v{version}"
|
||||
assert sync_cache.get(redis_key) is not None
|
||||
assert miss == [rom.id]
|
||||
|
||||
# Add a ROM; a cache hit must ignore it until the version bumps.
|
||||
with session_factory.begin() as s:
|
||||
s.add(
|
||||
Rom(
|
||||
platform_id=rom.platform_id,
|
||||
name="Another",
|
||||
slug="another-slug-idx",
|
||||
fs_name="another-idx.zip",
|
||||
fs_name_no_tags="another-idx",
|
||||
fs_name_no_ext="another-idx",
|
||||
fs_extension="zip",
|
||||
fs_path=rom.fs_path,
|
||||
)
|
||||
)
|
||||
|
||||
hit = db_rom_handler.get_rom_id_index(query=query, cache_key=cache_key)
|
||||
assert hit == miss == [rom.id]
|
||||
|
||||
|
||||
class TestInvalidateFilterValuesCache:
|
||||
def test_deletes_prior_version_keys_and_set(self, rom_with_metadata: Rom):
|
||||
|
||||
@@ -714,11 +714,6 @@ def main() -> int:
|
||||
parser.add_argument(
|
||||
"--password", default="password", help="Plain password for every seeded user"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--user-prefix",
|
||||
default="loadtest",
|
||||
help="Prefix for seeded usernames (kept unique by id)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wipe",
|
||||
action="store_true",
|
||||
@@ -852,11 +847,11 @@ def main() -> int:
|
||||
# Suffix with the assigned id so usernames/emails never collide with
|
||||
# rows already in the target database.
|
||||
if i == 0:
|
||||
username, role = f"{args.user_prefix}_admin_{uid}", Role.ADMIN
|
||||
username, role = f"admin_{uid}", Role.ADMIN
|
||||
elif i <= 2:
|
||||
username, role = f"{args.user_prefix}_editor_{uid}", Role.EDITOR
|
||||
username, role = f"editor_{uid}", Role.EDITOR
|
||||
else:
|
||||
username, role = f"{args.user_prefix}_viewer_{uid}", Role.VIEWER
|
||||
username, role = f"viewer_{uid}", Role.VIEWER
|
||||
created = rand_past(rng, now)
|
||||
user_rows.append(
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user