From d5ffeeeddb668c4f2fc78c1617b6d1c8cba0f653 Mon Sep 17 00:00:00 2001 From: Daniel Bonofiglio Date: Tue, 16 Jun 2026 20:32:14 -0300 Subject: [PATCH] fix(search): full-text indexes+caching Adds a few new indexes to handle full-text searches instead of doing `ILIKE` matching, improving performance substantially. Alongside that, a few other things were done in order to improve search performance, such as caching filter values so they're not computed on each request to /api/roms. Overall, this should have a very noticeable impact on large collections when using the search feature. --- backend/alembic/env.py | 9 + .../versions/0083_add_roms_search_indexes.py | 61 +++++ .../versions/0084_add_roms_name_index.py | 30 +++ .../versions/0085_add_roms_name_sort_key.py | 67 +++++ backend/endpoints/roms/__init__.py | 64 +++-- backend/endpoints/sockets/scan.py | 3 + backend/handler/database/roms_handler.py | 231 ++++++++++++++---- backend/models/rom.py | 31 +++ backend/tests/conftest.py | 3 + backend/tests/handler/test_db_handler.py | 49 ++++ .../components/common/Game/VirtualTable.vue | 15 +- frontend/src/services/api/rom.ts | 2 +- frontend/src/stores/roms.ts | 6 +- 13 files changed, 504 insertions(+), 67 deletions(-) create mode 100644 backend/alembic/versions/0083_add_roms_search_indexes.py create mode 100644 backend/alembic/versions/0084_add_roms_name_index.py create mode 100644 backend/alembic/versions/0085_add_roms_name_sort_key.py diff --git a/backend/alembic/env.py b/backend/alembic/env.py index 4a0296ba2..b124934ae 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -41,6 +41,15 @@ def include_object(object, name, type_, reflected, compare_to): ]: # Virtual table return False + # Skip DB-specific search indexes in autogenerate + # to avoid false drop/create operations + if type_ == "index" and name in ( + "idx_roms_name_fs_name_fulltext", + "idx_roms_name_trgm", + "idx_roms_fs_name_trgm", + ): + return False + return True diff --git a/backend/alembic/versions/0083_add_roms_search_indexes.py b/backend/alembic/versions/0083_add_roms_search_indexes.py new file mode 100644 index 000000000..fc51e9c61 --- /dev/null +++ b/backend/alembic/versions/0083_add_roms_search_indexes.py @@ -0,0 +1,61 @@ +"""Add db-specific search indexes on roms.name and roms.fs_name + +Revision ID: 0083_add_roms_search_indexes +Revises: 0082_save_origin_device +Create Date: 2026-06-16 00:00:00.000000 + +""" + +import sqlalchemy as sa +from alembic import op + +from utils.database import is_mariadb, is_mysql, is_postgresql + +# revision identifiers, used by Alembic. +revision = "0083_add_roms_search_indexes" +down_revision = "0082_save_origin_device" +branch_labels = None +depends_on = None + +FULLTEXT_INDEX_NAME = "idx_roms_name_fs_name_fulltext" +PG_NAME_INDEX = "idx_roms_name_trgm" +PG_FS_NAME_INDEX = "idx_roms_fs_name_trgm" + + +def upgrade() -> None: + bind = op.get_bind() + + if is_mysql(bind) or is_mariadb(bind): + op.execute( + sa.text( + f"CREATE FULLTEXT INDEX {FULLTEXT_INDEX_NAME} " + "ON roms (name, fs_name)" + ) + ) + elif is_postgresql(bind): + # pg_trgm is a trusted extension since PostgreSQL 13, so a non-superuser + # with CREATE on the database can install it. + op.execute(sa.text("CREATE EXTENSION IF NOT EXISTS pg_trgm")) + op.execute( + sa.text( + f"CREATE INDEX IF NOT EXISTS {PG_NAME_INDEX} " + "ON roms USING gin (name gin_trgm_ops)" + ) + ) + op.execute( + sa.text( + f"CREATE INDEX IF NOT EXISTS {PG_FS_NAME_INDEX} " + "ON roms USING gin (fs_name gin_trgm_ops)" + ) + ) + + +def downgrade() -> None: + bind = op.get_bind() + + if is_mysql(bind) or is_mariadb(bind): + op.execute(sa.text(f"DROP INDEX {FULLTEXT_INDEX_NAME} ON roms")) + elif is_postgresql(bind): + # Leave the pg_trgm extension in place; other objects may depend on it. + op.execute(sa.text(f"DROP INDEX IF EXISTS {PG_FS_NAME_INDEX}")) + op.execute(sa.text(f"DROP INDEX IF EXISTS {PG_NAME_INDEX}")) diff --git a/backend/alembic/versions/0084_add_roms_name_index.py b/backend/alembic/versions/0084_add_roms_name_index.py new file mode 100644 index 000000000..6f007f895 --- /dev/null +++ b/backend/alembic/versions/0084_add_roms_name_index.py @@ -0,0 +1,30 @@ +"""Add index on roms.name + +Revision ID: 0084_add_roms_name_index +Revises: 0083_add_roms_search_indexes +Create Date: 2026-06-16 00:00:00.000000 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0084_add_roms_name_index" +down_revision = "0083_add_roms_search_indexes" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + with op.batch_alter_table("roms", schema=None) as batch_op: + batch_op.create_index( + "idx_roms_name", + ["name"], + unique=False, + if_not_exists=True, + ) + + +def downgrade() -> None: + with op.batch_alter_table("roms", schema=None) as batch_op: + batch_op.drop_index("idx_roms_name", if_exists=True) diff --git a/backend/alembic/versions/0085_add_roms_name_sort_key.py b/backend/alembic/versions/0085_add_roms_name_sort_key.py new file mode 100644 index 000000000..c2f232d7c --- /dev/null +++ b/backend/alembic/versions/0085_add_roms_name_sort_key.py @@ -0,0 +1,67 @@ +"""Add precomputed name_sort_key column for natural-sort ordering + +Ordering the gallery by name previously applied a per-row regexp (strip +articles, zero-pad numbers) that can't use an index, forcing a full sort. This +stores that key in an indexed column so name sorting — including deep-offset +pages — uses idx_roms_name_sort_key instead. + +Revision ID: 0085_add_roms_name_sort_key +Revises: 0084_add_roms_name_index +Create Date: 2026-06-16 00:00:00.000000 + +""" + +import sqlalchemy as sa +from alembic import op + +from models.rom import NAME_SORT_KEY_MAX_LENGTH, compute_name_sort_key + +# revision identifiers, used by Alembic. +revision = "0085_add_roms_name_sort_key" +down_revision = "0084_add_roms_name_index" +branch_labels = None +depends_on = None + +_BACKFILL_BATCH = 1000 + + +def upgrade() -> None: + op.add_column( + "roms", + sa.Column( + "name_sort_key", + sa.String(length=NAME_SORT_KEY_MAX_LENGTH), + nullable=True, + ), + ) + + bind = op.get_bind() + roms = sa.table( + "roms", + sa.column("id", sa.Integer), + sa.column("name", sa.String), + sa.column("name_sort_key", sa.String), + ) + + rows = bind.execute(sa.select(roms.c.id, roms.c.name)).fetchall() + update_stmt = ( + roms.update() + .where(roms.c.id == sa.bindparam("_id")) + .values(name_sort_key=sa.bindparam("_key")) + ) + for start in range(0, len(rows), _BACKFILL_BATCH): + batch = rows[start : start + _BACKFILL_BATCH] + bind.execute( + update_stmt, + [ + {"_id": row.id, "_key": compute_name_sort_key(row.name)} + for row in batch + ], + ) + + op.create_index("idx_roms_name_sort_key", "roms", ["name_sort_key"]) + + +def downgrade() -> None: + op.drop_index("idx_roms_name_sort_key", table_name="roms") + op.drop_column("roms", "name_sort_key") diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index 1a5773310..46fdb631a 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -25,7 +25,7 @@ from fastapi import ( status, ) from fastapi.responses import Response -from fastapi_pagination.ext.sqlalchemy import paginate +from fastapi_pagination import resolve_params from fastapi_pagination.limit_offset import LimitOffsetPage, LimitOffsetParams from pydantic import BaseModel, Field from starlette.responses import FileResponse @@ -470,8 +470,13 @@ def get_roms( ] = "any", order_by: Annotated[ str, - Query(description="Field to order results by."), - ] = "name", + Query( + description=( + "Field to order results by. Leave empty to order by search relevance " + "(when a search term is given) and fall back to name." + ), + ), + ] = "", order_dir: Annotated[ str, Query(description="Order direction, either 'asc' or 'desc'."), @@ -492,6 +497,7 @@ def get_roms( user_id=request.user.id, order_by=order_by.lower(), order_dir=order_dir.lower(), + search_term=search_term, ) # Filter down the query @@ -535,11 +541,22 @@ def get_roms( include_file_stats=True, ) + # Cache only the unscoped library scan; scoped/searched sets are narrower and computed live. + is_unscoped = not ( + search_term + or platform_ids + or collection_id + or virtual_collection_id + or smart_collection_id + ) + # Get the char index for the roms char_index_dict = {} if with_char_index: char_index = db_rom_handler.with_char_index( - query=query, order_by_attr=order_by_attr + query=query, + order_by_attr=order_by_attr, + cache_key=f"all:u{request.user.id}" if is_unscoped else None, ) char_index_dict = {char: index for (char, index) in char_index} @@ -566,7 +583,10 @@ def get_roms( smart_collection_id=smart_collection_id, search_term=search_term, ) - query_filters = db_rom_handler.with_filter_values(query=filter_query) + query_filters = db_rom_handler.with_filter_values( + query=filter_query, + cache_key=f"all:u{request.user.id}" if is_unscoped else None, + ) # trunk-ignore(mypy/typeddict-item) filter_values = RomFiltersDict(**query_filters) @@ -595,15 +615,23 @@ def get_roms( for item in items ] - return paginate( - session, - query, - transformer=_transform, - additional_data={ - "char_index": char_index_dict, - "rom_id_index": rom_id_index, - "filter_values": filter_values, - }, + params = resolve_params() + total = len(rom_id_index) + page_ids = list(rom_id_index[params.offset : params.offset + params.limit]) + if page_ids: + page_rows = session.scalars(query.where(Rom.id.in_(page_ids))).all() + rows_by_id = {rom.id: rom for rom in page_rows} + page_items = [rows_by_id[i] for i in page_ids if i in rows_by_id] + else: + page_items = [] + + return CustomLimitOffsetPage.create( + _transform(page_items), + params, + total=total, + char_index=char_index_dict, + rom_id_index=list(rom_id_index), + filter_values=filter_values, ) @@ -1156,6 +1184,7 @@ async def update_rom( if not rom: raise RomNotFoundInDatabaseException(id) + db_rom_handler.invalidate_filter_values_cache() return DetailedRomSchema.from_orm_with_request(rom, request) provided_fields = form_data.model_fields_set @@ -1488,6 +1517,7 @@ async def update_rom( if meta_playmatch_handler.is_manual_match(form_data.model_fields_set): fire_and_forget(meta_playmatch_handler.submit_manual_match_suggestion(rom)) + db_rom_handler.invalidate_filter_values_cache() return DetailedRomSchema.from_orm_with_request(rom, request) @@ -1578,6 +1608,9 @@ async def delete_roms( failed_items += 1 errors.append(f"Failed to delete ROM {id}: {str(e)}") + if successful_items: + db_rom_handler.invalidate_filter_values_cache() + return { "successful_items": successful_items, "failed_items": failed_items, @@ -1627,4 +1660,7 @@ async def update_rom_user( rom_user = db_rom_handler.update_rom_user(db_rom_user.id, cleaned_data) + if "hidden" in cleaned_data: + db_rom_handler.invalidate_filter_values_cache() + return RomUserSchema.model_validate(rom_user) diff --git a/backend/endpoints/sockets/scan.py b/backend/endpoints/sockets/scan.py index c765a8355..35d4e325f 100644 --- a/backend/endpoints/sockets/scan.py +++ b/backend/endpoints/sockets/scan.py @@ -756,6 +756,9 @@ async def scan_platforms( log.info(f"{emoji.EMOJI_CHECK_MARK} Scan completed") + # The library changed; drop cached filter values. + db_rom_handler.invalidate_filter_values_cache() + # Export metadata files if enabled in config config = cm.get_config() platforms_by_slug = {p.fs_slug: p for p in db_platform_handler.get_platforms()} diff --git a/backend/handler/database/roms_handler.py b/backend/handler/database/roms_handler.py index 9e85122c8..35e45c75f 100644 --- a/backend/handler/database/roms_handler.py +++ b/backend/handler/database/roms_handler.py @@ -1,8 +1,11 @@ import functools +import json +import re from collections.abc import Iterable, Sequence from datetime import datetime from typing import Any +from redis.exceptions import WatchError from sqlalchemy import ( Integer, Row, @@ -37,9 +40,18 @@ from sqlalchemy.sql.selectable import Select from config import ROMM_DB_DRIVER from decorators.database import begin_session from handler.metadata.base_handler import UniversalPlatformSlug as UPS +from handler.redis_handler import sync_cache from models.assets import Save, Screenshot, State from models.platform import Platform -from models.rom import Rom, RomFile, RomMetadata, RomNote, RomUser, SiblingRom +from models.rom import ( + Rom, + RomFile, + RomMetadata, + RomNote, + RomUser, + SiblingRom, + compute_name_sort_key, +) from utils.database import ( json_array_contains_all, json_array_contains_any, @@ -102,7 +114,53 @@ RUFFLE_SUPPORTED_PLATFORMS = [ UPS.BROWSER, ] -STRIP_ARTICLES_REGEX = r"^(the|a|an)\s+" +# Used to remove native full-text SQL operators +FULLTEXT_BOOLEAN_OPERATORS_REGEX = re.compile(r'[+\-~<>()"@*]') + +# 3 is the default minimum size in InnoDB +FULLTEXT_MIN_TOKEN_SIZE = 3 + +# Cached ROM filter values (genres/franchises/etc.) so it doesn't get +# recomputed on every call to /api/roms +ROM_FILTERS_CACHE_VERSION_KEY = "filter_values:ver" +ROM_FILTERS_CACHE_KEYS_PREFIX = "filter_values:keys" +ROM_FILTERS_CACHE_TTL = 60 * 60 * 24 * 7 # 7 days + + +def _cache_value_to_str(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, bytes): + return value.decode() + return str(value) + + +def _filter_values_cache_version() -> str: + return _cache_value_to_str(sync_cache.get(ROM_FILTERS_CACHE_VERSION_KEY)) or "0" + + +def _filter_values_cache_keys_key(version: str) -> str: + return f"{ROM_FILTERS_CACHE_KEYS_PREFIX}:v{version}" + + +def _store_versioned_cache(redis_key: str, version: str, result: Any) -> None: + version_keys_set = _filter_values_cache_keys_key(version) + with sync_cache.pipeline() as pipe: + try: + pipe.watch(ROM_FILTERS_CACHE_VERSION_KEY) + current_version = ( + _cache_value_to_str(pipe.get(ROM_FILTERS_CACHE_VERSION_KEY)) or "0" + ) + if current_version != version: + pipe.unwatch() + else: + pipe.multi() + pipe.set(redis_key, json.dumps(result), ex=ROM_FILTERS_CACHE_TTL) + pipe.sadd(version_keys_set, redis_key) + pipe.expire(version_keys_set, ROM_FILTERS_CACHE_TTL) + pipe.execute() + except WatchError: + pass def _create_metadata_id_case( @@ -310,19 +368,53 @@ class DBRomsHandler(DBBaseHandler): return query.filter(Rom.id.in_(smart_collection.rom_ids)) return query + def _build_fulltext_boolean_query(self, term: str) -> str | None: + words = FULLTEXT_BOOLEAN_OPERATORS_REGEX.sub(" ", term).split() + if not words or any(len(word) < FULLTEXT_MIN_TOKEN_SIZE for word in words): + return None + return " ".join(f"+{word}*" for word in words) + + def _build_fulltext_relevance(self, search_term: str) -> str | None: + parts: list[str] = [] + for term in search_term.split("|"): + words = FULLTEXT_BOOLEAN_OPERATORS_REGEX.sub(" ", term).split() + if len(words) > 1: + parts.append('"' + " ".join(words) + '"') + return " ".join(parts) if parts else None + def _filter_by_search_term(self, query: Query, search_term: str): terms = [term.strip() for term in search_term.split("|")] - conditions = [ - condition - for term in terms - for condition in ( - Rom.fs_name.ilike(f"%{term}%"), - Rom.name.ilike(f"%{term}%"), - ) - if term - ] + terms = [term for term in terms if term] + if not terms: + return query - return query.filter(or_(*conditions)) + if ROMM_DB_DRIVER in ("mariadb", "mysql"): + match_clauses: list[Any] | None = [] + for idx, term in enumerate(terms): + boolean_query = self._build_fulltext_boolean_query(term) + if boolean_query is None: + match_clauses = None + break + param = f"fulltext_search_{idx}" + match_clauses.append( + text( + f"MATCH(roms.name, roms.fs_name) " + f"AGAINST(:{param} IN BOOLEAN MODE)" + ).bindparams(**{param: boolean_query}) + ) + if match_clauses: + return query.filter(or_(*match_clauses)) + + # psql and full-text fallback + term_conditions = [] + for term in terms: + word_conditions = [ + or_(Rom.fs_name.ilike(f"%{word}%"), Rom.name.ilike(f"%{word}%")) + for word in term.split() + ] + if word_conditions: + term_conditions.append(and_(*word_conditions)) + return query.filter(or_(*term_conditions)) def _filter_by_matched(self, query: Query, value: bool) -> Query: """Filter based on whether the rom is matched to a metadata provider. @@ -861,8 +953,9 @@ class DBRomsHandler(DBBaseHandler): def get_roms_query( self, *, - order_by: str = "name", + order_by: str = "", order_dir: str = "asc", + search_term: str | None = None, user_id: int | None = None, session: Session = None, # type: ignore ) -> tuple[Query[Rom], Any]: @@ -884,26 +977,37 @@ class DBRomsHandler(DBBaseHandler): else: order_attr = Rom.name + # Use indexed `name_sort_key` to have fast access to names without + # articles (the, a, an) and leading digits + if order_attr is Rom.name: + order_attr = Rom.name_sort_key + order_attr_column = order_attr - # Ignore case when the order attribute is a number - if isinstance(order_attr.type, (String, Text)): - # Remove any leading articles - order_attr = func.trim( - func.lower(order_attr).regexp_replace(STRIP_ARTICLES_REGEX, "") - ) - - # Pad numbers with leading zeros to ensure natural sorting - order_attr = order_attr.regexp_replace( - r"(\d+)", r"00000000000\1" - ).regexp_replace(r"0*(\d{12})", r"\1") - if order_dir.lower() == "desc": order_attr = order_attr.desc() else: order_attr = order_attr.asc() - return query.order_by(order_attr), order_attr_column # type: ignore + relevance_clause = None + if search_term and ROMM_DB_DRIVER in ("mariadb", "mysql"): + relevance = self._build_fulltext_relevance(search_term) + if relevance: + relevance_clause = text( + "MATCH(roms.name, roms.fs_name) " + "AGAINST(:relevance IN BOOLEAN MODE) DESC" + ).bindparams(relevance=relevance) + + if order_by: # explicit sort wins, relevance breaks ties + order_clauses = [order_attr] + if relevance_clause is not None: + order_clauses.append(relevance_clause) + else: # no sort selected: relevance leads, name is the tiebreaker + order_clauses = [order_attr] + if relevance_clause is not None: + order_clauses.insert(0, relevance_clause) + + return query.order_by(*order_clauses), order_attr_column # type: ignore @begin_session def get_roms_scalar( @@ -914,8 +1018,9 @@ class DBRomsHandler(DBBaseHandler): **kwargs, ) -> Sequence[Rom]: query, _ = self.get_roms_query( - order_by=kwargs.get("order_by", "name"), + order_by=kwargs.get("order_by", ""), order_dir=kwargs.get("order_dir", "asc"), + search_term=kwargs.get("search_term", None), user_id=kwargs.get("user_id", None), ) @@ -966,22 +1071,25 @@ class DBRomsHandler(DBBaseHandler): self, query: Query, order_by_attr: Any, + *, + cache_key: str | None = None, session: Session = None, # type: ignore - ) -> list[Row[tuple[str, int]]]: - if isinstance(order_by_attr.type, (String, Text)): - # Remove any leading articles - order_by_attr = func.trim( - func.lower(order_by_attr).regexp_replace(STRIP_ARTICLES_REGEX, "") - ) - else: - order_by_attr = func.trim( - func.lower(Rom.name).regexp_replace(STRIP_ARTICLES_REGEX, "") - ) + ) -> list[tuple[str, int]]: + redis_key: str | None = None + version: str | None = None + if cache_key: + version = _filter_values_cache_version() + redis_key = f"char_index:{cache_key}:v{version}" + cached = sync_cache.get(redis_key) + if cached is not None: + return json.loads(cached) - # Pad numbers with leading zeros to ensure natural sorting - order_by_attr = order_by_attr.regexp_replace( - r"(\d+)", r"00000000000\1" - ).regexp_replace(r"0*(\d{12})", r"\1") + # Drop any ordering carried over from the main query (e.g. search relevance). + # This builds its own positional ordering below. + query = query.order_by(None) + + if not isinstance(order_by_attr.type, (String, Text)): + order_by_attr = Rom.name_sort_key # Get the row number and first letter for each item subquery = ( @@ -998,7 +1106,7 @@ class DBRomsHandler(DBBaseHandler): ) # Get the minimum position for each letter - return ( + rows = ( session.query( subquery.c.letter, func.min(subquery.c.position - 1).label("position") ) @@ -1008,6 +1116,11 @@ class DBRomsHandler(DBBaseHandler): .all() ) + result = [[letter, int(position)] for letter, position in rows] + if redis_key is not None and version is not None: + _store_versioned_cache(redis_key, version, result) + return result + @begin_session def get_roms_by_fs_name( self, @@ -1048,6 +1161,10 @@ class DBRomsHandler(DBBaseHandler): data: dict, session: Session = None, # type: ignore ) -> Rom: + # Bulk update() bypasses the ORM before_update event, so keep the + # precomputed sort key in sync whenever the name changes. + if "name" in data: + data = {**data, "name_sort_key": compute_name_sort_key(data["name"])} session.execute( update(Rom) .where(Rom.id == id) @@ -1532,16 +1649,39 @@ class DBRomsHandler(DBBaseHandler): "platforms": sorted(platforms), } + def invalidate_filter_values_cache(self) -> None: + old_version = str(int(sync_cache.incr(ROM_FILTERS_CACHE_VERSION_KEY)) - 1) + old_keys_set = _filter_values_cache_keys_key(old_version) + old_cache_keys = [ + key + for raw_key in sync_cache.smembers(old_keys_set) + if (key := _cache_value_to_str(raw_key)) is not None + ] + if old_cache_keys: + sync_cache.delete(*old_cache_keys) + sync_cache.delete(old_keys_set) + @begin_session def with_filter_values( self, query: Query, + *, + cache_key: str | None = None, session: Session = None, # type: ignore ) -> dict: """ Returns the list of filters given the current subset of ROMs in the query """ - ids_subq = query.with_only_columns(Rom.id).scalar_subquery() # type: ignore + redis_key: str | None = None + version: str | None = None + if cache_key: + version = _filter_values_cache_version() + redis_key = f"filter_values:{cache_key}:v{version}" + cached = sync_cache.get(redis_key) + if cached is not None: + return json.loads(cached) + + ids_subq = query.order_by(None).with_only_columns(Rom.id).scalar_subquery() # type: ignore statement = ( select( @@ -1561,7 +1701,10 @@ class DBRomsHandler(DBBaseHandler): .where(Rom.id.in_(ids_subq)) ) - return self._collect_filter_values(session, statement) + result = self._collect_filter_values(session, statement) + if redis_key is not None and version is not None: + _store_versioned_cache(redis_key, version, result) + return result @begin_session def get_rom_filters( diff --git a/backend/models/rom.py b/backend/models/rom.py index e12f3c06f..29d3c5234 100644 --- a/backend/models/rom.py +++ b/backend/models/rom.py @@ -2,6 +2,7 @@ from __future__ import annotations import copy import enum +import re from datetime import datetime from functools import cached_property from typing import TYPE_CHECKING, Any, TypedDict @@ -17,6 +18,7 @@ from sqlalchemy import ( Text, UniqueConstraint, and_, + event, func, or_, select, @@ -32,6 +34,19 @@ from models.base import ( ) from utils.database import CustomJSON +# Max length of the precomputed natural-sort key column. +NAME_SORT_KEY_MAX_LENGTH = 500 +ARTICLE_PREFIX_RE = re.compile(r"^(the|a|an)\s+") +DIGIT_RUN_RE = re.compile(r"\d+") + +def compute_name_sort_key(name: str | None) -> str: + """Precompute the natural-sort key stored in `Rom.name_sort_key` + """ + value = (name or "").lower() + value = ARTICLE_PREFIX_RE.sub("", value).strip() + value = DIGIT_RUN_RE.sub(lambda m: m.group(0).zfill(12), value) + return value[:NAME_SORT_KEY_MAX_LENGTH] + if TYPE_CHECKING: from models.assets import Save, Screenshot, State from models.collection import Collection @@ -181,6 +196,8 @@ class Rom(BaseModel): __table_args__ = ( Index("idx_roms_platform_id_fs_name", "platform_id", "fs_name"), + Index("idx_roms_name", "name"), + Index("idx_roms_name_sort_key", "name_sort_key"), Index("idx_roms_igdb_id", "igdb_id"), Index("idx_roms_moby_id", "moby_id"), Index("idx_roms_ss_id", "ss_id"), @@ -203,6 +220,9 @@ class Rom(BaseModel): fs_size_bytes: Mapped[int] = mapped_column(BigInteger(), default=0) name: Mapped[str | None] = mapped_column(String(length=350)) + name_sort_key: Mapped[str | None] = mapped_column( + String(length=NAME_SORT_KEY_MAX_LENGTH), default=None + ) slug: Mapped[str | None] = mapped_column(String(length=400)) summary: Mapped[str | None] = mapped_column(Text) igdb_metadata: Mapped[dict[str, Any] | None] = mapped_column( @@ -494,6 +514,17 @@ Rom.top_level_file_count = column_property( ) +@event.listens_for(Rom, "before_insert") +@event.listens_for(Rom, "before_update") +def _populate_rom_name_sort_key(mapper, connection, target: Rom) -> None: + """Keep `name_sort_key` in sync for ORM inserts/updates. + + Bulk `update()` statements bypass ORM events, so they have to + set it explicitly. + """ + target.name_sort_key = compute_name_sort_key(target.name) + + class RomUserStatus(enum.StrEnum): INCOMPLETE = "incomplete" # Started but not finished FINISHED = "finished" # Reached the end of the game diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index c44efeee1..aa144ee5e 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -52,6 +52,9 @@ def clear_database(): s.query(Platform).delete(synchronize_session="evaluate") s.query(User).delete(synchronize_session="evaluate") + # Drop any cached gallery filter values to keep tests isolated. + db_rom_handler.invalidate_filter_values_cache() + @pytest.fixture(scope="module") def vcr_config(): diff --git a/backend/tests/handler/test_db_handler.py b/backend/tests/handler/test_db_handler.py index b100def43..bdd6636db 100644 --- a/backend/tests/handler/test_db_handler.py +++ b/backend/tests/handler/test_db_handler.py @@ -219,6 +219,55 @@ def test_filter_by_search_term_with_multiple_terms(platform: Platform): assert actual_rom_ids_single == expected_rom_ids_single +def test_filter_by_search_term_multi_word_and_ranking(platform: Platform): + def _add(name: str) -> Rom: + fs = name.replace(" ", "_") + return db_rom_handler.add_rom( + Rom( + platform_id=platform.id, + name=name, + slug=name.lower().replace(" ", "-"), + fs_name=f"{fs}.zip", + fs_name_no_tags=fs, + fs_name_no_ext=fs, + fs_extension="zip", + fs_path=f"{platform.slug}/roms", + ) + ) + + ff = _add("Final Fantasy") + ff7 = _add("Final Fantasy VII") + fantasy_final = _add("Fantasy Final") # both words, reversed order + _add("Final Combat") # only "final" + _add("Angelique - Voice Fantasy") # only "fantasy" + _add("Super Mario World") # neither word + + results = db_rom_handler.get_roms_scalar(search_term="final fantasy") + result_ids = [r.id for r in results] + + # Only titles containing BOTH words appear (AND semantics). + assert set(result_ids) == {ff.id, ff7.id, fantasy_final.id} + + # Exact-order phrase matches rank above the reversed-order match. + assert result_ids.index(ff.id) < result_ids.index(fantasy_final.id) + assert result_ids.index(ff7.id) < result_ids.index(fantasy_final.id) + + # The relevance ORDER BY must also survive the group_by_meta_id subquery + # wrapping used by the gallery (each ROM here is its own group). + grouped = db_rom_handler.get_roms_scalar( + search_term="final fantasy", group_by_meta_id=True + ) + assert {r.id for r in grouped} == {ff.id, ff7.id, fantasy_final.id} + + # An explicit sort takes priority over relevance: ordering by name asc puts + # "Fantasy Final" first (relevance is only the tiebreaker here). + explicit = db_rom_handler.get_roms_scalar( + search_term="final fantasy", order_by="name", order_dir="asc" + ) + explicit_ids = [r.id for r in explicit] + assert explicit_ids.index(fantasy_final.id) < explicit_ids.index(ff.id) + + def test_sibling_roms_empty_fs_name_no_tags_not_matched(platform: Platform): """ROMs with empty fs_name_no_tags should NOT be matched as siblings. diff --git a/frontend/src/components/common/Game/VirtualTable.vue b/frontend/src/components/common/Game/VirtualTable.vue index f0d1f4d09..05ec8830a 100644 --- a/frontend/src/components/common/Game/VirtualTable.vue +++ b/frontend/src/components/common/Game/VirtualTable.vue @@ -130,12 +130,17 @@ function updateSelectedRom(rom: SimpleRom) { type SortBy = { key: keyof SimpleRom; order: "asc" | "desc" }[]; function updateOptions({ sortBy }: { sortBy: SortBy }) { - if (!sortBy[0]) return; - const { key, order } = sortBy[0]; - romsStore.resetPagination(); - romsStore.setOrderBy(key); - romsStore.setOrderDir(order); + if (sortBy[0]) { + const { key, order } = sortBy[0]; + romsStore.setOrderBy(key); + romsStore.setOrderDir(order); + } else { + // Clear the `orderBy` key when the user removes + // the sort column from the table + romsStore.setOrderBy(null); + romsStore.setOrderDir("asc"); + } romsStore.fetchRoms(); } diff --git a/frontend/src/services/api/rom.ts b/frontend/src/services/api/rom.ts index 254390c5c..a34f0145f 100644 --- a/frontend/src/services/api/rom.ts +++ b/frontend/src/services/api/rom.ts @@ -185,7 +185,7 @@ async function getRoms({ searchTerm = null, limit = 72, offset = 0, - orderBy = "name", + orderBy = null, orderDir = "asc", filterMatched = null, filterFavorites = null, diff --git a/frontend/src/stores/roms.ts b/frontend/src/stores/roms.ts index ff0f7590c..47a7d4a76 100644 --- a/frontend/src/stores/roms.ts +++ b/frontend/src/stores/roms.ts @@ -25,7 +25,7 @@ export type SimpleRom = SimpleRomSchema; export type SearchRom = SearchRomSchema; export type DetailedRom = DetailedRomSchema; -const orderByStorage = useLocalStorage("roms.orderBy", "name"); +const orderByStorage = useLocalStorage("roms.orderBy", null); const orderDirStorage = useLocalStorage("roms.orderDir", "asc"); const defaultRomsState = { @@ -48,7 +48,7 @@ const defaultRomsState = { characterIndex: {} as Record, selectedCharacter: null as string | null, romIdIndex: [] as number[], - orderBy: orderByStorage.value as keyof SimpleRom, + orderBy: orderByStorage.value as keyof SimpleRom | null, orderDir: orderDirStorage.value as "asc" | "desc", }; @@ -335,7 +335,7 @@ export default defineStore("roms", { this.selectedIDs = new Set(); this.lastSelectedIndex = -1; }, - setOrderBy(orderBy: keyof SimpleRom) { + setOrderBy(orderBy: keyof SimpleRom | null) { this.orderBy = orderBy; orderByStorage.value = orderBy; },