diff --git a/backend/handler/metadata/base_hander.py b/backend/handler/metadata/base_hander.py index 9fe5602b6..5c6552a36 100644 --- a/backend/handler/metadata/base_hander.py +++ b/backend/handler/metadata/base_hander.py @@ -52,7 +52,7 @@ PS2_SERIAL_INDEX_KEY: Final = "romm:ps2_serial_index" PSP_SERIAL_INDEX_KEY: Final = "romm:psp_serial_index" LEADING_ARTICLE_PATTERN = re.compile(r"^(a|an|the)\b") -COMMA_ARTICLE_PATTERN = re.compile(r",\b(a|an|the)\b") +COMMA_ARTICLE_PATTERN = re.compile(r",\s(a|an|the)\b$") NON_WORD_SPACE_PATTERN = re.compile(r"[^\w\s]") MULTIPLE_SPACE_PATTERN = re.compile(r"\s+") @@ -61,24 +61,28 @@ CHAR_REMOVAL_TABLE = str.maketrans("_'\"", " ") # This caches results to avoid repeated normalization of the same search term @lru_cache(maxsize=1024) -def _normalize_search_term(name: str) -> str: +def _normalize_search_term( + name: str, remove_articles: bool = True, remove_punctuation: bool = True +) -> str: # Single translate operation name = name.lower().translate(CHAR_REMOVAL_TABLE) # Remove articles (combined if possible) - name = LEADING_ARTICLE_PATTERN.sub("", name) - name = COMMA_ARTICLE_PATTERN.sub("", name) + if remove_articles: + name = LEADING_ARTICLE_PATTERN.sub("", name) + name = COMMA_ARTICLE_PATTERN.sub("", name) # Remove punctuation and normalize spaces in one step - name = NON_WORD_SPACE_PATTERN.sub("", name) - name = MULTIPLE_SPACE_PATTERN.sub(" ", name).strip() + if remove_punctuation: + name = NON_WORD_SPACE_PATTERN.sub("", name) + name = MULTIPLE_SPACE_PATTERN.sub(" ", name) # Unicode normalization and accent removal if any(ord(c) > 127 for c in name): # Only if non-ASCII chars present normalized = unicodedata.normalize("NFD", name) name = "".join(c for c in normalized if not unicodedata.combining(c)) - return name + return name.strip() class MetadataHandler: @@ -93,8 +97,10 @@ class MetadataHandler: def normalize_cover_url(self, url: str) -> str: return url if not url else f"https:{url.replace('https:', '')}" - def normalize_search_term(self, name: str) -> str: - return _normalize_search_term(name) + def normalize_search_term( + self, name: str, remove_articles: bool = True, remove_punctuation: bool = True + ) -> str: + return _normalize_search_term(name, remove_articles, remove_punctuation) async def _ps2_opl_format(self, match: re.Match[str], search_term: str) -> str: serial_code = match.group(1) diff --git a/backend/handler/metadata/igdb_handler.py b/backend/handler/metadata/igdb_handler.py index e8773fe3c..1e744089c 100644 --- a/backend/handler/metadata/igdb_handler.py +++ b/backend/handler/metadata/igdb_handler.py @@ -507,7 +507,6 @@ class IGDBHandler(MetadataHandler): rom = await self._search_rom(search_term, platform_igdb_id) # IGDB search is fuzzy so no need to split the search term by special characters - if not rom: return fallback_rom diff --git a/backend/handler/metadata/sgdb_handler.py b/backend/handler/metadata/sgdb_handler.py index e504b145f..775482abd 100644 --- a/backend/handler/metadata/sgdb_handler.py +++ b/backend/handler/metadata/sgdb_handler.py @@ -47,17 +47,18 @@ class SGDBBaseHandler(MetadataHandler): return list(filter(None, results)) async def get_details_by_name(self, game_name: str) -> SGDBRom: - search_term = self.normalize_search_term(game_name) + search_term = self.normalize_search_term(game_name, remove_articles=False) games = await self.sgdb_service.search_games(term=search_term) if not games: log.debug(f"Could not find '{search_term}' on SteamGridDB") return SGDBRom(sgdb_id=None) # SGDB search is fuzzy so no need to split the search term by special characters - for game in games: game_name_lower = game["name"].lower() - game_name_normalized = self.normalize_search_term(game["name"]) + game_name_normalized = self.normalize_search_term( + game["name"], remove_articles=False + ) if ( game_name_lower == search_term.lower() diff --git a/backend/handler/metadata/ss_handler.py b/backend/handler/metadata/ss_handler.py index b5dc17416..10c838e84 100644 --- a/backend/handler/metadata/ss_handler.py +++ b/backend/handler/metadata/ss_handler.py @@ -386,7 +386,10 @@ class SSHandler(MetadataHandler): search_term = await self._mame_format(search_term) fallback_rom = SSRom(ss_id=None, name=search_term) - normalized_search_term = self.normalize_search_term(search_term) + ## SS API requires punctuation to match + normalized_search_term = self.normalize_search_term( + search_term, remove_punctuation=False + ) res = await self._search_rom(normalized_search_term, platform_ss_id) # SS API doesn't handle some special characters well diff --git a/frontend/assets/scrappers/sgdb.png b/frontend/assets/scrappers/sgdb.png index 21d61c346..57c7a14c8 100644 Binary files a/frontend/assets/scrappers/sgdb.png and b/frontend/assets/scrappers/sgdb.png differ