mirror of
https://github.com/rommapp/romm.git
synced 2026-06-29 23:35:47 +00:00
add options to normalization
This commit is contained in:
@@ -52,7 +52,7 @@ PS2_SERIAL_INDEX_KEY: Final = "romm:ps2_serial_index"
|
||||
PSP_SERIAL_INDEX_KEY: Final = "romm:psp_serial_index"
|
||||
|
||||
LEADING_ARTICLE_PATTERN = re.compile(r"^(a|an|the)\b")
|
||||
COMMA_ARTICLE_PATTERN = re.compile(r",\b(a|an|the)\b")
|
||||
COMMA_ARTICLE_PATTERN = re.compile(r",\s(a|an|the)\b$")
|
||||
NON_WORD_SPACE_PATTERN = re.compile(r"[^\w\s]")
|
||||
MULTIPLE_SPACE_PATTERN = re.compile(r"\s+")
|
||||
|
||||
@@ -61,24 +61,28 @@ CHAR_REMOVAL_TABLE = str.maketrans("_'\"", " ")
|
||||
|
||||
# This caches results to avoid repeated normalization of the same search term
|
||||
@lru_cache(maxsize=1024)
|
||||
def _normalize_search_term(name: str) -> str:
|
||||
def _normalize_search_term(
|
||||
name: str, remove_articles: bool = True, remove_punctuation: bool = True
|
||||
) -> str:
|
||||
# Single translate operation
|
||||
name = name.lower().translate(CHAR_REMOVAL_TABLE)
|
||||
|
||||
# Remove articles (combined if possible)
|
||||
name = LEADING_ARTICLE_PATTERN.sub("", name)
|
||||
name = COMMA_ARTICLE_PATTERN.sub("", name)
|
||||
if remove_articles:
|
||||
name = LEADING_ARTICLE_PATTERN.sub("", name)
|
||||
name = COMMA_ARTICLE_PATTERN.sub("", name)
|
||||
|
||||
# Remove punctuation and normalize spaces in one step
|
||||
name = NON_WORD_SPACE_PATTERN.sub("", name)
|
||||
name = MULTIPLE_SPACE_PATTERN.sub(" ", name).strip()
|
||||
if remove_punctuation:
|
||||
name = NON_WORD_SPACE_PATTERN.sub("", name)
|
||||
name = MULTIPLE_SPACE_PATTERN.sub(" ", name)
|
||||
|
||||
# Unicode normalization and accent removal
|
||||
if any(ord(c) > 127 for c in name): # Only if non-ASCII chars present
|
||||
normalized = unicodedata.normalize("NFD", name)
|
||||
name = "".join(c for c in normalized if not unicodedata.combining(c))
|
||||
|
||||
return name
|
||||
return name.strip()
|
||||
|
||||
|
||||
class MetadataHandler:
|
||||
@@ -93,8 +97,10 @@ class MetadataHandler:
|
||||
def normalize_cover_url(self, url: str) -> str:
|
||||
return url if not url else f"https:{url.replace('https:', '')}"
|
||||
|
||||
def normalize_search_term(self, name: str) -> str:
|
||||
return _normalize_search_term(name)
|
||||
def normalize_search_term(
|
||||
self, name: str, remove_articles: bool = True, remove_punctuation: bool = True
|
||||
) -> str:
|
||||
return _normalize_search_term(name, remove_articles, remove_punctuation)
|
||||
|
||||
async def _ps2_opl_format(self, match: re.Match[str], search_term: str) -> str:
|
||||
serial_code = match.group(1)
|
||||
|
||||
@@ -507,7 +507,6 @@ class IGDBHandler(MetadataHandler):
|
||||
rom = await self._search_rom(search_term, platform_igdb_id)
|
||||
|
||||
# IGDB search is fuzzy so no need to split the search term by special characters
|
||||
|
||||
if not rom:
|
||||
return fallback_rom
|
||||
|
||||
|
||||
@@ -47,17 +47,18 @@ class SGDBBaseHandler(MetadataHandler):
|
||||
return list(filter(None, results))
|
||||
|
||||
async def get_details_by_name(self, game_name: str) -> SGDBRom:
|
||||
search_term = self.normalize_search_term(game_name)
|
||||
search_term = self.normalize_search_term(game_name, remove_articles=False)
|
||||
games = await self.sgdb_service.search_games(term=search_term)
|
||||
if not games:
|
||||
log.debug(f"Could not find '{search_term}' on SteamGridDB")
|
||||
return SGDBRom(sgdb_id=None)
|
||||
|
||||
# SGDB search is fuzzy so no need to split the search term by special characters
|
||||
|
||||
for game in games:
|
||||
game_name_lower = game["name"].lower()
|
||||
game_name_normalized = self.normalize_search_term(game["name"])
|
||||
game_name_normalized = self.normalize_search_term(
|
||||
game["name"], remove_articles=False
|
||||
)
|
||||
|
||||
if (
|
||||
game_name_lower == search_term.lower()
|
||||
|
||||
@@ -386,7 +386,10 @@ class SSHandler(MetadataHandler):
|
||||
search_term = await self._mame_format(search_term)
|
||||
fallback_rom = SSRom(ss_id=None, name=search_term)
|
||||
|
||||
normalized_search_term = self.normalize_search_term(search_term)
|
||||
## SS API requires punctuation to match
|
||||
normalized_search_term = self.normalize_search_term(
|
||||
search_term, remove_punctuation=False
|
||||
)
|
||||
res = await self._search_rom(normalized_search_term, platform_ss_id)
|
||||
|
||||
# SS API doesn't handle some special characters well
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 39 KiB |
Reference in New Issue
Block a user