From a9c9d658bbc24604909dcefc7af25e80bfe9100e Mon Sep 17 00:00:00 2001 From: Georges-Antoine Assi Date: Thu, 21 Sep 2023 09:30:02 -0400 Subject: [PATCH] Aggressive removal of file extensions and tags --- backend/utils/__init__.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py index 24525e8c1..3967ba7a3 100644 --- a/backend/utils/__init__.py +++ b/backend/utils/__init__.py @@ -95,9 +95,20 @@ def parse_tags(file_name: str) -> tuple: def get_file_name_with_no_tags(file_name: str) -> str: - # Use .rsplit to remove only the file extension - return re.sub(r"[\(\[].*?[\)\]]", "", file_name.rsplit(".", 1)[0]).strip() + # \[[^\]]+\]: Matches tags enclosed in square brackets, e.g., [rel-1] + # \([^)]+\): Matches tags enclosed in parentheses, e.g., (USA) + # (\.\w+)+$: Matches one or more file extensions, e.g., .zip or .nkit.iso + tags_extension_regex = r"(\s*\[[^\]]+\]\s*|\s*\([^)]+\)\s*)*(\.\w+)+$" + + # The regex is aggressive and may remove some of the title, + # but that's prefered over leaving tags/extensions in the title + return re.sub(tags_extension_regex, "", file_name).strip() def get_file_extension(rom: dict) -> str: - return rom["file_name"].split(".")[-1] if not rom["multi"] else "" + extension_regex = r"(\.\w+)+$" + return ( + re.search(extension_regex, rom["file_name"]).group(0) + if not rom["multi"] + else "" + )