misc: Move cache initialization to startup script

Guarantee that cache is initialized during startup, and only once,
instead of every time a `MetadataHandler` object is instantiated.

Also, improve logic to determine `fixtures` paths.
This commit is contained in:
Michael Manganiello
2025-08-13 12:26:15 -03:00
parent 57e9a9f0fd
commit 80291f4be1
7 changed files with 125 additions and 101 deletions

View File

@@ -1,13 +1,12 @@
import enum
import json
import os
import re
import unicodedata
from functools import lru_cache
from itertools import batched
from pathlib import Path
from typing import Final, NotRequired, TypedDict
from handler.redis_handler import async_cache, sync_cache
from handler.redis_handler import async_cache
from logger.logger import log
from strsimpy.jaro_winkler import JaroWinkler
from tasks.scheduled.update_switch_titledb import (
@@ -19,22 +18,7 @@ from tasks.scheduled.update_switch_titledb import (
jarowinkler = JaroWinkler()
def conditionally_set_cache(
index_key: str, filename: str, parent_dir: str = os.path.dirname(__file__)
) -> None:
try:
fixtures_path = os.path.join(parent_dir, "fixtures")
if not sync_cache.exists(index_key):
index_data = json.loads(open(os.path.join(fixtures_path, filename)).read())
with sync_cache.pipeline() as pipe:
for data_batch in batched(index_data.items(), 2000, strict=False):
data_map = {k: json.dumps(v) for k, v in dict(data_batch).items()}
pipe.hset(index_key, mapping=data_map)
pipe.execute()
except Exception as e:
# Log the error but don't fail - this allows migrations to run even if Redis is not available
log.warning(f"Failed to initialize cache for {index_key}: {e}")
METADATA_FIXTURES_DIR: Final = Path(__file__).parent / "fixtures"
# These are loaded in cache in update_switch_titledb_task
SWITCH_TITLEDB_REGEX: Final = re.compile(r"(70[0-9]{12})")
@@ -96,14 +80,6 @@ def _normalize_search_term(
class MetadataHandler:
def __init__(self):
# Initialize cache data lazily when the handler is first instantiated
conditionally_set_cache(MAME_XML_KEY, "mame_index.json")
conditionally_set_cache(PS2_OPL_KEY, "ps2_opl_index.json")
conditionally_set_cache(PS1_SERIAL_INDEX_KEY, "ps1_serial_index.json")
conditionally_set_cache(PS2_SERIAL_INDEX_KEY, "ps2_serial_index.json")
conditionally_set_cache(PSP_SERIAL_INDEX_KEY, "psp_serial_index.json")
def normalize_cover_url(self, url: str) -> str:
return url if not url else f"https:{url.replace('https:', '')}"

View File

@@ -1,11 +1,10 @@
from __future__ import annotations
import json
import os
from functools import cached_property
from typing import TYPE_CHECKING
from pathlib import Path
from typing import TYPE_CHECKING, Final
from handler.metadata.base_hander import conditionally_set_cache
from handler.redis_handler import sync_cache
from models.base import (
FILE_EXTENSION_MAX_LENGTH,
@@ -19,6 +18,7 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
if TYPE_CHECKING:
from models.platform import Platform
FIRMWARE_FIXTURES_DIR: Final = Path(__file__).parent / "fixtures"
KNOWN_BIOS_KEY = "romm:known_bios_files"
@@ -47,13 +47,6 @@ class Firmware(BaseModel):
missing_from_fs: Mapped[bool] = mapped_column(default=False, nullable=False)
def __init__(self, **kwargs):
super().__init__(**kwargs)
conditionally_set_cache(
KNOWN_BIOS_KEY, "known_bios_files.json", os.path.dirname(__file__)
)
@property
def platform_slug(self) -> str:
return self.platform.slug

View File

@@ -9,12 +9,23 @@ from config import (
ENABLE_SCHEDULED_UPDATE_SWITCH_TITLEDB,
SENTRY_DSN,
)
from handler.metadata.base_hander import (
MAME_XML_KEY,
METADATA_FIXTURES_DIR,
PS1_SERIAL_INDEX_KEY,
PS2_OPL_KEY,
PS2_SERIAL_INDEX_KEY,
PSP_SERIAL_INDEX_KEY,
)
from handler.redis_handler import async_cache
from logger.logger import log
from models.firmware import FIRMWARE_FIXTURES_DIR, KNOWN_BIOS_KEY
from opentelemetry import trace
from tasks.scheduled.scan_library import scan_library_task
from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task
from tasks.scheduled.update_switch_titledb import update_switch_titledb_task
from utils import get_version
from utils.cache import conditionally_set_cache
from utils.context import initialize_context
tracer = trace.get_tracer(__name__)
@@ -38,6 +49,32 @@ async def main() -> None:
log.info("Starting scheduled update launchbox metadata")
update_launchbox_metadata_task.init()
log.info("Initializing cache with fixtures data")
await conditionally_set_cache(
async_cache, MAME_XML_KEY, METADATA_FIXTURES_DIR / "mame_index.json"
)
await conditionally_set_cache(
async_cache, PS2_OPL_KEY, METADATA_FIXTURES_DIR / "ps2_opl_index.json"
)
await conditionally_set_cache(
async_cache,
PS1_SERIAL_INDEX_KEY,
METADATA_FIXTURES_DIR / "ps1_serial_index.json",
)
await conditionally_set_cache(
async_cache,
PS2_SERIAL_INDEX_KEY,
METADATA_FIXTURES_DIR / "ps2_serial_index.json",
)
await conditionally_set_cache(
async_cache,
PSP_SERIAL_INDEX_KEY,
METADATA_FIXTURES_DIR / "psp_serial_index.json",
)
await conditionally_set_cache(
async_cache, KNOWN_BIOS_KEY, FIRMWARE_FIXTURES_DIR / "known_bios_files.json"
)
log.info("Startup tasks completed")

View File

@@ -1,6 +1,6 @@
import json
import re
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, patch
import pytest
from handler.metadata.base_hander import (
@@ -20,7 +20,6 @@ from handler.metadata.base_hander import (
MetadataHandler,
UniversalPlatformSlug,
_normalize_search_term,
conditionally_set_cache,
)
from handler.redis_handler import async_cache
@@ -98,73 +97,12 @@ class TestNormalizeSearchTerm:
assert cache_info2.hits == cache_info1.hits + 1
class TestConditionallySetCache:
"""Test the conditionally_set_cache function."""
@patch("handler.metadata.base_hander.sync_cache")
def test_cache_not_exists_loads_data(self, mock_cache):
"""Test loading data when cache doesn't exist."""
mock_cache.exists.return_value = False
mock_pipeline = MagicMock()
mock_cache.pipeline.return_value.__enter__.return_value = mock_pipeline
mock_cache.pipeline.return_value.__exit__.return_value = None
conditionally_set_cache(MAME_XML_KEY, "mame_index.json")
mock_cache.exists.assert_called_once_with(MAME_XML_KEY)
mock_cache.pipeline.assert_called_once()
mock_pipeline.hset.assert_called()
mock_pipeline.execute.assert_called_once()
@patch("handler.metadata.base_hander.sync_cache")
def test_cache_exists_skips_loading(self, mock_cache):
"""Test skipping load when cache already exists."""
mock_cache.exists.return_value = True
conditionally_set_cache(MAME_XML_KEY, "mame_index.json")
mock_cache.exists.assert_called_once_with(MAME_XML_KEY)
mock_cache.pipeline.assert_not_called()
@patch("handler.metadata.base_hander.sync_cache")
def test_exception_handling(self, mock_cache):
"""Test exception handling when file loading fails."""
mock_cache.exists.return_value = False
conditionally_set_cache(MAME_XML_KEY, "nonexistent.json")
mock_cache.pipeline.assert_not_called()
class TestMetadataHandlerInit:
"""Test MetadataHandler initialization."""
@patch("handler.metadata.base_hander.conditionally_set_cache")
def test_init_calls_cache_setup(self, mock_conditionally_set_cache):
"""Test that initialization calls cache setup for all required indexes."""
MetadataHandler()
expected_calls = [
(MAME_XML_KEY, "mame_index.json"),
(PS2_OPL_KEY, "ps2_opl_index.json"),
(PS1_SERIAL_INDEX_KEY, "ps1_serial_index.json"),
(PS2_SERIAL_INDEX_KEY, "ps2_serial_index.json"),
(PSP_SERIAL_INDEX_KEY, "psp_serial_index.json"),
]
assert mock_conditionally_set_cache.call_count == 5
for call_args in mock_conditionally_set_cache.call_args_list:
assert call_args[0] in expected_calls
class TestMetadataHandlerMethods:
"""Test MetadataHandler instance methods."""
@pytest.fixture
def handler(self):
with patch("handler.metadata.base_hander.conditionally_set_cache"):
return MetadataHandler()
return MetadataHandler()
def test_normalize_cover_url_with_url(self, handler: MetadataHandler):
"""Test URL normalization with valid URL."""

View File

@@ -0,0 +1,55 @@
from unittest.mock import AsyncMock
from handler.metadata.base_hander import MAME_XML_KEY, METADATA_FIXTURES_DIR
from handler.redis_handler import async_cache
from redis.asyncio import Redis as AsyncRedis
from utils.cache import conditionally_set_cache
class TestConditionallySetCache:
"""Test the conditionally_set_cache function."""
async def test_cache_not_exists_loads_data(self, mocker):
"""Test loading data when cache doesn't exist."""
mock_cache_exists = mocker.patch.object(
AsyncRedis, "exists", side_effect=AsyncMock(return_value=False)
)
mock_pipeline = AsyncMock()
mock_cache_pipeline = mocker.patch.object(AsyncRedis, "pipeline")
mock_cache_pipeline.return_value.__aenter__.return_value = mock_pipeline
await conditionally_set_cache(
async_cache, MAME_XML_KEY, METADATA_FIXTURES_DIR / "mame_index.json"
)
mock_cache_exists.assert_called_once_with(MAME_XML_KEY)
mock_cache_pipeline.return_value.__aenter__.assert_called_once()
mock_pipeline.hset.assert_called()
mock_pipeline.execute.assert_called_once()
async def test_cache_exists_skips_loading(self, mocker):
"""Test skipping load when cache already exists."""
mock_cache_exists = mocker.patch.object(
AsyncRedis, "exists", side_effect=AsyncMock(return_value=True)
)
mock_cache_pipeline = mocker.patch.object(AsyncRedis, "pipeline")
await conditionally_set_cache(
async_cache, MAME_XML_KEY, METADATA_FIXTURES_DIR / "mame_index.json"
)
mock_cache_exists.assert_called_once_with(MAME_XML_KEY)
mock_cache_pipeline.assert_not_called()
async def test_exception_handling(self, mocker):
"""Test exception handling when file loading fails."""
mocker.patch.object(
AsyncRedis, "exists", side_effect=AsyncMock(return_value=False)
)
mock_cache_pipeline = mocker.patch.object(AsyncRedis, "pipeline")
await conditionally_set_cache(
async_cache, MAME_XML_KEY, METADATA_FIXTURES_DIR / "nonexistent.json"
)
mock_cache_pipeline.assert_not_called()

24
backend/utils/cache.py Normal file
View File

@@ -0,0 +1,24 @@
import json
from itertools import batched
from pathlib import Path
from anyio import open_file
from logger.logger import log
from redis.asyncio import Redis as AsyncRedis
async def conditionally_set_cache(cache: AsyncRedis, key: str, file_path: Path) -> None:
"""Set the content of a JSON file to the cache, if it does not already exist."""
try:
if await cache.exists(key):
return
async with await open_file(file_path, "r") as file:
index_data = json.loads(await file.read())
async with cache.pipeline() as pipe:
for data_batch in batched(index_data.items(), 2000, strict=False):
data_map = {k: json.dumps(v) for k, v in dict(data_batch).items()}
await pipe.hset(key, mapping=data_map)
await pipe.execute()
except Exception as e:
# Log the error but don't fail - this allows migrations to run even if Redis is not available
log.warning(f"Failed to initialize cache for {key}: {e}")

View File

@@ -284,6 +284,7 @@ else
error_log "Failed to run database migrations"
fi
# Startup process requires database and cache to be already available
run_startup
# main loop