Files
romm/backend/utils/update_hltb_api_url.py

136 lines
4.5 KiB
Python

# uv run python -m utils.update_hltb_api_url
"""
Utility script to update HowLongToBeat API URL by discovering the dynamic endpoint from the website
"""
import re
import sys
from pathlib import Path
import httpx
from utils.context import create_httpx_client
# Precompiled regexes for better performance
APP_JS_REGEX = re.compile(
r'src=["\'](?P<path>\/_next\/static\/chunks\/pages\/_app[^"\']+\.js)["\']'
)
APP_JS_FALLBACK_REGEX = re.compile(r'src=["\'](?P<path>[^"\']*_app[^"\']+\.js)["\']')
ENDPOINT_TOKEN_REGEX = re.compile(
r'/api/(?P<endpoint>[a-zA-Z0-9_-]+)/["\']\.concat\(["\'](?P<part1>[0-9a-zA-Z]+)["\']\)\.concat\(["\'](?P<part2>[0-9a-zA-Z]+)["\']\)'
)
def fetch_hltb_app_script(base_url: str = "https://howlongtobeat.com") -> str | None:
"""Fetch the HLTB app script from the site."""
try:
with create_httpx_client() as client:
# 1) Fetch homepage HTML
homepage_url = f"{base_url}/"
resp = client.get(homepage_url, timeout=15)
resp.raise_for_status()
html = resp.text
print(f"Fetched homepage: {homepage_url}")
# 2) Find the Next.js _app chunk (typical pattern: "/_next/static/chunks/pages/_app-<hash>.js")
app_js_match = APP_JS_REGEX.search(html)
if not app_js_match:
# Fallback: any script path containing "_app" ending with .js
app_js_match = APP_JS_FALLBACK_REGEX.search(html)
if not app_js_match:
print("Could not locate HLTB _app JS chunk.")
return None
app_js_path = app_js_match.group("path")
print(f"Located app JS path: {app_js_path}")
app_js_url = (
app_js_path
if app_js_path.startswith("http")
else f"{base_url.rstrip('/')}/{app_js_path.lstrip('/')}"
)
print(f"Constructed app JS URL: {app_js_url}")
# 3) Download the _app JS chunk
js_resp = client.get(app_js_url, timeout=15)
js_resp.raise_for_status()
js_code = js_resp.text
print(f"Downloaded app JS chunk (size: {len(js_code)} chars)")
return js_code
except (httpx.RequestError, httpx.HTTPStatusError) as e:
print(f"Error fetching HLTB app script: {e}", file=sys.stderr)
return None
def discover_hltb_endpoint(base_url: str = "https://howlongtobeat.com") -> str | None:
"""Discover the current HLTB API endpoint by fetching and parsing the app script."""
try:
# 1) Fetch the app script
js_code = fetch_hltb_app_script(base_url)
if not js_code:
print("Could not fetch HLTB app script; using default search endpoint")
return None
# 2) Extract the endpoint and tokens from the app script
token_match = ENDPOINT_TOKEN_REGEX.search(js_code)
if not token_match:
print(
"Could not extract HLTB endpoint and tokens from _app JS; using default search endpoint"
)
return None
endpoint = token_match.group("endpoint")
part1 = token_match.group("part1")
part2 = token_match.group("part2")
print(f"Extracted endpoint: {endpoint}")
print(f"Extracted token part1: {part1}")
print(f"Extracted token part2: {part2}")
# 3) Build the search URL
search_url = f"{base_url}/api/{endpoint}/{part1}{part2}"
print(f"Resolved HLTB search endpoint: {search_url}")
return search_url
except (IOError, OSError) as e:
print(
f"Unexpected error discovering HLTB endpoint from site: {e}",
file=sys.stderr,
)
return None
def main():
"""Main function to discover and update the HLTB API URL."""
print("Starting HLTB API URL discovery...")
search_url = discover_hltb_endpoint()
if not search_url:
print("Failed to discover HLTB API URL")
sys.exit(1)
return
# Write to the expected location
fixture_path = (
Path(__file__).parent.parent
/ "handler"
/ "metadata"
/ "fixtures"
/ "hltb_api_url"
)
try:
with open(fixture_path, "w") as f:
f.write(search_url)
print(f"Successfully updated HLTB API URL to: {search_url}")
print(f"Written to: {fixture_path}")
except Exception as e:
print(f"Error writing to fixture file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()