optimize tv releases fetching

This commit is contained in:
FuzzyGrim
2025-07-11 21:11:03 +02:00
parent a789e10a44
commit 0d938c087d
2 changed files with 102 additions and 82 deletions

View File

@@ -24,35 +24,36 @@ def fetch_releases(user=None, items_to_process=None):
if not items_to_process:
return "No items to process"
events_bulk, skipped_items = process_items(items_to_process)
save_events(events_bulk)
events_bulk = process_items(items_to_process)
items_updated = save_events(events_bulk)
cleanup_invalid_events(events_bulk)
return generate_final_message(items_to_process, skipped_items)
return generate_final_message(items_to_process, items_updated)
def process_items(items_to_process):
"""Process items and categorize them."""
events_bulk = []
anime_to_process = []
skipped_items = set()
for item in items_to_process:
if item.media_type == MediaTypes.ANIME.value:
anime_to_process.append(item)
elif item.media_type == MediaTypes.TV.value:
process_tv(item, events_bulk, skipped_items)
process_tv(item, events_bulk)
elif item.media_type == MediaTypes.COMIC.value:
process_comic(item, events_bulk, skipped_items)
process_comic(item, events_bulk)
else:
process_other(item, events_bulk, skipped_items)
process_other(item, events_bulk)
process_anime_bulk(anime_to_process, events_bulk, skipped_items)
return events_bulk, skipped_items
process_anime_bulk(anime_to_process, events_bulk)
return events_bulk
def save_events(events_bulk):
"""Save events in bulk with proper conflict handling."""
items_updated = set()
# Get all existing events that match our bulk data
existing_events = Event.objects.filter(
item__in=[e.item for e in events_bulk],
@@ -75,6 +76,9 @@ def save_events(events_bulk):
to_update = []
for event in events_bulk:
if event.item not in items_updated:
items_updated.add(event.item)
if event.content_number is not None:
key = (event.item_id, event.content_number)
if key in existing_with_content:
@@ -104,32 +108,27 @@ def save_events(events_bulk):
len(to_update),
)
return items_updated
def generate_final_message(items_to_process, skipped_items):
def generate_final_message(items_to_process, items_updated):
"""Generate the final message summarizing the results."""
successful_items = [item for item in items_to_process if item not in skipped_items]
final_message_parts = []
processed_details = "\n".join(
f" - {item} ({item.get_media_type_display()})" for item in items_to_process
)
if successful_items:
if items_updated:
success_details = "\n".join(
f" - {item} ({item.get_media_type_display()})" for item in successful_items
f" - {item} ({item.get_media_type_display()})" for item in items_updated
)
final_message_parts.append(
f"Releases have been fetched for the following items:\n{success_details}",
)
if skipped_items:
skipped_details = "\n".join(
f" - {item} ({item.get_media_type_display()})" for item in skipped_items
)
final_message_parts.append(
f"The following items were skipped due to errors:\n{skipped_details}",
return (
f"Processed {len(items_to_process)} items:\n{processed_details}\n\n"
f"Releases updated for {len(items_updated)} items:\n{success_details}"
)
return (
"\n\n".join(final_message_parts)
if final_message_parts
else "No releases have been fetched for any items."
f"Processed {len(items_to_process)} items:\n{processed_details}\n\n"
f"No releases have been updated."
)
@@ -194,20 +193,25 @@ def get_items_to_process(user=None):
items = Item.objects.filter(query).distinct()
return exclude_items_to_fetch(items)
return filter_items_to_fetch(items)
def exclude_items_to_fetch(items):
def filter_items_to_fetch(items):
"""Filter items that need calendar events according to specific rules.
1. Always include if item has no events
2. For items with events:
- If comic: only include if latest event is within 365 days
- If TV show: always include
- If TV show: ignore if all seasons aired in the past
- Other media types: only include if has future events
"""
now = timezone.now()
one_year_ago = now - timezone.timedelta(days=365)
sentinel_datetime = datetime.min.replace(tzinfo=ZoneInfo("UTC"))
# Handle TV items with a single optimized query
tv_items = items.filter(media_type=MediaTypes.TV.value)
tv_items_to_include = get_tv_items_to_include(tv_items, now, sentinel_datetime)
# Subquery for future events
future_events = Event.objects.filter(
@@ -221,21 +225,58 @@ def exclude_items_to_fetch(items):
item__media_type=MediaTypes.COMIC.value,
).order_by("-datetime")
return items.annotate(
annotated = items.annotate(
has_future_events=Exists(future_events),
latest_comic_event_datetime=Subquery(latest_comic_event.values("datetime")[:1]),
).filter(
Q(event__isnull=True) # No events at all - always include
| (
Q(media_type=MediaTypes.COMIC.value)
& Q(latest_comic_event_datetime__gte=one_year_ago)
) # Comics with recent events
| Q(media_type=MediaTypes.TV.value) # TV shows - always include
| Q(has_future_events=True), # Other media types with future events
)
tv_q = Q(id__in=tv_items_to_include)
def process_anime_bulk(items, events_bulk, skipped_items):
comic_q = Q(media_type=MediaTypes.COMIC.value) & (
Q(event__isnull=True) | Q(latest_comic_event_datetime__gte=one_year_ago)
)
other_q = ~Q(media_type__in=[MediaTypes.TV.value, MediaTypes.COMIC.value]) & (
Q(event__isnull=True) | Q(has_future_events=True)
)
return annotated.filter(tv_q | comic_q | other_q).distinct()
def get_tv_items_to_include(tv_items, now, sentinel_datetime):
"""Return list of TV item ids that passed the season checks."""
if not tv_items.exists():
return []
tv_media_ids = list(tv_items.values_list("media_id", flat=True))
season_events = Event.objects.filter(
item__media_id__in=tv_media_ids,
item__media_type=MediaTypes.SEASON.value,
).select_related("item")
events_by_show = {}
for ev in season_events:
key = (ev.item.media_id, ev.item.source)
events_by_show.setdefault(key, []).append(ev)
tv_items_to_include = []
for tv_item in tv_items:
key = (tv_item.media_id, tv_item.source)
show_events = events_by_show.get(key, [])
if (
not show_events
or any(ev.datetime >= now for ev in show_events)
or all(ev.datetime == sentinel_datetime for ev in show_events)
):
tv_items_to_include.append(tv_item.id)
return tv_items_to_include
def process_anime_bulk(items, events_bulk):
"""Process multiple anime items and add events to the event list."""
if not items:
return
@@ -265,7 +306,7 @@ def process_anime_bulk(items, events_bulk, skipped_items):
item.title,
item.media_id,
)
process_other(item, events_bulk, skipped_items)
process_other(item, events_bulk)
def get_anime_schedule_bulk(media_ids):
@@ -352,7 +393,7 @@ def get_anime_schedule_bulk(media_ids):
return all_data
def process_tv(tv_item, events_bulk, skipped_items):
def process_tv(tv_item, events_bulk):
"""Process TV item and create events for all seasons and episodes.
Only processes:
@@ -366,6 +407,7 @@ def process_tv(tv_item, events_bulk, skipped_items):
seasons_to_process = get_seasons_to_process(tv_item)
if not seasons_to_process:
logger.info("%s - No seasons need processing", tv_item)
return
# Fetch and process season data
@@ -376,12 +418,8 @@ def process_tv(tv_item, events_bulk, skipped_items):
"Failed to fetch metadata for %s",
tv_item,
)
if tv_item not in skipped_items:
skipped_items.add(tv_item)
except Exception:
logger.exception("Error processing %s", tv_item)
if tv_item not in skipped_items:
skipped_items.add(tv_item)
def get_seasons_to_process(tv_item):
@@ -424,7 +462,6 @@ def get_seasons_to_process(tv_item):
seasons_to_process.append(season_num)
if not seasons_to_process:
logger.info("%s - No seasons need processing", tv_item)
return []
logger.info(
@@ -617,7 +654,7 @@ def get_tvmaze_response(tvdb_id):
return {}
def process_comic(item, events_bulk, skipped_items):
def process_comic(item, events_bulk):
"""Process comic item and add events to the event list."""
logger.info("Fetching releases for %s", item)
try:
@@ -631,9 +668,6 @@ def process_comic(item, events_bulk, skipped_items):
"Failed to fetch metadata for %s",
item,
)
if item not in skipped_items:
skipped_items.add(item)
return
# get latest event
latest_event = Event.objects.filter(item=item).order_by("-datetime").first()
@@ -650,8 +684,6 @@ def process_comic(item, events_bulk, skipped_items):
"Failed to fetch issue metadata for %s",
item,
)
if item not in skipped_items:
skipped_items.add(item)
return
if issue_metadata["store_date"]:
@@ -670,7 +702,7 @@ def process_comic(item, events_bulk, skipped_items):
)
def process_other(item, events_bulk, skipped_items):
def process_other(item, events_bulk):
"""Process other types of items and add events to the event list."""
logger.info("Fetching releases for %s", item)
try:
@@ -684,8 +716,6 @@ def process_other(item, events_bulk, skipped_items):
"Failed to fetch metadata for %s",
item,
)
if item not in skipped_items:
skipped_items.add(item)
return
date_key = media_type_config.get_date_key(item.media_type)

View File

@@ -153,7 +153,7 @@ class ReloadCalendarTaskTests(TestCase):
):
"""Test fetch_releases with all media types."""
# Setup mocks
mock_process_tv.side_effect = lambda _, events_bulk, __: events_bulk.append(
mock_process_tv.side_effect = lambda _, events_bulk: events_bulk.append(
Event(
item=self.season_item,
content_number=1,
@@ -161,7 +161,7 @@ class ReloadCalendarTaskTests(TestCase):
),
)
mock_process_other.side_effect = (
lambda item, events_bulk, __: events_bulk.append(
lambda item, events_bulk: events_bulk.append(
Event(
item=item,
content_number=1,
@@ -170,7 +170,7 @@ class ReloadCalendarTaskTests(TestCase):
)
)
# Setup mock for process_anime_bulk to create events for anime items
mock_process_anime_bulk.side_effect = lambda items, events_bulk, __: [
mock_process_anime_bulk.side_effect = lambda items, events_bulk: [
events_bulk.append(
Event(
item=item,
@@ -215,7 +215,7 @@ class ReloadCalendarTaskTests(TestCase):
"""Test fetch_releases with specific items to process."""
# Setup mock
mock_process_other.side_effect = (
lambda item, events_bulk, _: events_bulk.append(
lambda item, events_bulk: events_bulk.append(
Event(
item=item,
content_number=1,
@@ -398,8 +398,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_tv(self.tv_item, events_bulk, skipped_items)
process_tv(self.tv_item, events_bulk)
# Verify events were added
self.assertEqual(len(events_bulk), 6)
@@ -425,8 +424,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_other(self.movie_item, events_bulk, skipped_items)
process_other(self.movie_item, events_bulk)
# Verify event was added
self.assertEqual(len(events_bulk), 1)
@@ -453,8 +451,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_other(self.book_item, events_bulk, skipped_items)
process_other(self.book_item, events_bulk)
# Verify event was added
self.assertEqual(len(events_bulk), 1)
@@ -478,8 +475,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_other(self.manga_item, events_bulk, skipped_items)
process_other(self.manga_item, events_bulk)
# Verify event was added
self.assertEqual(len(events_bulk), 1)
@@ -714,8 +710,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_other(self.movie_item, events_bulk, skipped_items)
process_other(self.movie_item, events_bulk)
# Verify no events were added
self.assertEqual(len(events_bulk), 0)
@@ -730,8 +725,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_other(self.movie_item, events_bulk, skipped_items)
process_other(self.movie_item, events_bulk)
# Verify no events were added
self.assertEqual(len(events_bulk), 0)
@@ -762,7 +756,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process anime items
events_bulk = []
process_anime_bulk([self.anime_item], events_bulk, set())
process_anime_bulk([self.anime_item], events_bulk)
# Verify events were added
self.assertEqual(len(events_bulk), 1)
@@ -788,7 +782,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process anime items
events_bulk = []
process_anime_bulk([self.anime_item], events_bulk, set())
process_anime_bulk([self.anime_item], events_bulk)
# Verify anime added from default source
self.assertEqual(len(events_bulk), 1)
@@ -810,8 +804,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item - should not raise exception
events_bulk = []
skipped_items = set()
process_other(self.movie_item, events_bulk, skipped_items)
process_other(self.movie_item, events_bulk)
# Verify no events were added
self.assertEqual(len(events_bulk), 0)
@@ -843,8 +836,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_comic(comic_item, events_bulk, skipped_items)
process_comic(comic_item, events_bulk)
# Verify event was added
self.assertEqual(len(events_bulk), 1)
@@ -889,8 +881,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_comic(comic_item, events_bulk, skipped_items)
process_comic(comic_item, events_bulk)
# Verify event was added
self.assertEqual(len(events_bulk), 1)
@@ -928,8 +919,7 @@ class ReloadCalendarTaskTests(TestCase):
# Process the item
events_bulk = []
skipped_items = []
process_comic(comic_item, events_bulk, skipped_items)
process_comic(comic_item, events_bulk)
# Verify no event was added
self.assertEqual(len(events_bulk), 0)