Files
mailcow-logs-viewer/backend/app/scheduler.py
2026-01-14 23:02:39 +02:00

1608 lines
63 KiB
Python

"""
Background scheduler
"""
import logging
import asyncio
import hashlib
import re
import httpx
from datetime import datetime, timedelta, timezone
from typing import Set, Optional, List, Dict, Any
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.cron import CronTrigger
from sqlalchemy.orm import Session
from sqlalchemy import desc, or_
from sqlalchemy.exc import IntegrityError
from .config import settings, set_cached_active_domains
from .database import get_db_context
from .mailcow_api import mailcow_api
from .models import PostfixLog, RspamdLog, NetfilterLog, MessageCorrelation
from .correlation import detect_direction, parse_postfix_message
from .models import DomainDNSCheck
from .routers.domains import check_domain_dns, save_dns_check_to_db
from .services.dmarc_imap_service import sync_dmarc_reports_from_imap
from .services.dmarc_notifications import send_dmarc_error_notification
from .services import geoip_service
from .services.geoip_downloader import (
update_geoip_database_if_needed,
is_license_configured,
get_geoip_status
)
from .services.geoip_downloader import is_license_configured
logger = logging.getLogger(__name__)
# Job execution tracking
job_status = {
'fetch_logs': {'last_run': None, 'status': 'idle', 'error': None},
'complete_correlations': {'last_run': None, 'status': 'idle', 'error': None},
'update_final_status': {'last_run': None, 'status': 'idle', 'error': None},
'expire_correlations': {'last_run': None, 'status': 'idle', 'error': None},
'cleanup_logs': {'last_run': None, 'status': 'idle', 'error': None},
'check_app_version': {'last_run': None, 'status': 'idle', 'error': None},
'dns_check': {'last_run': None, 'status': 'idle', 'error': None},
'update_geoip': {'last_run': None, 'status': 'idle', 'error': None},
'dmarc_imap_sync': {'last_run': None, 'status': 'idle', 'error': None}
}
def update_job_status(job_name: str, status: str, error: str = None):
"""Update job execution status"""
job_status[job_name] = {
'last_run': datetime.now(timezone.utc),
'status': status,
'error': error
}
def get_job_status():
"""Get all job statuses"""
return job_status
# App version cache (shared with status router)
app_version_cache = {
"checked_at": None,
"current_version": None, # Will be set on first check
"latest_version": None,
"update_available": False,
"changelog": None
}
async def check_app_version_update():
"""
Check for app version updates from GitHub and update the cache.
This function is called by the scheduler and can also be called from the API endpoint.
"""
update_job_status('check_app_version', 'running')
global app_version_cache
# Get current version from VERSION file
try:
from .version import __version__
current_version = __version__
app_version_cache["current_version"] = current_version
except Exception as e:
logger.error(f"Failed to read current version: {e}")
update_job_status('check_app_version', 'failed', str(e))
return
logger.info("Checking app version and updates from GitHub...")
# Check GitHub for latest version
try:
async with httpx.AsyncClient(timeout=10) as client:
response = await client.get(
"https://api.github.com/repos/ShlomiPorush/mailcow-logs-viewer/releases/latest"
)
if response.status_code == 200:
release_data = response.json()
latest_version = release_data.get('tag_name', 'unknown')
# Remove 'v' prefix if present
if latest_version.startswith('v'):
latest_version = latest_version[1:]
changelog = release_data.get('body', '')
app_version_cache["latest_version"] = latest_version
app_version_cache["changelog"] = changelog
# Compare versions (simple string comparison)
app_version_cache["update_available"] = current_version != latest_version
logger.info(f"App version check: Current={current_version}, Latest={latest_version}")
update_job_status('check_app_version', 'success')
else:
logger.warning(f"GitHub API returned status {response.status_code}")
app_version_cache["latest_version"] = "unknown"
app_version_cache["update_available"] = False
update_job_status('check_app_version', 'failed', f"GitHub API returned {response.status_code}")
except Exception as e:
logger.error(f"Failed to check GitHub for app updates: {e}")
app_version_cache["latest_version"] = "unknown"
app_version_cache["update_available"] = False
update_job_status('check_app_version', 'failed', str(e))
app_version_cache["checked_at"] = datetime.now(timezone.utc)
def get_app_version_cache():
"""Get app version cache (for API endpoint)"""
return app_version_cache
scheduler = AsyncIOScheduler()
seen_postfix: Set[str] = set()
seen_rspamd: Set[str] = set()
seen_netfilter: Set[str] = set()
last_fetch_run_time: Dict[str, Optional[datetime]] = {
'postfix': None,
'rspamd': None,
'netfilter': None
}
def is_blacklisted(email: Optional[str]) -> bool:
"""
Check if email is in blacklist.
Args:
email: Email address to check
Returns:
True if blacklisted, False otherwise
"""
if not email:
return False
email_lower = email.lower().strip()
blacklist = settings.blacklist_emails_list
if not blacklist:
return False
is_blocked = email_lower in blacklist
if is_blocked:
logger.debug(f"Blacklist: blocking {email_lower}")
return is_blocked
async def fetch_and_store_postfix():
"""Fetch Postfix logs from API and store in DB"""
last_fetch_run_time['postfix'] = datetime.now(timezone.utc)
try:
logs = await mailcow_api.get_postfix_logs(count=settings.fetch_count_postfix)
if not logs:
return
with get_db_context() as db:
new_count = 0
skipped_count = 0
skipped_blacklist = 0
blacklisted_queue_ids: Set[str] = set()
for log_entry in logs:
message = log_entry.get('message', '')
parsed = parse_postfix_message(message)
queue_id = parsed.get('queue_id')
if not queue_id:
continue
sender = parsed.get('sender')
recipient = parsed.get('recipient')
if is_blacklisted(sender) or is_blacklisted(recipient):
blacklisted_queue_ids.add(queue_id)
logger.debug(f"Blacklist: Queue ID {queue_id} marked for deletion (sender={sender}, recipient={recipient})")
# Delete existing logs with blacklisted queue IDs
if blacklisted_queue_ids:
deleted_count = db.query(PostfixLog).filter(
PostfixLog.queue_id.in_(blacklisted_queue_ids)
).delete(synchronize_session=False)
# Also delete correlations for these queue IDs
db.query(MessageCorrelation).filter(
MessageCorrelation.queue_id.in_(blacklisted_queue_ids)
).delete(synchronize_session=False)
if deleted_count > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_count} Postfix logs for {len(blacklisted_queue_ids)} blacklisted queue IDs")
db.commit()
for log_entry in logs:
try:
time_str = str(log_entry.get('time', ''))
message = log_entry.get('message', '')
unique_id = f"{time_str}:{message[:100]}"
if unique_id in seen_postfix:
skipped_count += 1
continue
# Parse message for fields
parsed = parse_postfix_message(message)
queue_id = parsed.get('queue_id')
# Skip if queue ID is blacklisted
if queue_id and queue_id in blacklisted_queue_ids:
skipped_blacklist += 1
seen_postfix.add(unique_id)
continue
# Parse timestamp with timezone
timestamp = datetime.fromtimestamp(
int(log_entry.get('time', 0)),
tz=timezone.utc
)
sender = parsed.get('sender')
recipient = parsed.get('recipient')
postfix_log = PostfixLog(
time=timestamp,
program=log_entry.get('program'),
priority=log_entry.get('priority'),
message=message,
queue_id=queue_id,
message_id=parsed.get('message_id'),
sender=sender,
recipient=recipient,
status=parsed.get('status'),
relay=parsed.get('relay'),
delay=parsed.get('delay'),
dsn=parsed.get('dsn'),
raw_data=log_entry
)
db.add(postfix_log)
db.flush()
seen_postfix.add(unique_id)
new_count += 1
except IntegrityError:
# Duplicate log - skip silently
db.rollback()
seen_postfix.add(unique_id)
skipped_count += 1
continue
except Exception as e:
logger.error(f"Error processing Postfix log: {e}")
db.rollback()
continue
db.commit()
if new_count > 0 or skipped_count > 0:
msg = f"[OK] Imported {new_count} Postfix logs"
if skipped_count > 0:
msg += f" (skipped {skipped_count} duplicates)"
if skipped_blacklist > 0:
msg += f" (skipped {skipped_blacklist} blacklisted)"
logger.info(msg)
if len(seen_postfix) > 10000:
seen_postfix.clear()
except Exception as e:
logger.error(f"[ERROR] Postfix fetch error: {e}")
async def fetch_and_store_rspamd():
"""Fetch Rspamd logs from API and store in DB"""
last_fetch_run_time['rspamd'] = datetime.now(timezone.utc)
try:
logs = await mailcow_api.get_rspamd_logs(count=settings.fetch_count_rspamd)
if not logs:
return
with get_db_context() as db:
new_count = 0
skipped_blacklist = 0
blacklisted_message_ids: Set[str] = set()
for log_entry in logs:
try:
unix_time = log_entry.get('unix_time', 0)
message_id = log_entry.get('message-id', '')
if message_id == 'undef' or not message_id:
message_id = None
sender = log_entry.get('sender_smtp')
recipients = log_entry.get('rcpt_smtp', [])
unique_id = f"{unix_time}:{message_id if message_id else 'no-id'}"
if unique_id in seen_rspamd:
continue
if is_blacklisted(sender):
skipped_blacklist += 1
seen_rspamd.add(unique_id)
if message_id:
blacklisted_message_ids.add(message_id)
continue
if recipients and any(is_blacklisted(r) for r in recipients):
skipped_blacklist += 1
seen_rspamd.add(unique_id)
if message_id:
blacklisted_message_ids.add(message_id)
continue
timestamp = datetime.fromtimestamp(unix_time, tz=timezone.utc)
direction = detect_direction(log_entry)
rspamd_log = RspamdLog(
time=timestamp,
message_id=message_id,
sender_smtp=sender,
sender_mime=log_entry.get('sender_mime', sender),
recipients_smtp=recipients,
recipients_mime=log_entry.get('rcpt_mime', recipients),
subject=log_entry.get('subject'),
score=log_entry.get('score', 0.0),
required_score=log_entry.get('required_score', 15.0),
action=log_entry.get('action', 'unknown'),
symbols=log_entry.get('symbols', {}),
is_spam=(log_entry.get('action') in ['reject', 'add header', 'rewrite subject']),
has_auth=('MAILCOW_AUTH' in log_entry.get('symbols', {})),
direction=direction,
ip=log_entry.get('ip'),
user=log_entry.get('user'),
size=log_entry.get('size'),
raw_data=log_entry
)
if geoip_service.is_geoip_available() and rspamd_log.ip:
geo_info = geoip_service.lookup_ip(rspamd_log.ip)
rspamd_log.country_code = geo_info.get('country_code')
rspamd_log.country_name = geo_info.get('country_name')
rspamd_log.city = geo_info.get('city')
rspamd_log.asn = geo_info.get('asn')
rspamd_log.asn_org = geo_info.get('asn_org')
db.add(rspamd_log)
seen_rspamd.add(unique_id)
new_count += 1
except Exception as e:
logger.error(f"Error processing Rspamd log: {e}")
continue
if blacklisted_message_ids:
correlations_to_delete = db.query(MessageCorrelation).filter(
MessageCorrelation.message_id.in_(blacklisted_message_ids)
).all()
queue_ids_to_delete = set()
for corr in correlations_to_delete:
if corr.queue_id:
queue_ids_to_delete.add(corr.queue_id)
deleted_corr = db.query(MessageCorrelation).filter(
MessageCorrelation.message_id.in_(blacklisted_message_ids)
).delete(synchronize_session=False)
if queue_ids_to_delete:
deleted_postfix = db.query(PostfixLog).filter(
PostfixLog.queue_id.in_(queue_ids_to_delete)
).delete(synchronize_session=False)
if deleted_postfix > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_postfix} Postfix logs linked to blacklisted messages")
if deleted_corr > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_corr} correlations for blacklisted message IDs")
db.commit()
if new_count > 0:
msg = f"[OK] Imported {new_count} Rspamd logs"
if skipped_blacklist > 0:
msg += f" (skipped {skipped_blacklist} blacklisted)"
logger.info(msg)
if len(seen_rspamd) > 10000:
seen_rspamd.clear()
except Exception as e:
logger.error(f"[ERROR] Rspamd fetch error: {e}")
def parse_netfilter_message(message: str, priority: Optional[str] = None) -> Dict[str, Any]:
result = {}
message_lower = message.lower()
ip_match = re.match(r'^(\d+\.\d+\.\d+\.\d+)', message)
if ip_match:
result['ip'] = ip_match.group(1)
if not result.get('ip'):
ban_match = re.search(r'until\s+(\d+\.\d+\.\d+\.\d+)', message)
if ban_match:
result['ip'] = ban_match.group(1)
if not result.get('ip'):
bracket_match = re.search(r'\[(\d+\.\d+\.\d+\.\d+)\]', message)
if bracket_match:
result['ip'] = bracket_match.group(1)
if not result.get('ip'):
banned_match = re.search(r'Ban(?:ned|ning)\s+(\d+\.\d+\.\d+\.\d+)', message, re.IGNORECASE)
if banned_match:
result['ip'] = banned_match.group(1)
if not result.get('ip'):
cidr_match = re.search(r'Ban(?:ned|ning)\s+(\d+\.\d+\.\d+\.\d+/\d+)', message, re.IGNORECASE)
if cidr_match:
ip_part = cidr_match.group(1).split('/')[0]
result['ip'] = ip_part
username_match = re.search(r'sasl_username=([^\s,\)]+)', message)
if username_match:
result['username'] = username_match.group(1)
auth_match = re.search(r'SASL\s+(\w+)', message)
if auth_match:
result['auth_method'] = f"SASL {auth_match.group(1)}"
rule_match = re.search(r'rule id\s+(\d+)', message)
if rule_match:
result['rule_id'] = int(rule_match.group(1))
attempts_match = re.search(r'(\d+)\s+more\s+attempt', message)
if attempts_match:
result['attempts_left'] = int(attempts_match.group(1))
# Check for unbanning first (before banning) - use word boundaries to avoid matching "banning" inside "unbanning"
# Check for "unbanning" or "unban" as separate words
if re.search(r'\bunban(?:ning)?\b', message_lower):
result['action'] = 'unban'
# Check for "banning" or "banned" as separate words (but not if it's part of "unbanning")
elif re.search(r'\bban(?:ning|ned)\b', message_lower):
if 'more attempts' in message_lower:
result['action'] = 'warning'
else:
result['action'] = 'ban'
elif priority and priority.lower() == 'crit':
# For crit priority, default to ban if not already set
result['action'] = 'ban'
elif 'warning' in message_lower:
result['action'] = 'warning'
else:
result['action'] = 'info'
return result
async def fetch_and_store_netfilter():
"""Fetch Netfilter logs from API and store in DB"""
last_fetch_run_time['netfilter'] = datetime.now(timezone.utc)
try:
logger.debug(f"[NETFILTER] Starting fetch (count: {settings.fetch_count_netfilter})")
logs = await mailcow_api.get_netfilter_logs(count=settings.fetch_count_netfilter)
if not logs:
logger.debug("[NETFILTER] No logs returned from API")
return
logger.debug(f"[NETFILTER] Received {len(logs)} logs from API")
with get_db_context() as db:
new_count = 0
skipped_count = 0
for log_entry in logs:
try:
time_val = log_entry.get('time', 0)
message = log_entry.get('message', '')
priority = log_entry.get('priority', 'info')
unique_id = f"{time_val}:{priority}:{message}"
if unique_id in seen_netfilter:
skipped_count += 1
continue
timestamp = datetime.fromtimestamp(time_val, tz=timezone.utc)
existing = db.query(NetfilterLog).filter(
NetfilterLog.message == message,
NetfilterLog.time == timestamp,
NetfilterLog.priority == priority
).first()
if existing:
skipped_count += 1
seen_netfilter.add(unique_id)
continue
parsed = parse_netfilter_message(message, priority=priority)
netfilter_log = NetfilterLog(
time=timestamp,
priority=priority,
message=message,
ip=parsed.get('ip'),
username=parsed.get('username'),
auth_method=parsed.get('auth_method'),
action=parsed.get('action'),
rule_id=parsed.get('rule_id'),
attempts_left=parsed.get('attempts_left'),
raw_data=log_entry
)
db.add(netfilter_log)
seen_netfilter.add(unique_id)
new_count += 1
except Exception as e:
logger.error(f"[NETFILTER] Error processing log entry: {e}")
continue
db.commit()
if new_count > 0:
logger.info(f"[OK] Imported {new_count} Netfilter logs (skipped {skipped_count} duplicates)")
elif skipped_count > 0:
logger.debug(f"[NETFILTER] All {skipped_count} logs were duplicates, nothing new to import")
if len(seen_netfilter) > 10000:
logger.debug("[NETFILTER] Clearing seen_netfilter cache (size > 10000)")
seen_netfilter.clear()
except Exception as e:
logger.error(f"[ERROR] Netfilter fetch error: {e}", exc_info=True)
async def fetch_all_logs():
"""Fetch all log types concurrently"""
try:
update_job_status('fetch_logs', 'running')
logger.debug("[FETCH] Starting fetch_all_logs")
results = await asyncio.gather(
fetch_and_store_postfix(),
fetch_and_store_rspamd(),
fetch_and_store_netfilter(),
return_exceptions=True
)
for i, result in enumerate(results):
if isinstance(result, Exception):
log_type = ["Postfix", "Rspamd", "Netfilter"][i]
logger.error(f"[ERROR] {log_type} fetch failed: {result}", exc_info=result)
logger.debug("[FETCH] Completed fetch_all_logs")
update_job_status('fetch_logs', 'success')
except Exception as e:
update_job_status('fetch_logs', 'failed', str(e))
logger.error(f"[ERROR] Fetch all logs error: {e}", exc_info=True)
async def cleanup_blacklisted_queues():
"""
Clean up Postfix queues where the recipient is blacklisted.
This handles the BCC scenario:
- Same message-id appears with multiple queue-ids
- One queue is for the real recipient
- Another queue is for the BCC address (which is blacklisted)
We need to delete ALL logs for queues where the recipient is blacklisted,
so that correlation only finds the "real" queue.
"""
blacklist = settings.blacklist_emails_list
if not blacklist:
return
try:
with get_db_context() as db:
blacklisted_queue_ids = set()
for email in blacklist:
logs_with_blacklisted_recipient = db.query(PostfixLog).filter(
PostfixLog.recipient == email,
PostfixLog.queue_id.isnot(None)
).all()
for log in logs_with_blacklisted_recipient:
if log.queue_id:
blacklisted_queue_ids.add(log.queue_id)
if not blacklisted_queue_ids:
return
deleted_count = 0
for queue_id in blacklisted_queue_ids:
count = db.query(PostfixLog).filter(
PostfixLog.queue_id == queue_id
).delete(synchronize_session=False)
deleted_count += count
db.commit()
if deleted_count > 0:
logger.info(f"[CLEANUP] Cleaned up {deleted_count} Postfix logs from {len(blacklisted_queue_ids)} blacklisted BCC queues")
except Exception as e:
logger.error(f"[ERROR] Blacklisted queue cleanup error: {e}")
async def run_correlation():
"""
Main correlation job - links Rspamd logs with Postfix logs.
Strategy:
1. Clean up blacklisted BCC queues first
2. Find Rspamd logs without correlation_key
3. For each, find Postfix logs with same message_id
4. Get queue_id and find ALL related Postfix logs
5. Create MessageCorrelation (if doesn't exist)
Note: Also checks blacklist for legacy logs that were imported before blacklist was set.
"""
# Step 1: Clean up blacklisted BCC queues before correlating
await cleanup_blacklisted_queues()
try:
with get_db_context() as db:
uncorrelated_rspamd = db.query(RspamdLog).filter(
RspamdLog.correlation_key.is_(None),
RspamdLog.message_id.isnot(None),
RspamdLog.message_id != '',
RspamdLog.message_id != 'undef'
).order_by(desc(RspamdLog.time)).limit(100).all()
if not uncorrelated_rspamd:
return
correlated_count = 0
skipped_blacklist = 0
for rspamd_log in uncorrelated_rspamd:
try:
if is_blacklisted(rspamd_log.sender_smtp):
rspamd_log.correlation_key = "BLACKLISTED"
db.commit()
skipped_blacklist += 1
continue
if rspamd_log.recipients_smtp:
recipients = rspamd_log.recipients_smtp
if any(is_blacklisted(r) for r in recipients):
rspamd_log.correlation_key = "BLACKLISTED"
db.commit()
skipped_blacklist += 1
continue
result = correlate_single_message(db, rspamd_log)
if result:
correlated_count += 1
except Exception as e:
logger.warning(f"Correlation failed for rspamd {rspamd_log.id}: {e}")
db.rollback()
continue
if correlated_count > 0:
logger.info(f"[LINK] Correlated {correlated_count} messages")
if skipped_blacklist > 0:
logger.info(f"[INFO] Skipped {skipped_blacklist} blacklisted messages")
except Exception as e:
logger.error(f"[ERROR] Correlation job error: {e}")
def correlate_single_message(db: Session, rspamd_log: RspamdLog) -> Optional[MessageCorrelation]:
"""
Correlate a single Rspamd log with Postfix logs.
Steps:
1. Check if correlation already exists for this message_id
2. Find Postfix logs with same message_id => get queue_id
3. Find ALL Postfix logs with that queue_id
4. Create or update correlation
"""
message_id = rspamd_log.message_id
if not message_id:
return None
# Step 1: Check if correlation already exists
existing = db.query(MessageCorrelation).filter(
MessageCorrelation.message_id == message_id
).first()
if existing:
# Just update the rspamd log with correlation key
rspamd_log.correlation_key = existing.correlation_key
if not existing.rspamd_log_id:
existing.rspamd_log_id = rspamd_log.id
existing.last_seen = datetime.now(timezone.utc)
db.commit()
return existing
# Step 2: Find Postfix logs with this message_id
postfix_with_msgid = db.query(PostfixLog).filter(
PostfixLog.message_id == message_id
).all()
# Get queue_id from Postfix logs
queue_id = None
for plog in postfix_with_msgid:
if plog.queue_id:
queue_id = plog.queue_id
break
# Step 3: Find ALL Postfix logs with this queue_id
all_postfix_logs: List[PostfixLog] = []
if queue_id:
all_postfix_logs = db.query(PostfixLog).filter(
PostfixLog.queue_id == queue_id
).all()
# Step 4: Double-check no correlation exists (race condition protection)
existing_check = db.query(MessageCorrelation).filter(
MessageCorrelation.message_id == message_id
).first()
if existing_check:
# Another process created it, just link and return
rspamd_log.correlation_key = existing_check.correlation_key
if not existing_check.rspamd_log_id:
existing_check.rspamd_log_id = rspamd_log.id
db.commit()
return existing_check
# Create correlation
correlation_key = hashlib.sha256(f"msgid:{message_id}".encode()).hexdigest()
# Get recipient
recipients = rspamd_log.recipients_smtp or []
first_recipient = recipients[0] if recipients else None
# Determine final status from Postfix logs
final_status = None
for plog in all_postfix_logs:
if plog.status:
if plog.status in ['bounced', 'rejected']:
final_status = plog.status
break
elif plog.status == 'deferred' and not final_status:
final_status = plog.status
elif plog.status == 'sent' and not final_status:
final_status = 'delivered'
# Use Rspamd action if no Postfix status
if not final_status:
if rspamd_log.action == 'reject':
final_status = 'rejected'
elif rspamd_log.is_spam:
final_status = 'spam'
# Check if email was delivered locally (relay=dovecot + both sender and recipient are local domains)
# This is the definitive way to determine if email is internal
direction = rspamd_log.direction
# Check if sender and recipient are both local domains
from .correlation import extract_domain, is_local_domain
sender_domain = extract_domain(rspamd_log.sender_smtp)
recipients = rspamd_log.recipients_smtp or []
sender_is_local = sender_domain and is_local_domain(sender_domain)
all_recipients_local = True
if recipients:
for recipient in recipients:
recipient_domain = extract_domain(recipient)
if not recipient_domain or not is_local_domain(recipient_domain):
all_recipients_local = False
break
else:
all_recipients_local = False
# Only mark as internal if: relay=dovecot AND sender is local AND all recipients are local
if sender_is_local and all_recipients_local:
for plog in all_postfix_logs:
if plog.relay and 'dovecot' in plog.relay.lower():
direction = 'internal'
rspamd_log.direction = 'internal'
break
# Get earliest timestamp (ensure timezone-aware)
now = datetime.now(timezone.utc)
first_seen = rspamd_log.time
if first_seen and first_seen.tzinfo is None:
first_seen = first_seen.replace(tzinfo=timezone.utc)
if not first_seen:
first_seen = now
try:
# Create correlation
correlation = MessageCorrelation(
correlation_key=correlation_key,
message_id=message_id,
queue_id=queue_id,
sender=rspamd_log.sender_smtp,
recipient=first_recipient,
subject=rspamd_log.subject,
direction=direction,
final_status=final_status,
rspamd_log_id=rspamd_log.id,
postfix_log_ids=[plog.id for plog in all_postfix_logs] if all_postfix_logs else [],
first_seen=first_seen,
last_seen=now,
is_complete=bool(queue_id and all_postfix_logs)
)
db.add(correlation)
db.flush() # Try to insert - will fail if duplicate
# Update rspamd log with correlation key
rspamd_log.correlation_key = correlation_key
# Update all postfix logs with correlation key
for plog in all_postfix_logs:
plog.correlation_key = correlation_key
db.commit()
logger.debug(f"Created correlation for {message_id[:40]}... (queue: {queue_id}, {len(all_postfix_logs)} postfix logs)")
return correlation
except Exception as e:
# Handle race condition - another process created the correlation
db.rollback()
# Try to find and return the existing one
existing = db.query(MessageCorrelation).filter(
MessageCorrelation.message_id == message_id
).first()
if existing:
rspamd_log.correlation_key = existing.correlation_key
db.commit()
return existing
# Re-raise if it's a different error
raise
async def complete_incomplete_correlations():
"""
Complete correlations that are missing Postfix logs.
This handles the case where rspamd was processed before postfix logs arrived.
"""
update_job_status('complete_correlations', 'running')
try:
with get_db_context() as db:
# Find incomplete correlations (have message_id but missing queue_id or postfix logs)
# Use naive datetime for comparison since DB stores naive UTC
cutoff_time = datetime.utcnow() - timedelta(
minutes=settings.max_correlation_age_minutes
)
incomplete = db.query(MessageCorrelation).filter(
MessageCorrelation.is_complete == False,
MessageCorrelation.message_id.isnot(None),
MessageCorrelation.created_at >= cutoff_time
).limit(100).all()
if not incomplete:
return
completed_count = 0
for correlation in incomplete:
try:
# Find Postfix logs with this message_id
postfix_with_msgid = db.query(PostfixLog).filter(
PostfixLog.message_id == correlation.message_id
).all()
if not postfix_with_msgid:
continue
# Get queue_id
queue_id = None
for plog in postfix_with_msgid:
if plog.queue_id:
queue_id = plog.queue_id
break
if not queue_id:
continue
# Find ALL Postfix logs with this queue_id
all_postfix = db.query(PostfixLog).filter(
PostfixLog.queue_id == queue_id
).all()
# Update correlation
correlation.queue_id = queue_id
correlation.postfix_log_ids = [plog.id for plog in all_postfix]
correlation.is_complete = True
correlation.last_seen = datetime.now(timezone.utc)
# Update final status
for plog in all_postfix:
if plog.status:
if plog.status in ['bounced', 'rejected']:
correlation.final_status = plog.status
break
elif plog.status == 'deferred' and correlation.final_status not in ['bounced', 'rejected']:
correlation.final_status = plog.status
elif plog.status == 'sent' and not correlation.final_status:
correlation.final_status = 'delivered'
# Update correlation key in Postfix logs
for plog in all_postfix:
plog.correlation_key = correlation.correlation_key
completed_count += 1
except Exception as e:
logger.warning(f"Failed to complete correlation {correlation.id}: {e}")
continue
db.commit()
if completed_count > 0:
logger.info(f"[OK] Completed {completed_count} correlations")
update_job_status('complete_correlations', 'success')
except Exception as e:
logger.error(f"[ERROR] Complete correlations error: {e}")
update_job_status('complete_correlations', 'failed', str(e))
async def expire_old_correlations():
"""
SEPARATE JOB: Mark old incomplete correlations as "expired".
This runs independently to ensure old incomplete correlations get expired even if
the complete_incomplete_correlations job has issues.
Only marks incomplete correlations (is_complete == False) as expired.
Complete correlations with non-final statuses (None, 'deferred', etc.) are left as-is,
as they may have legitimate statuses that don't need to be changed.
Uses datetime.utcnow() (naive) to match the naive datetime in created_at.
"""
update_job_status('expire_correlations', 'running')
try:
with get_db_context() as db:
# Use naive datetime for comparison (DB stores naive UTC)
old_cutoff = datetime.utcnow() - timedelta(
minutes=settings.max_correlation_age_minutes
)
# Find old incomplete correlations and mark them as expired
expired_correlations = db.query(MessageCorrelation).filter(
MessageCorrelation.is_complete == False,
MessageCorrelation.created_at < old_cutoff
).all()
if not expired_correlations:
return
expired_count = 0
for corr in expired_correlations:
corr.is_complete = True # Mark as complete so we stop trying
corr.final_status = "expired" # Set status to expired
expired_count += 1
db.commit()
if expired_count > 0:
logger.info(f"[EXPIRED] Marked {expired_count} correlations as expired (older than {settings.max_correlation_age_minutes}min)")
update_job_status('expire_correlations', 'success')
except Exception as e:
logger.error(f"[ERROR] Expire correlations error: {e}")
update_job_status('expire_correlations', 'failed', str(e))
async def update_final_status_for_correlations():
"""
Background job to update final_status for correlations that don't have one yet.
This handles the case where Postfix logs (especially status=sent) arrive after
the initial correlation was created. The job:
1. Finds correlations without a definitive final_status
2. Only checks correlations within Max Correlation Age
3. Looks for new Postfix logs that may have arrived
4. Updates final_status, postfix_log_ids, and correlation_key
This runs independently from correlation creation to ensure we catch
late-arriving Postfix logs.
"""
update_job_status('update_final_status', 'running')
try:
with get_db_context() as db:
# Only check correlations within Max Correlation Age
cutoff_time = datetime.utcnow() - timedelta(
minutes=settings.max_correlation_age_minutes
)
# Find correlations that:
# 1. Are within the correlation age limit
# 2. Have a queue_id (so we can check Postfix logs)
# 3. Don't have a definitive final_status yet
# We exclude 'delivered', 'bounced', 'rejected', 'expired' as these are final
# We check None, 'deferred', 'spam', and other non-final statuses
correlations_to_check = db.query(MessageCorrelation).filter(
MessageCorrelation.created_at >= cutoff_time,
MessageCorrelation.queue_id.isnot(None),
or_(
MessageCorrelation.final_status.is_(None),
MessageCorrelation.final_status.notin_(['delivered', 'bounced', 'rejected', 'expired'])
)
).limit(500).all() # Increased from 100 to 500
if not correlations_to_check:
return
updated_count = 0
for correlation in correlations_to_check:
try:
# Get all Postfix logs for this queue_id
all_postfix = db.query(PostfixLog).filter(
PostfixLog.queue_id == correlation.queue_id
).all()
if not all_postfix:
continue
# Determine best final status from all Postfix logs
# Priority: bounced > rejected > sent (delivered) > deferred
new_final_status = correlation.final_status
for plog in all_postfix:
if plog.status:
if plog.status in ['bounced', 'rejected']:
new_final_status = plog.status
break # Highest priority, stop here
elif plog.status == 'sent':
# 'sent' (delivered) is better than 'deferred' or None
if new_final_status not in ['bounced', 'rejected', 'delivered']:
new_final_status = 'delivered'
elif plog.status == 'deferred' and new_final_status not in ['bounced', 'rejected', 'delivered']:
new_final_status = 'deferred'
# FIX #1: Update postfix_log_ids - add any missing logs
current_ids = list(correlation.postfix_log_ids or [])
ids_added = 0
for plog in all_postfix:
if plog.id and plog.id not in current_ids:
current_ids.append(plog.id)
ids_added += 1
if ids_added > 0:
correlation.postfix_log_ids = current_ids
# FIX #2: Update correlation_key in ALL Postfix logs
for plog in all_postfix:
if not plog.correlation_key or plog.correlation_key != correlation.correlation_key:
plog.correlation_key = correlation.correlation_key
# Update if we found a better status or added logs
if (new_final_status and new_final_status != correlation.final_status) or ids_added > 0:
old_status = correlation.final_status
correlation.final_status = new_final_status
correlation.last_seen = datetime.now(timezone.utc)
updated_count += 1
if ids_added > 0:
logger.debug(f"Updated correlation {correlation.id}: added {ids_added} logs, status {old_status} -> {new_final_status}")
else:
logger.debug(f"Updated final_status for correlation {correlation.id} ({correlation.message_id[:40] if correlation.message_id else 'no-id'}...): {old_status} -> {new_final_status}")
except Exception as e:
logger.warning(f"Failed to update final_status for correlation {correlation.id}: {e}")
continue
db.commit()
if updated_count > 0:
logger.info(f"[STATUS] Updated final_status for {updated_count} correlations")
update_job_status('update_final_status', 'success')
except Exception as e:
logger.error(f"[ERROR] Update final status error: {e}")
update_job_status('update_final_status', 'failed', str(e))
async def update_geoip_database():
"""Background job: Update GeoIP databases"""
from .services.geoip_downloader import (
update_geoip_database_if_needed,
is_license_configured
)
try:
update_job_status('update_geoip', 'running')
if not is_license_configured():
update_job_status('update_geoip', 'idle', 'License key not configured')
return
status = update_geoip_database_if_needed()
if status['City']['updated'] or status['ASN']['updated']:
update_job_status('update_geoip', 'success')
else:
update_job_status('update_geoip', 'success')
except Exception as e:
logger.error(f"GeoIP update failed: {e}")
update_job_status('update_geoip', 'failed', str(e))
async def dmarc_imap_sync_job():
"""
Scheduled job to sync DMARC reports from IMAP mailbox
Runs every hour (configurable via DMARC_IMAP_INTERVAL)
"""
if not settings.dmarc_imap_enabled:
logger.debug("DMARC IMAP sync is disabled, skipping")
return
# Global cleanup to ensure no other job is stuck in 'running' state
try:
# Assuming you have a way to get a DB session here
from your_app.database import SessionLocal
with SessionLocal() as db:
db.query(DMARCSync).filter(DMARCSync.status == 'running').update({
"status": "failed",
"error_message": "Stale job cleaned by scheduler"
})
db.commit()
except Exception as cleanup_err:
logger.warning(f"Background cleanup failed: {cleanup_err}")
# Start the current job
update_job_status('dmarc_imap_sync', 'running')
try:
logger.info("Starting DMARC IMAP sync...")
# Execute the actual IMAP sync logic
result = sync_dmarc_reports_from_imap(sync_type='auto')
if result.get('status') == 'error':
error_msg = result.get('error_message', 'Unknown error')
logger.error(f"DMARC IMAP sync failed: {error_msg}")
update_job_status('dmarc_imap_sync', 'failed', error_msg)
# Send notification if needed
failed_emails = result.get('failed_emails')
if failed_emails and settings.notification_smtp_configured:
try:
send_dmarc_error_notification(failed_emails, result.get('sync_id'))
except Exception as e:
logger.error(f"Failed to send error notification: {e}")
else:
# Sync finished successfully
logger.info(f"DMARC IMAP sync completed: {result.get('reports_created', 0)} created")
update_job_status('dmarc_imap_sync', 'success')
except Exception as e:
# Catch-all for unexpected crashes
logger.error(f"DMARC IMAP sync job error: {e}", exc_info=True)
update_job_status('dmarc_imap_sync', 'failed', str(e))
finally:
# Ensure the state is never left as 'running' if the code reaches here
logger.debug("DMARC IMAP sync job cycle finished")
# =============================================================================
# CLEANUP
# =============================================================================
async def cleanup_old_logs():
"""Delete logs older than retention period"""
update_job_status('cleanup_logs', 'running')
try:
with get_db_context() as db:
cutoff_date = datetime.now(timezone.utc) - timedelta(
days=settings.retention_days
)
postfix_deleted = db.query(PostfixLog).filter(
PostfixLog.time < cutoff_date
).delete()
rspamd_deleted = db.query(RspamdLog).filter(
RspamdLog.time < cutoff_date
).delete()
netfilter_deleted = db.query(NetfilterLog).filter(
NetfilterLog.time < cutoff_date
).delete()
correlation_deleted = db.query(MessageCorrelation).filter(
MessageCorrelation.first_seen < cutoff_date
).delete()
db.commit()
total = postfix_deleted + rspamd_deleted + netfilter_deleted + correlation_deleted
if total > 0:
logger.info(f"[CLEANUP] Cleaned up {total} old entries")
update_job_status('cleanup_logs', 'success')
except Exception as e:
logger.error(f"[ERROR] Cleanup error: {e}")
update_job_status('cleanup_logs', 'failed', str(e))
def cleanup_blacklisted_data():
"""
One-time cleanup of existing blacklisted data.
Called on startup to purge any data that was imported before
the blacklist was properly configured.
"""
blacklist = settings.blacklist_emails_list
if not blacklist:
logger.info("[BLACKLIST] No blacklist configured, skipping cleanup")
return
logger.info(f"[BLACKLIST] Running startup cleanup for {len(blacklist)} blacklisted emails...")
try:
with get_db_context() as db:
total_deleted = 0
# 1. Find and delete correlations with blacklisted sender or recipient
for email in blacklist:
# Delete correlations where sender matches
deleted = db.query(MessageCorrelation).filter(
MessageCorrelation.sender.ilike(email)
).delete(synchronize_session=False)
total_deleted += deleted
# Delete correlations where recipient matches
deleted = db.query(MessageCorrelation).filter(
MessageCorrelation.recipient.ilike(email)
).delete(synchronize_session=False)
total_deleted += deleted
if total_deleted > 0:
logger.info(f"[BLACKLIST] Deleted {total_deleted} correlations with blacklisted emails")
# 2. Find Postfix logs with blacklisted emails and get their queue IDs
blacklisted_queue_ids: Set[str] = set()
for email in blacklist:
# Find queue IDs from logs with blacklisted sender
postfix_with_sender = db.query(PostfixLog.queue_id).filter(
PostfixLog.sender.ilike(email),
PostfixLog.queue_id.isnot(None)
).distinct().all()
for row in postfix_with_sender:
if row[0]:
blacklisted_queue_ids.add(row[0])
# Find queue IDs from logs with blacklisted recipient
postfix_with_recipient = db.query(PostfixLog.queue_id).filter(
PostfixLog.recipient.ilike(email),
PostfixLog.queue_id.isnot(None)
).distinct().all()
for row in postfix_with_recipient:
if row[0]:
blacklisted_queue_ids.add(row[0])
# 3. Delete all Postfix logs with blacklisted queue IDs
if blacklisted_queue_ids:
deleted_postfix = db.query(PostfixLog).filter(
PostfixLog.queue_id.in_(blacklisted_queue_ids)
).delete(synchronize_session=False)
if deleted_postfix > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_postfix} Postfix logs from {len(blacklisted_queue_ids)} blacklisted queue IDs")
# Also delete any remaining correlations for these queue IDs
deleted_corr = db.query(MessageCorrelation).filter(
MessageCorrelation.queue_id.in_(blacklisted_queue_ids)
).delete(synchronize_session=False)
if deleted_corr > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_corr} additional correlations")
# 4. Delete Rspamd logs with blacklisted emails
deleted_rspamd = 0
for email in blacklist:
# Delete by sender
deleted = db.query(RspamdLog).filter(
RspamdLog.sender_smtp.ilike(email)
).delete(synchronize_session=False)
deleted_rspamd += deleted
if deleted_rspamd > 0:
logger.info(f"[BLACKLIST] Deleted {deleted_rspamd} Rspamd logs with blacklisted senders")
db.commit()
logger.info("[BLACKLIST] Startup cleanup completed")
except Exception as e:
logger.error(f"[BLACKLIST] Cleanup error: {e}")
async def check_all_domains_dns_background():
"""Background job to check DNS for all domains"""
logger.info("Starting background DNS check...")
update_job_status('dns_check', 'running')
try:
domains = await mailcow_api.get_domains()
if not domains:
return
checked_count = 0
for domain_data in domains:
domain_name = domain_data.get('domain_name')
if not domain_name or domain_data.get('active', 0) != 1:
continue
try:
dns_data = await check_domain_dns(domain_name)
with get_db_context() as db:
await save_dns_check_to_db(db, domain_name, dns_data, is_full_check=True)
checked_count += 1
await asyncio.sleep(0.5)
except Exception as e:
logger.error(f"Failed DNS check for {domain_name}: {e}")
logger.info(f"DNS check completed: {checked_count} domains")
update_job_status('dns_check', 'success')
except Exception as e:
logger.error(f"Background DNS check failed: {e}")
update_job_status('dns_check', 'failed', str(e))
async def sync_local_domains():
"""
Sync local domains from Mailcow API
Runs every 6 hours
"""
logger.info("Starting background local domains sync...")
update_job_status('sync_local_domains', 'running')
try:
active_domains = await mailcow_api.get_active_domains()
if active_domains:
set_cached_active_domains(active_domains)
logger.info(f"✓ Local domains synced: {len(active_domains)} domains")
update_job_status('sync_local_domains', 'success')
return True
else:
logger.warning("⚠ No active domains retrieved")
update_job_status('sync_local_domains', 'failed', str(e))
return False
except Exception as e:
logger.error(f"✗ Failed to sync local domains: {e}")
update_job_status('sync_local_domains', 'failed', str(e))
return False
# =============================================================================
# SCHEDULER SETUP
# =============================================================================
def start_scheduler():
"""Start the background scheduler"""
try:
# Run one-time blacklist cleanup on startup
cleanup_blacklisted_data()
# Job 1: Fetch logs from API (every fetch_interval seconds)
scheduler.add_job(
fetch_all_logs,
trigger=IntervalTrigger(seconds=settings.fetch_interval),
id='fetch_logs',
name='Fetch Mailcow Logs',
replace_existing=True,
max_instances=1
)
# Job 2: Run correlation (every 30 seconds, after logs are imported)
scheduler.add_job(
run_correlation,
trigger=IntervalTrigger(seconds=30),
id='run_correlation',
name='Correlate Logs',
replace_existing=True,
max_instances=1
)
# Job 3: Complete incomplete correlations (every 2 minutes)
scheduler.add_job(
complete_incomplete_correlations,
trigger=IntervalTrigger(seconds=settings.correlation_check_interval),
id='complete_correlations',
name='Complete Correlations',
replace_existing=True,
max_instances=1
)
# Job 4: Expire old incomplete correlations (every 1 minute)
# This is separate to ensure old correlations get expired reliably
scheduler.add_job(
expire_old_correlations,
trigger=IntervalTrigger(seconds=60),
id='expire_correlations',
name='Expire Old Correlations',
replace_existing=True,
max_instances=1
)
# Job 5: Update final status for correlations (every correlation_check_interval)
# This handles late-arriving Postfix logs (e.g., status=sent) that arrive
# after the initial correlation was created
scheduler.add_job(
update_final_status_for_correlations,
trigger=IntervalTrigger(seconds=settings.correlation_check_interval),
id='update_final_status',
name='Update Final Status',
replace_existing=True,
max_instances=1
)
# Job 6: Cleanup old logs (daily at 2 AM)
scheduler.add_job(
cleanup_old_logs,
trigger=CronTrigger(hour=2, minute=0),
id='cleanup_logs',
name='Cleanup Old Logs',
replace_existing=True
)
# Job 7: Check app version updates (every 6 hours, starting immediately)
scheduler.add_job(
check_app_version_update,
trigger=IntervalTrigger(hours=6),
id='check_app_version',
name='Check App Version Updates',
replace_existing=True,
max_instances=1,
next_run_time=datetime.now(timezone.utc)
)
# Job 8: DNS Check
scheduler.add_job(
check_all_domains_dns_background,
trigger=IntervalTrigger(hours=6),
id='dns_check_background',
name='DNS Check (All Domains)',
replace_existing=True,
max_instances=1
)
# Job 8b: Initial DNS check on startup
scheduler.add_job(
check_all_domains_dns_background,
'date',
run_date=datetime.now(timezone.utc) + timedelta(seconds=60),
id='dns_check_startup',
name='DNS Check (Startup)'
)
# Job 9: Sync local domains (every 6 hours)
scheduler.add_job(
sync_local_domains,
IntervalTrigger(hours=6),
id='sync_local_domains',
name='Sync Local Domains',
replace_existing=True,
max_instances=1,
next_run_time=datetime.now(timezone.utc)
)
# Job 11: Update GeoIP database (weekly, Sunday at 3 AM)
# Only runs if MaxMind license key is configured
if is_license_configured():
scheduler.add_job(
update_geoip_database,
trigger=CronTrigger(day_of_week='sun', hour=3, minute=0),
id='update_geoip',
name='Update GeoIP',
replace_existing=True
)
# Run initial check on startup (after 60 seconds to let everything settle)
scheduler.add_job(
update_geoip_database,
'date',
run_date=datetime.now(timezone.utc) + timedelta(seconds=60),
id='geoip_startup',
name='GeoIP Check (Startup)'
)
logger.info(" [GEOIP] Initial GeoIP check scheduled (60 seconds after startup)")
else:
logger.info(" [GEOIP] MaxMind license key not configured, GeoIP features disabled")
# Job 12: DMARC IMAP Sync - runs at configured interval (default: hourly)
if settings.dmarc_imap_enabled:
scheduler.add_job(
dmarc_imap_sync_job,
IntervalTrigger(seconds=settings.dmarc_imap_interval),
id='dmarc_imap_sync',
name='DMARC IMAP Sync',
replace_existing=True
)
logger.info(f"Scheduled DMARC IMAP sync job (interval: {settings.dmarc_imap_interval}s)")
# Run once on startup if configured
if settings.dmarc_imap_run_on_startup:
scheduler.add_job(
dmarc_imap_sync_job,
'date',
run_date=datetime.now() + timedelta(seconds=30),
id='dmarc_imap_sync_startup',
name='DMARC IMAP Sync (Startup)'
)
logger.info("Scheduled initial DMARC IMAP sync on startup")
scheduler.start()
logger.info("[OK] Scheduler started")
logger.info(f" [INFO] Import: every {settings.fetch_interval}s")
logger.info(f" [LINK] Correlation: every 30s")
logger.info(f" [COMPLETE] Incomplete correlations: every {settings.correlation_check_interval}s")
logger.info(f" [STATUS] Update final status: every {settings.correlation_check_interval}s (max age: {settings.max_correlation_age_minutes}min)")
logger.info(f" [EXPIRE] Old correlations: every 60s (expire after {settings.max_correlation_age_minutes}min)")
logger.info(f" [VERSION] Check app version updates: every 6 hours")
logger.info(f" [DNS] Check all domains DNS: every 6 hours")
logger.info(" [GEOIP] Update GeoIP database: weekly (Sunday 3 AM)")
if settings.dmarc_imap_enabled:
logger.info(f" [DMARC] IMAP sync: every {settings.dmarc_imap_interval // 60} minutes")
else:
logger.info(" [DMARC] IMAP sync: disabled")
# Log blacklist status
blacklist = settings.blacklist_emails_list
if blacklist:
logger.info(f" [INFO] Blacklist: {len(blacklist)} emails")
for email in blacklist[:5]: # Show first 5
logger.info(f" - {email}")
if len(blacklist) > 5:
logger.info(f" ... and {len(blacklist) - 5} more")
else:
logger.info(" [INFO] Blacklist: disabled (no emails configured)")
except Exception as e:
logger.error(f"[ERROR] Failed to start scheduler: {e}")
raise
def stop_scheduler():
"""Stop the background scheduler"""
try:
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")