Root cause of the 2GB prod import.db: the sync_run_orders audit junction recorded every order on every run; under the 1-minute scheduler ~98% of 21.7M rows were no-op ALREADY_IMPORTED re-observations. NSSM stdout/stderr also grew unbounded (rotation never applied to the live service). Changes: - sqlite_service: skip ALREADY_IMPORTED rows in sync_run_orders (write-side guard, _SKIP_JUNCTION_STATUSES); add prune_sync_history(retention_days) with incremental_vacuum. - maintenance_service (new): cleanup_old_logs + run_daily_maintenance. - scheduler_service: start_maintenance_job (daily CronTrigger). - main.py: RotatingFileHandler (sync_comenzi_current.log, 10MB x5) instead of a new timestamped file per start; schedule daily maintenance + one-shot catch-up at startup. - scripts/db_maintenance.py (new): one-shot prune + VACUUM + log cleanup, plain sqlite3, invoked by deploy.ps1 while the service is stopped. - deploy.ps1: stop -> run db_maintenance.py -> (re)apply NSSM AppRotate* idempotently -> start, so rotation reaches pre-existing services. Retention defaults: 7 days history, 7 days logs. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
"""Periodic maintenance: prune audit history + clean up old log files.
|
|
|
|
Keeps the SQLite DB and the logs/ directory from growing unbounded. The audit
|
|
tables (sync_runs, sync_run_orders) were the only DB growth source under the
|
|
1-minute scheduler; business tables (orders, order_items) are never touched.
|
|
|
|
The one-shot heavy reclaim (full VACUUM, run while the service is stopped) lives
|
|
in scripts/db_maintenance.py and is invoked by deploy.ps1.
|
|
"""
|
|
import logging
|
|
import os
|
|
import time
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEFAULT_HISTORY_RETENTION_DAYS = 7
|
|
DEFAULT_LOG_RETENTION_DAYS = 7
|
|
|
|
|
|
def _logs_dir() -> str:
|
|
"""Absolute path to the repo-root logs/ directory (matches main.py)."""
|
|
here = os.path.dirname(os.path.abspath(__file__))
|
|
return os.path.join(os.path.abspath(os.path.join(here, "..", "..", "..")), "logs")
|
|
|
|
|
|
def cleanup_old_logs(retention_days: int = DEFAULT_LOG_RETENTION_DAYS,
|
|
log_dir: str | None = None) -> int:
|
|
"""Delete log files older than `retention_days`. Returns count removed.
|
|
|
|
Targets any file with `.log` in its name (covers `sync_comenzi_current.log`,
|
|
NSSM `service_stdout.log`, and rotated backups like `*.log.3`). The live
|
|
rotating files stay fresh (recent mtime) so they fall inside the window.
|
|
"""
|
|
log_dir = log_dir or _logs_dir()
|
|
if not os.path.isdir(log_dir):
|
|
return 0
|
|
cutoff = time.time() - retention_days * 86400
|
|
removed = 0
|
|
for name in os.listdir(log_dir):
|
|
if ".log" not in name:
|
|
continue
|
|
path = os.path.join(log_dir, name)
|
|
try:
|
|
if os.path.isfile(path) and os.path.getmtime(path) < cutoff:
|
|
os.remove(path)
|
|
removed += 1
|
|
except OSError as e:
|
|
logger.warning(f"cleanup_old_logs: could not remove {name}: {e}")
|
|
if removed:
|
|
logger.info(f"cleanup_old_logs: removed {removed} file(s) older than "
|
|
f"{retention_days}d from {log_dir}")
|
|
return removed
|
|
|
|
|
|
async def run_daily_maintenance(
|
|
history_days: int = DEFAULT_HISTORY_RETENTION_DAYS,
|
|
log_days: int = DEFAULT_LOG_RETENTION_DAYS) -> dict:
|
|
"""Daily job: prune audit history (+reclaim pages) and clean old log files.
|
|
|
|
Each step is isolated — a failure in one does not skip the other.
|
|
"""
|
|
from . import sqlite_service
|
|
|
|
result: dict = {}
|
|
try:
|
|
result["db"] = await sqlite_service.prune_sync_history(history_days)
|
|
except Exception as e:
|
|
logger.warning(f"run_daily_maintenance: prune_sync_history failed: {e}")
|
|
result["db_error"] = str(e)
|
|
try:
|
|
result["logs_removed"] = cleanup_old_logs(log_days)
|
|
except Exception as e:
|
|
logger.warning(f"run_daily_maintenance: cleanup_old_logs failed: {e}")
|
|
result["logs_error"] = str(e)
|
|
return result
|