feat(maintenance): guard DB + log growth (Option B + daily prune + rotation)
Root cause of the 2GB prod import.db: the sync_run_orders audit junction recorded every order on every run; under the 1-minute scheduler ~98% of 21.7M rows were no-op ALREADY_IMPORTED re-observations. NSSM stdout/stderr also grew unbounded (rotation never applied to the live service). Changes: - sqlite_service: skip ALREADY_IMPORTED rows in sync_run_orders (write-side guard, _SKIP_JUNCTION_STATUSES); add prune_sync_history(retention_days) with incremental_vacuum. - maintenance_service (new): cleanup_old_logs + run_daily_maintenance. - scheduler_service: start_maintenance_job (daily CronTrigger). - main.py: RotatingFileHandler (sync_comenzi_current.log, 10MB x5) instead of a new timestamped file per start; schedule daily maintenance + one-shot catch-up at startup. - scripts/db_maintenance.py (new): one-shot prune + VACUUM + log cleanup, plain sqlite3, invoked by deploy.ps1 while the service is stopped. - deploy.ps1: stop -> run db_maintenance.py -> (re)apply NSSM AppRotate* idempotently -> start, so rotation reaches pre-existing services. Retention defaults: 7 days history, 7 days logs. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
75
api/app/services/maintenance_service.py
Normal file
75
api/app/services/maintenance_service.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Periodic maintenance: prune audit history + clean up old log files.
|
||||
|
||||
Keeps the SQLite DB and the logs/ directory from growing unbounded. The audit
|
||||
tables (sync_runs, sync_run_orders) were the only DB growth source under the
|
||||
1-minute scheduler; business tables (orders, order_items) are never touched.
|
||||
|
||||
The one-shot heavy reclaim (full VACUUM, run while the service is stopped) lives
|
||||
in scripts/db_maintenance.py and is invoked by deploy.ps1.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_HISTORY_RETENTION_DAYS = 7
|
||||
DEFAULT_LOG_RETENTION_DAYS = 7
|
||||
|
||||
|
||||
def _logs_dir() -> str:
|
||||
"""Absolute path to the repo-root logs/ directory (matches main.py)."""
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
return os.path.join(os.path.abspath(os.path.join(here, "..", "..", "..")), "logs")
|
||||
|
||||
|
||||
def cleanup_old_logs(retention_days: int = DEFAULT_LOG_RETENTION_DAYS,
|
||||
log_dir: str | None = None) -> int:
|
||||
"""Delete log files older than `retention_days`. Returns count removed.
|
||||
|
||||
Targets any file with `.log` in its name (covers `sync_comenzi_current.log`,
|
||||
NSSM `service_stdout.log`, and rotated backups like `*.log.3`). The live
|
||||
rotating files stay fresh (recent mtime) so they fall inside the window.
|
||||
"""
|
||||
log_dir = log_dir or _logs_dir()
|
||||
if not os.path.isdir(log_dir):
|
||||
return 0
|
||||
cutoff = time.time() - retention_days * 86400
|
||||
removed = 0
|
||||
for name in os.listdir(log_dir):
|
||||
if ".log" not in name:
|
||||
continue
|
||||
path = os.path.join(log_dir, name)
|
||||
try:
|
||||
if os.path.isfile(path) and os.path.getmtime(path) < cutoff:
|
||||
os.remove(path)
|
||||
removed += 1
|
||||
except OSError as e:
|
||||
logger.warning(f"cleanup_old_logs: could not remove {name}: {e}")
|
||||
if removed:
|
||||
logger.info(f"cleanup_old_logs: removed {removed} file(s) older than "
|
||||
f"{retention_days}d from {log_dir}")
|
||||
return removed
|
||||
|
||||
|
||||
async def run_daily_maintenance(
|
||||
history_days: int = DEFAULT_HISTORY_RETENTION_DAYS,
|
||||
log_days: int = DEFAULT_LOG_RETENTION_DAYS) -> dict:
|
||||
"""Daily job: prune audit history (+reclaim pages) and clean old log files.
|
||||
|
||||
Each step is isolated — a failure in one does not skip the other.
|
||||
"""
|
||||
from . import sqlite_service
|
||||
|
||||
result: dict = {}
|
||||
try:
|
||||
result["db"] = await sqlite_service.prune_sync_history(history_days)
|
||||
except Exception as e:
|
||||
logger.warning(f"run_daily_maintenance: prune_sync_history failed: {e}")
|
||||
result["db_error"] = str(e)
|
||||
try:
|
||||
result["logs_removed"] = cleanup_old_logs(log_days)
|
||||
except Exception as e:
|
||||
logger.warning(f"run_daily_maintenance: cleanup_old_logs failed: {e}")
|
||||
result["logs_error"] = str(e)
|
||||
return result
|
||||
Reference in New Issue
Block a user