feat(maintenance): guard DB + log growth (Option B + daily prune + rotation)
Root cause of the 2GB prod import.db: the sync_run_orders audit junction recorded every order on every run; under the 1-minute scheduler ~98% of 21.7M rows were no-op ALREADY_IMPORTED re-observations. NSSM stdout/stderr also grew unbounded (rotation never applied to the live service). Changes: - sqlite_service: skip ALREADY_IMPORTED rows in sync_run_orders (write-side guard, _SKIP_JUNCTION_STATUSES); add prune_sync_history(retention_days) with incremental_vacuum. - maintenance_service (new): cleanup_old_logs + run_daily_maintenance. - scheduler_service: start_maintenance_job (daily CronTrigger). - main.py: RotatingFileHandler (sync_comenzi_current.log, 10MB x5) instead of a new timestamped file per start; schedule daily maintenance + one-shot catch-up at startup. - scripts/db_maintenance.py (new): one-shot prune + VACUUM + log cleanup, plain sqlite3, invoked by deploy.ps1 while the service is stopped. - deploy.ps1: stop -> run db_maintenance.py -> (re)apply NSSM AppRotate* idempotently -> start, so rotation reaches pre-existing services. Retention defaults: 7 days history, 7 days logs. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
102
scripts/db_maintenance.py
Normal file
102
scripts/db_maintenance.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
One-shot SQLite + log maintenance, invoked by deploy.ps1 while the GoMagVending
|
||||
service is stopped.
|
||||
|
||||
What it does:
|
||||
1. Prune audit history older than --history-days (sync_runs, sync_run_orders,
|
||||
orphaned sync_phase_failures). Business tables (orders, order_items) are
|
||||
NEVER touched.
|
||||
2. Enable PRAGMA auto_vacuum=INCREMENTAL and run a full VACUUM to reclaim disk.
|
||||
3. Delete log files older than --log-days from logs/.
|
||||
|
||||
Plain sqlite3 only — no app imports, no Oracle, no event loop — so it runs even
|
||||
if the app/Oracle env isn't set up.
|
||||
|
||||
Usage:
|
||||
python scripts/db_maintenance.py # defaults: 7/7 days
|
||||
python scripts/db_maintenance.py --history-days 7 --log-days 7
|
||||
python scripts/db_maintenance.py --db C:\\path\\import.db
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DEFAULT_DB = os.path.join(REPO_ROOT, "api", "data", "import.db")
|
||||
DEFAULT_LOGS = os.path.join(REPO_ROOT, "logs")
|
||||
|
||||
|
||||
def prune_and_vacuum(db_path: str, history_days: int) -> None:
|
||||
cutoff = (datetime.now() - timedelta(days=history_days)).strftime("%Y-%m-%d")
|
||||
before = os.path.getsize(db_path) / 1048576.0
|
||||
conn = sqlite3.connect(db_path, timeout=120)
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"DELETE FROM sync_run_orders WHERE sync_run_id IN "
|
||||
"(SELECT run_id FROM sync_runs WHERE substr(started_at,1,10) < ?)",
|
||||
(cutoff,))
|
||||
junction = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM sync_runs WHERE substr(started_at,1,10) < ?", (cutoff,))
|
||||
runs = cur.rowcount
|
||||
cur.execute(
|
||||
"DELETE FROM sync_phase_failures "
|
||||
"WHERE run_id NOT IN (SELECT run_id FROM sync_runs)")
|
||||
conn.commit()
|
||||
# auto_vacuum mode change only takes effect on the next VACUUM.
|
||||
conn.isolation_level = None
|
||||
conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
|
||||
t0 = time.time()
|
||||
conn.execute("VACUUM")
|
||||
vac = time.time() - t0
|
||||
finally:
|
||||
conn.close()
|
||||
after = os.path.getsize(db_path) / 1048576.0
|
||||
print(f"[db_maintenance] cutoff<{cutoff} runs_deleted={runs} "
|
||||
f"junction_deleted={junction} size {before:.1f}MB -> {after:.1f}MB "
|
||||
f"(VACUUM {vac:.1f}s)")
|
||||
|
||||
|
||||
def cleanup_logs(log_dir: str, log_days: int) -> None:
|
||||
if not os.path.isdir(log_dir):
|
||||
print(f"[db_maintenance] logs dir not found: {log_dir}")
|
||||
return
|
||||
cutoff = time.time() - log_days * 86400
|
||||
removed = 0
|
||||
for name in os.listdir(log_dir):
|
||||
if ".log" not in name:
|
||||
continue
|
||||
path = os.path.join(log_dir, name)
|
||||
try:
|
||||
if os.path.isfile(path) and os.path.getmtime(path) < cutoff:
|
||||
os.remove(path)
|
||||
removed += 1
|
||||
except OSError as e:
|
||||
print(f"[db_maintenance] could not remove {name}: {e}")
|
||||
print(f"[db_maintenance] removed {removed} log file(s) older than {log_days}d")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="SQLite + log maintenance")
|
||||
ap.add_argument("--db", default=DEFAULT_DB)
|
||||
ap.add_argument("--logs-dir", default=DEFAULT_LOGS)
|
||||
ap.add_argument("--history-days", type=int, default=7)
|
||||
ap.add_argument("--log-days", type=int, default=7)
|
||||
args = ap.parse_args()
|
||||
|
||||
if not os.path.exists(args.db):
|
||||
# Non-fatal: a fresh install may not have a DB yet.
|
||||
print(f"[db_maintenance] DB not found, skipping: {args.db}", file=sys.stderr)
|
||||
else:
|
||||
prune_and_vacuum(args.db, args.history_days)
|
||||
cleanup_logs(args.logs_dir, args.log_days)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user