feat(sync): sync_phase_failures table for escalation tracking

New table sync_phase_failures(run_id, phase, error_summary, created_at)
with index on (phase, created_at). Minimal schema — no raw payload, no
PII — stores just enough to answer "did phase X fail in the last N
runs?" for the escalation check and the /api/sync/health pill.

Helpers in sqlite_service:
  record_phase_failure(run_id, phase, error_summary)
    INSERT OR REPLACE semantics (one row per run+phase), then prunes
    to the most recent 100 sync_runs. error_summary clipped at 500
    chars defensively.
  get_recent_phase_failures(limit=3) → {phase: count} across the last N
    runs, ordered by started_at desc.

6 unit tests cover creation, counting, pruning, empty state,
idempotency, and limit semantics.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-22 09:02:08 +00:00
parent 47a6bd83a4
commit 1e4e3279f7
3 changed files with 184 additions and 0 deletions

View File

@@ -186,6 +186,15 @@ CREATE TABLE IF NOT EXISTS anaf_cache (
denumire_anaf TEXT,
checked_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS sync_phase_failures (
run_id TEXT NOT NULL REFERENCES sync_runs(run_id),
phase TEXT NOT NULL,
error_summary TEXT,
created_at TEXT DEFAULT (datetime('now')),
PRIMARY KEY (run_id, phase)
);
CREATE INDEX IF NOT EXISTS idx_spf_phase_time ON sync_phase_failures(phase, created_at);
"""
_sqlite_db_path = None

View File

@@ -842,6 +842,60 @@ async def add_order_items(order_number: str, items: list):
await db.close()
# ── sync phase failure tracking ───────────────────
async def record_phase_failure(run_id: str, phase: str, error_summary: str) -> None:
"""Insert a phase-failure marker and prune to the last 100 sync runs.
`error_summary` must be short (error_type + message) — no raw payload,
no PII. Used by _phase_wrap in sync_service to surface repeat failures
to the escalation check and the /api/sync/health dashboard pill.
"""
db = await get_sqlite()
try:
await db.execute(
"""INSERT OR REPLACE INTO sync_phase_failures (run_id, phase, error_summary)
VALUES (?, ?, ?)""",
(run_id, phase, error_summary[:500] if error_summary else None),
)
await db.execute("""
DELETE FROM sync_phase_failures
WHERE run_id NOT IN (
SELECT run_id FROM sync_runs ORDER BY started_at DESC LIMIT 100
)
""")
await db.commit()
finally:
await db.close()
async def get_recent_phase_failures(limit: int = 3) -> dict[str, int]:
"""Return a {phase: failure_count} map across the last N sync runs.
Used by the escalation check (>=3 consecutive failures on the same
phase halts the next sync) and by /api/sync/health for the dashboard
pill.
"""
db = await get_sqlite()
try:
cursor = await db.execute(
"""
SELECT phase, COUNT(*) AS cnt
FROM sync_phase_failures
WHERE run_id IN (
SELECT run_id FROM sync_runs ORDER BY started_at DESC LIMIT ?
)
GROUP BY phase
""",
(limit,),
)
rows = await cursor.fetchall()
return {row[0]: row[1] for row in rows}
finally:
await db.close()
async def get_order_items(order_number: str) -> list:
"""Fetch items for one order."""
db = await get_sqlite()