feat(sync): persistent append-only error history log

_log_order_error_history(order_number, msg) writes to
logs/sync_errors_history.log via a dedicated RotatingFileHandler
(100MB × 12 backups). Logger is lazy-initialised and non-propagating
so it doesn't pollute the root logger.

Purpose: orders.error_message is overwritten when a retry succeeds,
so the history log preserves permanent audit of every malformed-order
event regardless of later outcome. Helper never raises — callers are
already in a degraded path.

3 unit tests: append semantics, multi-order, exception isolation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-22 08:54:11 +00:00
parent 38498bec6d
commit d7610a6f33
2 changed files with 132 additions and 0 deletions

View File

@@ -1,5 +1,7 @@
import json import json
import logging import logging
import logging.handlers
import os
from datetime import datetime from datetime import datetime
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from ..database import get_sqlite, get_sqlite_sync from ..database import get_sqlite, get_sqlite_sync
@@ -18,6 +20,61 @@ def _now_str():
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Dedicated append-only logger for per-order errors.
# orders.error_message is overwritten when retry succeeds — this file
# keeps the permanent audit trail.
_error_history_logger: logging.Logger | None = None
def _get_error_history_logger() -> logging.Logger:
"""Lazily-initialised logger writing to logs/sync_errors_history.log.
Append-only. Rolls over at 100MB with 12 kept backups (~monthly cadence
under prod load).
"""
global _error_history_logger
if _error_history_logger is not None:
return _error_history_logger
lg = logging.getLogger("sync_errors_history")
lg.setLevel(logging.INFO)
lg.propagate = False
# Find project root by walking up from this file
here = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(os.path.join(here, "..", "..", ".."))
logs_dir = os.path.join(project_root, "logs")
os.makedirs(logs_dir, exist_ok=True)
log_path = os.path.join(logs_dir, "sync_errors_history.log")
if not any(
isinstance(h, logging.handlers.RotatingFileHandler)
and getattr(h, "baseFilename", "") == log_path
for h in lg.handlers
):
handler = logging.handlers.RotatingFileHandler(
log_path, maxBytes=100 * 1024 * 1024, backupCount=12, encoding="utf-8"
)
handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
lg.addHandler(handler)
_error_history_logger = lg
return lg
def _log_order_error_history(order_number: str, error_msg: str) -> None:
"""Append an order-level failure line to the permanent error history log.
Called from save_orders_batch + add_order_items on MALFORMED fallback,
so the evidence survives later retry-success overwrites of
orders.error_message.
"""
try:
_get_error_history_logger().warning(f"ORDER_FAIL {order_number}: {error_msg}")
except Exception as e:
logger.warning(f"_log_order_error_history failed for {order_number}: {e}")
async def create_sync_run(run_id: str, json_files: int = 0): async def create_sync_run(run_id: str, json_files: int = 0):
"""Create a new sync run record.""" """Create a new sync run record."""
db = await get_sqlite() db = await get_sqlite()

View File

@@ -0,0 +1,75 @@
"""Tests for _log_order_error_history — permanent audit trail."""
import os
import sys
import logging
import logging.handlers
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from app.services import sqlite_service
@pytest.fixture
def reset_logger(tmp_path, monkeypatch):
"""Redirect error history log into tmp_path for isolation."""
sqlite_service._error_history_logger = None
lg = logging.getLogger("sync_errors_history")
for h in list(lg.handlers):
lg.removeHandler(h)
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
target = logs_dir / "sync_errors_history.log"
def fake_get_logger():
if sqlite_service._error_history_logger is not None:
return sqlite_service._error_history_logger
inner = logging.getLogger("sync_errors_history")
inner.setLevel(logging.INFO)
inner.propagate = False
handler = logging.handlers.RotatingFileHandler(
str(target), maxBytes=100 * 1024 * 1024, backupCount=12, encoding="utf-8"
)
handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
inner.addHandler(handler)
sqlite_service._error_history_logger = inner
return inner
monkeypatch.setattr(sqlite_service, "_get_error_history_logger", fake_get_logger)
yield target
sqlite_service._error_history_logger = None
lg = logging.getLogger("sync_errors_history")
for h in list(lg.handlers):
h.close()
lg.removeHandler(h)
@pytest.mark.unit
def test_log_order_error_history_writes_line(reset_logger):
sqlite_service._log_order_error_history("485224762", "UNIQUE constraint failed")
logging.shutdown()
content = reset_logger.read_text(encoding="utf-8")
assert "ORDER_FAIL 485224762" in content
assert "UNIQUE constraint failed" in content
@pytest.mark.unit
def test_log_order_error_history_appends(reset_logger):
sqlite_service._log_order_error_history("1", "err-a")
sqlite_service._log_order_error_history("2", "err-b")
sqlite_service._log_order_error_history("2", "err-b-retry")
logging.shutdown()
content = reset_logger.read_text(encoding="utf-8")
assert "ORDER_FAIL 1: err-a" in content
assert "ORDER_FAIL 2: err-b" in content
# Two entries for order 2 — append-only guarantee
assert content.count("ORDER_FAIL 2:") == 2
@pytest.mark.unit
def test_log_order_error_history_swallows_errors(monkeypatch):
"""Callable must never raise — caller is already in a degraded path."""
def boom():
raise RuntimeError("disk full")
monkeypatch.setattr(sqlite_service, "_get_error_history_logger", boom)
sqlite_service._log_order_error_history("X", "ignored")