feat(validation): add structural pre-flight validator

validate_structural(order) runs before save_orders_batch insert.
Catches malformed payloads (MISSING_FIELD, INVALID_DATE, EMPTY_ITEMS,
INVALID_QUANTITY, INVALID_PRICE) that would otherwise crash the batch
insert or downstream pipeline. 17 unit tests cover each rule.

Does NOT validate SKU existence — redundant with _dedup_items_by_sku
pass-through and validate_skus Oracle lookup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-22 08:52:32 +00:00
parent f6d283b743
commit 38498bec6d
2 changed files with 232 additions and 0 deletions

View File

@@ -1,11 +1,77 @@
import asyncio
import logging
from datetime import datetime
from .. import database
from . import sqlite_service
logger = logging.getLogger(__name__)
def validate_structural(order: dict) -> tuple[bool, str | None, str | None]:
"""Pre-flight structural validator used by save_orders_batch.
Returns (True, None, None) on pass, (False, error_type, error_msg) on fail.
Rules are intentionally minimal — only catches malformed payloads that
would crash downstream inserts. Semantic checks (SKU existence, price
comparison, etc.) are handled in later phases.
"""
if not isinstance(order, dict):
return False, "MISSING_FIELD", f"order is not a dict: {type(order).__name__}"
order_number = order.get("order_number")
if order_number is None or str(order_number).strip() == "":
return False, "MISSING_FIELD", "order_number is missing or empty"
raw_date = order.get("order_date")
if raw_date in (None, ""):
return False, "INVALID_DATE", "order_date is missing or empty"
if isinstance(raw_date, datetime):
pass
elif isinstance(raw_date, str):
parsed = None
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"):
try:
parsed = datetime.strptime(raw_date, fmt)
break
except ValueError:
continue
if parsed is None:
try:
parsed = datetime.fromisoformat(raw_date.replace("Z", "+00:00"))
except ValueError:
return False, "INVALID_DATE", f"order_date not parseable: {raw_date!r}"
else:
return False, "INVALID_DATE", f"order_date wrong type: {type(raw_date).__name__}"
items = order.get("items")
if not items or not isinstance(items, list):
return False, "EMPTY_ITEMS", "items missing or not a non-empty list"
for idx, item in enumerate(items):
if not isinstance(item, dict):
return False, "EMPTY_ITEMS", f"item[{idx}] is not a dict"
qty_raw = item.get("quantity")
if qty_raw is None or qty_raw == "":
return False, "INVALID_QUANTITY", f"item[{idx}] quantity missing"
try:
qty = float(qty_raw)
except (TypeError, ValueError):
return False, "INVALID_QUANTITY", f"item[{idx}] quantity not numeric: {qty_raw!r}"
if qty <= 0:
return False, "INVALID_QUANTITY", f"item[{idx}] quantity not > 0: {qty}"
price_raw = item.get("price")
if price_raw is None or price_raw == "":
return False, "INVALID_PRICE", f"item[{idx}] price missing"
try:
float(price_raw)
except (TypeError, ValueError):
return False, "INVALID_PRICE", f"item[{idx}] price not numeric: {price_raw!r}"
return True, None, None
async def reconcile_unresolved_missing_skus(conn=None) -> dict:
"""Revalidate all resolved=0 SKUs in missing_skus against Oracle.
Fail-soft: logs warning and returns zero if Oracle is unavailable.