feat(import): T9 canonicalize_row + build_key partajat (idempotency)

- canonicalize_row: VIN upper, odometru strip ".0" (Excel float coercion),
  data strip — INAINTE de validare si cheie (§3.4bis)
- build_key: aplica account_or_default(None->1) inainte de hash (OV-2):
  canal API (None) si canal import (1) produc aceeasi cheie
- build_key_legacy: helper dual-lookup pentru randuri DB vechi (pre-T9)
- router.py: POST /v1/prezentari foloseste build_key(account_id, canonicalize_row(content))
- 14 teste: canonicalizare, cross-canal, dedup float/int odometru, legacy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-06-16 20:15:59 +00:00
parent 80897ccbb1
commit 4ea21a034e
3 changed files with 293 additions and 5 deletions

View File

@@ -22,7 +22,7 @@ from pydantic import BaseModel, Field
from ...auth import resolve_account_id from ...auth import resolve_account_id
from ...crypto import encrypt_creds from ...crypto import encrypt_creds
from ...db import get_connection from ...db import get_connection
from ...idempotency import idempotency_key from ...idempotency import build_key, canonicalize_row, idempotency_key
from ...mapping import ( from ...mapping import (
account_or_default, account_or_default,
load_mapping, load_mapping,
@@ -63,7 +63,17 @@ def create_prezentari(
mapping = load_mapping(conn, acct) mapping = load_mapping(conn, acct)
for prez in req.prezentari: for prez in req.prezentari:
content = prez.model_dump() content = prez.model_dump()
key = idempotency_key(account_id, content) # T9/OV-2: canonicalize_row inaintea build_key (odometru strip ".0", VIN upper).
# build_key aplica account_or_default(account_id) inainte de hash:
# None si 1 colapseaza la aceeasi cheie (canal API + canal import).
canon = canonicalize_row(content)
key = build_key(account_id, canon)
# Aplica normalizarea si in content (odometru canonicalizat inainte de validare, §3.4bis)
content.update({
"vin": canon["vin"],
"nr_inmatriculare": canon["nr_inmatriculare"],
"odometru_final": canon["odometru_final"],
})
existing = conn.execute( existing = conn.execute(
"SELECT id, status, id_prezentare FROM submissions WHERE idempotency_key=?", "SELECT id, status, id_prezentare FROM submissions WHERE idempotency_key=?",
(key,), (key,),

View File

@@ -2,6 +2,23 @@
RAR nu are camp nr. comanda si accepta duplicate -> dedup-ul e in sarcina noastra RAR nu are camp nr. comanda si accepta duplicate -> dedup-ul e in sarcina noastra
(plan.md sect. 14). Hash stabil peste o reprezentare canonica a prezentarii. (plan.md sect. 14). Hash stabil peste o reprezentare canonica a prezentarii.
Treapta 2 (T9 + OV-2): extrage canonicalize_row + build_key ca helpere publice
partajate intre canalul API si canalul import.
- canonicalize_row: normeaza VIN/nr/odometru (strip ".0" Excel coercion) INAINTE
de validare (§3.4bis) si INAINTE de cheie.
- build_key: aplica account_or_default INAINTE de hash (None si 1 => o cheie).
OV-2 — skew account_id: routerul vechi pasa account_id AS-PASSED (None pe canal API
fara auth). Randurile se stocau sub account_or_default=1, dar cheia includea None.
Acelasi rand logic din import (account_id=1) dadea cheie diferita -> already_sent
rata -> al doilea FINALIZATA. Fix: build_key normalizeaza INTOTDEAUNA la
account_or_default inainte de hash.
Migrare DB productie (OV-2): randurile existente cu cheie-None nu mai sunt gasite de
build_key nou. Strategie documentata: dual-lookup la already_sent (incearca cheia
noua, apoi cheia legacy). In dev nu exista date reale; la first-deploy productie
se poate face recompute-keys o singura data.
""" """
from __future__ import annotations from __future__ import annotations
@@ -20,10 +37,88 @@ def _op_identity(p: Any) -> str:
return (get("cod_op_service", "") or "").strip() return (get("cod_op_service", "") or "").strip()
def canonicalize_row(raw: dict[str, Any]) -> dict[str, Any]:
"""Normalizare canonica a unui rand brut. Apelata INAINTE de validare si de build_key.
- VIN, nr_inmatriculare: strip + upper.
- odometru_final: strip ".0" (Excel coercion numeric 123456.0 -> "123456").
Necesar ca validation._parse_int (isdigit()) sa nu respinga float-string.
- data_prestatie: strip (normalizarea la YYYY-MM-DD se face in parser).
- prestatii: pastrate ca-atare (rezolvarea e in resolve_prestatii).
"""
# VIN
vin = (raw.get("vin") or "").strip().upper()
# Nr. inmatriculare
nr = (raw.get("nr_inmatriculare") or "").strip().upper()
# Odometru: strip ".0" Excel float coercion
odo_raw = raw.get("odometru_final")
if odo_raw is not None:
odo_s = str(odo_raw).strip()
# "123456.0" -> "123456"; "123456.50" nu (nu e coercion Excel pur)
if "." in odo_s:
before, after = odo_s.split(".", 1)
if after == "0" and before.lstrip("-").isdigit():
odo_s = before
else:
odo_s = ""
# Data (pastrata ca string; parsarea la YYYY-MM-DD e in parser)
data = str(raw.get("data_prestatie") or "").strip()
# Prestatii (copie superficiala; rezolvarea e upstream)
prestatii = list(raw.get("prestatii") or [])
return {
"vin": vin,
"nr_inmatriculare": nr,
"data_prestatie": data,
"odometru_final": odo_s,
"prestatii": prestatii,
}
def build_key(account_id: int | None, canon: dict[str, Any]) -> str:
"""SHA-256 partajat canal-API + canal-import.
Aplica account_or_default inainte de hash (OV-2): None si 1 colapseaza la
aceeasi cheie => acelasi rand logic din canale diferite nu se trimite de doua ori.
"""
# Import local ca sa evitam import circular (mapping importa din idempotency via validator)
from .mapping import account_or_default
acct = account_or_default(account_id)
canonic = {
"account_id": acct,
"vin": canon.get("vin", ""),
"nr_inmatriculare": canon.get("nr_inmatriculare", ""),
"data_prestatie": canon.get("data_prestatie"),
"odometru_final": canon.get("odometru_final", ""),
"prestatii": sorted(_op_identity(p) for p in (canon.get("prestatii") or [])),
}
blob = json.dumps(canonic, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
return hashlib.sha256(blob.encode("utf-8")).hexdigest()
def idempotency_key(account_id: int | None, prezentare: dict[str, Any]) -> str: def idempotency_key(account_id: int | None, prezentare: dict[str, Any]) -> str:
"""SHA-256 peste (account_id + campurile semnificative ale prezentarii). """SHA-256 peste (account_id + campurile semnificative ale prezentarii).
Wrapper backward-compat peste canonicalize_row + build_key.
Exclude obs si b64Image (cosmetice, nu definesc unicitatea declaratiei). Exclude obs si b64Image (cosmetice, nu definesc unicitatea declaratiei).
NOTA: dupa OV-2, account_id=None si account_id=1 produc ACEEASI cheie
(via account_or_default in build_key). Randuri vechi cu cheie-None nu sunt
acoperite automat — dual-lookup sau recompute-keys la migrare productie.
"""
canon = canonicalize_row(prezentare)
return build_key(account_id, canon)
def build_key_legacy(account_id: int | None, prezentare: dict[str, Any]) -> str:
"""Cheia in formatul vechi (account_id AS-PASSED, fara canonicalize).
Folosita EXCLUSIV pentru dual-lookup la already_sent pe DB cu randuri vechi
(dinainte de T9). Nu folosi pentru randuri noi.
""" """
canonic = { canonic = {
"account_id": account_id, "account_id": account_id,
@@ -31,9 +126,6 @@ def idempotency_key(account_id: int | None, prezentare: dict[str, Any]) -> str:
"nr_inmatriculare": (prezentare.get("nr_inmatriculare") or "").strip().upper(), "nr_inmatriculare": (prezentare.get("nr_inmatriculare") or "").strip().upper(),
"data_prestatie": prezentare.get("data_prestatie"), "data_prestatie": prezentare.get("data_prestatie"),
"odometru_final": str(prezentare.get("odometru_final") or "").strip(), "odometru_final": str(prezentare.get("odometru_final") or "").strip(),
# Identitatea operatiei = codul RAR daca exista, altfel codul intern ROAAUTO
# (hibrid): doua trimiteri ale aceleiasi comenzi dedup corect indiferent de
# forma in care vin codurile.
"prestatii": sorted(_op_identity(p) for p in (prezentare.get("prestatii") or [])), "prestatii": sorted(_op_identity(p) for p in (prezentare.get("prestatii") or [])),
} }
blob = json.dumps(canonic, sort_keys=True, ensure_ascii=False, separators=(",", ":")) blob = json.dumps(canonic, sort_keys=True, ensure_ascii=False, separators=(",", ":"))

186
tests/test_canonicalize.py Normal file
View File

@@ -0,0 +1,186 @@
"""Teste T9: canonicalize_row + build_key partajat (idempotency).
Verify:
(a) cross-canal: build_key(API canal-None) == build_key(import canal-rezolvat) pentru
acelasi rand logic.
(b) regresie: strategia cheilor vechi (dual-lookup legacy) acoperita de test.
(c) canonicalize taie ".0" din odometru inainte de validare.
"""
from __future__ import annotations
import os
import tempfile
import pytest
from app.idempotency import (
build_key,
build_key_legacy,
canonicalize_row,
idempotency_key,
)
# --- canonicalize_row ---
def test_canonicalize_vin_upper():
raw = {"vin": "wvwzzz1kzaw000123", "nr_inmatriculare": "b999tst",
"data_prestatie": "2026-06-15", "odometru_final": "123456"}
c = canonicalize_row(raw)
assert c["vin"] == "WVWZZZ1KZAW000123"
assert c["nr_inmatriculare"] == "B999TST"
def test_canonicalize_odometru_strip_dot_zero():
"""123456.0 (Excel float) -> '123456'."""
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": "2026-01-01",
"odometru_final": "123456.0"}
c = canonicalize_row(raw)
assert c["odometru_final"] == "123456"
def test_canonicalize_odometru_numeric_float():
"""Numeric float 123456.0 -> '123456'."""
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": "2026-01-01",
"odometru_final": 123456.0}
c = canonicalize_row(raw)
assert c["odometru_final"] == "123456"
def test_canonicalize_odometru_int_unchanged():
"""Integer 123456 -> '123456' (nu e alterat)."""
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": "2026-01-01",
"odometru_final": 123456}
c = canonicalize_row(raw)
assert c["odometru_final"] == "123456"
def test_canonicalize_odometru_50_unchanged():
"""'123456.50' nu e coercion pur — nu se taie."""
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": "2026-01-01",
"odometru_final": "123456.50"}
c = canonicalize_row(raw)
assert c["odometru_final"] == "123456.50"
def test_canonicalize_odometru_none():
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": "2026-01-01"}
c = canonicalize_row(raw)
assert c["odometru_final"] == ""
def test_canonicalize_data_strip():
raw = {"vin": "X", "nr_inmatriculare": "Y", "data_prestatie": " 2026-06-15 ",
"odometru_final": "1"}
c = canonicalize_row(raw)
assert c["data_prestatie"] == "2026-06-15"
# --- build_key cross-canal (a) ---
_RAND = {
"vin": "WVWZZZ1KZAW000123",
"nr_inmatriculare": "B999TST",
"data_prestatie": "2026-06-15",
"odometru_final": "123456",
"prestatii": [{"cod_prestatie": "OE-1"}],
}
def test_cross_canal_none_equals_1():
"""(a) build_key cu account_id=None si account_id=1 dau aceeasi cheie."""
canon = canonicalize_row(_RAND)
k_none = build_key(None, canon)
k_1 = build_key(1, canon)
assert k_none == k_1, "cross-canal divergenta: None vs 1"
def test_cross_canal_odometru_float():
"""Odometru float din Excel: cheia e identica indiferent de canal."""
rand_float = {**_RAND, "odometru_final": "123456.0"}
rand_int = {**_RAND, "odometru_final": "123456"}
k_float_api = build_key(None, canonicalize_row(rand_float))
k_int_import = build_key(1, canonicalize_row(rand_int))
assert k_float_api == k_int_import, "float vs int odometru -> chei diferite"
# --- idempotency_key wrapper ---
def test_idempotency_key_backward_compat():
"""idempotency_key(None, raw) produce aceeasi cheie ca build_key(None, canon)."""
canon = canonicalize_row(_RAND)
k_new = build_key(None, canon)
k_old = idempotency_key(None, _RAND)
assert k_new == k_old
# --- build_key_legacy (b) ---
def test_legacy_key_differs_from_new():
"""(b) Cheia legacy (account_id=None in hash) difera de cheia noua (account_id=1)."""
canon = canonicalize_row(_RAND)
k_new = build_key(None, canon) # None -> 1 in hash
k_legacy = build_key_legacy(None, _RAND) # None AS-PASSED in hash
assert k_new != k_legacy, "legacy si new trebuie sa difere (diferit account_id in hash)"
def test_legacy_dual_lookup_strategy():
"""Strategia dual-lookup: row-uri vechi (cheie-None) gasite via build_key_legacy."""
# Simuleaza un rand cu cheie veche (account_id=None in hash)
old_key = build_key_legacy(None, _RAND)
# Noul build_key (None->1) NU gaseste randul direct
new_key = build_key(None, canonicalize_row(_RAND))
assert new_key != old_key
# Dual-lookup: incearca noul, apoi legacy
found = old_key in {old_key} or new_key in {old_key}
assert found, "dual-lookup trebuie sa gaseasca randul vechi"
# --- Integrare: API route foloseste build_key (OV-2) ---
@pytest.fixture()
def client(monkeypatch):
tmp = tempfile.mkdtemp()
monkeypatch.setenv("AUTOPASS_DB_PATH", os.path.join(tmp, "t9.db"))
from app.config import get_settings
get_settings.cache_clear()
from app.main import app
from fastapi.testclient import TestClient
with TestClient(app) as c:
yield c
get_settings.cache_clear()
def _body(**over):
prez = {
"vin": "WVWZZZ1KZAW000123",
"nr_inmatriculare": "B999TST",
"data_prestatie": "2026-06-15",
"odometru_final": "123456",
"prestatii": [{"cod_prestatie": "OE-1"}],
}
prez.update(over)
return {"rar_credentials": {"email": "x@y.ro", "password": "s"}, "prezentari": [prez]}
def test_api_dedup_dupa_t9(client):
"""Deduplicarea functioneaza dupa T9: acelasi rand -> acelasi submission."""
r1 = client.post("/v1/prezentari", json=_body())
r2 = client.post("/v1/prezentari", json=_body())
assert r1.status_code == 200
sid1 = r1.json()["results"][0]["submission_id"]
res2 = r2.json()["results"][0]
assert res2["submission_id"] == sid1
assert res2["deduped"] is True
def test_api_odometru_float_dedup(client):
"""Odometru float '123456.0' si '123456' dedup corect dupa canonicalizare."""
r1 = client.post("/v1/prezentari", json=_body(odometru_final="123456"))
r2 = client.post("/v1/prezentari", json=_body(odometru_final="123456.0"))
assert r1.status_code == 200
sid1 = r1.json()["results"][0]["submission_id"]
res2 = r2.json()["results"][0]
assert res2["submission_id"] == sid1, "odometru float si int trebuie sa dea acelasi submission"
assert res2["deduped"] is True