Files
rar-autopass/app/mapping.py
Claude Agent 14e1c463f0 feat(errors): erori pe 3 niveluri (problema+cauza+fix) pe API si UI (PRD 5.4)
Catalog central pur app/errors.py ca sursa unica cod->{problema,fix},
consumat de API+UI+worker. Aditiv (field/message pastrate la octet) +
rar_error stocat superset. Scope: fluxul de declarare; login/signup/CSRF
neatinse. labels.parse_erori degradeaza gratios; UI progresiv AA light+dark.
631 teste.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 10:28:09 +00:00

449 lines
18 KiB
Python

"""Mapare operatie ROAAUTO -> cod prestatie RAR + fuzzy lookup pentru editor.
Contract (varianta hibrida, decis 2026-06-15): un item de prestatie poate veni
fie cu `cod_prestatie` (cod RAR direct, ca pana acum), fie cu `cod_op_service`
(cod intern ROAAUTO) + `denumire`. La ingestie incercam sa rezolvam codul intern
prin `operations_mapping`; daca nu exista mapare -> submission `needs_mapping`
(nu se trimite la RAR), iar operatia apare in editorul web unde userul o mapeaza
cu ajutorul unei sugestii fuzzy pe nomenclatorul RAR. La salvarea maparii,
submission-urile blocate pe acel cod se re-rezolva automat.
Functiile de la inceput (normalize/suggest/resolve) sunt PURE (fara DB/HTTP),
unit-testabile direct. Cele cu `conn` sunt helpere de persistenta.
"""
from __future__ import annotations
import json
import unicodedata
from typing import Any
from rapidfuzz import fuzz, process
from . import errors as err_mod
from .nomenclator_seed import FALLBACK_NOMENCLATOR
from .validation import validate_prezentare
# Cont implicit cat timp auth API-key (CORE) nu e implementat: ingestiile vin cu
# account_id NULL si le atribuim contului seed-at in schema (id=1).
DEFAULT_ACCOUNT_ID = 1
# Sub acest scor (0..100) nu preselectam nicio sugestie — userul alege manual.
SUGGEST_MIN_SCORE = 60
# --------------------------------------------------------------------------- #
# Pur: normalizare + fuzzy + rezolvare #
# --------------------------------------------------------------------------- #
def normalize_for_match(value: object) -> str:
"""Upper + fara diacritice + spatii colapsate, pentru potrivire robusta.
'Reparație motor' -> 'REPARATIE MOTOR'. Diacriticele romanesti (ă/â/î/ș/ț)
si artefactele de encoding nu trebuie sa strice scorul fuzzy.
"""
s = str(value or "")
s = unicodedata.normalize("NFKD", s)
s = "".join(ch for ch in s if not unicodedata.combining(ch))
return " ".join(s.upper().split())
def suggest_codes(
denumire: object,
nomenclator: list[dict],
*,
limit: int = 5,
) -> list[dict]:
"""Clasament fuzzy al codurilor RAR pentru o denumire de operatie ROAAUTO.
`nomenclator` = randuri {cod_prestatie, nume_prestatie}. Intoarce
[{cod_prestatie, nume_prestatie, score}] sortat descrescator dupa scor.
Daca denumirea e goala, intoarce nomenclatorul in ordinea data, scor 0.
"""
query = normalize_for_match(denumire)
rows = [r for r in nomenclator if (r.get("cod_prestatie") or "")]
if not query:
return [{**r, "score": 0.0} for r in rows[:limit]]
choices = {r["cod_prestatie"]: normalize_for_match(r.get("nume_prestatie")) for r in rows}
by_cod = {r["cod_prestatie"]: r for r in rows}
# token_sort_ratio (nu token_set_ratio): recompenseaza acoperirea cat mai multor
# cuvinte din denumire, in loc sa dea 100 la orice subset (ex. "REPARATIE" si
# "REPARATIE ODOMETRU" ar fi egale la set_ratio).
ranked = process.extract(
query,
choices,
scorer=fuzz.token_sort_ratio,
limit=limit,
)
# process.extract -> [(value, score, key)]; key = cod_prestatie.
return [
{
"cod_prestatie": cod,
"nume_prestatie": by_cod[cod].get("nume_prestatie"),
"score": float(score),
}
for _val, score, cod in ranked
]
def resolve_prestatii(
prestatii: list[dict] | None,
mapping: dict[str, str],
) -> tuple[list[dict], list[dict]]:
"""Rezolva fiecare item: umple `cod_prestatie` din maparea op->cod unde lipseste.
Reguli (hibrid):
- item cu `cod_prestatie` -> pastrat ca atare (cod RAR direct).
- item fara cod, cu `cod_op_service` in `mapping` -> umplem cod_prestatie.
- item fara cod si fara mapare -> ramane nemapat.
Intoarce (prestatii_rezolvate, nemapate). `prestatii_rezolvate` pastreaza
si campurile originale (cod_op_service/denumire) ca re-rezolvarea sa aiba
contextul; payload-ul RAR citeste doar cod_prestatie. `nemapate` =
[{cod_op_service, denumire}] pentru editor.
"""
resolved: list[dict] = []
unmapped: list[dict] = []
for item in prestatii or []:
it = dict(item)
cod = (it.get("cod_prestatie") or "").strip().upper()
op = (it.get("cod_op_service") or "").strip()
if cod:
it["cod_prestatie"] = cod
elif op and op in mapping:
it["cod_prestatie"] = mapping[op]
elif op:
it["cod_prestatie"] = None
unmapped.append({"cod_op_service": op, "denumire": it.get("denumire")})
# item fara cod si fara op: il lasam asa; validarea de continut prinde
# "prestatii goale"/cod lipsa.
resolved.append(it)
return resolved, unmapped
# --------------------------------------------------------------------------- #
# Persistenta (conn) #
# --------------------------------------------------------------------------- #
def account_or_default(account_id: int | None) -> int:
return account_id if account_id is not None else DEFAULT_ACCOUNT_ID
def account_scope_clause(account_id: int) -> tuple[str, list]:
"""Fragment SQL + params pentru filtrarea pe cont in tabele cu account_id nullable.
Aplica regula: NULL apartine contului 1 (legacy/OV-2).
Foloseste DOAR pe submissions (account_id NULLABLE).
NU folosi pe operations_mapping (account_id NOT NULL) — acolo WHERE account_id=? simplu.
"""
return (
"(account_id = ? OR (account_id IS NULL AND ? = 1))",
[account_id, account_id],
)
def seed_nomenclator_if_empty(conn) -> int:
"""Seed fallback (18 coduri din contract) DOAR daca nomenclator_rar e gol.
Worker-ul suprascrie live; aici doar garantam ca editorul fuzzy merge offline.
Intoarce nr. de randuri inserate.
"""
n = conn.execute("SELECT COUNT(*) AS n FROM nomenclator_rar").fetchone()["n"]
if n:
return 0
conn.executemany(
"INSERT OR IGNORE INTO nomenclator_rar (cod_prestatie, nume_prestatie) VALUES (?, ?)",
FALLBACK_NOMENCLATOR,
)
return len(FALLBACK_NOMENCLATOR)
def upsert_nomenclator(conn, items: list[dict]) -> int:
"""Upsert nomenclator live din RAR. `items` = forma API (codPrestatie/numePrestatie).
Tolerant la chei: codPrestatie/cod_prestatie/cod, numePrestatie/nume_prestatie/nume.
Intoarce nr. de coduri upsert-ate.
"""
rows: list[tuple[str, str]] = []
for it in items or []:
if not isinstance(it, dict):
continue
cod = it.get("codPrestatie") or it.get("cod_prestatie") or it.get("cod")
nume = it.get("numePrestatie") or it.get("nume_prestatie") or it.get("nume")
if cod:
rows.append((str(cod).strip().upper(), str(nume or "").strip()))
if not rows:
return 0
conn.executemany(
"INSERT INTO nomenclator_rar (cod_prestatie, nume_prestatie, updated_at) "
"VALUES (?, ?, datetime('now')) "
"ON CONFLICT(cod_prestatie) DO UPDATE SET nume_prestatie=excluded.nume_prestatie, "
"updated_at=datetime('now')",
rows,
)
return len(rows)
def load_nomenclator(conn) -> list[dict]:
rows = conn.execute(
"SELECT cod_prestatie, nume_prestatie FROM nomenclator_rar ORDER BY cod_prestatie"
).fetchall()
return [dict(r) for r in rows]
def load_mapping(conn, account_id: int | None) -> dict[str, str]:
"""{cod_op_service -> cod_prestatie} pentru un cont."""
acct = account_or_default(account_id)
rows = conn.execute(
"SELECT cod_op_service, cod_prestatie FROM operations_mapping WHERE account_id=?",
(acct,),
).fetchall()
return {r["cod_op_service"]: r["cod_prestatie"] for r in rows}
def load_mapping_meta(conn, account_id: int | None) -> dict[str, dict]:
"""{cod_op_service -> {cod_prestatie, auto_send}} pentru un cont.
T6/OV-1: varianta extinsa care include si flagul auto_send per operatie.
"""
acct = account_or_default(account_id)
rows = conn.execute(
"SELECT cod_op_service, cod_prestatie, auto_send FROM operations_mapping WHERE account_id=?",
(acct,),
).fetchall()
return {
r["cod_op_service"]: {"cod_prestatie": r["cod_prestatie"], "auto_send": bool(r["auto_send"])}
for r in rows
}
def classify_prezentare(
content: dict,
mapping: dict[str, str],
mapping_meta: dict[str, dict],
) -> dict:
"""Helper pur de clasificare: reproduce EXACT logica create_prezentari fara DB/efecte.
Apelat de AMBELE rute (POST /v1/prezentari si POST /v1/prezentari/valideaza) pentru
a garanta acelasi verdict — invariantul de corectitudine dry-run (PRD 5.2).
Intoarce {"status", "rar_error", "resolved", "unmapped", "errors", "content"}.
"content" = copia actualizata (VIN/nr canonicalizat + prestatii rezolvate).
"""
from .idempotency import canonicalize_row # import local: evita circular (mapping <- idempotency)
c = dict(content)
canon = canonicalize_row(c)
c.update({
"vin": canon["vin"],
"nr_inmatriculare": canon["nr_inmatriculare"],
"odometru_final": canon["odometru_final"],
})
resolved, unmapped = resolve_prestatii(c.get("prestatii"), mapping)
c["prestatii"] = resolved
if unmapped:
status = "needs_mapping"
coduri = ", ".join((u.get("cod_op_service") or "") for u in unmapped)
rar_error = json.dumps(
{"unmapped": unmapped, **err_mod.eroare("COD_NEMAPAT", cauza=f"Coduri fara mapare RAR: {coduri}")},
ensure_ascii=False,
)
errors: list[dict] = []
else:
errors = validate_prezentare(c)
if errors:
status = "needs_data"
rar_error = json.dumps(errors, ensure_ascii=False)
elif has_no_auto_send(resolved, mapping_meta):
status = "needs_mapping"
mesaj = "cod mapat cu auto_send=0; review manual inainte de trimitere"
rar_error = json.dumps(
{"auto_send": mesaj, **err_mod.eroare("AUTO_SEND_OPRIT", cauza=mesaj)},
ensure_ascii=False,
)
else:
status = "queued"
rar_error = None
return {
"status": status,
"rar_error": rar_error,
"resolved": resolved,
"unmapped": unmapped,
"errors": errors,
"content": c,
}
def has_no_auto_send(resolved: list[dict], mapping_meta: dict[str, dict]) -> bool:
"""Verifica daca vreun item rezolvat via mapping are auto_send=0.
T6/OV-1: un cod nou-mapat cu auto_send=0 nu trebuie trimis automat.
Items cu cod_prestatie direct (nu via cod_op_service) nu sunt afectate.
"""
for item in resolved:
op = (item.get("cod_op_service") or "").strip()
if op and op in mapping_meta and not mapping_meta[op]["auto_send"]:
return True
return False
def pending_unmapped(conn, account_id=None) -> list[dict]:
"""Operatii distincte nemapate, agregate din submission-urile `needs_mapping`.
account_id=None (default): global — intentionat pentru web/routes.py (back-compat).
Apelantii noi din API TREBUIE sa paseze account_id explicit; None global e
footgun (scurge cross-account) si e rezervat exclusiv pentru dashboard-ul intern.
account_id=int: filtreaza in SQL pe cont inclusiv randuri legacy (account_id IS NULL
apartine contului 1, OV-2). Filtrarea in SQL, nu post-hoc in Python.
"""
nomenclator = load_nomenclator(conn)
if account_id is not None:
scope_sql, scope_params = account_scope_clause(account_id)
rows = conn.execute(
f"SELECT id, account_id, payload_json FROM submissions "
f"WHERE status='needs_mapping' AND {scope_sql}",
scope_params,
).fetchall()
else:
rows = conn.execute(
"SELECT id, account_id, payload_json FROM submissions WHERE status='needs_mapping'"
).fetchall()
agg: dict[tuple[int, str], dict[str, Any]] = {}
for r in rows:
acct = r["account_id"] if r["account_id"] is not None else DEFAULT_ACCOUNT_ID
try:
content = json.loads(r["payload_json"])
except (ValueError, TypeError):
continue
for item in content.get("prestatii") or []:
if not isinstance(item, dict):
continue
if (item.get("cod_prestatie") or ""):
continue
op = (item.get("cod_op_service") or "").strip()
if not op:
continue
key = (acct, op)
entry = agg.setdefault(
key,
{"account_id": acct, "cod_op_service": op, "denumire": item.get("denumire"), "blocked": 0, "_ids": set()},
)
if not entry["denumire"] and item.get("denumire"):
entry["denumire"] = item.get("denumire")
entry["_ids"].add(r["id"])
out: list[dict] = []
for entry in agg.values():
entry["blocked"] = len(entry.pop("_ids"))
entry["suggestions"] = suggest_codes(entry["denumire"], nomenclator, limit=5)
out.append(entry)
out.sort(key=lambda e: (-e["blocked"], e["cod_op_service"]))
return out
def save_mapping(conn, account_id: int | None, cod_op_service: str, cod_prestatie: str, auto_send: bool) -> None:
"""Upsert o mapare op->cod (UNIQUE pe account_id+cod_op_service)."""
acct = account_or_default(account_id)
op = (cod_op_service or "").strip()
cod = (cod_prestatie or "").strip().upper()
if not op or not cod:
raise ValueError("cod_op_service si cod_prestatie sunt obligatorii")
conn.execute(
"INSERT INTO operations_mapping (account_id, cod_op_service, cod_prestatie, auto_send) "
"VALUES (?, ?, ?, ?) "
"ON CONFLICT(account_id, cod_op_service) DO UPDATE SET "
"cod_prestatie=excluded.cod_prestatie, auto_send=excluded.auto_send",
(acct, op, cod, 1 if auto_send else 0),
)
def reresolve_account(conn, account_id: int | None, batch_id: int | None = None) -> dict[str, int]:
"""Re-rezolva submission-urile `needs_mapping` ale unui cont dupa o noua mapare.
Pentru fiecare: aplica maparea curenta; daca nu mai raman op-uri nemapate ->
ruleaza validarea de continut (T3) si trece pe `queued` (sau `needs_data` cu
motiv), resetand backoff-ul. Daca raman nemapate, ramane `needs_mapping` cu
motivul actualizat. Intoarce {requeued, still_blocked, needs_data, review_manual}.
T6/OV-1: auto_send=0 pe un cod nou-mapat -> nu trece pe 'queued' (ramane
'needs_mapping' cu motiv "review manual"); previne FINALIZATA eronat permanent.
T7: batch_id != None -> scope la seria comitata (NU cross-batch).
batch_id is None -> re-rezolva toti (canal API, batch_id IS NULL inclus).
"""
acct = account_or_default(account_id)
mapping_meta = load_mapping_meta(conn, acct)
mapping = {op: meta["cod_prestatie"] for op, meta in mapping_meta.items()}
if batch_id is not None:
# T7: scope la batch-ul specificat (import commit explicit).
# NU atinge randuri din alte batches sau din feed API.
rows = conn.execute(
"SELECT id, payload_json FROM submissions "
"WHERE status='needs_mapping' AND account_id=? AND batch_id=?",
(acct, batch_id),
).fetchall()
else:
# POST /v1/mapari (save manual): re-rezolva EXCLUSIV canalul API (batch_id IS NULL).
# T7/R1 INCHIS: salvarea unei mapari NU re-queues randuri din batches de import
# (cross-batch / cross-feed). Batches de import sunt re-rezolvate doar la commit explicit.
rows = conn.execute(
"SELECT id, payload_json FROM submissions "
"WHERE status='needs_mapping' AND account_id=? AND batch_id IS NULL",
(acct,),
).fetchall()
stats = {"requeued": 0, "still_blocked": 0, "needs_data": 0, "review_manual": 0}
for r in rows:
try:
content = json.loads(r["payload_json"])
except (ValueError, TypeError):
continue
resolved, unmapped = resolve_prestatii(content.get("prestatii"), mapping)
content["prestatii"] = resolved
payload_json = json.dumps(content, ensure_ascii=False)
if unmapped:
conn.execute(
"UPDATE submissions SET payload_json=?, rar_error=?, updated_at=datetime('now') WHERE id=?",
(payload_json, json.dumps({"unmapped": unmapped}, ensure_ascii=False), r["id"]),
)
stats["still_blocked"] += 1
continue
# T6/OV-1: verifica auto_send inainte de re-queuing
if has_no_auto_send(resolved, mapping_meta):
conn.execute(
"UPDATE submissions SET payload_json=?, rar_error=?, updated_at=datetime('now') WHERE id=?",
(
payload_json,
json.dumps({"auto_send": "cod mapat cu auto_send=0; review manual inainte de trimitere"}, ensure_ascii=False),
r["id"],
),
)
stats["review_manual"] += 1
continue
errors = validate_prezentare(content)
if errors:
conn.execute(
"UPDATE submissions SET status='needs_data', payload_json=?, rar_error=?, "
"updated_at=datetime('now') WHERE id=?",
(payload_json, json.dumps(errors, ensure_ascii=False), r["id"]),
)
stats["needs_data"] += 1
else:
conn.execute(
"UPDATE submissions SET status='queued', payload_json=?, rar_error=NULL, "
"retry_count=0, next_attempt_at=NULL, updated_at=datetime('now') WHERE id=?",
(payload_json, r["id"]),
)
stats["requeued"] += 1
return stats