feat(anaf): add CUI checksum validation + sanitize at import time

Romanian CUI check digit algorithm (key 753217532) validates CUIs
before ANAF lookup. New sanitize_cui() fixes OCR typos (O→0, I→1)
and verifies checksum, logging warnings for invalid CUIs.

Applied at both ANAF batch verification and per-order import steps.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-04-02 14:38:58 +00:00
parent 2ec1fc0f19
commit b64a99d4e6
2 changed files with 57 additions and 4 deletions

View File

@@ -22,10 +22,61 @@ def strip_ro_prefix(cod_fiscal: str) -> str:
def validate_cui(bare_cui: str) -> bool:
"""Validate bare CUI: digits only, length 1-13."""
"""Validate bare CUI: digits only, length 2-10."""
if not bare_cui:
return False
return bare_cui.isdigit() and 1 <= len(bare_cui) <= 13
return bare_cui.isdigit() and 2 <= len(bare_cui) <= 10
# Cheia de testare CUI Romania (9 ponderi, aliniate la dreapta cu cifrele fara cifra de control)
_CUI_KEY = [7, 5, 3, 2, 1, 7, 5, 3, 2]
def validate_cui_checksum(bare_cui: str) -> bool:
"""Validate CUI check digit using the Romanian algorithm.
Algorithm: pad to 9 digits (without check digit), multiply by key 753217532,
sum products, (sum * 10) % 11 → if 10 then 0, else result == check digit.
"""
if not validate_cui(bare_cui):
return False
digits = [int(d) for d in bare_cui]
check_digit = digits[-1]
body = digits[:-1]
# Pad left with zeros to 9 positions
padded = [0] * (9 - len(body)) + body
total = sum(d * k for d, k in zip(padded, _CUI_KEY))
result = (total * 10) % 11
if result == 10:
result = 0
return result == check_digit
def sanitize_cui(raw_cf: str) -> tuple[str, str | None]:
"""Sanitize and validate CUI. Returns (clean_cui, warning_or_none).
Steps: strip RO prefix, fix OCR typos (O→0), validate checksum.
If sanitized version passes checksum but original didn't, returns the fixed CUI.
If neither passes, returns original with warning.
"""
bare = strip_ro_prefix(raw_cf)
if not bare:
return bare, None
if validate_cui(bare) and validate_cui_checksum(bare):
return bare, None
# Try without OCR fix (raw, just stripped)
raw_bare = re.sub(r'^RO\s*', '', raw_cf.strip().upper())
if raw_bare != bare and validate_cui(raw_bare) and validate_cui_checksum(raw_bare):
return raw_bare, None
# Sanitized version passes format but not checksum
if validate_cui(bare):
return bare, f"CUI {bare} nu trece verificarea cifrei de control"
# Not even valid format
return bare, f"CUI {raw_cf!r} contine caractere invalide dupa sanitizare: {bare!r}"
async def check_vat_status_batch(cui_list: list[str], date: str = None) -> dict[str, dict]:

View File

@@ -659,7 +659,7 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
is_ro = (order.billing.country or "").strip().lower() == "romania"
if order.billing.is_company and order.billing.company_code and is_ro:
raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
bare = anaf_service.strip_ro_prefix(raw_cf)
bare, _ = anaf_service.sanitize_cui(raw_cf)
if anaf_service.validate_cui(bare):
company_cuis.add(bare)
@@ -702,7 +702,9 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
raw_cf = ""
if order.billing.is_company and order.billing.company_code:
raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
bare_cui = anaf_service.strip_ro_prefix(raw_cf)
bare_cui, cui_warning = anaf_service.sanitize_cui(raw_cf)
if cui_warning:
_log_line(run_id, f"#{order.number} WARN: {cui_warning}")
anaf_data_for_order = cached_results.get(bare_cui)
if anaf_data_for_order and anaf_data_for_order.get("scpTVA") is not None:
correct_cf = anaf_service.determine_correct_cod_fiscal(bare_cui, anaf_data_for_order["scpTVA"])