Compare commits

...

3 Commits

Author SHA1 Message Date
Claude Agent
0992744490 refactor(anaf): remove dead code in sanitize_cui, fix empty test
Remove unreachable OCR-skip fallback (raw_bare can't be all-digits
if strip_ro_prefix changed it via OCR fix). Add real test for the
checksum result==10→0 branch using CUI 14186770.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 14:46:10 +00:00
Claude Agent
1d871c8215 test(anaf): add 45 tests for CUI validation, checksum, and sanitization
Covers strip_ro_prefix (OCR fixes), validate_cui (format),
validate_cui_checksum (Romanian algorithm with key 753217532),
and sanitize_cui (end-to-end with warnings). Verified against
12 real CUIs from production orders.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 14:42:32 +00:00
Claude Agent
b64a99d4e6 feat(anaf): add CUI checksum validation + sanitize at import time
Romanian CUI check digit algorithm (key 753217532) validates CUIs
before ANAF lookup. New sanitize_cui() fixes OCR typos (O→0, I→1)
and verifies checksum, logging warnings for invalid CUIs.

Applied at both ANAF batch verification and per-order import steps.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 14:38:58 +00:00
3 changed files with 263 additions and 4 deletions

View File

@@ -22,10 +22,55 @@ def strip_ro_prefix(cod_fiscal: str) -> str:
def validate_cui(bare_cui: str) -> bool: def validate_cui(bare_cui: str) -> bool:
"""Validate bare CUI: digits only, length 1-13.""" """Validate bare CUI: digits only, length 2-10."""
if not bare_cui: if not bare_cui:
return False return False
return bare_cui.isdigit() and 1 <= len(bare_cui) <= 13 return bare_cui.isdigit() and 2 <= len(bare_cui) <= 10
# Cheia de testare CUI Romania (9 ponderi, aliniate la dreapta cu cifrele fara cifra de control)
_CUI_KEY = [7, 5, 3, 2, 1, 7, 5, 3, 2]
def validate_cui_checksum(bare_cui: str) -> bool:
"""Validate CUI check digit using the Romanian algorithm.
Algorithm: pad to 9 digits (without check digit), multiply by key 753217532,
sum products, (sum * 10) % 11 → if 10 then 0, else result == check digit.
"""
if not validate_cui(bare_cui):
return False
digits = [int(d) for d in bare_cui]
check_digit = digits[-1]
body = digits[:-1]
padded = [0] * (9 - len(body)) + body
total = sum(d * k for d, k in zip(padded, _CUI_KEY))
result = (total * 10) % 11
if result == 10:
result = 0
return result == check_digit
def sanitize_cui(raw_cf: str) -> tuple[str, str | None]:
"""Sanitize and validate CUI. Returns (clean_cui, warning_or_none).
Steps: strip RO prefix, fix OCR typos (O→0), validate checksum.
If sanitized version passes checksum but original didn't, returns the fixed CUI.
If neither passes, returns original with warning.
"""
bare = strip_ro_prefix(raw_cf)
if not bare:
return bare, None
if validate_cui(bare) and validate_cui_checksum(bare):
return bare, None
# Sanitized version passes format but not checksum
if validate_cui(bare):
return bare, f"CUI {bare} nu trece verificarea cifrei de control"
# Not even valid format
return bare, f"CUI {raw_cf!r} contine caractere invalide dupa sanitizare: {bare!r}"
async def check_vat_status_batch(cui_list: list[str], date: str = None) -> dict[str, dict]: async def check_vat_status_batch(cui_list: list[str], date: str = None) -> dict[str, dict]:

View File

@@ -659,7 +659,7 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
is_ro = (order.billing.country or "").strip().lower() == "romania" is_ro = (order.billing.country or "").strip().lower() == "romania"
if order.billing.is_company and order.billing.company_code and is_ro: if order.billing.is_company and order.billing.company_code and is_ro:
raw_cf = import_service.clean_web_text(order.billing.company_code) or "" raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
bare = anaf_service.strip_ro_prefix(raw_cf) bare, _ = anaf_service.sanitize_cui(raw_cf)
if anaf_service.validate_cui(bare): if anaf_service.validate_cui(bare):
company_cuis.add(bare) company_cuis.add(bare)
@@ -702,7 +702,9 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
raw_cf = "" raw_cf = ""
if order.billing.is_company and order.billing.company_code: if order.billing.is_company and order.billing.company_code:
raw_cf = import_service.clean_web_text(order.billing.company_code) or "" raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
bare_cui = anaf_service.strip_ro_prefix(raw_cf) bare_cui, cui_warning = anaf_service.sanitize_cui(raw_cf)
if cui_warning:
_log_line(run_id, f"#{order.number} WARN: {cui_warning}")
anaf_data_for_order = cached_results.get(bare_cui) anaf_data_for_order = cached_results.get(bare_cui)
if anaf_data_for_order and anaf_data_for_order.get("scpTVA") is not None: if anaf_data_for_order and anaf_data_for_order.get("scpTVA") is not None:
correct_cf = anaf_service.determine_correct_cod_fiscal(bare_cui, anaf_data_for_order["scpTVA"]) correct_cf = anaf_service.determine_correct_cod_fiscal(bare_cui, anaf_data_for_order["scpTVA"])

View File

@@ -0,0 +1,212 @@
"""
CUI Validation Tests
====================
Tests for Romanian CUI sanitization, checksum validation, and OCR typo correction.
Run:
cd api && python -m pytest tests/test_cui_validation.py -v
"""
import os
import sys
import tempfile
import pytest
pytestmark = pytest.mark.unit
# --- Set env vars BEFORE any app import ---
_tmpdir = tempfile.mkdtemp()
os.environ["FORCE_THIN_MODE"] = "true"
os.environ["SQLITE_DB_PATH"] = os.path.join(_tmpdir, "test_cui.db")
os.environ["ORACLE_DSN"] = "dummy"
os.environ["ORACLE_USER"] = "dummy"
os.environ["ORACLE_PASSWORD"] = "dummy"
os.environ["JSON_OUTPUT_DIR"] = _tmpdir
_api_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _api_dir not in sys.path:
sys.path.insert(0, _api_dir)
from app.services.anaf_service import (
strip_ro_prefix,
validate_cui,
validate_cui_checksum,
sanitize_cui,
)
# ===========================================================================
# strip_ro_prefix
# ===========================================================================
class TestStripRoPrefix:
def test_basic_ro_prefix(self):
assert strip_ro_prefix("RO15134434") == "15134434"
def test_ro_with_space(self):
assert strip_ro_prefix("RO 15134434") == "15134434"
def test_lowercase_ro(self):
assert strip_ro_prefix("ro15134434") == "15134434"
def test_no_prefix(self):
assert strip_ro_prefix("15134434") == "15134434"
def test_whitespace(self):
assert strip_ro_prefix(" RO15134434 ") == "15134434"
def test_empty(self):
assert strip_ro_prefix("") == ""
def test_none(self):
assert strip_ro_prefix(None) == ""
def test_ocr_fix_O_to_0(self):
"""Letter O in CUI should be converted to digit 0."""
assert strip_ro_prefix("49O33O51") == "49033051"
def test_ocr_fix_I_to_1(self):
"""Letter I in CUI should be converted to digit 1."""
assert strip_ro_prefix("I5134434") == "15134434"
def test_ocr_fix_L_to_1(self):
"""Letter L in CUI should be converted to digit 1."""
assert strip_ro_prefix("L5134434") == "15134434"
def test_ocr_fix_combined_with_ro(self):
"""RO prefix removed first, then OCR fix on remaining."""
assert strip_ro_prefix("RO49O33O51") == "49033051"
def test_ro_prefix_not_affected_by_ocr(self):
"""The 'RO' prefix is removed before OCR translation."""
assert strip_ro_prefix("Ro 50519951") == "50519951"
# ===========================================================================
# validate_cui
# ===========================================================================
class TestValidateCui:
def test_valid_short(self):
assert validate_cui("12") is True
def test_valid_10_digits(self):
assert validate_cui("1234567890") is True
def test_too_short(self):
assert validate_cui("1") is False
def test_too_long(self):
assert validate_cui("12345678901") is False
def test_non_digits(self):
assert validate_cui("49O33O51") is False
def test_empty(self):
assert validate_cui("") is False
def test_none(self):
assert validate_cui(None) is False
# ===========================================================================
# validate_cui_checksum
# ===========================================================================
class TestValidateCuiChecksum:
"""Test Romanian CUI check digit algorithm (key 753217532)."""
@pytest.mark.parametrize("cui,name", [
("49033051", "MATTEO&OANA CAFFE 2022 SRL"),
("15134434", "AUTOKLASS CENTER SRL"),
("44741316", "OLLY'S HOUSE IECEA MARE SRL"),
("45484539", "S OFFICE VENDING SRL"),
("8722253", "VENUS ALIMCOM SRL"),
("3738836", "AUSTRAL TRADE SRL"),
("37567030", "CONVER URBAN SRL"),
("45350367", "TURCHI GARAGE SRL"),
("3601803", "known company"),
("18189442", "known company"),
("45093662", "CARTON PREMIUM SRL"),
("50519951", "SERCO CAFFE COMPANY"),
])
def test_valid_cuis(self, cui, name):
assert validate_cui_checksum(cui) is True, f"CUI {cui} ({name}) should pass checksum"
@pytest.mark.parametrize("cui", [
"49033052", # last digit wrong (should be 1)
"15134435", # last digit wrong
"44741310", # last digit wrong
])
def test_invalid_checksum(self, cui):
assert validate_cui_checksum(cui) is False
def test_invalid_format_rejected(self):
assert validate_cui_checksum("ABC") is False
assert validate_cui_checksum("") is False
assert validate_cui_checksum("1") is False
def test_checksum_result_10_becomes_0(self):
"""When (sum*10)%11 == 10, check digit should be 0.
CUI 14186770: body=1418677, padded=001418677,
sum=0+0+3+8+1+42+35+21+14=124, 1240%11=10 → check=0.
"""
assert validate_cui_checksum("14186770") is True
# Wrong check digit for same body
assert validate_cui_checksum("14186771") is False
# ===========================================================================
# sanitize_cui
# ===========================================================================
class TestSanitizeCui:
def test_clean_cui_no_warning(self):
bare, warning = sanitize_cui("RO15134434")
assert bare == "15134434"
assert warning is None
def test_ocr_typo_fixed_no_warning(self):
"""Letter O→0 fix results in valid checksum, no warning."""
bare, warning = sanitize_cui("49O33O51")
assert bare == "49033051"
assert warning is None
def test_ocr_typo_with_ro_prefix(self):
bare, warning = sanitize_cui("RO49O33O51")
assert bare == "49033051"
assert warning is None
def test_valid_format_bad_checksum_warns(self):
bare, warning = sanitize_cui("49033052") # wrong check digit
assert bare == "49033052"
assert warning is not None
assert "nu trece verificarea" in warning
def test_invalid_format_warns(self):
bare, warning = sanitize_cui("ABCDEF")
assert warning is not None
assert "caractere invalide" in warning
def test_empty_no_warning(self):
bare, warning = sanitize_cui("")
assert bare == ""
assert warning is None
def test_bare_cui_no_prefix(self):
bare, warning = sanitize_cui("45484539")
assert bare == "45484539"
assert warning is None
def test_with_spaces(self):
bare, warning = sanitize_cui(" RO 8722253 ")
assert bare == "8722253"
assert warning is None
def test_ro_space_format(self):
"""CUI like 'Ro 50519951' from real GoMag data."""
bare, warning = sanitize_cui("Ro 50519951")
assert bare == "50519951"
assert warning is None