Compare commits
3 Commits
2ec1fc0f19
...
0992744490
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0992744490 | ||
|
|
1d871c8215 | ||
|
|
b64a99d4e6 |
@@ -22,10 +22,55 @@ def strip_ro_prefix(cod_fiscal: str) -> str:
|
||||
|
||||
|
||||
def validate_cui(bare_cui: str) -> bool:
|
||||
"""Validate bare CUI: digits only, length 1-13."""
|
||||
"""Validate bare CUI: digits only, length 2-10."""
|
||||
if not bare_cui:
|
||||
return False
|
||||
return bare_cui.isdigit() and 1 <= len(bare_cui) <= 13
|
||||
return bare_cui.isdigit() and 2 <= len(bare_cui) <= 10
|
||||
|
||||
|
||||
# Cheia de testare CUI Romania (9 ponderi, aliniate la dreapta cu cifrele fara cifra de control)
|
||||
_CUI_KEY = [7, 5, 3, 2, 1, 7, 5, 3, 2]
|
||||
|
||||
|
||||
def validate_cui_checksum(bare_cui: str) -> bool:
|
||||
"""Validate CUI check digit using the Romanian algorithm.
|
||||
|
||||
Algorithm: pad to 9 digits (without check digit), multiply by key 753217532,
|
||||
sum products, (sum * 10) % 11 → if 10 then 0, else result == check digit.
|
||||
"""
|
||||
if not validate_cui(bare_cui):
|
||||
return False
|
||||
digits = [int(d) for d in bare_cui]
|
||||
check_digit = digits[-1]
|
||||
body = digits[:-1]
|
||||
padded = [0] * (9 - len(body)) + body
|
||||
total = sum(d * k for d, k in zip(padded, _CUI_KEY))
|
||||
result = (total * 10) % 11
|
||||
if result == 10:
|
||||
result = 0
|
||||
return result == check_digit
|
||||
|
||||
|
||||
def sanitize_cui(raw_cf: str) -> tuple[str, str | None]:
|
||||
"""Sanitize and validate CUI. Returns (clean_cui, warning_or_none).
|
||||
|
||||
Steps: strip RO prefix, fix OCR typos (O→0), validate checksum.
|
||||
If sanitized version passes checksum but original didn't, returns the fixed CUI.
|
||||
If neither passes, returns original with warning.
|
||||
"""
|
||||
bare = strip_ro_prefix(raw_cf)
|
||||
if not bare:
|
||||
return bare, None
|
||||
|
||||
if validate_cui(bare) and validate_cui_checksum(bare):
|
||||
return bare, None
|
||||
|
||||
# Sanitized version passes format but not checksum
|
||||
if validate_cui(bare):
|
||||
return bare, f"CUI {bare} nu trece verificarea cifrei de control"
|
||||
|
||||
# Not even valid format
|
||||
return bare, f"CUI {raw_cf!r} contine caractere invalide dupa sanitizare: {bare!r}"
|
||||
|
||||
|
||||
async def check_vat_status_batch(cui_list: list[str], date: str = None) -> dict[str, dict]:
|
||||
|
||||
@@ -659,7 +659,7 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
|
||||
is_ro = (order.billing.country or "").strip().lower() == "romania"
|
||||
if order.billing.is_company and order.billing.company_code and is_ro:
|
||||
raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
|
||||
bare = anaf_service.strip_ro_prefix(raw_cf)
|
||||
bare, _ = anaf_service.sanitize_cui(raw_cf)
|
||||
if anaf_service.validate_cui(bare):
|
||||
company_cuis.add(bare)
|
||||
|
||||
@@ -702,7 +702,9 @@ async def run_sync(id_pol: int = None, id_sectie: int = None, run_id: str = None
|
||||
raw_cf = ""
|
||||
if order.billing.is_company and order.billing.company_code:
|
||||
raw_cf = import_service.clean_web_text(order.billing.company_code) or ""
|
||||
bare_cui = anaf_service.strip_ro_prefix(raw_cf)
|
||||
bare_cui, cui_warning = anaf_service.sanitize_cui(raw_cf)
|
||||
if cui_warning:
|
||||
_log_line(run_id, f"#{order.number} WARN: {cui_warning}")
|
||||
anaf_data_for_order = cached_results.get(bare_cui)
|
||||
if anaf_data_for_order and anaf_data_for_order.get("scpTVA") is not None:
|
||||
correct_cf = anaf_service.determine_correct_cod_fiscal(bare_cui, anaf_data_for_order["scpTVA"])
|
||||
|
||||
212
api/tests/test_cui_validation.py
Normal file
212
api/tests/test_cui_validation.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
CUI Validation Tests
|
||||
====================
|
||||
Tests for Romanian CUI sanitization, checksum validation, and OCR typo correction.
|
||||
|
||||
Run:
|
||||
cd api && python -m pytest tests/test_cui_validation.py -v
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
# --- Set env vars BEFORE any app import ---
|
||||
_tmpdir = tempfile.mkdtemp()
|
||||
os.environ["FORCE_THIN_MODE"] = "true"
|
||||
os.environ["SQLITE_DB_PATH"] = os.path.join(_tmpdir, "test_cui.db")
|
||||
os.environ["ORACLE_DSN"] = "dummy"
|
||||
os.environ["ORACLE_USER"] = "dummy"
|
||||
os.environ["ORACLE_PASSWORD"] = "dummy"
|
||||
os.environ["JSON_OUTPUT_DIR"] = _tmpdir
|
||||
|
||||
_api_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if _api_dir not in sys.path:
|
||||
sys.path.insert(0, _api_dir)
|
||||
|
||||
from app.services.anaf_service import (
|
||||
strip_ro_prefix,
|
||||
validate_cui,
|
||||
validate_cui_checksum,
|
||||
sanitize_cui,
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# strip_ro_prefix
|
||||
# ===========================================================================
|
||||
|
||||
class TestStripRoPrefix:
|
||||
def test_basic_ro_prefix(self):
|
||||
assert strip_ro_prefix("RO15134434") == "15134434"
|
||||
|
||||
def test_ro_with_space(self):
|
||||
assert strip_ro_prefix("RO 15134434") == "15134434"
|
||||
|
||||
def test_lowercase_ro(self):
|
||||
assert strip_ro_prefix("ro15134434") == "15134434"
|
||||
|
||||
def test_no_prefix(self):
|
||||
assert strip_ro_prefix("15134434") == "15134434"
|
||||
|
||||
def test_whitespace(self):
|
||||
assert strip_ro_prefix(" RO15134434 ") == "15134434"
|
||||
|
||||
def test_empty(self):
|
||||
assert strip_ro_prefix("") == ""
|
||||
|
||||
def test_none(self):
|
||||
assert strip_ro_prefix(None) == ""
|
||||
|
||||
def test_ocr_fix_O_to_0(self):
|
||||
"""Letter O in CUI should be converted to digit 0."""
|
||||
assert strip_ro_prefix("49O33O51") == "49033051"
|
||||
|
||||
def test_ocr_fix_I_to_1(self):
|
||||
"""Letter I in CUI should be converted to digit 1."""
|
||||
assert strip_ro_prefix("I5134434") == "15134434"
|
||||
|
||||
def test_ocr_fix_L_to_1(self):
|
||||
"""Letter L in CUI should be converted to digit 1."""
|
||||
assert strip_ro_prefix("L5134434") == "15134434"
|
||||
|
||||
def test_ocr_fix_combined_with_ro(self):
|
||||
"""RO prefix removed first, then OCR fix on remaining."""
|
||||
assert strip_ro_prefix("RO49O33O51") == "49033051"
|
||||
|
||||
def test_ro_prefix_not_affected_by_ocr(self):
|
||||
"""The 'RO' prefix is removed before OCR translation."""
|
||||
assert strip_ro_prefix("Ro 50519951") == "50519951"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# validate_cui
|
||||
# ===========================================================================
|
||||
|
||||
class TestValidateCui:
|
||||
def test_valid_short(self):
|
||||
assert validate_cui("12") is True
|
||||
|
||||
def test_valid_10_digits(self):
|
||||
assert validate_cui("1234567890") is True
|
||||
|
||||
def test_too_short(self):
|
||||
assert validate_cui("1") is False
|
||||
|
||||
def test_too_long(self):
|
||||
assert validate_cui("12345678901") is False
|
||||
|
||||
def test_non_digits(self):
|
||||
assert validate_cui("49O33O51") is False
|
||||
|
||||
def test_empty(self):
|
||||
assert validate_cui("") is False
|
||||
|
||||
def test_none(self):
|
||||
assert validate_cui(None) is False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# validate_cui_checksum
|
||||
# ===========================================================================
|
||||
|
||||
class TestValidateCuiChecksum:
|
||||
"""Test Romanian CUI check digit algorithm (key 753217532)."""
|
||||
|
||||
@pytest.mark.parametrize("cui,name", [
|
||||
("49033051", "MATTEO&OANA CAFFE 2022 SRL"),
|
||||
("15134434", "AUTOKLASS CENTER SRL"),
|
||||
("44741316", "OLLY'S HOUSE IECEA MARE SRL"),
|
||||
("45484539", "S OFFICE VENDING SRL"),
|
||||
("8722253", "VENUS ALIMCOM SRL"),
|
||||
("3738836", "AUSTRAL TRADE SRL"),
|
||||
("37567030", "CONVER URBAN SRL"),
|
||||
("45350367", "TURCHI GARAGE SRL"),
|
||||
("3601803", "known company"),
|
||||
("18189442", "known company"),
|
||||
("45093662", "CARTON PREMIUM SRL"),
|
||||
("50519951", "SERCO CAFFE COMPANY"),
|
||||
])
|
||||
def test_valid_cuis(self, cui, name):
|
||||
assert validate_cui_checksum(cui) is True, f"CUI {cui} ({name}) should pass checksum"
|
||||
|
||||
@pytest.mark.parametrize("cui", [
|
||||
"49033052", # last digit wrong (should be 1)
|
||||
"15134435", # last digit wrong
|
||||
"44741310", # last digit wrong
|
||||
])
|
||||
def test_invalid_checksum(self, cui):
|
||||
assert validate_cui_checksum(cui) is False
|
||||
|
||||
def test_invalid_format_rejected(self):
|
||||
assert validate_cui_checksum("ABC") is False
|
||||
assert validate_cui_checksum("") is False
|
||||
assert validate_cui_checksum("1") is False
|
||||
|
||||
def test_checksum_result_10_becomes_0(self):
|
||||
"""When (sum*10)%11 == 10, check digit should be 0.
|
||||
|
||||
CUI 14186770: body=1418677, padded=001418677,
|
||||
sum=0+0+3+8+1+42+35+21+14=124, 1240%11=10 → check=0.
|
||||
"""
|
||||
assert validate_cui_checksum("14186770") is True
|
||||
# Wrong check digit for same body
|
||||
assert validate_cui_checksum("14186771") is False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# sanitize_cui
|
||||
# ===========================================================================
|
||||
|
||||
class TestSanitizeCui:
|
||||
def test_clean_cui_no_warning(self):
|
||||
bare, warning = sanitize_cui("RO15134434")
|
||||
assert bare == "15134434"
|
||||
assert warning is None
|
||||
|
||||
def test_ocr_typo_fixed_no_warning(self):
|
||||
"""Letter O→0 fix results in valid checksum, no warning."""
|
||||
bare, warning = sanitize_cui("49O33O51")
|
||||
assert bare == "49033051"
|
||||
assert warning is None
|
||||
|
||||
def test_ocr_typo_with_ro_prefix(self):
|
||||
bare, warning = sanitize_cui("RO49O33O51")
|
||||
assert bare == "49033051"
|
||||
assert warning is None
|
||||
|
||||
def test_valid_format_bad_checksum_warns(self):
|
||||
bare, warning = sanitize_cui("49033052") # wrong check digit
|
||||
assert bare == "49033052"
|
||||
assert warning is not None
|
||||
assert "nu trece verificarea" in warning
|
||||
|
||||
def test_invalid_format_warns(self):
|
||||
bare, warning = sanitize_cui("ABCDEF")
|
||||
assert warning is not None
|
||||
assert "caractere invalide" in warning
|
||||
|
||||
def test_empty_no_warning(self):
|
||||
bare, warning = sanitize_cui("")
|
||||
assert bare == ""
|
||||
assert warning is None
|
||||
|
||||
def test_bare_cui_no_prefix(self):
|
||||
bare, warning = sanitize_cui("45484539")
|
||||
assert bare == "45484539"
|
||||
assert warning is None
|
||||
|
||||
def test_with_spaces(self):
|
||||
bare, warning = sanitize_cui(" RO 8722253 ")
|
||||
assert bare == "8722253"
|
||||
assert warning is None
|
||||
|
||||
def test_ro_space_format(self):
|
||||
"""CUI like 'Ro 50519951' from real GoMag data."""
|
||||
bare, warning = sanitize_cui("Ro 50519951")
|
||||
assert bare == "50519951"
|
||||
assert warning is None
|
||||
Reference in New Issue
Block a user