fix(ocr): Improve CUI matching and vendor name extraction

- Add CUI variant matching for Romanian fiscal codes (handles "RO22891860",
  "RO 22891860", and "22891860" formats) in both sync_service and validation
- Fix vendor name extraction to properly handle "SC." prefix (Societate
  Comercială) vs "SC" as staircase in addresses
- Remove problematic TVA pattern that was incorrectly matching percentage values
- Add docTR Plus engine option to dropdown with "(recomandat)" label

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-04 05:34:31 +02:00
parent f1f6760bef
commit 2f7ef55868
4 changed files with 61 additions and 9 deletions

View File

@@ -1129,14 +1129,21 @@ class OCRValidationEngine:
# Helper to search database for CUI
async def lookup_cui_in_db(digits: str) -> Optional[tuple[str, str]]:
"""Search both synced and local suppliers for CUI."""
# Search patterns: with and without RO prefix
search_patterns = [digits, f"RO{digits}"]
# Search patterns: with and without RO prefix, with and without space
# Database may have: "22891860", "RO22891860", "RO 22891860"
search_patterns = [
digits, # Just digits: 22891860
f"RO{digits}", # With RO prefix: RO22891860
f"RO {digits}", # With RO prefix and space: RO 22891860
digits.lstrip('0'), # Handle leading zeros
]
# Search synced_suppliers first (more data)
stmt = select(SyncedSupplier.fiscal_code, SyncedSupplier.name).where(
or_(
SyncedSupplier.fiscal_code == digits,
SyncedSupplier.fiscal_code == f"RO{digits}",
SyncedSupplier.fiscal_code == f"RO {digits}", # With space
SyncedSupplier.fiscal_code == digits.lstrip('0'), # Handle leading zeros
)
).limit(1)
@@ -1150,6 +1157,7 @@ class OCRValidationEngine:
or_(
LocalSupplier.fiscal_code == digits,
LocalSupplier.fiscal_code == f"RO{digits}",
LocalSupplier.fiscal_code == f"RO {digits}", # With space
LocalSupplier.fiscal_code == digits.lstrip('0'),
)
).limit(1)