""" UNLIMITED KEYS S.R.L. store profile for OCR extraction. Key duplication service. Notable for CASH (NUMERAR) payments. """ import re from decimal import Decimal, InvalidOperation from typing import List, Dict, Any, Optional, Tuple from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class UnlimitedKeysProfile(BaseStoreProfile): """ UNLIMITED KEYS S.R.L. - standard TVA profile with NUMERAR payment. Key characteristics: - Standard TVA format (single rate, any percentage) - Key duplication service - NUMERAR (cash) payment common - different from most stores! - May also accept CARD - OCR often reads "TVA" as "TUA" - need OCR error variants """ CUI_LIST = ["18993187"] NAME_PATTERNS = ["UNLIMITED KEYS", "UNLIMITED", "UNL1MITED", "UNLIMITED KEYS SRL"] STORE_NAME = "UNLIMITED KEYS S.R.L." # Standard TVA patterns - including OCR error variants (TVA -> TUA) TVA_PATTERNS = [ # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY" (including TUA OCR error) r'T[UV]A\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,\s]+)', # "A - XX,XX% = YY,YY" r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,\s]+)', # "TVA XX% YY,YY" (simple format, includes TUA) r'T[UV]A\s+(\d{1,2})\s*%\s*([\d.,\s]+)', # "XX.XX% TUA*A YY.YY" (OCR format with TUA*A or TUA) r'(\d{1,2})[.,]\d{0,2}\s*%\s*T[UV]A\*?[A-D]?\s*([\d.,\s]+)', # "TOTAL TUA: YY.YY" (total TVA amount only) r'TOTAL\s+T[UV]A\s*:?\s*([\d.,\s]+)', ] # TOTAL patterns for UNLIMITED KEYS (handles "80 .00" format) TOTAL_PATTERNS = [ # "SUMA TOTALA: 80 .00" (with space before decimal) (r'SUMA\s+TOTALA\s*:?\s*([\d\s.,]+)', 0.98), # "TOTALA: 80,00" (r'TOTALA\s*:?\s*([\d.,]+)', 0.95), # Standard TOTAL patterns from base class (r'TOTAL\s+(?:DE\s+PLATA|ACHITAT|LEI)\s*:?\s*([\d.,]+)', 0.95), (r'TOTAL\s*:?\s*([\d.,]+)', 0.90), ] # Payment patterns - NUMERAR is primary for this store PAYMENT_PATTERNS = [ # "NUMERAR 80.00" or "NUMERAR: 80.00" (r'NUMERAR\s*:?\s*([\d.,\s]+)', 'NUMERAR', 0.98), # "CARD 80.00" or "CARD: 80.00" (r'CARD\s*:?\s*([\d.,\s]+)', 'CARD', 0.95), ] # Client CUI patterns - specific to this receipt format CLIENT_CUI_PATTERNS = [ # "CIF CLIENT:1879855" (exact format from OCR) (r'CIF\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.99), # "CLIENT CIF: ROXXXXXXX" (r'CLIENT\s+CIF\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98), # "C.I.F. CLIENT: XXXXXXX" (r'C\.?I\.?F\.?\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98), ] # Override client markers to be less strict CLIENT_MARKERS = [ r'CIF\s+CLIENT', r'CLIENT\s+CIF', r'C\.?I\.?F\.?\s+CLIENT', r'CLIENT\s*:', ] def extract_total(self, text: str) -> Tuple[Optional[Decimal], float]: """ Extract total amount from receipt text. Handles UNLIMITED KEYS format with space before decimal (e.g., "80 .00"). Args: text: Raw OCR text from receipt Returns: Tuple of (total_amount, confidence) or (None, 0.0) """ text_upper = text.upper() for pattern, confidence in self.TOTAL_PATTERNS: match = re.search(pattern, text_upper, re.IGNORECASE) if match: try: # Clean up amount string (remove spaces, fix decimal) amount_str = match.group(1) # Remove spaces that might appear before decimal amount_str = re.sub(r'\s+', '', amount_str) amount = self._parse_decimal(amount_str) if amount and amount > 0: return (amount, confidence) except (ValueError, InvalidOperation): continue return (None, 0.0) def extract_tva_entries(self, text: str) -> List[dict]: """ Extract TVA entries from receipt text. Handles OCR errors where TVA is read as TUA. Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] text_upper = text.upper() # Pattern 4: "XX.XX% TUA*A YY.YY" - common OCR format pattern4 = self.TVA_PATTERNS[3] match = re.search(pattern4, text_upper) if match: try: percent = int(match.group(1)) amount_str = re.sub(r'\s+', '', match.group(2)) amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': 'A', 'percent': percent, 'amount': amount }) return entries except (ValueError, InvalidOperation, IndexError): pass # Pattern 5: "TOTAL TUA: YY.YY" - fallback to total TVA pattern5 = self.TVA_PATTERNS[4] match = re.search(pattern5, text_upper) if match: try: amount_str = re.sub(r'\s+', '', match.group(1)) amount = self._parse_decimal(amount_str) if amount and amount > 0: # Infer percent from amount vs total ratio entries.append({ 'code': 'A', 'percent': 19, # Standard Romanian TVA rate 'amount': amount }) return entries except (ValueError, InvalidOperation, IndexError): pass # Try coded patterns for pattern in self.TVA_PATTERNS[:3]: for match in re.finditer(pattern, text_upper, re.IGNORECASE): try: groups = match.groups() if len(groups) == 3: code = groups[0].upper() percent = int(groups[1]) amount_str = re.sub(r'\s+', '', groups[2]) else: code = 'A' percent = int(groups[0]) amount_str = re.sub(r'\s+', '', groups[1]) amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) return entries except (ValueError, InvalidOperation, IndexError): continue return entries def extract_payment_methods(self, text: str) -> List[dict]: """ Extract payment methods from receipt text. Handles NUMERAR (cash) as primary payment for this store. Args: text: Raw OCR text from receipt Returns: List of payment methods with method, amount, and confidence """ payments = [] text_upper = text.upper() for pattern, method, confidence in self.PAYMENT_PATTERNS: match = re.search(pattern, text_upper, re.IGNORECASE) if match: try: amount_str = re.sub(r'\s+', '', match.group(1)) amount = self._parse_decimal(amount_str) if amount and amount > 0: payments.append({ 'method': method, 'amount': amount, 'confidence': confidence }) except (ValueError, InvalidOperation): continue return payments def extract_client_cui(self, text: str) -> Tuple[Optional[str], float]: """ Extract client CUI from receipt text. Handles "CIF CLIENT:1879855" format specific to this store. Args: text: Raw OCR text from receipt Returns: Tuple of (cui, confidence) or (None, 0.0) """ text_upper = text.upper() # Check for client markers has_client = any( re.search(marker, text_upper, re.IGNORECASE) for marker in self.CLIENT_MARKERS ) if not has_client: return (None, 0.0) # Try client CUI patterns for pattern, confidence in self.CLIENT_CUI_PATTERNS: match = re.search(pattern, text_upper, re.IGNORECASE) if match: cui = match.group(1) # Clean up: remove RO prefix, spaces cui_digits = re.sub(r'[^0-9]', '', cui) if 6 <= len(cui_digits) <= 10: return (cui_digits, confidence) return (None, 0.0) def get_validation_hints(self) -> Dict[str, Any]: """Return UNLIMITED KEYS-specific validation hints.""" return { "has_multi_rate_tva": False, "card_equals_total": False, # May be NUMERAR (cash) "has_client_cui": True, # May have client CUI "has_efactura": False, "is_non_vat_payer": False, "common_payment": "NUMERAR", # Cash payments common }