""" ELECTROBERING S.R.L. store profile for OCR extraction. Electronics and home supplies store. Receipt structure: - TVA format: "TOTAL TVA A - - 19%" with amount on next line - "TOTAL TVA BON" with total TVA amount - Client CUI: "CIF CLIENT: XXXXXXX" """ import re from decimal import Decimal, InvalidOperation from typing import List, Dict, Any, Tuple, Optional from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class ElectroberingProfile(BaseStoreProfile): """ ELECTROBERING S.R.L. - standard TVA profile with multiline support. Key characteristics: - TVA format with rate on one line, amount on next - Double-dash separators common (OCR artifact) - May have client CUI for B2B purchases - CARD payment typical """ CUI_LIST = ["2744937"] NAME_PATTERNS = ["ELECTROBERING", "ELECTR0BERING", "ELECTROBERING SRL"] STORE_NAME = "ELECTROBERING S.R.L." # ELECTROBERING TVA patterns (handles double-dash and multiline) TVA_PATTERNS = [ # "TOTAL TVA A - - 19%" with amount on next line r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%', # "TOTAL TVA A 19%" without separator r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%', # Standard: "TVA A: XX% = YY,YY" r'TVA\s*([A-D])\s*[-:]?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)', ] # TOTAL TVA BON pattern (fallback) TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s+BON' def extract_tva_entries(self, text: str) -> List[dict]: """ Extract ELECTROBERING-specific TVA entries. ELECTROBERING receipts show TVA in multi-line format: "TOTAL TVA A - - 19%" "5.59" "TOTAL TVA BON" "5.59" Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] text_upper = text.upper() lines = text_upper.split('\n') # Find TVA rate line and get amount from next line for i, line in enumerate(lines): # Match "TOTAL TVA A - - 19%" or "TOTAL TVA A 19%" match = re.search(r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%', line) if match: code = match.group(1) percent = int(match.group(2)) # Amount should be on next line if i + 1 < len(lines): amount_str = lines[i + 1].strip() amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) return entries # Fallback: Find TOTAL TVA BON and get amount for i, line in enumerate(lines): if re.search(self.TOTAL_TVA_BON_PATTERN, line): # Amount should be on next line if i + 1 < len(lines): amount_str = lines[i + 1].strip() amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': 'A', 'percent': 19, # Default Romanian TVA rate 'amount': amount }) return entries # Last fallback: inline format "TVA A: XX% = YY,YY" for pattern in [self.TVA_PATTERNS[2]]: match = re.search(pattern, text_upper, re.IGNORECASE) if match and len(match.groups()) >= 3: try: code = match.group(1) percent = int(match.group(2)) amount = self._parse_decimal(match.group(3)) if amount and amount > 0: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) return entries except (ValueError, InvalidOperation): pass return entries def get_validation_hints(self) -> Dict[str, Any]: """Return ELECTROBERING-specific validation hints.""" return { "has_multi_rate_tva": False, "card_equals_total": True, "has_client_cui": True, # May have client CUI for B2B "has_efactura": False, "is_non_vat_payer": False, "tva_on_separate_line": True, }