""" DEDEMAN store profile for OCR extraction. Dedeman receipts may include e-factura information and use standard TVA format. Large DIY retailer in Romania. """ import re from decimal import Decimal, InvalidOperation from typing import List, Dict, Any from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class DedemanProfile(BaseStoreProfile): """ DEDEMAN SRL - standard TVA with e-factura support. Key characteristics: - Standard TVA format - May include e-factura reference number - Professional receipts for construction materials """ CUI_LIST = ["2816464"] NAME_PATTERNS = ["DEDEMAN", "DEDEMAN SRL", "OEDEMAN", "D3DEMAN"] # OCR variants STORE_NAME = "DEDEMAN SRL" # Standard TVA patterns (flexible - accepts any rate) TVA_PATTERNS = [ # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY" r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)', # "A - XX,XX% = YY,YY" r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)', # "TVA (XX%) YY,YY" r'TVA\s*\(?\s*(\d{1,2})\s*%\s*\)?\s*:?\s*([\d.,]+)', ] # E-factura pattern for reference extraction EFACTURA_PATTERN = r'e-?factura\s*:?\s*([A-Z0-9]+)' def extract_tva_entries(self, text: str) -> List[dict]: """ Extract Dedeman-specific TVA entries. Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] seen = set() # Try coded patterns first for pattern in self.TVA_PATTERNS[:2]: for match in re.finditer(pattern, text, re.IGNORECASE): try: code = match.group(1).upper() percent = int(match.group(2)) amount = self._parse_decimal(match.group(3)) if amount and amount > 0: entry_key = (code, percent) if entry_key not in seen: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) seen.add(entry_key) except (ValueError, InvalidOperation, IndexError): continue # Fallback to simple format if not entries: simple_pattern = self.TVA_PATTERNS[2] for match in re.finditer(simple_pattern, text, re.IGNORECASE): try: percent = int(match.group(1)) amount = self._parse_decimal(match.group(2)) if amount and amount > 0: entries.append({ 'code': 'A', 'percent': percent, 'amount': amount }) break except (ValueError, InvalidOperation): continue return entries def extract_efactura_reference(self, text: str) -> str | None: """ Extract e-factura reference number if present. Args: text: Raw OCR text from receipt Returns: E-factura reference string or None """ match = re.search(self.EFACTURA_PATTERN, text, re.IGNORECASE) return match.group(1) if match else None def get_validation_hints(self) -> Dict[str, Any]: """Return Dedeman-specific validation hints.""" return { "has_multi_rate_tva": False, "card_equals_total": False, "has_client_cui": False, "has_efactura": True, "is_non_vat_payer": False, }