""" LIDL store profile for OCR extraction. Lidl receipts have a specific TVA format without hyphen/colon separators: TOTAL TVA 9,84 TVA A 21,00% 7,71 TVA B 11,00% 2,13 This profile handles multi-rate TVA extraction for Lidl receipts. """ import re from decimal import Decimal, InvalidOperation from typing import List, Dict, Any from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class LidlProfile(BaseStoreProfile): """ LIDL DISCOUNT S.R.L. - multi-rate TVA profile. Key characteristics: - Multi-rate TVA (codes A, B, C, D with any percentage - patterns are flexible) - TVA format: "TVA A XX,XX% YY,YY" (code + percent + amount on same line) - Supports historical rates (19%, 9%, 5%) and current rates (21%, 11%) - CARD payment usually equals total - No client CUI on receipts """ CUI_LIST = ["22891860"] NAME_PATTERNS = ["LIDL", "LDL", "L1DL", "LIDL DISCOUNT"] # OCR variants STORE_NAME = "LIDL DISCOUNT S.R.L." # Lidl-specific TVA patterns # Format: "TVA A 21,00% 7,71" (code + percent + amount on same line) TVA_PATTERNS = [ # Primary: "TVA A 21,00% 7.71" with various spacing r'T[VU][AR]\s+([A-D])\s+(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', # With backslash OCR artifact: "TVA A \21,00% 7.71" r'T[VU][AR]\s+([A-D])\s+\\?(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', # IVA variant (rare OCR misread) r'IVA\s+([A-D])\s+(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', ] def extract_tva_entries(self, text: str) -> List[dict]: """ Extract Lidl-specific TVA entries. Handles multiple TVA rates (A, B, C, D) commonly found on Lidl receipts. Uses deduplication to avoid counting the same entry twice from different patterns. Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] seen = set() # Deduplication key: (code, percent) for pattern in self.TVA_PATTERNS: for match in re.finditer(pattern, text, re.IGNORECASE): try: code = match.group(1).upper() percent = int(match.group(2)) amount = self._parse_decimal(match.group(3)) if amount and amount > 0: entry_key = (code, percent) if entry_key not in seen: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) seen.add(entry_key) except (ValueError, InvalidOperation): continue return entries def get_validation_hints(self) -> Dict[str, Any]: """Return Lidl-specific validation hints.""" return { "has_multi_rate_tva": True, "card_equals_total": True, "has_client_cui": False, "has_efactura": False, "is_non_vat_payer": False, }