roa2web-service-auto/backend/modules/data_entry/services/ocr/profiles/base.py

"""
Base class for store-specific OCR extraction profiles.

Each store can have different receipt formats (TVA layout, total position, etc.).
Store profiles allow customizing extraction logic per-store for better accuracy.

Usage:
    from .base import BaseStoreProfile
    from . import ProfileRegistry

    @ProfileRegistry.register
    class LidlProfile(BaseStoreProfile):
        CUI_LIST = ["22891860"]
        NAME_PATTERNS = ["LIDL", "LDL"]

        def extract_tva_entries(self, text: str) -> List[dict]:
            # Custom Lidl TVA extraction logic
            ...
"""

import re
from abc import ABC
from decimal import Decimal, InvalidOperation
from typing import List, Optional, Tuple, Dict, Any
from datetime import date


class BaseStoreProfile(ABC):
    """
    Abstract base class for store-specific extraction profiles.

    Each profile defines:
    - CUI_LIST: CUI codes that identify this store (without RO prefix)
    - NAME_PATTERNS: OCR-tolerant name patterns for fallback matching
    - Custom extraction methods for TVA, total, date, etc.

    The ProfileRegistry uses CUI_LIST to lookup profiles during extraction.
    """

    # -------------------------------------------------------------------------
    # Class attributes - override in subclasses
    # -------------------------------------------------------------------------

    # List of CUI codes (without RO prefix) that identify this store
    CUI_LIST: List[str] = []

    # OCR-tolerant name patterns for fallback matching
    NAME_PATTERNS: List[str] = []

    # Store display name
    STORE_NAME: str = "Unknown Store"

    # Flag for known non-VAT payer stores (skips TVA extraction)
    IS_NON_VAT_PAYER: bool = False

    # -------------------------------------------------------------------------
    # Generic patterns - can be overridden in subclasses
    # -------------------------------------------------------------------------

    # Total amount patterns (confidence-weighted)
    TOTAL_PATTERNS = [
        (r'T[O0]TAL[.\s]+L[E3][I1!]\s*:?\s*([\d\s.,]+)', 0.98),
        (r'TOTAL\s+LEI\s*([\d\s.,]+)', 0.98),
        (r'[OT]?OTAL\s+LEI\s*([\d\s.,]+)', 0.95),
        (r'TOTAL\s*:?\s*([\d\s.,]+)\s*(?:RON|LEI)?', 0.95),
        (r'TOTAL\s+(?:RON|LEI)\s*([\d\s.,]+)', 0.95),
        (r'SUBTOTAL\s*([\d\s.,]+)', 0.90),
        (r'DE\s+PLATA\s*:?\s*([\d\s.,]+)', 0.90),
        (r'SUMA\s*:?\s*([\d\s.,]+)', 0.85),
    ]

    # Date patterns (confidence-weighted)
    DATE_PATTERNS = [
        (r'D[AR]TA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
        (r'DATA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
        (r'(\d{2}[-./]\d{2}[-./]\d{4})\s+[O0]RA\s*:?\s*\d{2}:\d{2}', 0.95),
        (r'(\d{2}[-./]\d{2}[-./]\d{4})\s+\d{2}:\d{2}', 0.90),
        (r'(\d{2}[-./]\d{2}[-./]\d{4})', 0.80),
        (r'(\d{4}[-./]\d{2}[-./]\d{2})', 0.75),
    ]

    # Date patterns with OCR-introduced spaces (separate because format is different)
    DATE_PATTERNS_OCR_SPACES = [
        (r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})\s+\d{2}:\d{2}', 0.92, 'ymd'),
        (r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})', 0.85, 'ymd'),
        (r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})\s+\d{2}:\d{2}', 0.92, 'dmy'),
        (r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})', 0.85, 'dmy'),
    ]

    # Receipt number patterns (confidence-weighted)
    NUMBER_PATTERNS = [
        (r'NDS\s*:?\s*(\d+)', 0.98),
        (r'C3POS[-A-Z0-9]*[N:](\d{6,7})', 0.98),
        (r'C3POS.*?(\d{6,7})\b', 0.95),
        (r'BF\s*:\s*(\d{4,})', 0.96),
        (r'BF\s+(\d{4,})', 0.93),
        (r'NIVS\s*:?\s*(\d+)', 0.95),
        (r'NR\.?\s*BON\s*:?\s*(\d+)', 0.95),
        (r'BON\s+(?:FISCAL\s+)?NR\.?\s*:?\s*(\d+)', 0.95),
        (r'CHITANTA\s+NR\.?\s*:?\s*(\d+)', 0.95),
        (r'NR\.?\s+DOCUMENT\s*:?\s*(\d+)', 0.90),
        (r'ID\s*BF\s*:?\s*(\d+)', 0.90),
    ]

    # Payment method patterns (pattern, method_type, confidence)
    # Handles ALL payment types: CARD, NUMERAR, and card brand names
    PAYMENT_PATTERNS = [
        # CARTE CREDIT variants (OMV/Petrom/Socar receipts)
        (r'CARTE\s+CREDIT\s*:?\s*([\d\s.,]+)', 'CARD', 0.98),
        (r'CARTE\s+CREDIT\s*:?\s*\n\s*([\d\s.,]+)', 'CARD', 0.97),
        (r'CARTE\s+DE\s+CREDIT\s*:?\s*([\d\s.,]+)', 'CARD', 0.98),
        (r'CARTE\s+DE\s+CREDIT\s*:?\s*\n\s*([\d\s.,]+)', 'CARD', 0.97),
        # CARD standard
        (r'(?:PLATA\s+)?CARD\s*[:\sA-Z]?\s*([\d\s.,]+)', 'CARD', 0.95),
        # Card brand names
        (r'VISA\s*:?\s*([\d\s.,]+)', 'CARD', 0.95),
        (r'MASTERCARD\s*:?\s*([\d\s.,]+)', 'CARD', 0.95),
        (r'MAESTR[O0]\s*:?\s*([\d\s.,]+)', 'CARD', 0.95),
        (r'CONTACTLESS\s*:?\s*([\d\s.,]+)', 'CARD', 0.90),
        (r'DEBIT\s*:?\s*([\d\s.,]+)', 'CARD', 0.90),
        (r'CREDIT\s*:?\s*([\d\s.,]+)', 'CARD', 0.88),
        # Cash variants
        (r'NUMERAR\s*:?\s*([\d\s.,]+)', 'NUMERAR', 0.95),
        (r'CASH\s*:?\s*([\d\s.,]+)', 'NUMERAR', 0.90),
        # Truncation recovery patterns (for OCR left-margin issues)
        (r'(?:^|\n|\s)RD\s*:?\s*(\d{1,6}[.,]\d{2})\b', 'CARD', 0.70),
        (r'(?:^|\n|\s)ARD\s*:?\s*(\d{1,6}[.,]\d{2})\b', 'CARD', 0.75),
        (r'(?:^|\n|\s)MERAR\s*:?\s*(\d{1,6}[.,]\d{2})\b', 'NUMERAR', 0.70),
    ]

    # Client section markers (for B2B receipts) - More flexible patterns
    # Includes OCR corruption variants (LIENT, C IENT, L IENT)
    CLIENT_MARKERS = [
        r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT',    # "CIF CLIENT" (with or without colon)
        r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT',    # "CUI CLIENT"
        r'CLIENT\s+C\.?\s*[UI1]\.?\s*[IF1]',  # "CLIENT CIF" / "CLIENT CUI"
        r'CLIENT\s*:',                          # "CLIENT:"
        r'CUMPARATOR\s*:',                      # "CUMPARATOR:"
        r'BENEFICIAR\s*:',                      # "BENEFICIAR:"
        r'CUMP[AĂ]R[AĂ]TOR',                   # "CUMPARATOR" without colon
        r'COD\s+FISCAL\s+CLIENT',              # "COD FISCAL CLIENT"
        # OCR corruption patterns
        r'C[I1]F\s+[A-Z\s]{0,6}IENT\s*:',      # "CIF a IENT:", "CIF CL IENT:", "CIF L IENT:"
        r'C[I1]F\s+LIENT\s*:',                  # "CIF LIENT:" (missing C)
        r'LIENT\s*:',                           # "LIENT:" (missing C and I/L)
        # Brick-specific (I→L OCR error)
        r'CLIENT\s+C\.?U\.?[LI1]\.?\s*/',       # "CLIENT C.U.L./" (I read as L)
    ]

    # Client CUI patterns (pattern, confidence) - Comprehensive
    # Handles: docTR reordering, doubled letters, corruption, CUMPARATOR, Brick L/I swap
    CLIENT_CUI_PATTERNS = [
        # === CUI on line BEFORE CLIENT marker (docTR/OCR reordering) ===
        (r'(R[O0]\d{6,10})\s*\n\s*CLIENT\s+C\.?\s*U\.?\s*[I1]\.?', 0.99),
        (r'(R[O0]\d{6,10})\s*\n\s*CLIENT\s+C\.?\s*[I1]\.?\s*F\.?', 0.99),
        (r'(R[O0]\d{6,10})\s*:?\s*\n\s*CLIENT', 0.98),
        # === "CIF I CLIENT:" format (OCR extra chars) ===
        (r'C[I1]F\s+[A-Z]*\s*CLIENT\s*:?\s*(R[O0]\d{6,10})', 0.98),
        (r'C[I1]F\s+[A-Z]*\s*CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.97),
        # === CIF CLIENT: (reversed - CIF before CLIENT) ===
        (r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.98),
        (r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.98),
        (r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT\s*:?\s*(?:R[O0])?(\d{6,10})', 0.98),
        (r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT\s*:?\s*(?:R[O0])?(\d{6,10})', 0.98),
        # === CLIENT C.U.I/C.I.F. (slash variants) ===
        (r'CLIENT\s+C\.?\s*U\.?\s*[I1]\.?\s*/\s*C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(R[O0]?\d{6,10})', 0.97),
        (r'CLIENT\s+C\.?\s*U\.?\s*[I1]\.?\s*/\s*C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.97),
        # === Doubled letters (docTR artifact: "C.U U.I") ===
        (r'CLIENT\s+C\.?\s*U\.?\s*U?\.?\s*[I1]\.?\s*/\s*C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(R[O0]?\d{6,10})', 0.96),
        (r'CLIENT\s+C\.?\s*U\.?\s*U?\.?\s*[I1]\.?\s*/\s*C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.96),
        # === CLIENT C.U.I. or CLIENT CUI (without slash) ===
        (r'CLIENT\s+C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(R[O0]?\d{6,10})', 0.96),
        (r'CLIENT\s+C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.96),
        (r'CLIENT\s+C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(R[O0]?\d{6,10})', 0.96),
        (r'CLIENT\s+C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.96),
        # === Corrupted CLIENT after CIF (OCR errors) ===
        (r'CIF\s+[a-zA-Z\s]{2,8}IENT\s*:?\s*(R[O0]?\d{6,10})', 0.93),
        (r'CIF\s+[a-zA-Z\s]{2,8}IENT\s*:?\s*(?:R[O0])?(\d{6,10})', 0.93),
        (r'CIF\s+LIENT\s*:?\s*(R[O0]?\d{6,10})', 0.92),
        (r'CIF\s+LIENT\s*:?\s*(?:R[O0])?(\d{6,10})', 0.92),
        # === CUMPARATOR variants ===
        (r'CUMPARATOR\s+C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95),
        (r'CUMPARATOR\s+C\.?\s*[I1]\.?\s*F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95),
        # CUMPARATOR with CUI/CIF on next line
        (r'CUMPARATOR\s*:.*\n\s*C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.93),
        (r'CUMPARATOR\s*:.*\n\s*C\.?\s*[I1]\.?\s*[FT]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.93),
        # CUMPARATOR with CUI/CIF two lines down
        (r'CUMPARATOR\s*:.*\n.*\n\s*C\.?\s*[I1]\.?\s*[FT]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.90),
        # === CLIENT on next line ===
        (r'CLIENT\s*:\s*\n\s*C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95),
        (r'CLIENT\s*:.*\n\s*C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.90),
        # === Standard fallback patterns ===
        (r'CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.90),
        (r'COD\s+FISCAL\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.95),
        # === Brick-specific (I→L OCR error) ===
        # Matches: "CLIENT C.U.L./C.IF. :R01879855"
        (r'CLIENT\s+C\.?U\.?[LI1]\.?\s*/\s*C\.?[LI1]\.?F\.?\s*:?\s*(R?O?\d{6,10})', 0.99),
    ]

    # Company type indicators (for identifying company names)
    COMPANY_INDICATORS = [
        r'\bS\.?\s*R\.?\s*L\.?\b',      # S.R.L. or S. R. L.
        r'\bS\.?\s*A\.?\b',              # S.A. or S. A.
        r'\bS\.?\s*N\.?\s*C\.?\b',      # S.N.C. or S. N. C.
        r'\bS\.?\s*C\.?\s*S\.?\b',      # S.C.S. or S. C. S.
        r'\bI\.?\s*I\.?\b',              # I.I. or I. I.
        r'\bP\.?\s*F\.?\s*A\.?\b',      # P.F.A. or P. F. A.
        r'\bS\.?\s*C\.?\s+[A-Z]',       # S.C. followed by company name
        r'HOLDING',
        r'COMPANY',
        r'GROUP',
    ]

    # Maximum reasonable payment amount (to filter OCR errors)
    MAX_PAYMENT = Decimal('100000')

    # -------------------------------------------------------------------------
    # TVA (VAT) patterns - ALL FORMATS unified
    # OCR tolerant: T[VU][AR] matches TVA, TUA, TVR
    # -------------------------------------------------------------------------
    TVA_PATTERNS = [
        # === FORMAT 1: INLINE cu cod și procent (Lidl-style) ===
        # "TVA A 21,00% 7.71" sau "TVA B 11,00% 2.13"
        (r'T[VU][AR]\s+([A-D])\s+(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', 0.98, 'inline'),
        (r'T[VU][AR]\s+([A-D])\s+\\?(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', 0.97, 'inline'),
        (r'IVA\s+([A-D])\s+(\d{1,2})[.,]?\d{0,2}\s*%\s+([\d.,]+)', 0.95, 'inline'),

        # === FORMAT 2: REVERSED (Stepout-style) ===
        # "5.00% TUA*B" - procent ÎNAINTE de TVA
        (r'(\d{1,2})[.,]\d{0,2}\s*%\s*T[UV]A\*?([A-D])', 0.97, 'reversed'),

        # === FORMAT 3: TABLE (OMV-style) ===
        # "A-21,00%  285,66  49,58" (cod-procent bază tva)
        (r'([A-D])\s*[-:]\s*(\d{1,2})[.,\s]*\d{0,2}\s*%\s+([\d.,\s]+)\s+([\d.,\s]+)', 0.96, 'table'),
        (r'TOTAL\s+TAXE\s*:?\s*([\d.,\s]+)', 0.95, 'taxe'),

        # === FORMAT 4: MULTILINE (Brick/Electrobering) ===
        # "TOTAL TVA A - 19%" pe o linie, amount pe următoarea
        (r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%', 0.96, 'multiline'),
        (r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%', 0.95, 'multiline'),
        (r'TOTAL\s+T[VU][AR]\s*([A-D])?\s*[-:]?\s*(\d{1,2})\s*%', 0.94, 'multiline'),

        # === FORMAT 5: STANDARD (din extractor) ===
        (r'TOTAL\s+(?:T[VU][AR]|IVA)\s+BON\s*:?\s*([\d\s.,]+)', 0.98, 'bon'),
        (r'T[O0]TAL\s+(?:T[VU][AR]|IVA)\s*:?\s*([\d\s.,]+)', 0.95, 'standard'),
        (r'TOTAL\s+IVA\s*:?\s*([\d\s.,]+)', 0.95, 'standard'),
        (r'T[VU][AR]\s+(?:A\s*[-:]?\s*)?(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)', 0.95, 'percent'),
        (r'T[VU][AR]\s+[A-Z]\s*[-:]\s*(\d{1,2})\s*%\s*([\d\s.,]+)', 0.93, 'percent'),
        (r'T[VU][AR]\s*[C5]\s*[-:]\s*5\s*%\s*:?\s*([\d\s.,]+)', 0.93, 'books'),
        (r'(?:T[VU][AR]|IVA)\s+5\s*%\s*:?\s*([\d\s.,]+)', 0.92, 'books'),

        # === FORMAT 6: CODED inline (cu code A-D) ===
        (r'T[UV]A\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,\s]+)', 0.95, 'coded'),
        (r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,\s]+)', 0.93, 'coded'),

        # === FALLBACK patterns ===
        (r'T[O0]T[AE]L\s+(?:T[VUAI]+[AR]?|IVA)\s*:?\s*([\d\s.,]+)', 0.88, 'fallback'),
        (r'TOTAL\s+TA\s*[F\s]?\s*\d*\s*:?\s*([\d\s.,]+)', 0.85, 'fallback'),
        (r'(?:T[VU][AR]|IVA)\s*:?\s*([\d\s.,]+)', 0.85, 'fallback'),
        (r'TOTAL\s+T[UV]A\s*:?\s*([\d.,\s]+)', 0.90, 'standard'),
    ]

    # Non-VAT payer patterns - NEPLATITOR DE TVA
    # OCR variants: NEPLATTOR, NEPLATITOR, NEPLATOR, ANEPLATHTOR, MEPLATITOR
    NON_VAT_PATTERNS = [
        r'NEPLAT\w*OR',           # NEPLATITOR, NEPLATTOR, NEPLATOR
        r'[ANM]EPLAT\w*O?R',      # OCR errors: ANEPLATHTOR, MEPLATITOR
        r'TOTAL\s+NEPLAT',        # TOTAL NEPLATITOR...
        r'TOTAL\s+[ANM]EPLAT',    # TOTAL ANEPLAT... (OCR error)
        r'SCUTIT\s*(?:DE\s+)?T[VU]A',  # SCUTIT DE TVA
        r'NEPLAT\w*\s+T[VU]A',    # NEPLATITOR TVA
        r'NEPLAT\w*\s+DE\s+T',    # NEPLATITOR DE T... (truncated)
    ]

    # CUI (fiscal code) patterns - VENDOR CUI (exclude CLIENT)
    # OCR errors: R0 instead of RO, C1F instead of CIF
    CUI_PATTERNS = [
        # CIF at start of line (definitely vendor)
        (r'^CIF\s*:?\s*(R[O0]?\d{6,10})', 0.98),
        (r'^CIF\s*:?\s*(\d{6,10})', 0.97),
        (r'^C[I1]F\s*:?\s*(R[O0]?\d{6,10})', 0.95),
        (r'^C[I1]F\s*:?\s*(\d{6,10})', 0.94),
        # CIF not preceded by CLIENT (negative lookbehind)
        (r'(?<!CLIENT\s)(?<!LIENT\s)CIF\s*:?\s*(R[O0]?\d{6,10})', 0.95),
        (r'(?<!CLIENT\s)(?<!LIENT\s)CIF\s*:?\s*(\d{6,10})', 0.94),
        # Standalone CIF with word boundary
        (r'\bC[I1]F\s*:?\s*(R[O0]?\d{6,10})\b', 0.90),
        (r'\bC[I1]F\s*:?\s*(\d{6,10})\b', 0.89),
        # COD FISCAL (vendor)
        (r'COD\s+FISCAL\s*:?\s*(R[O0]?\d{6,10})', 0.90),
        (r'COD\s+FISCAL\s*:?\s*(\d{6,10})', 0.89),
        # C. I. F. format with SPACES (OCR artifact)
        (r'C\.\s*I\.\s*F\.?\s*[:\s]+(R[O0]?\d{6,10})', 0.92),
        (r'C\.\s*I\.\s*F\.?\s*[:\s]+(\d{6,10})', 0.91),
        # C.I.F. format (with dots, no spaces)
        (r'(?<!CLIENT\s)C\.[I1]\.F\.?\s*:?\s*(R[O0]?\d{6,10})', 0.88),
        (r'(?<!CLIENT\s)C\.[I1]\.F\.?\s*:?\s*(\d{6,10})', 0.87),
        # CUI format
        (r'(?<!CLIENT\s)C\.?U\.?[I1]\.?\s*:?\s*(R[O0]?\d{6,10})', 0.85),
        (r'(?<!CLIENT\s)C\.?U\.?[I1]\.?\s*:?\s*(\d{6,10})', 0.84),
        # Lidl format: "Cod Identificare fiscala" (OCR corrupted)
        (r'[IC](?:od|ed)\s*Identific[a-z/]*\s*(R[O0]\d{6,10})', 0.90),
        # Generic: anything with "fiscal" followed by RO + digits
        (r'fiscal[a-z]*\s*:?\s*(R[O0]\d{6,10})', 0.85),
    ]

    # CUI REVERSED format (number BEFORE label)
    CUI_REVERSED_PATTERNS = [
        (r'(R[O0]\d{6,10})\s*\n\s*C\.?\s*I\.?\s*F\.?', 0.98),
        (r'(\d{6,10})\s*\n\s*C\.?\s*I\.?\s*F\.?', 0.95),
    ]

    # Items count patterns - NR POZ ART IN BON
    ITEMS_COUNT_PATTERNS = [
        (r'NR\.?\s*P?[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*(\d+)', 0.98),
        (r'(\d{1,2})\s*\n\s*[O0]Z\.?\s*ART', 0.95),
        (r'[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.93),
        (r'NR\.?\s*(?:P?[O0]Z\.?)?\s*ART(?:ICOLE)?\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.90),
        (r'ARTIC[O0]LE\s*:?\s*(\d+)', 0.88),
        (r'(?:^|\s|:)P?[O0]Z\.?\s*(?:ART)?\.?\s*(?:IN\s+BON)?\s*:?\s*(\d{1,3})(?:\s|$)', 0.85),
    ]

    # Series patterns - Romanian fiscal receipt series
    SERIES_PATTERNS = [
        (r'SERIE\s*:?\s*([A-Z]{1,4})', 0.90),
        (r'(?:^|\s)Z\s*:\s*(\d{4})', 0.85),
        (r'(?:^|\s)BF\s*:\s*(\d{4})', 0.85),
    ]

    # -------------------------------------------------------------------------
    # Extraction methods - override in subclasses as needed
    # -------------------------------------------------------------------------

    def extract_tva_entries(self, text: str) -> List[dict]:
        """
        Extract TVA entries from receipt text - GENERIC implementation.

        Handles ALL formats:
        - Multi-rate inline (Lidl): "TVA A 21% 7.71"
        - Reversed (Stepout): "5.00% TUA*B"
        - Table (OMV): "A-21,00% 285,66 49,58"
        - Multiline: "TOTAL TVA A - 19%" + amount on next line
        - Non-VAT payers: Returns empty list

        Args:
            text: Raw OCR text from receipt

        Returns:
            List of dicts with keys: code, percent, amount
        """
        entries = []
        text_upper = text.upper()

        # Step 1: Check for known non-VAT payer (by class flag or text detection)
        if self.IS_NON_VAT_PAYER or self._is_non_vat_payer(text_upper):
            return []  # No TVA entries for non-VAT payers

        # Step 2: Normalize OCR spaces in numbers
        normalized = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text_upper)
        lines = normalized.split('\n')

        # Step 3: Try all formats, collect candidates
        candidates = []

        # Try inline multi-rate (Lidl-style)
        candidates.extend(self._try_tva_inline(normalized))

        # Try reversed format (Stepout-style)
        candidates.extend(self._try_tva_reversed(normalized, lines))

        # Try multiline format (Brick/Electrobering)
        candidates.extend(self._try_tva_multiline(normalized, lines))

        # Try table format (OMV-style)
        candidates.extend(self._try_tva_table(normalized))

        # Try standard/fallback patterns
        if not candidates:
            candidates.extend(self._try_tva_standard(normalized))

        # Step 4: Deduplicate and return
        seen = set()
        for entry in candidates:
            key = (entry.get('code', 'A'), entry.get('percent', 19))
            if key not in seen and entry.get('amount') and entry['amount'] > 0:
                entries.append(entry)
                seen.add(key)

        return entries

    def _is_non_vat_payer(self, text: str) -> bool:
        """Check if receipt is from non-VAT payer."""
        for pattern in self.NON_VAT_PATTERNS:
            if re.search(pattern, text, re.IGNORECASE):
                return True
        return False

    def _try_tva_inline(self, text: str) -> List[dict]:
        """Try Lidl-style inline format: 'TVA A 21,00% 7.71'"""
        entries = []
        # Pattern: "TVA A 21,00% 7.71" or "TVA B 11,00% 2.13"
        for pattern, confidence, fmt in self.TVA_PATTERNS:
            if fmt != 'inline':
                continue
            for match in re.finditer(pattern, text, re.IGNORECASE):
                try:
                    groups = match.groups()
                    if len(groups) >= 3:
                        code = groups[0].upper() if groups[0] else 'A'
                        percent = int(groups[1])
                        amount = self._parse_decimal(self._clean_ocr_number(groups[2]))
                        if amount and amount > 0:
                            entries.append({
                                'code': code,
                                'percent': percent,
                                'amount': amount
                            })
                except (ValueError, InvalidOperation, IndexError):
                    continue
        return entries

    def _try_tva_reversed(self, text: str, lines: List[str]) -> List[dict]:
        """Try Stepout-style reversed format: '5.00% TUA*B 2.00' (rate BEFORE TVA marker)"""
        entries = []
        # Pattern: "5.00% TUA*B 2.00" - procent BEFORE TVA, amount same line or next
        for i, line in enumerate(lines):
            # Try pattern with amount on SAME line: "5.00% TUA*B        2.00"
            match = re.search(
                r'(\d{1,2})[.,]?\d{0,2}\s*%\s*T[UV][AR]\s*\*?\s*([A-D])?\s+([\d\s.,]+)',
                line, re.IGNORECASE
            )
            if match:
                try:
                    percent = int(match.group(1))
                    code = match.group(2).upper() if match.group(2) else 'A'
                    amount_str = match.group(3).strip()
                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
                            'code': code,
                            'percent': percent,
                            'amount': amount
                        })
                        continue  # Check for more entries
                except (ValueError, InvalidOperation, IndexError):
                    pass

            # Fallback: amount on NEXT line
            match = re.search(r'(\d{1,2})[.,]?\d{0,2}\s*%\s*T[UV][AR]\s*\*?\s*([A-D])?$', line, re.IGNORECASE)
            if match:
                try:
                    percent = int(match.group(1))
                    code = match.group(2).upper() if match.group(2) else 'A'
                    if i + 1 < len(lines):
                        amount_str = lines[i + 1].strip()
                        amount = self._parse_decimal(amount_str)
                        if amount and amount > 0:
                            entries.append({
                                'code': code,
                                'percent': percent,
                                'amount': amount
                            })
                except (ValueError, InvalidOperation, IndexError):
                    continue
        return entries

    def _try_tva_multiline(self, text: str, lines: List[str]) -> List[dict]:
        """Try multiline format: 'TOTAL TVA A - 19%' + amount on next line"""
        entries = []
        # Pattern: "TOTAL TVA A - 19%" or "TOTAL TVA A 19%" on one line, amount on next
        multiline_patterns = [
            r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%',
            r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%',
            r'TOTAL\s+T[VU][AR]\s*([A-D])?\s*[-:]?\s*(\d{1,2})\s*%',
            r'O?TAL\s+[IT]VA\s*([A-D])\s*[-:]?\s*(\d{1,2})\s*%',
        ]
        for i, line in enumerate(lines):
            for pattern in multiline_patterns:
                match = re.search(pattern, line, re.IGNORECASE)
                if match:
                    try:
                        code = match.group(1).upper() if match.group(1) else 'A'
                        percent = int(match.group(2))
                        # Amount is on next line
                        if i + 1 < len(lines):
                            amount_str = lines[i + 1].strip()
                            amount = self._parse_decimal(amount_str)
                            if amount and amount > 0:
                                entries.append({
                                    'code': code,
                                    'percent': percent,
                                    'amount': amount
                                })
                                return entries
                    except (ValueError, InvalidOperation, IndexError):
                        continue
        return entries

    def _try_tva_table(self, text: str) -> List[dict]:
        """Try OMV-style table format: 'A-21,00% 285,66 49,58'"""
        entries = []
        # Pattern: "A-21,00%  285,66  49,58" (code-percent base_amount tva_amount)
        table_pattern = r'([A-D])\s*[-:]\s*(\d{1,2})[.,\s]*\d{0,2}\s*%\s+([\d.,\s]+)\s+([\d.,\s]+)'
        for match in re.finditer(table_pattern, text, re.IGNORECASE):
            try:
                code = match.group(1).upper()
                percent = int(match.group(2))
                # Group 4 is the TVA amount (last column in table)
                tva_amount_str = self._clean_ocr_number(match.group(4))
                tva_amount = self._parse_decimal(tva_amount_str)
                if tva_amount and tva_amount > 0:
                    entries.append({
                        'code': code,
                        'percent': percent,
                        'amount': tva_amount
                    })
            except (ValueError, InvalidOperation, IndexError):
                continue

        # Fallback: "TOTAL TAXE: 55,22"
        if not entries:
            taxe_match = re.search(r'TOTAL\s+TAXE\s*:?\s*([\d.,\s]+)', text, re.IGNORECASE)
            if taxe_match:
                try:
                    amount_str = self._clean_ocr_number(taxe_match.group(1))
                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
                            'code': 'A',
                            'percent': 19,  # Default rate
                            'amount': amount
                        })
                except (ValueError, InvalidOperation):
                    pass
        return entries

    def _try_tva_standard(self, text: str) -> List[dict]:
        """Try standard TVA patterns as fallback"""
        entries = []
        standard_fmts = ['standard', 'bon', 'percent', 'coded', 'fallback', 'books']
        for pattern, confidence, fmt in self.TVA_PATTERNS:
            if fmt not in standard_fmts:
                continue
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                try:
                    groups = match.groups()
                    if len(groups) >= 2:
                        # Could be (percent, amount) or (code, percent, amount)
                        if groups[0] and groups[0].isalpha():
                            code = groups[0].upper()
                            percent = int(groups[1]) if len(groups) > 1 else 19
                            amount_str = groups[2] if len(groups) > 2 else None
                        else:
                            code = 'A'
                            percent = int(groups[0]) if groups[0] and groups[0].isdigit() else 19
                            amount_str = groups[1] if len(groups) > 1 else groups[0]
                        if amount_str:
                            amount = self._parse_decimal(self._clean_ocr_number(amount_str))
                            if amount and amount > 0:
                                entries.append({
                                    'code': code,
                                    'percent': percent,
                                    'amount': amount
                                })
                                return entries
                    elif len(groups) == 1:
                        # Just amount
                        amount = self._parse_decimal(self._clean_ocr_number(groups[0]))
                        if amount and amount > 0:
                            entries.append({
                                'code': 'A',
                                'percent': 19,
                                'amount': amount
                            })
                            return entries
                except (ValueError, InvalidOperation, IndexError):
                    continue
        return entries

    def _clean_ocr_number(self, value: str) -> str:
        """Remove OCR spaces from numbers (e.g., '55, 22' -> '55,22')."""
        if not value:
            return ""
        value = re.sub(r'\s*([.,])\s*', r'\1', value)
        value = value.replace(' ', '')
        return value

    def extract_total(self, text: str) -> Tuple[Optional[Decimal], float]:
        """
        Extract total amount from receipt text.

        Supports both single-line and multiline formats:
        - Single line: "TOTAL: 78.00", "SUMA TOTALA: 78.00"
        - Multiline: "SUMA\nTOTALA:\n78.00" (common in thermal receipts)

        Args:
            text: Raw OCR text from receipt

        Returns:
            Tuple of (amount, confidence) or (None, 0.0)
        """
        text_upper = text.upper()
        lines = text_upper.split('\n')

        # =====================================================================
        # STRATEGY 1: Multiline "SUMA TOTALA" pattern (thermal receipts)
        # Format: SUMA on one line, TOTALA: on next, amount on third
        # =====================================================================
        for i, line in enumerate(lines):
            line_clean = line.strip()

            # Check for "SUMA" keyword (with OCR variants: SUNA, SUHA, SUM A)
            if re.search(r'S[UU]M[AĂ\s]', line_clean):
                # Look at next 3 lines for "TOTALA" and amount
                for j in range(i, min(i + 4, len(lines))):
                    check_line = lines[j].strip()

                    # Check for "TOTALA:" or "TOTALA -" followed by amount
                    match = re.search(r'T[O0]TALA\s*[:\-]\s*([\d\s.,]+)', check_line)
                    if match:
                        amount = self._parse_decimal(self._clean_ocr_number(match.group(1)))
                        if amount and amount > 0 and amount < self.MAX_PAYMENT:
                            return (amount, 0.98)

                    # Check for "TOTALA" without amount, amount on next line
                    if re.search(r'T[O0]TALA\s*[:\-]?\s*$', check_line):
                        if j + 1 < len(lines):
                            amount_line = lines[j + 1].strip()
                            amount = self._parse_decimal(amount_line)
                            if amount and amount > 0 and amount < self.MAX_PAYMENT:
                                return (amount, 0.97)

            # Check for "SUMA TOTALA" on single line with amount
            match = re.search(r'S[UU]M[AĂ]\s+T[O0]TALA\s*[:\-]\s*([\d\s.,]+)', line_clean)
            if match:
                amount = self._parse_decimal(self._clean_ocr_number(match.group(1)))
                if amount and amount > 0 and amount < self.MAX_PAYMENT:
                    return (amount, 0.98)

            # Check for "SUMA TOTALA" without amount, amount on next line
            if re.search(r'S[UU]M[AĂ]\s+T[O0]TALA\s*[:\-]?\s*$', line_clean):
                if i + 1 < len(lines):
                    next_line = lines[i + 1].strip()
                    amount = self._parse_decimal(next_line)
                    if amount and amount > 0 and amount < self.MAX_PAYMENT:
                        return (amount, 0.96)

        # =====================================================================
        # STRATEGY 2: Standard single-line patterns
        # =====================================================================
        for pattern, confidence in self.TOTAL_PATTERNS:
            match = re.search(pattern, text_upper)
            if match:
                amount = self._parse_decimal(match.group(1))
                if amount and amount > 0 and amount < self.MAX_PAYMENT:
                    return (amount, confidence)

        return (None, 0.0)

    def extract_date(self, text: str) -> Tuple[Optional[date], float]:
        """
        Extract receipt date from text.

        Args:
            text: Raw OCR text from receipt

        Returns:
            Tuple of (date, confidence) or (None, 0.0)
        """
        text_upper = text.upper()

        # Try standard patterns first
        for pattern, confidence in self.DATE_PATTERNS:
            match = re.search(pattern, text_upper)
            if match:
                parsed = self._parse_date(match.group(1))
                if parsed:
                    return (parsed, confidence)

        # Try OCR-corrupted patterns with spaces
        for pattern, confidence, fmt in self.DATE_PATTERNS_OCR_SPACES:
            match = re.search(pattern, text_upper)
            if match:
                try:
                    if fmt == 'ymd':
                        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
                    else:  # dmy
                        day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3))

                    if 1 <= day <= 31 and 1 <= month <= 12 and 2000 <= year <= 2100:
                        return (date(year, month, day), confidence)
                except (ValueError, TypeError):
                    continue

        return (None, 0.0)

    def extract_receipt_number(self, text: str) -> Tuple[Optional[str], float]:
        """
        Extract receipt number from text.

        Args:
            text: Raw OCR text from receipt

        Returns:
            Tuple of (number, confidence) or (None, 0.0)
        """
        text_upper = text.upper()

        for pattern, confidence in self.NUMBER_PATTERNS:
            match = re.search(pattern, text_upper)
            if match:
                number = match.group(1).strip()
                if number and len(number) >= 3:
                    return (number, confidence)

        return (None, 0.0)

    def extract_payment_methods(self, text: str) -> List[dict]:
        """
        Extract payment methods (CARD/NUMERAR) from receipt.

        Supports:
        - Multiline patterns: "CARD\n78.00" (common in thermal receipts)
        - Multiple payments (split CARD + NUMERAR)
        - REST (change) detection to calculate actual CARD amount
        - Keyword-only CARD/NUMERAR that infers from total
        - Fallback for fiscal receipts without explicit payment

        Args:
            text: Raw OCR text from receipt

        Returns:
            List of dicts: [{'method': 'CARD'/'NUMERAR', 'amount': Decimal, 'confidence': float}]
        """
        text_upper = text.upper()
        lines = text_upper.split('\n')
        methods = []
        seen_entries = set()

        # =====================================================================
        # STEP 0: Try MULTILINE patterns first (thermal receipts)
        # Format: "CARD" on one line, amount on next line
        # =====================================================================
        for i, line in enumerate(lines):
            line_clean = line.strip()

            # Standalone CARD keyword (not part of MASTERCARD, etc.)
            if re.match(r'^CARD\s*$', line_clean):
                if i + 1 < len(lines):
                    next_line = lines[i + 1].strip()
                    # Must be a valid amount (not another keyword)
                    if re.match(r'^[\d\s.,]+$', next_line):
                        amount = self._parse_decimal(next_line)
                        if amount and amount > 0 and amount < self.MAX_PAYMENT:
                            entry_key = ('CARD', amount)
                            if entry_key not in seen_entries:
                                methods.append({
                                    'method': 'CARD',
                                    'amount': amount,
                                    'confidence': 0.95
                                })
                                seen_entries.add(entry_key)

            # Standalone NUMERAR keyword
            if re.match(r'^NUMERAR\s*$', line_clean):
                if i + 1 < len(lines):
                    next_line = lines[i + 1].strip()
                    if re.match(r'^[\d\s.,]+$', next_line):
                        amount = self._parse_decimal(next_line)
                        if amount and amount > 0 and amount < self.MAX_PAYMENT:
                            entry_key = ('NUMERAR', amount)
                            if entry_key not in seen_entries:
                                methods.append({
                                    'method': 'NUMERAR',
                                    'amount': amount,
                                    'confidence': 0.95
                                })
                                seen_entries.add(entry_key)

        # If multiline extraction found methods, return them
        if methods:
            return methods

        # =====================================================================
        # STEP 1: Try pattern-based extraction with explicit amounts
        # =====================================================================
        for pattern, method, confidence in self.PAYMENT_PATTERNS:
            for match in re.finditer(pattern, text_upper):
                try:
                    amount = self._parse_decimal(match.group(1))
                    if amount and amount > 0 and amount < self.MAX_PAYMENT:
                        entry_key = (method, amount)
                        if entry_key not in seen_entries:
                            methods.append({
                                'method': method,
                                'amount': amount,
                                'confidence': confidence
                            })
                            seen_entries.add(entry_key)
                except (ValueError, InvalidOperation):
                    continue

        # If we found explicit amounts, we're done
        if methods:
            return methods

        # Step 2: Try keyword-only detection with REST logic
        # Get total amount for inference
        total_amount, _ = self.extract_total(text)
        if not total_amount:
            return []

        # Check for payment keywords
        has_card = any(kw in text_upper for kw in ['CARD', 'CARTE CREDIT', 'VISA', 'MASTERCARD', 'DEBIT', 'CREDIT', 'CONTACTLESS'])
        has_numerar = any(kw in text_upper for kw in ['NUMERAR', 'CASH'])

        # Find REST (change) amount
        rest_amount = Decimal('0')
        for i, line in enumerate(lines):
            if 'REST' in line:
                # REST on same line: "REST 0.00" or "REST: 0.00"
                match = re.search(r'REST\s*:?\s*([\d.,]+)', line)
                if match:
                    rest_amount = self._parse_decimal(match.group(1)) or Decimal('0')
                elif i + 1 < len(lines):
                    # REST on separate line
                    rest_amount = self._parse_decimal(lines[i + 1].strip()) or Decimal('0')
                break

        # Calculate payment amounts
        if has_card:
            card_amount = total_amount - rest_amount
            if card_amount > 0:
                methods.append({
                    'method': 'CARD',
                    'amount': card_amount,
                    'confidence': 0.90
                })

        if has_numerar:
            if has_card and rest_amount > 0:
                # Mixed payment: NUMERAR is the change given back
                methods.append({
                    'method': 'NUMERAR',
                    'amount': rest_amount,
                    'confidence': 0.85
                })
            elif not has_card:
                # Cash only
                methods.append({
                    'method': 'NUMERAR',
                    'amount': total_amount,
                    'confidence': 0.90
                })

        # Step 3: Fallback for fiscal receipts without explicit payment
        if not methods and total_amount and total_amount > 0:
            is_fiscal = 'BON FISCAL' in text_upper or 'FISCAL' in text_upper
            if is_fiscal:
                # Default to CARD for business purchases (most common)
                methods.append({
                    'method': 'CARD',
                    'amount': total_amount,
                    'confidence': 0.70
                })

        return methods

    def extract_client_cui(self, text: str) -> Tuple[Optional[str], float]:
        """
        Extract client CUI from B2B receipts.

        Args:
            text: Raw OCR text from receipt

        Returns:
            Tuple of (cui, confidence) or (None, 0.0)
        """
        text_upper = text.upper()

        # First check if there's a CLIENT section
        has_client_section = any(
            re.search(marker, text_upper, re.IGNORECASE)
            for marker in self.CLIENT_MARKERS
        )

        if not has_client_section:
            return (None, 0.0)

        # Try to extract CUI
        for pattern, confidence in self.CLIENT_CUI_PATTERNS:
            match = re.search(pattern, text_upper, re.IGNORECASE | re.MULTILINE)
            if match:
                cui = match.group(1)
                # Normalize: remove RO prefix for storage
                cui_digits = re.sub(r'[^0-9]', '', cui)
                if 6 <= len(cui_digits) <= 10:
                    return (cui_digits, confidence)

        return (None, 0.0)

    def extract_client_name(self, text: str) -> Tuple[Optional[str], float]:
        """
        Extract client/buyer company name from B2B receipts.

        Args:
            text: Raw OCR text from receipt

        Returns:
            Tuple of (client_name, confidence) or (None, 0.0)
        """
        text_upper = text.upper()
        lines = text.split('\n')

        # First check if there's a CLIENT section
        client_section_idx = None
        for i, line in enumerate(lines):
            line_upper = line.upper().strip()
            if any(re.search(marker, line_upper, re.IGNORECASE) for marker in self.CLIENT_MARKERS):
                client_section_idx = i
                break

        if client_section_idx is None:
            return (None, 0.0)

        # Look for company name in CLIENT section
        line = lines[client_section_idx].strip()
        line_upper = line.upper()

        # Strategy 1: Check if name is on same line after ":"
        if ':' in line:
            name_part = line.split(':', 1)[1].strip()
            if name_part and len(name_part) >= 3:
                # Skip if it looks like a CUI (RO followed by digits)
                if re.match(r'^R[O0]?\d{6,10}$', name_part.upper()):
                    pass  # This is CUI, not name - continue to next strategy
                else:
                    # Check for company indicators
                    name_upper = name_part.upper()
                    if any(re.search(ind, name_upper) for ind in self.COMPANY_INDICATORS):
                        return (self._clean_company_name(name_part), 0.95)
                    elif len(name_part) >= 5 and not name_part.isdigit():
                        return (self._clean_company_name(name_part), 0.80)

        # Strategy 2: Check next line for company name
        if client_section_idx + 1 < len(lines):
            next_line = lines[client_section_idx + 1].strip()
            next_upper = next_line.upper()

            # Skip if it's a CUI/CIF line or looks like CUI
            if not re.search(r'C\.?\s*[UI]\.?\s*[IF]\.?', next_upper):
                if not re.match(r'^R[O0]?\d{6,10}$', next_upper):
                    if any(re.search(ind, next_upper) for ind in self.COMPANY_INDICATORS):
                        return (self._clean_company_name(next_line), 0.90)
                    elif len(next_line) >= 5 and not next_line.isdigit():
                        # Check it's not CUI/CIF/COD keywords
                        if not any(kw in next_upper for kw in ['CUI', 'CIF', 'COD', 'FISCAL']):
                            return (self._clean_company_name(next_line), 0.75)

        # Strategy 3: Look for any line with company indicators in CLIENT section region
        search_end = min(client_section_idx + 5, len(lines))
        for i in range(client_section_idx + 1, search_end):
            line = lines[i].strip()
            line_upper = line.upper()

            # Skip CUI/CIF lines
            if re.search(r'C\.?\s*[UI]\.?\s*[IF]\.?', line_upper):
                continue
            if re.match(r'^R[O0]?\d{6,10}$', line_upper):
                continue

            if any(re.search(ind, line_upper) for ind in self.COMPANY_INDICATORS):
                return (self._clean_company_name(line), 0.85)

        return (None, 0.0)

    @staticmethod
    def _clean_company_name(name: str) -> str:
        """Clean company name for storage."""
        if not name:
            return ""
        # Remove extra whitespace
        name = re.sub(r'\s+', ' ', name).strip()
        # Remove trailing punctuation except periods in S.R.L., S.A., etc.
        name = re.sub(r'[,;:]+$', '', name).strip()
        return name

    # -------------------------------------------------------------------------
    # Validation hints - override to customize validation behavior
    # -------------------------------------------------------------------------

    def get_validation_hints(self) -> Dict[str, Any]:
        """
        Return validation hints for this store.

        Returns:
            Dict with validation hints. Common keys:
            - has_multi_rate_tva: bool - Store uses multiple TVA rates
            - card_equals_total: bool - CARD payment equals total
            - has_client_cui: bool - Receipt includes client CUI
            - has_efactura: bool - Store uses e-factura format
            - is_non_vat_payer: bool - Store is not a VAT payer
        """
        return {}

    # -------------------------------------------------------------------------
    # Helper methods - available to all subclasses
    # -------------------------------------------------------------------------

    @staticmethod
    def _normalize_number(text: str) -> str:
        """
        Normalize a number string for Decimal conversion.

        Handles Romanian formats: "1.234,56" -> "1234.56"
        """
        if not text:
            return "0"

        # Remove spaces
        text = text.replace(" ", "")

        # Determine decimal separator
        last_comma = text.rfind(",")
        last_dot = text.rfind(".")

        if last_comma > last_dot:
            text = text.replace(".", "").replace(",", ".")
        elif last_dot > last_comma:
            text = text.replace(",", "")
        else:
            text = text.replace(",", ".")

        return text

    @staticmethod
    def _parse_decimal(text: str) -> Optional[Decimal]:
        """Parse a string to Decimal, handling various formats."""
        try:
            normalized = BaseStoreProfile._normalize_number(text)
            return Decimal(normalized)
        except (InvalidOperation, ValueError, TypeError):
            return None

    @staticmethod
    def _parse_date(text: str) -> Optional[date]:
        """
        Parse date string in various formats.

        Supports: DD-MM-YYYY, DD/MM/YYYY, DD.MM.YYYY, YYYY-MM-DD
        """
        if not text:
            return None

        # Normalize separators
        text = text.replace('/', '-').replace('.', '-')

        try:
            parts = text.split('-')
            if len(parts) != 3:
                return None

            # Determine format based on first part length
            if len(parts[0]) == 4:
                # YYYY-MM-DD
                year, month, day = int(parts[0]), int(parts[1]), int(parts[2])
            else:
                # DD-MM-YYYY
                day, month, year = int(parts[0]), int(parts[1]), int(parts[2])

            # Validate ranges
            if 1 <= day <= 31 and 1 <= month <= 12 and 2000 <= year <= 2100:
                return date(year, month, day)
        except (ValueError, TypeError, IndexError):
            pass

        return None

    @staticmethod
    def _clean_text(text: str) -> str:
        """Clean OCR text for pattern matching."""
        if not text:
            return ""
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'[\x00-\x09\x0b\x0c\x0e-\x1f\x7f]', '', text)
        return text.strip()

    # -------------------------------------------------------------------------
    # Magic methods
    # -------------------------------------------------------------------------

    def __repr__(self) -> str:
        return f"<{self.__class__.__name__} CUI={self.CUI_LIST}>"

    def __str__(self) -> str:
        return f"{self.STORE_NAME} ({', '.join(self.CUI_LIST)})"