fix(ocr): Fix store profile extraction patterns and module loading

Major fixes to OCR store profiles for Romanian receipt extraction: - Fix ProfileRegistry module path resolution (was loading 0 profiles) - Add multiline TVA extraction for Brick, Electrobering, Gama Ink - Add "CARTE CREDIT" payment detection for OMV/SOCAR gas stations - Handle OCR artifacts: TVA→TUA, "-"→"4", I→L in CUI markers - Add client CUI patterns for Brick receipts - Add profile selection logging to ocr_extractor.py - Create test script for all 29 PDFs (test_all_profiles.py) Test results: 13/29 passing (improved from 9/29) Remaining failures are primarily OCR quality issues. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-07 09:40:58 +00:00
parent 099556213d
commit 28f259cd05
13 changed files with 1531 additions and 257 deletions
--- a/backend/modules/data_entry/services/ocr/profiles/init.py
+++ b/backend/modules/data_entry/services/ocr/profiles/init.py
@@ -251,9 +251,12 @@ class ProfileRegistry:
        # Get list of profile modules (exclude __init__, base)
        module_names = cls._get_profile_module_names()

+        # Determine the module prefix based on how THIS module was imported
+        base_package = cls.__module__
+
        count = 0
        for module_name in module_names:
-            full_name = f"backend.modules.data_entry.services.ocr.profiles.{module_name}"
+            full_name = f"{base_package}.{module_name}"

            try:
                if full_name in sys.modules:
@@ -349,8 +352,15 @@ class ProfileRegistry:

        module_names = cls._get_profile_module_names()

+        # Determine the module prefix based on how THIS module was imported
+        # This handles both:
+        # - Running from backend dir: "modules.data_entry.services.ocr.profiles"
+        # - Running from project root: "backend.modules.data_entry.services.ocr.profiles"
+        this_module = cls.__module__  # e.g. "backend.modules..." or "modules..."
+        base_package = this_module  # Use the same prefix for child modules
+
        for module_name in module_names:
-            full_name = f"backend.modules.data_entry.services.ocr.profiles.{module_name}"
+            full_name = f"{base_package}.{module_name}"
            try:
                importlib.import_module(full_name)
                logger.debug(f"Loaded module: {module_name}")
--- a/backend/modules/data_entry/services/ocr/profiles/base.py
+++ b/backend/modules/data_entry/services/ocr/profiles/base.py
@@ -111,25 +111,34 @@ class BaseStoreProfile(ABC):
        (r'(?:^|\n|\s)MERAR\s*:?\s*(\d{1,6}[.,]\d{2})\b', 'NUMERAR', 0.70),
    ]

-    # Client section markers (for B2B receipts)
+    # Client section markers (for B2B receipts) - More flexible patterns
    CLIENT_MARKERS = [
-        r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT\s*:',
-        r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT\s*:',
-        r'CLIENT\s+C\.?\s*[UI1]\.?\s*[IF1]\.?\s*:',
-        r'CLIENT\s*:',
-        r'CUMPARATOR\s*:',
-        r'BENEFICIAR\s*:',
+        r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT',    # "CIF CLIENT" (with or without colon)
+        r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT',    # "CUI CLIENT"
+        r'CLIENT\s+C\.?\s*[UI1]\.?\s*[IF1]',  # "CLIENT CIF" / "CLIENT CUI"
+        r'CLIENT\s*:',                          # "CLIENT:"
+        r'CUMPARATOR\s*:',                      # "CUMPARATOR:"
+        r'BENEFICIAR\s*:',                      # "BENEFICIAR:"
+        r'CUMP[AĂ]R[AĂ]TOR',                   # "CUMPARATOR" without colon
+        r'COD\s+FISCAL\s+CLIENT',              # "COD FISCAL CLIENT"
    ]

-    # Client CUI patterns (pattern, confidence)
+    # Client CUI patterns (pattern, confidence) - More flexible
    CLIENT_CUI_PATTERNS = [
-        (r'(R[O0]\d{6,10})\s*\n\s*CLIENT\s+C\.?\s*U\.?\s*[I1]\.?', 0.99),
-        (r'(R[O0]\d{6,10})\s*:?\s*\n\s*CLIENT', 0.98),
-        (r'C[I1]F\s+[A-Z]*\s*CLIENT\s*:?\s*(R[O0]\d{6,10})', 0.98),
-        (r'C\.?\s*[I1]\.?\s*F\.?\s+CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.98),
-        (r'C\.?\s*U\.?\s*[I1]\.?\s+CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.98),
-        (r'CLIENT\s+C\.?\s*U\.?\s*[I1]\.?\s*:?\s*(R[O0]?\d{6,10})', 0.95),
-        (r'CLIENT\s*:?\s*(R[O0]?\d{6,10})', 0.90),
+        # "CIF CLIENT:XXXXXXX" or "CIF CLIENT: ROXXXXXXX" - most common format
+        (r'C\.?[I1]\.?F\.?\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.99),
+        # "CLIENT CIF: XXXXXXX"
+        (r'CLIENT\s+C\.?[I1]\.?F\.?\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98),
+        # "CUI CLIENT: XXXXXXX"
+        (r'C\.?U\.?[I1]\.?\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98),
+        # "ROXXXXXXX" followed by CLIENT marker
+        (r'(R[O0]\d{6,10})\s*\n\s*CLIENT', 0.97),
+        # "C.I.F. CLIENT: XXXXXXX"
+        (r'C\.?\s*I\.?\s*F\.?\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.96),
+        # "CLIENT: ROXXXXXXX" or "CLIENT: XXXXXXX"
+        (r'CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.90),
+        # "COD FISCAL CLIENT: XXXXXXX"
+        (r'COD\s+FISCAL\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.95),
    ]

    # Company type indicators (for identifying company names)
--- a/backend/modules/data_entry/services/ocr/profiles/brick.py
+++ b/backend/modules/data_entry/services/ocr/profiles/brick.py
@@ -2,11 +2,16 @@
 BRICK (Five-Holding) store profile for OCR extraction.

 Five-Holding S.A. operates BRICK stores with standard receipt format.
+
+Receipt structure:
+- TVA format: "TOTAL TVA A - 21%" with amount on next line
+- Payment: "CARD" on separate line (amount from TOTAL LEI)
+- Client CUI: "CLIENT C.U.L./C.IF. :ROXXXXXXX" (OCR reads I as L)
 """

 import re
 from decimal import Decimal, InvalidOperation
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Tuple, Optional

 from .base import BaseStoreProfile
 from . import ProfileRegistry
@@ -15,32 +20,60 @@ from . import ProfileRegistry
@ProfileRegistry.register
 class BrickProfile(BaseStoreProfile):
    """
-    FIVE-HOLDING S.A. (BRICK) - standard TVA format.
+    FIVE-HOLDING S.A. (BRICK) - standard TVA format with client CUI.

    Key characteristics:
-    - Standard TVA format
-    - Single TVA rate typically
-    - No client CUI on receipts
+    - Standard TVA format with rate code (A, B, etc.)
+    - TVA amount on separate line after percentage
+    - CARD payment indicated by keyword (amount derived from total)
+    - Client CUI in format: CLIENT C.U.L./C.IF.
+    - OCR often reads "I" as "L" in CUI markers
    """

    CUI_LIST = ["10562600"]
-    NAME_PATTERNS = ["BRICK", "FIVE-HOLDING", "FIVE HOLDING", "BR1CK"]  # OCR variants
+    NAME_PATTERNS = ["BRICK", "FIVE-HOLDING", "FIVE HOLDING", "BR1CK", "F1VE"]
    STORE_NAME = "FIVE-HOLDING S.A."

-    # Standard TVA patterns (flexible - accepts any rate)
+    # BRICK TVA patterns (amount often on separate line)
    TVA_PATTERNS = [
-        # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY"
-        r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
-        # "A - XX,XX% = YY,YY"
-        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
-        # Simple: "TVA XX% YY,YY"
-        r'TVA\s+(\d{1,2})\s*%\s*([\d.,]+)',
+        # "TOTAL TVA A - 21%" with amount on next line (captured as multiline)
+        r'TOTAL\s+TVA\s*([A-D])\s*[-:]\s*(\d{1,2})\s*%\s*\n?\s*([\d.,]+)',
+        # "OTAL IVAA 21%" - OCR error variant
+        r'O?TAL\s+[IT]VA\s*([A-D])\s*[-:]?\s*(\d{1,2})\s*%\s*\n?\s*([\d.,]+)',
+        # "TOTAL TVA A 21%" without separator
+        r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%\s*\n?\s*([\d.,]+)',
+        # "TVA A: XX% = YY,YY" - inline format
+        r'TVA\s*([A-D])\s*[-:]?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
+    ]
+
+    # TOTAL TVA BON pattern (fallback)
+    TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s*BON\s*\n?\s*([\d.,]+)'
+
+    # Client CUI patterns - specific to Brick (handles OCR L/I confusion)
+    CLIENT_CUI_PATTERNS = [
+        # "CLIENT C.U.L./C.IF. :R01879855" - exact OCR format (I->L)
+        (r'CLIENT\s+C\.?U\.?[LI1]\.?\s*/?\s*C\.?[LI1]\.?F\.?\s*:?\s*(R?O?\d{6,10})', 0.99),
+        # "CLIENT C.U.I./C.I.F.: RO1879855" - standard format
+        (r'CLIENT\s+C\.?U\.?I\.?\s*/?\s*C\.?I\.?F\.?\s*:?\s*(R?O?\s*\d{6,10})', 0.98),
+        # "CIF CLIENT: XXXXXXX" - alternative format
+        (r'CIF\s+CLIENT\s*:?\s*(R?O?\s*\d{6,10})', 0.95),
+    ]
+
+    # Client markers for Brick
+    CLIENT_MARKERS = [
+        r'CLIENT\s+C\.?U\.?[LI1]',
+        r'CLIENT\s+C\.?I\.?F',
+        r'CIF\s+CLIENT',
    ]

    def extract_tva_entries(self, text: str) -> List[dict]:
        """
        Extract BRICK-specific TVA entries.

+        BRICK receipts show TVA in multi-line format:
+        "TOTAL TVA A - 21%"
+        "32.31"
+
        Args:
            text: Raw OCR text from receipt

@@ -48,11 +81,12 @@ class BrickProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
+        text_upper = text.upper()
        seen = set()

-        # Try coded patterns first
-        for pattern in self.TVA_PATTERNS[:2]:
-            for match in re.finditer(pattern, text, re.IGNORECASE):
+        # Try coded patterns first (with multiline support)
+        for pattern in self.TVA_PATTERNS:
+            for match in re.finditer(pattern, text_upper, re.IGNORECASE | re.MULTILINE):
                try:
                    code = match.group(1).upper()
                    percent = int(match.group(2))
@@ -67,35 +101,182 @@ class BrickProfile(BaseStoreProfile):
                                'amount': amount
                            })
                            seen.add(entry_key)
+                            return entries  # Brick usually has single TVA rate
                except (ValueError, InvalidOperation, IndexError):
                    continue

-        # Fallback to simple format
-        if not entries:
-            simple_pattern = self.TVA_PATTERNS[2]
-            for match in re.finditer(simple_pattern, text, re.IGNORECASE):
-                try:
-                    percent = int(match.group(1))
-                    amount = self._parse_decimal(match.group(2))
-
-                    if amount and amount > 0:
-                        entries.append({
-                            'code': 'A',
-                            'percent': percent,
-                            'amount': amount
-                        })
-                        break
-                except (ValueError, InvalidOperation):
-                    continue
+        # Fallback: "TOTAL TVA BON" with amount on next line
+        match = re.search(self.TOTAL_TVA_BON_PATTERN, text_upper, re.IGNORECASE | re.MULTILINE)
+        if match:
+            try:
+                amount = self._parse_decimal(match.group(1))
+                if amount and amount > 0:
+                    entries.append({
+                        'code': 'A',
+                        'percent': 19,  # Default rate
+                        'amount': amount
+                    })
+            except (ValueError, InvalidOperation):
+                pass

        return entries

+    def extract_payment_methods(self, text: str) -> List[dict]:
+        """
+        Extract BRICK-specific payment methods.
+
+        BRICK receipts show payment method on separate line:
+        "TOTAL LEI"
+        "21.18"
+        "CARD"
+        "0.00"  <- REST (change)
+
+        When CARD appears with REST=0, full amount was paid by card.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            List of payment methods with method, amount, and confidence
+        """
+        payments = []
+        text_upper = text.upper()
+        lines = text_upper.split('\n')
+
+        # Find TOTAL LEI amount
+        total_amount = None
+        for i, line in enumerate(lines):
+            if 'TOTAL' in line and 'LEI' in line:
+                # Amount is likely on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    total_amount = self._parse_decimal(amount_str)
+                    break
+            # Also try inline: "TOTAL LEI 21.18"
+            match = re.search(r'TOTAL\s+LEI\s*([\d.,]+)', line)
+            if match:
+                total_amount = self._parse_decimal(match.group(1))
+                break
+
+        if not total_amount:
+            # Fallback to generic total extraction
+            total_amount, _ = self.extract_total(text)
+
+        if not total_amount:
+            return []
+
+        # Check for CARD or NUMERAR keywords
+        has_card = any('CARD' in line for line in lines)
+        has_numerar = any('NUMERAR' in line for line in lines)
+
+        # Find REST amount to determine actual card amount
+        rest_amount = Decimal('0')
+        for i, line in enumerate(lines):
+            if 'REST' in line:
+                # REST amount is on next line or same line
+                match = re.search(r'REST\s*([\d.,]+)', line)
+                if match:
+                    rest_amount = self._parse_decimal(match.group(1)) or Decimal('0')
+                elif i + 1 < len(lines):
+                    rest_amount = self._parse_decimal(lines[i + 1].strip()) or Decimal('0')
+                break
+
+        if has_card:
+            # Card payment = total - rest
+            card_amount = total_amount - rest_amount
+            if card_amount > 0:
+                payments.append({
+                    'method': 'CARD',
+                    'amount': card_amount,
+                    'confidence': 0.95
+                })
+
+        if has_numerar:
+            # If both card and cash, need more complex logic
+            # For now, assume numerar is the rest if card is present
+            if not has_card:
+                payments.append({
+                    'method': 'NUMERAR',
+                    'amount': total_amount,
+                    'confidence': 0.95
+                })
+            elif rest_amount > 0:
+                payments.append({
+                    'method': 'NUMERAR',
+                    'amount': rest_amount,
+                    'confidence': 0.90
+                })
+
+        # If no explicit payment keyword but REST=0, assume card
+        if not payments and rest_amount == 0:
+            # Check for any payment indicators
+            for line in lines:
+                if 'CARD' in line or 'DEBIT' in line or 'CREDIT' in line:
+                    payments.append({
+                        'method': 'CARD',
+                        'amount': total_amount,
+                        'confidence': 0.90
+                    })
+                    break
+
+        # FALLBACK: If still no payment found but we have total amount,
+        # assume CARD for business receipts (Brick stores usually accept card)
+        # This handles cases where OCR fails to capture payment method
+        if not payments and total_amount and total_amount > 0:
+            # Check if this is a fiscal receipt (BON FISCAL)
+            is_fiscal = 'BON FISCAL' in text_upper or 'FISCAL' in text_upper
+            if is_fiscal:
+                payments.append({
+                    'method': 'CARD',
+                    'amount': total_amount,
+                    'confidence': 0.70  # Lower confidence for inferred payment
+                })
+
+        return payments
+
+    def extract_client_cui(self, text: str) -> Tuple[Optional[str], float]:
+        """
+        Extract client CUI from BRICK receipt.
+
+        BRICK uses format: "CLIENT C.U.L./C.IF. :R01879855"
+        Note: OCR often reads "I" as "L" in these markers.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            Tuple of (cui, confidence) or (None, 0.0)
+        """
+        text_upper = text.upper()
+
+        # Check for Brick client markers
+        has_client = any(
+            re.search(marker, text_upper, re.IGNORECASE)
+            for marker in self.CLIENT_MARKERS
+        )
+
+        if not has_client:
+            return (None, 0.0)
+
+        # Try Brick-specific patterns
+        for pattern, confidence in self.CLIENT_CUI_PATTERNS:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match:
+                cui = match.group(1)
+                # Clean up: remove RO prefix, spaces
+                cui_digits = re.sub(r'[^0-9]', '', cui)
+                if 6 <= len(cui_digits) <= 10:
+                    return (cui_digits, confidence)
+
+        return (None, 0.0)
+
    def get_validation_hints(self) -> Dict[str, Any]:
        """Return BRICK-specific validation hints."""
        return {
            "has_multi_rate_tva": False,
-            "card_equals_total": False,
-            "has_client_cui": False,
+            "card_equals_total": True,  # Card amount equals total when REST=0
+            "has_client_cui": True,  # Brick receipts CAN have client CUI
            "has_efactura": False,
            "is_non_vat_payer": False,
+            "tva_on_separate_line": True,  # TVA amount on next line
        }
--- a/backend/modules/data_entry/services/ocr/profiles/electrobering.py
+++ b/backend/modules/data_entry/services/ocr/profiles/electrobering.py
@@ -2,11 +2,16 @@
 ELECTROBERING S.R.L. store profile for OCR extraction.

 Electronics and home supplies store.
+
+Receipt structure:
+- TVA format: "TOTAL TVA A - - 19%" with amount on next line
+- "TOTAL TVA BON" with total TVA amount
+- Client CUI: "CIF CLIENT: XXXXXXX"
 """

 import re
 from decimal import Decimal, InvalidOperation
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Tuple, Optional

 from .base import BaseStoreProfile
 from . import ProfileRegistry
@@ -15,11 +20,11 @@ from . import ProfileRegistry
@ProfileRegistry.register
 class ElectroberingProfile(BaseStoreProfile):
    """
-    ELECTROBERING S.R.L. - standard TVA profile.
+    ELECTROBERING S.R.L. - standard TVA profile with multiline support.

    Key characteristics:
-    - Standard TVA format (single rate, any percentage)
-    - Electronics and home supplies
+    - TVA format with rate on one line, amount on next
+    - Double-dash separators common (OCR artifact)
    - May have client CUI for B2B purchases
    - CARD payment typical
    """
@@ -28,19 +33,28 @@ class ElectroberingProfile(BaseStoreProfile):
    NAME_PATTERNS = ["ELECTROBERING", "ELECTR0BERING", "ELECTROBERING SRL"]
    STORE_NAME = "ELECTROBERING S.R.L."

-    # Standard TVA patterns (flexible - accepts any rate)
+    # ELECTROBERING TVA patterns (handles double-dash and multiline)
    TVA_PATTERNS = [
-        # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY"
-        r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
-        # "A - XX,XX% = YY,YY"
-        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
-        # "TVA XX% YY,YY" (simple format without code)
-        r'TVA\s+(\d{1,2})\s*%\s*([\d.,]+)',
+        # "TOTAL TVA A - - 19%" with amount on next line
+        r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%',
+        # "TOTAL TVA A 19%" without separator
+        r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%',
+        # Standard: "TVA A: XX% = YY,YY"
+        r'TVA\s*([A-D])\s*[-:]?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
    ]

+    # TOTAL TVA BON pattern (fallback)
+    TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s+BON'
+
    def extract_tva_entries(self, text: str) -> List[dict]:
        """
-        Extract TVA entries from receipt text.
+        Extract ELECTROBERING-specific TVA entries.
+
+        ELECTROBERING receipts show TVA in multi-line format:
+        "TOTAL TVA A - - 19%"
+        "5.59"
+        "TOTAL TVA BON"
+        "5.59"

        Args:
            text: Raw OCR text from receipt
@@ -49,45 +63,61 @@ class ElectroberingProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
-        seen = set()
+        text_upper = text.upper()
+        lines = text_upper.split('\n')

-        # Try coded patterns first
-        for pattern in self.TVA_PATTERNS[:2]:
-            for match in re.finditer(pattern, text, re.IGNORECASE):
-                try:
-                    code = match.group(1).upper()
-                    percent = int(match.group(2))
-                    amount = self._parse_decimal(match.group(3))
-
-                    if amount and amount > 0:
-                        entry_key = (code, percent)
-                        if entry_key not in seen:
-                            entries.append({
-                                'code': code,
-                                'percent': percent,
-                                'amount': amount
-                            })
-                            seen.add(entry_key)
-                except (ValueError, InvalidOperation, IndexError):
-                    continue
-
-        # Fallback to simple format
-        if not entries:
-            simple_pattern = self.TVA_PATTERNS[2]
-            for match in re.finditer(simple_pattern, text, re.IGNORECASE):
-                try:
-                    percent = int(match.group(1))
-                    amount = self._parse_decimal(match.group(2))
+        # Find TVA rate line and get amount from next line
+        for i, line in enumerate(lines):
+            # Match "TOTAL TVA A - - 19%" or "TOTAL TVA A 19%"
+            match = re.search(r'TOTAL\s+TVA\s*([A-D])\s*[-\s]+(\d{1,2})\s*%', line)
+            if match:
+                code = match.group(1)
+                percent = int(match.group(2))

+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
-                            'code': 'A',
+                            'code': code,
                            'percent': percent,
                            'amount': amount
                        })
-                        break
+                        return entries
+
+        # Fallback: Find TOTAL TVA BON and get amount
+        for i, line in enumerate(lines):
+            if re.search(self.TOTAL_TVA_BON_PATTERN, line):
+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
+                    if amount and amount > 0:
+                        entries.append({
+                            'code': 'A',
+                            'percent': 19,  # Default Romanian TVA rate
+                            'amount': amount
+                        })
+                        return entries
+
+        # Last fallback: inline format "TVA A: XX% = YY,YY"
+        for pattern in [self.TVA_PATTERNS[2]]:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match and len(match.groups()) >= 3:
+                try:
+                    code = match.group(1)
+                    percent = int(match.group(2))
+                    amount = self._parse_decimal(match.group(3))
+                    if amount and amount > 0:
+                        entries.append({
+                            'code': code,
+                            'percent': percent,
+                            'amount': amount
+                        })
+                        return entries
                except (ValueError, InvalidOperation):
-                    continue
+                    pass

        return entries

@@ -99,4 +129,5 @@ class ElectroberingProfile(BaseStoreProfile):
            "has_client_cui": True,  # May have client CUI for B2B
            "has_efactura": False,
            "is_non_vat_payer": False,
+            "tva_on_separate_line": True,
        }
--- a/backend/modules/data_entry/services/ocr/profiles/gama_ink.py
+++ b/backend/modules/data_entry/services/ocr/profiles/gama_ink.py
@@ -2,6 +2,10 @@
 GAMA INK SERVICE SRL store profile for OCR extraction.

 Toner refill and printer supplies store.
+
+Receipt structure:
+- TVA format: "TOTAL TVA A 4 19%" with amount on next line (4 is OCR for -)
+- "TOTAL TVA BON" with total TVA amount
 """

 import re
@@ -15,11 +19,11 @@ from . import ProfileRegistry
@ProfileRegistry.register
 class GamaInkProfile(BaseStoreProfile):
    """
-    GAMA INK SERVICE SRL - standard TVA profile.
+    GAMA INK SERVICE SRL - standard TVA profile with multiline support.

    Key characteristics:
-    - Standard TVA format (single rate, any percentage)
-    - Service-based (toner refill, printer supplies)
+    - TVA format with rate on one line, amount on next
+    - OCR often reads "-" as "4" (e.g., "A 4 19%" instead of "A - 19%")
    - CARD payment typical
    """

@@ -27,21 +31,23 @@ class GamaInkProfile(BaseStoreProfile):
    NAME_PATTERNS = ["GAMA INK", "GAMA", "GAMAINK", "GAMA INK SERVICE"]
    STORE_NAME = "GAMA INK SERVICE SRL"

-    # Standard TVA patterns (flexible - accepts any rate)
+    # GAMA INK TVA patterns (handles OCR errors)
    TVA_PATTERNS = [
-        # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY"
-        r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
-        # "A - XX,XX% = YY,YY"
-        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
-        # "TVA XX% YY,YY" (simple format without code)
-        r'TVA\s+(\d{1,2})\s*%\s*([\d.,]+)',
-        # "TVA: YY,YY" (amount only, percent inferred)
-        r'TVA\s*:?\s*([\d.,]+)\s*(?:LEI|RON)?',
+        # "TOTAL TVA A 4 19%" (4 is OCR for -)
+        r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%',
+        # "TOTAL TVA A - 19%"
+        r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%',
    ]

+    # TOTAL TVA BON pattern (fallback)
+    TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s+BON'
+
    def extract_tva_entries(self, text: str) -> List[dict]:
        """
-        Extract TVA entries from receipt text.
+        Extract GAMA INK-specific TVA entries.
+
+        Format: "TOTAL TVA A 4 19%" on one line, amount on next line.
+        Note: OCR reads "-" as "4" sometimes.

        Args:
            text: Raw OCR text from receipt
@@ -50,45 +56,43 @@ class GamaInkProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
-        seen = set()
+        text_upper = text.upper()
+        lines = text_upper.split('\n')

-        # Try coded patterns first (have both code and percent)
-        for pattern in self.TVA_PATTERNS[:2]:
-            for match in re.finditer(pattern, text, re.IGNORECASE):
-                try:
-                    code = match.group(1).upper()
-                    percent = int(match.group(2))
-                    amount = self._parse_decimal(match.group(3))
-
-                    if amount and amount > 0:
-                        entry_key = (code, percent)
-                        if entry_key not in seen:
-                            entries.append({
-                                'code': code,
-                                'percent': percent,
-                                'amount': amount
-                            })
-                            seen.add(entry_key)
-                except (ValueError, InvalidOperation, IndexError):
-                    continue
-
-        # Fallback to simple format (percent + amount without code)
-        if not entries:
-            simple_pattern = self.TVA_PATTERNS[2]
-            for match in re.finditer(simple_pattern, text, re.IGNORECASE):
-                try:
-                    percent = int(match.group(1))
-                    amount = self._parse_decimal(match.group(2))
+        # Find TVA rate line and get amount from next line
+        for i, line in enumerate(lines):
+            # Match "TOTAL TVA A 4 19%" or "TOTAL TVA A - 19%"
+            match = re.search(r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%', line)
+            if match:
+                code = match.group(1)
+                percent = int(match.group(2))

+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
-                            'code': 'A',
+                            'code': code,
                            'percent': percent,
                            'amount': amount
                        })
-                        break
-                except (ValueError, InvalidOperation):
-                    continue
+                        return entries
+
+        # Fallback: Find TOTAL TVA BON and get amount
+        for i, line in enumerate(lines):
+            if re.search(self.TOTAL_TVA_BON_PATTERN, line):
+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
+                    if amount and amount > 0:
+                        entries.append({
+                            'code': 'A',
+                            'percent': 19,  # Default Romanian TVA rate
+                            'amount': amount
+                        })
+                        return entries

        return entries

@@ -97,7 +101,8 @@ class GamaInkProfile(BaseStoreProfile):
        return {
            "has_multi_rate_tva": False,
            "card_equals_total": True,
-            "has_client_cui": False,
+            "has_client_cui": True,  # May have client CUI for business
            "has_efactura": False,
            "is_non_vat_payer": False,
+            "tva_on_separate_line": True,
        }
--- a/backend/modules/data_entry/services/ocr/profiles/omv.py
+++ b/backend/modules/data_entry/services/ocr/profiles/omv.py
@@ -5,6 +5,7 @@ OMV receipts typically include client CUI and use standard TVA format.
 Common at gas stations with fuel purchases.

 Date format: YYYY. MM. DD with spaces (e.g., "2025. 08. 14")
+OCR quirk: Numbers often have spaces before decimals (e.g., "55, 22" instead of "55,22")
 """

 import re
@@ -24,17 +25,24 @@ class OMVProfile(BaseStoreProfile):
    Key characteristics:
    - Standard TVA format (usually single rate, any percentage)
    - Includes client CUI on receipt (for business purchases)
-    - TVA table format: "A-XX,XX% base_amount tva_amount"
+    - TVA table format: "A-XX, XX% base_amount tva_amount" (with OCR spaces)
    - Supports historical rates (19%) and current rates (21%)
    - Date format: YYYY. MM. DD (with spaces)
+    - Client CUI format: "CLIENT C.U. I./C.I.F.: ROXXXXXXX"
    """

    CUI_LIST = ["11201891"]
    NAME_PATTERNS = ["OMV", "PETROM", "OMV PETROM", "0MV"]  # OCR variants
    STORE_NAME = "OMV PETROM MARKETING S.R.L."

-    # OMV TVA table pattern: "A-19,00%  285,66  49,58" (code-percent base tva)
-    TVA_TABLE_PATTERN = r'([A-D])\s*[-:]\s*(\d{1,2})[.,]\d{2}\s*%\s+([\d.,]+)\s+([\d.,]+)'
+    # OMV TVA table patterns (handles OCR spaces in numbers)
+    # Format: "A-21, 00% 55, 22 318, 16" (rate, TVA amount, total)
+    TVA_TABLE_PATTERNS = [
+        # "A-21, 00% 55, 22 318, 16" - with spaces in numbers
+        r'([A-D])\s*[-:]\s*(\d{1,2})[.,\s]*\d{0,2}\s*%\s+([\d.,\s]+)\s+([\d.,\s]+)',
+        # "TOTAL TAXE: 55, 22" - fallback to TOTAL TAXE
+        r'TOTAL\s+TAXE\s*:?\s*([\d.,\s]+)',
+    ]

    # Standard TVA pattern fallback
    TVA_STANDARD_PATTERN = r'TVA\s*:?\s*([\d.,]+)'
@@ -49,12 +57,38 @@ class OMVProfile(BaseStoreProfile):
        (r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})', 0.85, 'dmy'),
    ]

+    # Client CUI patterns for OMV (unique format)
+    CLIENT_CUI_PATTERNS = [
+        # "CLIENT C.U. I./C.I.F.: RO1879855"
+        (r'CLIENT\s+C\.?\s*U\.?\s*I\.?\s*/?\s*C\.?\s*I\.?\s*F\.?\s*:?\s*(R?O?\s*\d{6,10})', 0.99),
+        # "C.U.I./C.I.F. CLIENT: XXXXXXX"
+        (r'C\.?\s*U\.?\s*I\.?\s*/?\s*C\.?\s*I\.?\s*F\.?\s+CLIENT\s*:?\s*(R?O?\s*\d{6,10})', 0.98),
+        # Fallback to simpler pattern
+        (r'CLIENT\s*:?\s*(R?O?\s*\d{6,10})', 0.90),
+    ]
+
+    # Client markers for OMV
+    CLIENT_MARKERS = [
+        r'CLIENT\s+C\.?\s*U\.?\s*I',
+        r'CLIENT\s+C\.?\s*I\.?\s*F',
+        r'NUME\s+CLIENT',
+        r'CLIENT\s*:',
+    ]
+
+    def _clean_ocr_number(self, value: str) -> str:
+        """Remove OCR spaces from numbers (e.g., '55, 22' -> '55,22')."""
+        # Remove spaces around commas and periods
+        value = re.sub(r'\s*([.,])\s*', r'\1', value)
+        # Remove any remaining spaces
+        value = value.replace(' ', '')
+        return value
+
    def extract_tva_entries(self, text: str) -> List[dict]:
        """
        Extract OMV-specific TVA entries.

-        OMV receipts often show TVA in table format with base and TVA amounts.
-        Falls back to standard extraction if table format not found.
+        OMV receipts show TVA in table format with spaces in numbers.
+        Format: "A-21, 00% 55, 22 318, 16" (rate, TVA amount, base)

        Args:
            text: Raw OCR text from receipt
@@ -63,35 +97,138 @@ class OMVProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
-        seen = set()
+        text_upper = text.upper()

-        # Try table format first (more accurate)
-        for match in re.finditer(self.TVA_TABLE_PATTERN, text, re.IGNORECASE):
+        # Try table format first: "A-21, 00% 55, 22 318, 16"
+        table_pattern = self.TVA_TABLE_PATTERNS[0]
+        for match in re.finditer(table_pattern, text_upper):
            try:
                code = match.group(1).upper()
                percent = int(match.group(2))
-                # TVA amount is the second number (smaller one)
-                tva_amount = self._parse_decimal(match.group(4))
+                # Clean OCR spaces from amounts
+                tva_amount_str = self._clean_ocr_number(match.group(3))
+                tva_amount = self._parse_decimal(tva_amount_str)

                if tva_amount and tva_amount > 0:
-                    entry_key = (code, percent)
-                    if entry_key not in seen:
-                        entries.append({
-                            'code': code,
-                            'percent': percent,
-                            'amount': tva_amount
-                        })
-                        seen.add(entry_key)
-            except (ValueError, InvalidOperation):
+                    entries.append({
+                        'code': code,
+                        'percent': percent,
+                        'amount': tva_amount
+                    })
+                    return entries  # OMV usually has single TVA rate
+            except (ValueError, InvalidOperation, IndexError):
                continue

+        # Fallback: "TOTAL TAXE: 55, 22"
+        fallback_pattern = self.TVA_TABLE_PATTERNS[1]
+        match = re.search(fallback_pattern, text_upper)
+        if match:
+            try:
+                tva_amount_str = self._clean_ocr_number(match.group(1))
+                tva_amount = self._parse_decimal(tva_amount_str)
+                if tva_amount and tva_amount > 0:
+                    entries.append({
+                        'code': 'A',
+                        'percent': 19,  # Standard rate, will be corrected by validation
+                        'amount': tva_amount
+                    })
+            except (ValueError, InvalidOperation):
+                pass
+
        return entries

+    def extract_client_cui(self, text: str) -> Tuple[Optional[str], float]:
+        """
+        Extract client CUI from OMV receipt.
+
+        OMV uses format: "CLIENT C.U. I./C.I.F.: RO1879855"
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            Tuple of (cui, confidence) or (None, 0.0)
+        """
+        text_upper = text.upper()
+
+        # Check for OMV client markers
+        has_client = any(
+            re.search(marker, text_upper, re.IGNORECASE)
+            for marker in self.CLIENT_MARKERS
+        )
+
+        if not has_client:
+            return (None, 0.0)
+
+        # Try OMV-specific patterns
+        for pattern, confidence in self.CLIENT_CUI_PATTERNS:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match:
+                cui = match.group(1)
+                # Clean up: remove RO prefix, spaces
+                cui_digits = re.sub(r'[^0-9]', '', cui)
+                if 6 <= len(cui_digits) <= 10:
+                    return (cui_digits, confidence)
+
+        return (None, 0.0)
+
+    def extract_payment_methods(self, text: str) -> List[dict]:
+        """
+        Extract OMV-specific payment methods.
+
+        OMV receipts use "CARTE CREDIT" instead of "CARD".
+        Payment amount equals TOTAL for gas station receipts.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            List of payment methods with method, amount, and confidence
+        """
+        payments = []
+        text_upper = text.upper()
+
+        # Get total amount first
+        total_amount, _ = self.extract_total(text)
+        if not total_amount:
+            return []
+
+        # OMV payment patterns
+        payment_indicators = [
+            ('CARTE CREDIT', 'CARD', 0.98),
+            ('CARTE DE CREDIT', 'CARD', 0.98),
+            ('CARD', 'CARD', 0.95),
+            ('VISA', 'CARD', 0.95),
+            ('MASTERCARD', 'CARD', 0.95),
+            ('CONTACTLESS', 'CARD', 0.90),
+            ('NUMERAR', 'NUMERAR', 0.95),
+            ('CASH', 'NUMERAR', 0.90),
+        ]
+
+        for indicator, method, confidence in payment_indicators:
+            if indicator in text_upper:
+                payments.append({
+                    'method': method,
+                    'amount': total_amount,
+                    'confidence': confidence
+                })
+                return payments  # OMV usually has single payment method
+
+        # Fallback: If no explicit payment but has BON FISCAL, assume CARD
+        if 'BON FISCAL' in text_upper:
+            payments.append({
+                'method': 'CARD',
+                'amount': total_amount,
+                'confidence': 0.70
+            })
+
+        return payments
+
    def get_validation_hints(self) -> Dict[str, Any]:
        """Return OMV-specific validation hints."""
        return {
            "has_multi_rate_tva": False,
-            "card_equals_total": False,
+            "card_equals_total": True,  # Gas station: card equals total
            "has_client_cui": True,
            "has_efactura": False,
            "is_non_vat_payer": False,
--- a/backend/modules/data_entry/services/ocr/profiles/socar.py
+++ b/backend/modules/data_entry/services/ocr/profiles/socar.py
@@ -100,11 +100,62 @@ class SocarProfile(BaseStoreProfile):

        return entries

+    def extract_payment_methods(self, text: str) -> List[dict]:
+        """
+        Extract SOCAR-specific payment methods.
+
+        Gas stations use "CARTE CREDIT" or "CARD" for card payments.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            List of payment methods with method, amount, and confidence
+        """
+        payments = []
+        text_upper = text.upper()
+
+        # Get total amount first
+        total_amount, _ = self.extract_total(text)
+        if not total_amount:
+            return []
+
+        # Gas station payment patterns
+        payment_indicators = [
+            ('CARTE CREDIT', 'CARD', 0.98),
+            ('CARTE DE CREDIT', 'CARD', 0.98),
+            ('CARD', 'CARD', 0.95),
+            ('VISA', 'CARD', 0.95),
+            ('MASTERCARD', 'CARD', 0.95),
+            ('CONTACTLESS', 'CARD', 0.90),
+            ('NUMERAR', 'NUMERAR', 0.95),
+            ('CASH', 'NUMERAR', 0.90),
+        ]
+
+        for indicator, method, confidence in payment_indicators:
+            if indicator in text_upper:
+                payments.append({
+                    'method': method,
+                    'amount': total_amount,
+                    'confidence': confidence
+                })
+                return payments
+
+        # Fallback: If no explicit payment but has BON FISCAL, assume CARD
+        if 'BON FISCAL' in text_upper:
+            payments.append({
+                'method': 'CARD',
+                'amount': total_amount,
+                'confidence': 0.70
+            })
+
+        return payments
+
    def get_validation_hints(self) -> Dict[str, Any]:
        """Return SOCAR-specific validation hints."""
        return {
            "has_multi_rate_tva": False,
-            "card_equals_total": False,
+            "card_equals_total": True,  # Gas station: card equals total
            "has_client_cui": True,
            "has_efactura": False,
            "is_non_vat_payer": False,
--- a/backend/modules/data_entry/services/ocr/profiles/stepout_market.py
+++ b/backend/modules/data_entry/services/ocr/profiles/stepout_market.py
@@ -2,11 +2,17 @@
 STEPOUT MARKET SRL store profile for OCR extraction.

 Bookstore with reduced TVA rate (5% for books in Romania).
+
+Receipt structure:
+- TVA format: "5.00% TUA*B" with amount on next line
+- Total format: "SUMA TOTALA:" with amount on next line
+- Payment: "CARD" with amount on next line
+- Client CUI: "CIF CLIENT:XXXXXXX"
 """

 import re
 from decimal import Decimal, InvalidOperation
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Tuple, Optional

 from .base import BaseStoreProfile
 from . import ProfileRegistry
@@ -19,33 +25,66 @@ class StepoutMarketProfile(BaseStoreProfile):

    Key characteristics:
    - Reduced TVA rate: 5% for books (cărți qualification in Romania)
-    - May also have standard rates for non-book items
-    - Patterns are flexible to accept ANY TVA rate
+    - TVA format: "X.XX% TUA*B" (OCR reads TVA as TUA)
+    - Multiline format for amounts
    - CARD payment typical
    """

    CUI_LIST = ["35532655"]
-    NAME_PATTERNS = ["STEPOUT", "STEPOUT MARKET", "STEP0UT", "STEPOUT MARKET SRL"]
+    NAME_PATTERNS = ["STEPOUT", "STEPOUT MARKET", "STEP0UT", "STEPUUT", "STEPOUT MARKET SRL"]
    STORE_NAME = "STEPOUT MARKET SRL"

-    # TVA patterns (flexible - accepts any rate including 5%)
+    # TVA patterns for Stepout (handles TUA OCR error and multiline)
    TVA_PATTERNS = [
-        # "TVA A: 5% = YY,YY" or "TVA-A 5% YY,YY" (coded format)
+        # "5.00% TUA*B" - OCR format with TUA
+        r'(\d{1,2})[.,]\d{0,2}\s*%\s*T[UV]A\*?([A-D])',
+        # "TVA A: 5% = YY,YY" or "TVA-A 5% YY,YY" (inline format)
        r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
-        # "A - 5,00% = YY,YY" (table format)
-        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
-        # "TVA 5% YY,YY" (simple format - common for single rate)
-        r'TVA\s+(\d{1,2})\s*%\s*([\d.,]+)',
-        # "TVA 5,00%: YY,YY" (percent with colon)
-        r'TVA\s+(\d{1,2})[.,]\d{2}\s*%\s*:?\s*([\d.,]+)',
+        # "TOTAL TUA:" with amount on next line
+        r'TOTAL\s+T[UV]A\s*:',
    ]

+    # Total patterns for Stepout
+    TOTAL_PATTERNS = [
+        # "SUMA TOTALA:" with amount on next line
+        (r'SUMA\s+TOTALA\s*:', 0.98),
+        # "TOTAL:" fallback
+        (r'TOTAL\s*:', 0.90),
+    ]
+
+    def extract_total(self, text: str) -> Tuple[Optional[Decimal], float]:
+        """
+        Extract total amount from Stepout Market receipt.
+
+        Format: "SUMA TOTALA:" on one line, amount on next line.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            Tuple of (total_amount, confidence) or (None, 0.0)
+        """
+        text_upper = text.upper()
+        lines = text_upper.split('\n')
+
+        for pattern, confidence in self.TOTAL_PATTERNS:
+            for i, line in enumerate(lines):
+                if re.search(pattern, line, re.IGNORECASE):
+                    # Amount should be on next line
+                    if i + 1 < len(lines):
+                        amount_str = lines[i + 1].strip()
+                        amount = self._parse_decimal(amount_str)
+                        if amount and amount > 0:
+                            return (amount, confidence)
+
+        # Fallback to base class
+        return super().extract_total(text)
+
    def extract_tva_entries(self, text: str) -> List[dict]:
        """
-        Extract TVA entries from receipt text.
+        Extract TVA entries from Stepout Market receipt.

-        Stepout Market primarily sells books which have 5% TVA in Romania.
-        The patterns are generic and will extract whatever rate is on the receipt.
+        Format: "5.00% TUA*B" on one line, amount on next line.

        Args:
            text: Raw OCR text from receipt
@@ -54,59 +93,112 @@ class StepoutMarketProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
-        seen = set()
+        text_upper = text.upper()
+        lines = text_upper.split('\n')

-        # Try coded patterns first (have code letter)
-        for pattern in self.TVA_PATTERNS[:2]:
-            for match in re.finditer(pattern, text, re.IGNORECASE):
-                try:
-                    code = match.group(1).upper()
-                    percent = int(match.group(2))
-                    amount = self._parse_decimal(match.group(3))
+        # Try "X.XX% TUA*B" format first
+        for i, line in enumerate(lines):
+            match = re.search(r'(\d{1,2})[.,]\d{0,2}\s*%\s*T[UV]A\*?([A-D])', line)
+            if match:
+                percent = int(match.group(1))
+                code = match.group(2)

+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
-                        entry_key = (code, percent)
-                        if entry_key not in seen:
-                            entries.append({
-                                'code': code,
-                                'percent': percent,
-                                'amount': amount
-                            })
-                            seen.add(entry_key)
-                except (ValueError, InvalidOperation, IndexError):
-                    continue
+                        entries.append({
+                            'code': code,
+                            'percent': percent,
+                            'amount': amount
+                        })
+                        return entries  # Single rate store

-        # Fallback to simple format (no code letter, just percent + amount)
-        if not entries:
-            for pattern in self.TVA_PATTERNS[2:]:
-                for match in re.finditer(pattern, text, re.IGNORECASE):
-                    try:
-                        percent = int(match.group(1))
-                        amount = self._parse_decimal(match.group(2))
-
-                        if amount and amount > 0:
-                            # Default to code 'A' for simple format
-                            entries.append({
-                                'code': 'A',
-                                'percent': percent,
-                                'amount': amount
-                            })
-                            break  # Only take first match for simple format
-                    except (ValueError, InvalidOperation):
-                        continue
-                if entries:
-                    break
+        # Try "TOTAL TUA:" format
+        for i, line in enumerate(lines):
+            if re.search(r'TOTAL\s+T[UV]A\s*:', line):
+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
+                    if amount and amount > 0:
+                        entries.append({
+                            'code': 'B',  # Books are usually code B (5%)
+                            'percent': 5,
+                            'amount': amount
+                        })
+                        return entries

        return entries

+    def extract_payment_methods(self, text: str) -> List[dict]:
+        """
+        Extract payment methods from Stepout Market receipt.
+
+        Format: "CARD" on one line, amount on next line.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            List of payment methods with method, amount, and confidence
+        """
+        payments = []
+        text_upper = text.upper()
+        lines = text_upper.split('\n')
+
+        # Find CARD or NUMERAR keyword
+        for i, line in enumerate(lines):
+            line_stripped = line.strip()
+            if line_stripped == 'CARD':
+                # Amount should be on next line
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
+                    if amount and amount > 0:
+                        payments.append({
+                            'method': 'CARD',
+                            'amount': amount,
+                            'confidence': 0.95
+                        })
+                        return payments
+            elif line_stripped == 'NUMERAR' or 'CASH' in line_stripped:
+                if i + 1 < len(lines):
+                    amount_str = lines[i + 1].strip()
+                    amount = self._parse_decimal(amount_str)
+                    if amount and amount > 0:
+                        payments.append({
+                            'method': 'NUMERAR',
+                            'amount': amount,
+                            'confidence': 0.95
+                        })
+                        return payments
+
+        # Fallback: check for inline CARD amount
+        for line in lines:
+            match = re.search(r'CARD\s*:?\s*([\d.,]+)', line)
+            if match:
+                amount = self._parse_decimal(match.group(1))
+                if amount and amount > 0:
+                    payments.append({
+                        'method': 'CARD',
+                        'amount': amount,
+                        'confidence': 0.90
+                    })
+                    return payments
+
+        return payments
+
    def get_validation_hints(self) -> Dict[str, Any]:
        """Return STEPOUT MARKET-specific validation hints."""
        return {
            "has_multi_rate_tva": False,
            "card_equals_total": True,
-            "has_client_cui": True,  # May have client CUI
+            "has_client_cui": True,
            "has_efactura": False,
            "is_non_vat_payer": False,
            "typical_tva_rate": 5,  # Books have 5% TVA in Romania
            "product_category": "books",
+            "tva_on_separate_line": True,
        }
--- a/backend/modules/data_entry/services/ocr/profiles/unlimited_keys.py
+++ b/backend/modules/data_entry/services/ocr/profiles/unlimited_keys.py
@@ -6,7 +6,7 @@ Key duplication service. Notable for CASH (NUMERAR) payments.

 import re
 from decimal import Decimal, InvalidOperation
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional, Tuple

 from .base import BaseStoreProfile
 from . import ProfileRegistry
@@ -22,26 +22,101 @@ class UnlimitedKeysProfile(BaseStoreProfile):
    - Key duplication service
    - NUMERAR (cash) payment common - different from most stores!
    - May also accept CARD
+    - OCR often reads "TVA" as "TUA" - need OCR error variants
    """

    CUI_LIST = ["18993187"]
    NAME_PATTERNS = ["UNLIMITED KEYS", "UNLIMITED", "UNL1MITED", "UNLIMITED KEYS SRL"]
    STORE_NAME = "UNLIMITED KEYS S.R.L."

-    # Standard TVA patterns (flexible - accepts any rate)
+    # Standard TVA patterns - including OCR error variants (TVA -> TUA)
    TVA_PATTERNS = [
-        # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY"
-        r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
+        # "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY" (including TUA OCR error)
+        r'T[UV]A\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,\s]+)',
        # "A - XX,XX% = YY,YY"
-        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
-        # "TVA XX% YY,YY" (simple format without code)
-        r'TVA\s+(\d{1,2})\s*%\s*([\d.,]+)',
+        r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,\s]+)',
+        # "TVA XX% YY,YY" (simple format, includes TUA)
+        r'T[UV]A\s+(\d{1,2})\s*%\s*([\d.,\s]+)',
+        # "XX.XX% TUA*A YY.YY" (OCR format with TUA*A or TUA)
+        r'(\d{1,2})[.,]\d{0,2}\s*%\s*T[UV]A\*?[A-D]?\s*([\d.,\s]+)',
+        # "TOTAL TUA: YY.YY" (total TVA amount only)
+        r'TOTAL\s+T[UV]A\s*:?\s*([\d.,\s]+)',
    ]

+    # TOTAL patterns for UNLIMITED KEYS (handles "80 .00" format)
+    TOTAL_PATTERNS = [
+        # "SUMA TOTALA: 80 .00" (with space before decimal)
+        (r'SUMA\s+TOTALA\s*:?\s*([\d\s.,]+)', 0.98),
+        # "TOTALA: 80,00"
+        (r'TOTALA\s*:?\s*([\d.,]+)', 0.95),
+        # Standard TOTAL patterns from base class
+        (r'TOTAL\s+(?:DE\s+PLATA|ACHITAT|LEI)\s*:?\s*([\d.,]+)', 0.95),
+        (r'TOTAL\s*:?\s*([\d.,]+)', 0.90),
+    ]
+
+    # Payment patterns - NUMERAR is primary for this store
+    PAYMENT_PATTERNS = [
+        # "NUMERAR 80.00" or "NUMERAR: 80.00"
+        (r'NUMERAR\s*:?\s*([\d.,\s]+)', 'NUMERAR', 0.98),
+        # "CARD 80.00" or "CARD: 80.00"
+        (r'CARD\s*:?\s*([\d.,\s]+)', 'CARD', 0.95),
+    ]
+
+    # Client CUI patterns - specific to this receipt format
+    CLIENT_CUI_PATTERNS = [
+        # "CIF CLIENT:1879855" (exact format from OCR)
+        (r'CIF\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.99),
+        # "CLIENT CIF: ROXXXXXXX"
+        (r'CLIENT\s+CIF\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98),
+        # "C.I.F. CLIENT: XXXXXXX"
+        (r'C\.?I\.?F\.?\s+CLIENT\s*:?\s*(R?[O0]?\s*\d{6,10})', 0.98),
+    ]
+
+    # Override client markers to be less strict
+    CLIENT_MARKERS = [
+        r'CIF\s+CLIENT',
+        r'CLIENT\s+CIF',
+        r'C\.?I\.?F\.?\s+CLIENT',
+        r'CLIENT\s*:',
+    ]
+
+    def extract_total(self, text: str) -> Tuple[Optional[Decimal], float]:
+        """
+        Extract total amount from receipt text.
+
+        Handles UNLIMITED KEYS format with space before decimal (e.g., "80 .00").
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            Tuple of (total_amount, confidence) or (None, 0.0)
+        """
+        text_upper = text.upper()
+
+        for pattern, confidence in self.TOTAL_PATTERNS:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match:
+                try:
+                    # Clean up amount string (remove spaces, fix decimal)
+                    amount_str = match.group(1)
+                    # Remove spaces that might appear before decimal
+                    amount_str = re.sub(r'\s+', '', amount_str)
+                    amount = self._parse_decimal(amount_str)
+
+                    if amount and amount > 0:
+                        return (amount, confidence)
+                except (ValueError, InvalidOperation):
+                    continue
+
+        return (None, 0.0)
+
    def extract_tva_entries(self, text: str) -> List[dict]:
        """
        Extract TVA entries from receipt text.

+        Handles OCR errors where TVA is read as TUA.
+
        Args:
            text: Raw OCR text from receipt

@@ -49,48 +124,139 @@ class UnlimitedKeysProfile(BaseStoreProfile):
            List of TVA entries with code, percent, and amount
        """
        entries = []
-        seen = set()
+        text_upper = text.upper()

-        # Try coded patterns first
-        for pattern in self.TVA_PATTERNS[:2]:
-            for match in re.finditer(pattern, text, re.IGNORECASE):
+        # Pattern 4: "XX.XX% TUA*A YY.YY" - common OCR format
+        pattern4 = self.TVA_PATTERNS[3]
+        match = re.search(pattern4, text_upper)
+        if match:
+            try:
+                percent = int(match.group(1))
+                amount_str = re.sub(r'\s+', '', match.group(2))
+                amount = self._parse_decimal(amount_str)
+                if amount and amount > 0:
+                    entries.append({
+                        'code': 'A',
+                        'percent': percent,
+                        'amount': amount
+                    })
+                    return entries
+            except (ValueError, InvalidOperation, IndexError):
+                pass
+
+        # Pattern 5: "TOTAL TUA: YY.YY" - fallback to total TVA
+        pattern5 = self.TVA_PATTERNS[4]
+        match = re.search(pattern5, text_upper)
+        if match:
+            try:
+                amount_str = re.sub(r'\s+', '', match.group(1))
+                amount = self._parse_decimal(amount_str)
+                if amount and amount > 0:
+                    # Infer percent from amount vs total ratio
+                    entries.append({
+                        'code': 'A',
+                        'percent': 19,  # Standard Romanian TVA rate
+                        'amount': amount
+                    })
+                    return entries
+            except (ValueError, InvalidOperation, IndexError):
+                pass
+
+        # Try coded patterns
+        for pattern in self.TVA_PATTERNS[:3]:
+            for match in re.finditer(pattern, text_upper, re.IGNORECASE):
                try:
-                    code = match.group(1).upper()
-                    percent = int(match.group(2))
-                    amount = self._parse_decimal(match.group(3))
-
-                    if amount and amount > 0:
-                        entry_key = (code, percent)
-                        if entry_key not in seen:
-                            entries.append({
-                                'code': code,
-                                'percent': percent,
-                                'amount': amount
-                            })
-                            seen.add(entry_key)
-                except (ValueError, InvalidOperation, IndexError):
-                    continue
-
-        # Fallback to simple format
-        if not entries:
-            simple_pattern = self.TVA_PATTERNS[2]
-            for match in re.finditer(simple_pattern, text, re.IGNORECASE):
-                try:
-                    percent = int(match.group(1))
-                    amount = self._parse_decimal(match.group(2))
+                    groups = match.groups()
+                    if len(groups) == 3:
+                        code = groups[0].upper()
+                        percent = int(groups[1])
+                        amount_str = re.sub(r'\s+', '', groups[2])
+                    else:
+                        code = 'A'
+                        percent = int(groups[0])
+                        amount_str = re.sub(r'\s+', '', groups[1])

+                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
-                            'code': 'A',
+                            'code': code,
                            'percent': percent,
                            'amount': amount
                        })
-                        break
-                except (ValueError, InvalidOperation):
+                        return entries
+                except (ValueError, InvalidOperation, IndexError):
                    continue

        return entries

+    def extract_payment_methods(self, text: str) -> List[dict]:
+        """
+        Extract payment methods from receipt text.
+
+        Handles NUMERAR (cash) as primary payment for this store.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            List of payment methods with method, amount, and confidence
+        """
+        payments = []
+        text_upper = text.upper()
+
+        for pattern, method, confidence in self.PAYMENT_PATTERNS:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match:
+                try:
+                    amount_str = re.sub(r'\s+', '', match.group(1))
+                    amount = self._parse_decimal(amount_str)
+
+                    if amount and amount > 0:
+                        payments.append({
+                            'method': method,
+                            'amount': amount,
+                            'confidence': confidence
+                        })
+                except (ValueError, InvalidOperation):
+                    continue
+
+        return payments
+
+    def extract_client_cui(self, text: str) -> Tuple[Optional[str], float]:
+        """
+        Extract client CUI from receipt text.
+
+        Handles "CIF CLIENT:1879855" format specific to this store.
+
+        Args:
+            text: Raw OCR text from receipt
+
+        Returns:
+            Tuple of (cui, confidence) or (None, 0.0)
+        """
+        text_upper = text.upper()
+
+        # Check for client markers
+        has_client = any(
+            re.search(marker, text_upper, re.IGNORECASE)
+            for marker in self.CLIENT_MARKERS
+        )
+
+        if not has_client:
+            return (None, 0.0)
+
+        # Try client CUI patterns
+        for pattern, confidence in self.CLIENT_CUI_PATTERNS:
+            match = re.search(pattern, text_upper, re.IGNORECASE)
+            if match:
+                cui = match.group(1)
+                # Clean up: remove RO prefix, spaces
+                cui_digits = re.sub(r'[^0-9]', '', cui)
+                if 6 <= len(cui_digits) <= 10:
+                    return (cui_digits, confidence)
+
+        return (None, 0.0)
+
    def get_validation_hints(self) -> Dict[str, Any]:
        """Return UNLIMITED KEYS-specific validation hints."""
        return {
--- a/backend/modules/data_entry/services/ocr_extractor.py
+++ b/backend/modules/data_entry/services/ocr_extractor.py
@@ -456,7 +456,9 @@ class ReceiptExtractor:
        # Lookup store-specific profile for enhanced extraction accuracy
        store_profile = ProfileRegistry.get_profile(result.cui) if result.cui else None
        if store_profile:
-            print(f"[Profile] Using {store_profile.__class__.__name__} for CUI {result.cui}", flush=True)
+            print(f"[Profile] ✅ Using {store_profile.STORE_NAME} ({store_profile.__class__.__name__}) for CUI {result.cui}", flush=True)
+        else:
+            print(f"[Profile] ⚠️ No profile found for CUI '{result.cui}' - using GENERIC extraction", flush=True)

        # =========================================================================
        # STEP 2: Extract ALL fields using profile (if available) or generic
@@ -490,8 +492,11 @@ class ReceiptExtractor:
                result.client_address = client_address
                result.confidence_client = confidence

+            # Log extraction results for debugging
+            tva_summary = ", ".join([f"{e.get('percent', '?')}%={e.get('amount', '?')}" for e in result.tva_entries]) if result.tva_entries else "none"
+            payment_summary = ", ".join([f"{p.get('method', '?')}={p.get('amount', '?')}" for p in result.payment_methods]) if result.payment_methods else "none"
            print(f"[Profile] Extracted: total={result.amount}, date={result.receipt_date}, "
-                  f"TVA entries={len(result.tva_entries)}, payments={len(result.payment_methods)}", flush=True)
+                  f"TVA=[{tva_summary}], payments=[{payment_summary}], client_cui={result.client_cui}", flush=True)
        else:
            # Generic extraction for unknown stores
            result.amount, result.confidence_amount = self._extract_amount(text_upper)
@@ -507,6 +512,12 @@ class ReceiptExtractor:
            result.client_address = client_address
            result.confidence_client = confidence

+            # Log generic extraction results for debugging
+            tva_summary = ", ".join([f"{e.get('percent', '?')}%={e.get('amount', '?')}" for e in result.tva_entries]) if result.tva_entries else "none"
+            payment_summary = ", ".join([f"{p.get('method', '?')}={p.get('amount', '?')}" for p in result.payment_methods]) if result.payment_methods else "none"
+            print(f"[Generic] Extracted: total={result.amount}, date={result.receipt_date}, "
+                  f"TVA=[{tva_summary}], payments=[{payment_summary}], client_cui={result.client_cui}", flush=True)
+
        # Series extraction (no profile method, always generic)
        result.receipt_series, _ = self._extract_series(text_upper)