roa2web-service-auto/backend/modules/data_entry/services/ocr/profiles/gama_ink.py

"""
GAMA INK SERVICE SRL store profile for OCR extraction.

Toner refill and printer supplies store.

Receipt structure:
- TVA format: "TOTAL TVA A 4 19%" with amount on next line (4 is OCR for -)
- "TOTAL TVA BON" with total TVA amount
"""

import re
from decimal import Decimal, InvalidOperation
from typing import List, Dict, Any

from .base import BaseStoreProfile
from . import ProfileRegistry


@ProfileRegistry.register
class GamaInkProfile(BaseStoreProfile):
    """
    GAMA INK SERVICE SRL - standard TVA profile with multiline support.

    Key characteristics:
    - TVA format with rate on one line, amount on next
    - OCR often reads "-" as "4" (e.g., "A 4 19%" instead of "A - 19%")
    - CARD payment typical
    """

    CUI_LIST = ["17741882"]
    NAME_PATTERNS = ["GAMA INK", "GAMA", "GAMAINK", "GAMA INK SERVICE"]
    STORE_NAME = "GAMA INK SERVICE SRL"

    # GAMA INK TVA patterns (handles OCR errors)
    TVA_PATTERNS = [
        # "TOTAL TVA A 4 19%" (4 is OCR for -)
        r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%',
        # "TOTAL TVA A - 19%"
        r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%',
    ]

    # TOTAL TVA BON pattern (fallback)
    TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s+BON'

    def extract_tva_entries(self, text: str) -> List[dict]:
        """
        Extract GAMA INK-specific TVA entries.

        Format: "TOTAL TVA A 4 19%" on one line, amount on next line.
        Note: OCR reads "-" as "4" sometimes.

        Args:
            text: Raw OCR text from receipt

        Returns:
            List of TVA entries with code, percent, and amount
        """
        entries = []
        text_upper = text.upper()
        lines = text_upper.split('\n')

        # Find TVA rate line and get amount from next line
        for i, line in enumerate(lines):
            # Match "TOTAL TVA A 4 19%" or "TOTAL TVA A - 19%"
            match = re.search(r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%', line)
            if match:
                code = match.group(1)
                percent = int(match.group(2))

                # Amount should be on next line
                if i + 1 < len(lines):
                    amount_str = lines[i + 1].strip()
                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
                            'code': code,
                            'percent': percent,
                            'amount': amount
                        })
                        return entries

        # Fallback: Find TOTAL TVA BON and get amount
        for i, line in enumerate(lines):
            if re.search(self.TOTAL_TVA_BON_PATTERN, line):
                # Amount should be on next line
                if i + 1 < len(lines):
                    amount_str = lines[i + 1].strip()
                    amount = self._parse_decimal(amount_str)
                    if amount and amount > 0:
                        entries.append({
                            'code': 'A',
                            'percent': 19,  # Default Romanian TVA rate
                            'amount': amount
                        })
                        return entries

        return entries

    def get_validation_hints(self) -> Dict[str, Any]:
        """Return GAMA INK-specific validation hints."""
        return {
            "has_multi_rate_tva": False,
            "card_equals_total": True,
            "has_client_cui": True,  # May have client CUI for business
            "has_efactura": False,
            "is_non_vat_payer": False,
            "tva_on_separate_line": True,
        }