""" GAMA INK SERVICE SRL store profile for OCR extraction. Toner refill and printer supplies store. Receipt structure: - TVA format: "TOTAL TVA A 4 19%" with amount on next line (4 is OCR for -) - "TOTAL TVA BON" with total TVA amount """ import re from decimal import Decimal, InvalidOperation from typing import List, Dict, Any from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class GamaInkProfile(BaseStoreProfile): """ GAMA INK SERVICE SRL - standard TVA profile with multiline support. Key characteristics: - TVA format with rate on one line, amount on next - OCR often reads "-" as "4" (e.g., "A 4 19%" instead of "A - 19%") - CARD payment typical """ CUI_LIST = ["17741882"] NAME_PATTERNS = ["GAMA INK", "GAMA", "GAMAINK", "GAMA INK SERVICE"] STORE_NAME = "GAMA INK SERVICE SRL" # GAMA INK TVA patterns (handles OCR errors) TVA_PATTERNS = [ # "TOTAL TVA A 4 19%" (4 is OCR for -) r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%', # "TOTAL TVA A - 19%" r'TOTAL\s+TVA\s+([A-D])\s+(\d{1,2})\s*%', ] # TOTAL TVA BON pattern (fallback) TOTAL_TVA_BON_PATTERN = r'TOTAL\s+TVA\s+BON' def extract_tva_entries(self, text: str) -> List[dict]: """ Extract GAMA INK-specific TVA entries. Format: "TOTAL TVA A 4 19%" on one line, amount on next line. Note: OCR reads "-" as "4" sometimes. Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] text_upper = text.upper() lines = text_upper.split('\n') # Find TVA rate line and get amount from next line for i, line in enumerate(lines): # Match "TOTAL TVA A 4 19%" or "TOTAL TVA A - 19%" match = re.search(r'TOTAL\s+TVA\s*([A-D])\s*[4\-\s]+(\d{1,2})\s*%', line) if match: code = match.group(1) percent = int(match.group(2)) # Amount should be on next line if i + 1 < len(lines): amount_str = lines[i + 1].strip() amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': code, 'percent': percent, 'amount': amount }) return entries # Fallback: Find TOTAL TVA BON and get amount for i, line in enumerate(lines): if re.search(self.TOTAL_TVA_BON_PATTERN, line): # Amount should be on next line if i + 1 < len(lines): amount_str = lines[i + 1].strip() amount = self._parse_decimal(amount_str) if amount and amount > 0: entries.append({ 'code': 'A', 'percent': 19, # Default Romanian TVA rate 'amount': amount }) return entries return entries def get_validation_hints(self) -> Dict[str, Any]: """Return GAMA INK-specific validation hints.""" return { "has_multi_rate_tva": False, "card_equals_total": True, "has_client_cui": True, # May have client CUI for business "has_efactura": False, "is_non_vat_payer": False, "tva_on_separate_line": True, }