""" OMV Petrom store profile for OCR extraction. OMV receipts typically include client CUI and use standard TVA format. Common at gas stations with fuel purchases. Date format: YYYY. MM. DD with spaces (e.g., "2025. 08. 14") """ import re from datetime import date from decimal import Decimal, InvalidOperation from typing import List, Dict, Any, Tuple, Optional from .base import BaseStoreProfile from . import ProfileRegistry @ProfileRegistry.register class OMVProfile(BaseStoreProfile): """ OMV PETROM MARKETING S.R.L. - standard TVA with client CUI. Key characteristics: - Standard TVA format (usually single rate, any percentage) - Includes client CUI on receipt (for business purchases) - TVA table format: "A-XX,XX% base_amount tva_amount" - Supports historical rates (19%) and current rates (21%) - Date format: YYYY. MM. DD (with spaces) """ CUI_LIST = ["11201891"] NAME_PATTERNS = ["OMV", "PETROM", "OMV PETROM", "0MV"] # OCR variants STORE_NAME = "OMV PETROM MARKETING S.R.L." # OMV TVA table pattern: "A-19,00% 285,66 49,58" (code-percent base tva) TVA_TABLE_PATTERN = r'([A-D])\s*[-:]\s*(\d{1,2})[.,]\d{2}\s*%\s+([\d.,]+)\s+([\d.,]+)' # Standard TVA pattern fallback TVA_STANDARD_PATTERN = r'TVA\s*:?\s*([\d.,]+)' # OMV specific: prioritize YYYY. MM. DD format with spaces DATE_PATTERNS_OCR_SPACES = [ # YYYY. MM. DD with time (OMV format) (r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})\s+\d{2}:\d{2}', 0.98, 'ymd'), (r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})', 0.95, 'ymd'), # Fallback to DD. MM. YYYY (r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})\s+\d{2}:\d{2}', 0.92, 'dmy'), (r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})', 0.85, 'dmy'), ] def extract_tva_entries(self, text: str) -> List[dict]: """ Extract OMV-specific TVA entries. OMV receipts often show TVA in table format with base and TVA amounts. Falls back to standard extraction if table format not found. Args: text: Raw OCR text from receipt Returns: List of TVA entries with code, percent, and amount """ entries = [] seen = set() # Try table format first (more accurate) for match in re.finditer(self.TVA_TABLE_PATTERN, text, re.IGNORECASE): try: code = match.group(1).upper() percent = int(match.group(2)) # TVA amount is the second number (smaller one) tva_amount = self._parse_decimal(match.group(4)) if tva_amount and tva_amount > 0: entry_key = (code, percent) if entry_key not in seen: entries.append({ 'code': code, 'percent': percent, 'amount': tva_amount }) seen.add(entry_key) except (ValueError, InvalidOperation): continue return entries def get_validation_hints(self) -> Dict[str, Any]: """Return OMV-specific validation hints.""" return { "has_multi_rate_tva": False, "card_equals_total": False, "has_client_cui": True, "has_efactura": False, "is_non_vat_payer": False, "tva_table_format": True, }