Files
roa2web-service-auto/backend/modules/data_entry/services/ocr/profiles/omv.py
Claude Agent 099556213d feat(ocr): Add modular store profiles with hot-reload support
## Store Profiles System
- Add ProfileRegistry for CUI-based profile lookup
- Add BaseStoreProfile with generic extraction patterns
- Implement hot-reload via POST /api/data-entry/ocr/profiles/reload

## 12 Store Profiles
- LIDL: Multi-rate TVA (A, B, C, D codes)
- OMV, SOCAR: B2B with client CUI, YYYY.MM.DD dates
- BRICK, DEDEMAN: Standard TVA, e-factura support
- KINETERRA, BEST PRINT: Non-VAT payers (returns [])
- STEPOUT MARKET: TVA 5% (books/reduced rate)
- UNLIMITED KEYS: NUMERAR payment detection
- GAMA INK, ELECTROBERING, PICTUS VELUM: Standard TVA

## Flexible TVA Patterns
- All patterns use (\d{1,2})% to accept any rate
- Supports historical (19%, 9%, 5%) and current (21%, 11%)

## Payment Methods Fix
- Fixed base.py to support multiple payments of same type
- Changed deduplication from method-only to (method, amount) tuple
- Returns separate entries for split payments

## Tools
- Add generate_store_profile.py for automatic profile generation
- Analyzes PDFs via OCR API and detects patterns

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-06 23:07:07 +00:00

100 lines
3.4 KiB
Python

"""
OMV Petrom store profile for OCR extraction.
OMV receipts typically include client CUI and use standard TVA format.
Common at gas stations with fuel purchases.
Date format: YYYY. MM. DD with spaces (e.g., "2025. 08. 14")
"""
import re
from datetime import date
from decimal import Decimal, InvalidOperation
from typing import List, Dict, Any, Tuple, Optional
from .base import BaseStoreProfile
from . import ProfileRegistry
@ProfileRegistry.register
class OMVProfile(BaseStoreProfile):
"""
OMV PETROM MARKETING S.R.L. - standard TVA with client CUI.
Key characteristics:
- Standard TVA format (usually single rate, any percentage)
- Includes client CUI on receipt (for business purchases)
- TVA table format: "A-XX,XX% base_amount tva_amount"
- Supports historical rates (19%) and current rates (21%)
- Date format: YYYY. MM. DD (with spaces)
"""
CUI_LIST = ["11201891"]
NAME_PATTERNS = ["OMV", "PETROM", "OMV PETROM", "0MV"] # OCR variants
STORE_NAME = "OMV PETROM MARKETING S.R.L."
# OMV TVA table pattern: "A-19,00% 285,66 49,58" (code-percent base tva)
TVA_TABLE_PATTERN = r'([A-D])\s*[-:]\s*(\d{1,2})[.,]\d{2}\s*%\s+([\d.,]+)\s+([\d.,]+)'
# Standard TVA pattern fallback
TVA_STANDARD_PATTERN = r'TVA\s*:?\s*([\d.,]+)'
# OMV specific: prioritize YYYY. MM. DD format with spaces
DATE_PATTERNS_OCR_SPACES = [
# YYYY. MM. DD with time (OMV format)
(r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})\s+\d{2}:\d{2}', 0.98, 'ymd'),
(r'(\d{4})[.,]\s*(\d{2})[.,]\s*(\d{2})', 0.95, 'ymd'),
# Fallback to DD. MM. YYYY
(r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})\s+\d{2}:\d{2}', 0.92, 'dmy'),
(r'(\d{2})[.,]\s*(\d{2})[.,]\s*(\d{4})', 0.85, 'dmy'),
]
def extract_tva_entries(self, text: str) -> List[dict]:
"""
Extract OMV-specific TVA entries.
OMV receipts often show TVA in table format with base and TVA amounts.
Falls back to standard extraction if table format not found.
Args:
text: Raw OCR text from receipt
Returns:
List of TVA entries with code, percent, and amount
"""
entries = []
seen = set()
# Try table format first (more accurate)
for match in re.finditer(self.TVA_TABLE_PATTERN, text, re.IGNORECASE):
try:
code = match.group(1).upper()
percent = int(match.group(2))
# TVA amount is the second number (smaller one)
tva_amount = self._parse_decimal(match.group(4))
if tva_amount and tva_amount > 0:
entry_key = (code, percent)
if entry_key not in seen:
entries.append({
'code': code,
'percent': percent,
'amount': tva_amount
})
seen.add(entry_key)
except (ValueError, InvalidOperation):
continue
return entries
def get_validation_hints(self) -> Dict[str, Any]:
"""Return OMV-specific validation hints."""
return {
"has_multi_rate_tva": False,
"card_equals_total": False,
"has_client_cui": True,
"has_efactura": False,
"is_non_vat_payer": False,
"tva_table_format": True,
}