## Store Profiles System
- Add ProfileRegistry for CUI-based profile lookup
- Add BaseStoreProfile with generic extraction patterns
- Implement hot-reload via POST /api/data-entry/ocr/profiles/reload
## 12 Store Profiles
- LIDL: Multi-rate TVA (A, B, C, D codes)
- OMV, SOCAR: B2B with client CUI, YYYY.MM.DD dates
- BRICK, DEDEMAN: Standard TVA, e-factura support
- KINETERRA, BEST PRINT: Non-VAT payers (returns [])
- STEPOUT MARKET: TVA 5% (books/reduced rate)
- UNLIMITED KEYS: NUMERAR payment detection
- GAMA INK, ELECTROBERING, PICTUS VELUM: Standard TVA
## Flexible TVA Patterns
- All patterns use (\d{1,2})% to accept any rate
- Supports historical (19%, 9%, 5%) and current (21%, 11%)
## Payment Methods Fix
- Fixed base.py to support multiple payments of same type
- Changed deduplication from method-only to (method, amount) tuple
- Returns separate entries for split payments
## Tools
- Add generate_store_profile.py for automatic profile generation
- Analyzes PDFs via OCR API and detects patterns
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
119 lines
3.8 KiB
Python
119 lines
3.8 KiB
Python
"""
|
|
DEDEMAN store profile for OCR extraction.
|
|
|
|
Dedeman receipts may include e-factura information and use standard TVA format.
|
|
Large DIY retailer in Romania.
|
|
"""
|
|
|
|
import re
|
|
from decimal import Decimal, InvalidOperation
|
|
from typing import List, Dict, Any
|
|
|
|
from .base import BaseStoreProfile
|
|
from . import ProfileRegistry
|
|
|
|
|
|
@ProfileRegistry.register
|
|
class DedemanProfile(BaseStoreProfile):
|
|
"""
|
|
DEDEMAN SRL - standard TVA with e-factura support.
|
|
|
|
Key characteristics:
|
|
- Standard TVA format
|
|
- May include e-factura reference number
|
|
- Professional receipts for construction materials
|
|
"""
|
|
|
|
CUI_LIST = ["2816464"]
|
|
NAME_PATTERNS = ["DEDEMAN", "DEDEMAN SRL", "OEDEMAN", "D3DEMAN"] # OCR variants
|
|
STORE_NAME = "DEDEMAN SRL"
|
|
|
|
# Standard TVA patterns (flexible - accepts any rate)
|
|
TVA_PATTERNS = [
|
|
# "TVA A: XX% = YY,YY" or "TVA-A XX% YY,YY"
|
|
r'TVA\s*[-:]?\s*([A-D])\s*:?\s*(\d{1,2})\s*%\s*[=:]?\s*([\d.,]+)',
|
|
# "A - XX,XX% = YY,YY"
|
|
r'([A-D])\s*[-:]\s*(\d{1,2})[.,]?\d{0,2}\s*%\s*[=:]?\s*([\d.,]+)',
|
|
# "TVA (XX%) YY,YY"
|
|
r'TVA\s*\(?\s*(\d{1,2})\s*%\s*\)?\s*:?\s*([\d.,]+)',
|
|
]
|
|
|
|
# E-factura pattern for reference extraction
|
|
EFACTURA_PATTERN = r'e-?factura\s*:?\s*([A-Z0-9]+)'
|
|
|
|
def extract_tva_entries(self, text: str) -> List[dict]:
|
|
"""
|
|
Extract Dedeman-specific TVA entries.
|
|
|
|
Args:
|
|
text: Raw OCR text from receipt
|
|
|
|
Returns:
|
|
List of TVA entries with code, percent, and amount
|
|
"""
|
|
entries = []
|
|
seen = set()
|
|
|
|
# Try coded patterns first
|
|
for pattern in self.TVA_PATTERNS[:2]:
|
|
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
try:
|
|
code = match.group(1).upper()
|
|
percent = int(match.group(2))
|
|
amount = self._parse_decimal(match.group(3))
|
|
|
|
if amount and amount > 0:
|
|
entry_key = (code, percent)
|
|
if entry_key not in seen:
|
|
entries.append({
|
|
'code': code,
|
|
'percent': percent,
|
|
'amount': amount
|
|
})
|
|
seen.add(entry_key)
|
|
except (ValueError, InvalidOperation, IndexError):
|
|
continue
|
|
|
|
# Fallback to simple format
|
|
if not entries:
|
|
simple_pattern = self.TVA_PATTERNS[2]
|
|
for match in re.finditer(simple_pattern, text, re.IGNORECASE):
|
|
try:
|
|
percent = int(match.group(1))
|
|
amount = self._parse_decimal(match.group(2))
|
|
|
|
if amount and amount > 0:
|
|
entries.append({
|
|
'code': 'A',
|
|
'percent': percent,
|
|
'amount': amount
|
|
})
|
|
break
|
|
except (ValueError, InvalidOperation):
|
|
continue
|
|
|
|
return entries
|
|
|
|
def extract_efactura_reference(self, text: str) -> str | None:
|
|
"""
|
|
Extract e-factura reference number if present.
|
|
|
|
Args:
|
|
text: Raw OCR text from receipt
|
|
|
|
Returns:
|
|
E-factura reference string or None
|
|
"""
|
|
match = re.search(self.EFACTURA_PATTERN, text, re.IGNORECASE)
|
|
return match.group(1) if match else None
|
|
|
|
def get_validation_hints(self) -> Dict[str, Any]:
|
|
"""Return Dedeman-specific validation hints."""
|
|
return {
|
|
"has_multi_rate_tva": False,
|
|
"card_equals_total": False,
|
|
"has_client_cui": False,
|
|
"has_efactura": True,
|
|
"is_non_vat_payer": False,
|
|
}
|