fix(ocr): Fix store profile extraction patterns and module loading
Major fixes to OCR store profiles for Romanian receipt extraction: - Fix ProfileRegistry module path resolution (was loading 0 profiles) - Add multiline TVA extraction for Brick, Electrobering, Gama Ink - Add "CARTE CREDIT" payment detection for OMV/SOCAR gas stations - Handle OCR artifacts: TVA→TUA, "-"→"4", I→L in CUI markers - Add client CUI patterns for Brick receipts - Add profile selection logging to ocr_extractor.py - Create test script for all 29 PDFs (test_all_profiles.py) Test results: 13/29 passing (improved from 9/29) Remaining failures are primarily OCR quality issues. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -100,11 +100,62 @@ class SocarProfile(BaseStoreProfile):
|
||||
|
||||
return entries
|
||||
|
||||
def extract_payment_methods(self, text: str) -> List[dict]:
|
||||
"""
|
||||
Extract SOCAR-specific payment methods.
|
||||
|
||||
Gas stations use "CARTE CREDIT" or "CARD" for card payments.
|
||||
|
||||
Args:
|
||||
text: Raw OCR text from receipt
|
||||
|
||||
Returns:
|
||||
List of payment methods with method, amount, and confidence
|
||||
"""
|
||||
payments = []
|
||||
text_upper = text.upper()
|
||||
|
||||
# Get total amount first
|
||||
total_amount, _ = self.extract_total(text)
|
||||
if not total_amount:
|
||||
return []
|
||||
|
||||
# Gas station payment patterns
|
||||
payment_indicators = [
|
||||
('CARTE CREDIT', 'CARD', 0.98),
|
||||
('CARTE DE CREDIT', 'CARD', 0.98),
|
||||
('CARD', 'CARD', 0.95),
|
||||
('VISA', 'CARD', 0.95),
|
||||
('MASTERCARD', 'CARD', 0.95),
|
||||
('CONTACTLESS', 'CARD', 0.90),
|
||||
('NUMERAR', 'NUMERAR', 0.95),
|
||||
('CASH', 'NUMERAR', 0.90),
|
||||
]
|
||||
|
||||
for indicator, method, confidence in payment_indicators:
|
||||
if indicator in text_upper:
|
||||
payments.append({
|
||||
'method': method,
|
||||
'amount': total_amount,
|
||||
'confidence': confidence
|
||||
})
|
||||
return payments
|
||||
|
||||
# Fallback: If no explicit payment but has BON FISCAL, assume CARD
|
||||
if 'BON FISCAL' in text_upper:
|
||||
payments.append({
|
||||
'method': 'CARD',
|
||||
'amount': total_amount,
|
||||
'confidence': 0.70
|
||||
})
|
||||
|
||||
return payments
|
||||
|
||||
def get_validation_hints(self) -> Dict[str, Any]:
|
||||
"""Return SOCAR-specific validation hints."""
|
||||
return {
|
||||
"has_multi_rate_tva": False,
|
||||
"card_equals_total": False,
|
||||
"card_equals_total": True, # Gas station: card equals total
|
||||
"has_client_cui": True,
|
||||
"has_efactura": False,
|
||||
"is_non_vat_payer": False,
|
||||
|
||||
Reference in New Issue
Block a user