feat(data-entry): Add unified receipt form with OCR confidence tracking
New unified receipt creation system with: - UnifiedReceiptForm component with inline OCR preview and confidence indicators - Compact upload zone with drag-drop and camera support - TVA and Payment fields with dynamic add/remove - Supplier dual-field with autocomplete and OCR hint - Receipt form sections with collapsible auxiliary data Backend OCR improvements: - Add confidence_tva and confidence_payment to extraction results - Update TVA extraction to return confidence scores - Include TVA (15%) and payment (10%) in overall_confidence calculation Also includes: - CSS design system rules documentation - Port check helper function for service scripts - Expanded design tokens documentation in CLAUDE.md Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,8 @@ class ExtractionResult:
|
||||
confidence_date: float = 0.0
|
||||
confidence_vendor: float = 0.0
|
||||
confidence_client: float = 0.0
|
||||
confidence_tva: float = 0.0
|
||||
confidence_payment: float = 0.0
|
||||
raw_text: str = ""
|
||||
ocr_engine: str = "" # OCR engine used: paddleocr or tesseract
|
||||
processing_time_ms: int = 0 # Processing time in milliseconds
|
||||
@@ -51,12 +53,20 @@ class ExtractionResult:
|
||||
|
||||
@property
|
||||
def overall_confidence(self) -> float:
|
||||
"""Calculate weighted overall confidence score."""
|
||||
weights = {'amount': 0.4, 'date': 0.3, 'vendor': 0.3}
|
||||
"""Calculate weighted overall confidence score including TVA and payment."""
|
||||
weights = {
|
||||
'amount': 0.35, # Most important - receipt total
|
||||
'date': 0.20, # Receipt date
|
||||
'vendor': 0.20, # Vendor identification
|
||||
'tva': 0.15, # TVA extraction accuracy
|
||||
'payment': 0.10 # Payment method detection
|
||||
}
|
||||
return round(
|
||||
self.confidence_amount * weights['amount'] +
|
||||
self.confidence_date * weights['date'] +
|
||||
self.confidence_vendor * weights['vendor'],
|
||||
self.confidence_vendor * weights['vendor'] +
|
||||
self.confidence_tva * weights['tva'] +
|
||||
self.confidence_payment * weights['payment'],
|
||||
2
|
||||
)
|
||||
|
||||
@@ -468,9 +478,15 @@ class ReceiptExtractor:
|
||||
result.amount, result.confidence_amount = store_profile.extract_total(text_upper)
|
||||
result.receipt_date, result.confidence_date = store_profile.extract_date(text_upper)
|
||||
result.receipt_number, _ = store_profile.extract_receipt_number(text_upper)
|
||||
result.tva_entries = store_profile.extract_tva_entries(text_upper)
|
||||
result.tva_total = sum(e['amount'] for e in result.tva_entries) if result.tva_entries else None
|
||||
result.tva_entries, result.confidence_tva = store_profile.extract_tva_entries(text_upper)
|
||||
result.tva_total = sum((e['amount'] for e in result.tva_entries), Decimal(0)) if result.tva_entries else None
|
||||
result.payment_methods = store_profile.extract_payment_methods(text_upper)
|
||||
# Calculate payment confidence from individual payment method confidences
|
||||
if result.payment_methods:
|
||||
payment_confidences = [pm.get('confidence', 0.0) for pm in result.payment_methods]
|
||||
result.confidence_payment = max(payment_confidences) if payment_confidences else 0.0
|
||||
else:
|
||||
result.confidence_payment = 0.0
|
||||
|
||||
# Client data extraction via profile (CUI + name)
|
||||
profile_client_cui, cui_confidence = store_profile.extract_client_cui(text_upper)
|
||||
@@ -502,8 +518,8 @@ class ReceiptExtractor:
|
||||
result.amount, result.confidence_amount = self._extract_amount(text_upper)
|
||||
result.receipt_date, result.confidence_date = self._extract_date(text_upper)
|
||||
result.receipt_number, _ = self._extract_number(text_upper)
|
||||
result.tva_entries, result.tva_total = self._extract_tva_entries(text_upper)
|
||||
result.payment_methods = self._extract_payment_methods(text_upper)
|
||||
result.tva_entries, result.tva_total, result.confidence_tva = self._extract_tva_entries(text_upper)
|
||||
result.payment_methods, result.confidence_payment = self._extract_payment_methods(text_upper)
|
||||
|
||||
# Generic client extraction
|
||||
client_name, client_cui, client_address, confidence = self._extract_client_data(text_upper, text)
|
||||
@@ -1289,7 +1305,7 @@ class ReceiptExtractor:
|
||||
self,
|
||||
candidates: List[tuple],
|
||||
tva_bon_total: Optional[Decimal]
|
||||
) -> Tuple[List[dict], Optional[Decimal]]:
|
||||
) -> Tuple[List[dict], Optional[Decimal], float]:
|
||||
"""
|
||||
Select the best TVA candidate from collected candidates.
|
||||
|
||||
@@ -1303,10 +1319,10 @@ class ReceiptExtractor:
|
||||
tva_bon_total: Authoritative TOTAL TVA BON value (if extracted)
|
||||
|
||||
Returns:
|
||||
(best_entries, best_sum)
|
||||
(best_entries, best_sum, confidence)
|
||||
"""
|
||||
if not candidates:
|
||||
return [], None
|
||||
return [], None, 0.0
|
||||
|
||||
# Score each candidate
|
||||
scored = []
|
||||
@@ -1334,9 +1350,9 @@ class ReceiptExtractor:
|
||||
best = scored[0]
|
||||
print(f"[TVA Select] Winner: {best[1]} (score={best[0]:.1f})", flush=True)
|
||||
|
||||
return best[3], best[4]
|
||||
return best[3], best[4], best[2] # entries, sum, confidence
|
||||
|
||||
def _extract_tva_entries(self, text: str) -> Tuple[List[dict], Optional[Decimal]]:
|
||||
def _extract_tva_entries(self, text: str) -> Tuple[List[dict], Optional[Decimal], float]:
|
||||
"""
|
||||
Extract multiple TVA (VAT) entries from text.
|
||||
Romanian receipts can have multiple TVA rates (A=19%, B=9%, C=5%, D=0%).
|
||||
@@ -1345,11 +1361,12 @@ class ReceiptExtractor:
|
||||
- Try ALL patterns and collect candidates
|
||||
- Select best candidate based on matching TOTAL TVA BON
|
||||
|
||||
Returns (tva_entries, tva_total) where tva_entries is a list of:
|
||||
Returns (tva_entries, tva_total, confidence) where tva_entries is a list of:
|
||||
{'code': 'A', 'percent': 19, 'amount': Decimal('15.20')}
|
||||
"""
|
||||
tva_entries = []
|
||||
seen_entries = set() # To avoid duplicates
|
||||
confidence = 0.0 # Track extraction confidence
|
||||
|
||||
# Check for non-VAT payer (NEPLATITOR DE TVA) - TVA = 0
|
||||
# OCR variants: NEPLATTOR, NEPLATITOR, NEPLATOR, NEPLATTOR, ANEPLATHTOR, MEPLATITOR, etc.
|
||||
@@ -1366,8 +1383,8 @@ class ReceiptExtractor:
|
||||
]
|
||||
for pattern in non_vat_patterns:
|
||||
if re.search(pattern, text, re.IGNORECASE):
|
||||
# Non-VAT payer - return TVA = 0
|
||||
return [{'code': 'D', 'percent': 0, 'amount': Decimal('0.00')}], Decimal('0.00')
|
||||
# Non-VAT payer - return TVA = 0, high confidence
|
||||
return [{'code': 'D', 'percent': 0, 'amount': Decimal('0.00')}], Decimal('0.00'), 0.95
|
||||
|
||||
# Normalize spaces in numbers first (OCR may produce "32. 31" or "49, 58")
|
||||
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
|
||||
@@ -1717,7 +1734,7 @@ class ReceiptExtractor:
|
||||
# === CANDIDATE SELECTION ===
|
||||
# Select best candidate using TOTAL TVA BON as authoritative reference
|
||||
if all_candidates:
|
||||
best_entries, best_sum = self._select_best_tva_candidate(all_candidates, tva_bon_total)
|
||||
best_entries, best_sum, confidence = self._select_best_tva_candidate(all_candidates, tva_bon_total)
|
||||
if best_entries:
|
||||
tva_entries = best_entries
|
||||
entries_sum = best_sum
|
||||
@@ -1725,7 +1742,7 @@ class ReceiptExtractor:
|
||||
# Calculate sum from entries (if not set by candidate selection)
|
||||
entries_sum = None
|
||||
if tva_entries:
|
||||
entries_sum = sum(entry['amount'] for entry in tva_entries)
|
||||
entries_sum = sum((entry['amount'] for entry in tva_entries), Decimal(0))
|
||||
|
||||
# Validate and correct TVA values
|
||||
tva_entries, tva_total = self._validate_and_correct_tva(
|
||||
@@ -1735,7 +1752,7 @@ class ReceiptExtractor:
|
||||
# Sort by code (A, B, C, D)
|
||||
tva_entries.sort(key=lambda x: x.get('code', 'Z'))
|
||||
|
||||
return tva_entries, tva_total
|
||||
return tva_entries, tva_total, confidence if tva_entries else 0.0
|
||||
|
||||
def _get_tva_code_from_percent(self, percent: int) -> str:
|
||||
"""Map TVA percentage to standard Romanian code.
|
||||
@@ -1843,7 +1860,7 @@ class ReceiptExtractor:
|
||||
tva_entries = corrected_entries
|
||||
|
||||
# Recalculate sum after corrections
|
||||
entries_sum = sum(entry['amount'] for entry in tva_entries) if tva_entries else None
|
||||
entries_sum = sum((entry['amount'] for entry in tva_entries), Decimal(0)) if tva_entries else None
|
||||
|
||||
# Validate sum against TOTAL TVA BON
|
||||
if tva_bon_total and entries_sum:
|
||||
@@ -1876,7 +1893,7 @@ class ReceiptExtractor:
|
||||
seen.add(key)
|
||||
unique_entries.append(entry)
|
||||
tva_entries = unique_entries
|
||||
entries_sum = sum(e['amount'] for e in tva_entries)
|
||||
entries_sum = sum((e['amount'] for e in tva_entries), Decimal(0))
|
||||
|
||||
# Final total
|
||||
tva_total = entries_sum if entries_sum else tva_bon_total
|
||||
@@ -2032,15 +2049,16 @@ class ReceiptExtractor:
|
||||
|
||||
return None
|
||||
|
||||
def _extract_payment_methods(self, text: str) -> List[dict]:
|
||||
def _extract_payment_methods(self, text: str) -> Tuple[List[dict], float]:
|
||||
"""
|
||||
Extract payment methods (CARD/NUMERAR) from receipt.
|
||||
These appear after TOTAL LEI and before TOTAL TVA section.
|
||||
|
||||
Returns list of: {'method': 'CARD'/'NUMERAR', 'amount': Decimal}
|
||||
Returns tuple of: (list of {'method': 'CARD'/'NUMERAR', 'amount': Decimal}, confidence)
|
||||
"""
|
||||
payment_methods = []
|
||||
seen_methods = set()
|
||||
max_confidence = 0.0
|
||||
|
||||
# Normalize spaces in numbers
|
||||
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
|
||||
@@ -2071,13 +2089,15 @@ class ReceiptExtractor:
|
||||
'amount': amount
|
||||
})
|
||||
seen_methods.add(method)
|
||||
print(f"[Payment] Found {method}: {amount} (pattern matched)", flush=True)
|
||||
if confidence > max_confidence:
|
||||
max_confidence = confidence
|
||||
print(f"[Payment] Found {method}: {amount} (pattern matched, conf={confidence})", flush=True)
|
||||
elif amount >= self.MAX_REASONABLE_PAYMENT:
|
||||
print(f"[Payment] Rejected unreasonable amount {amount} for {method} (likely OCR error)", flush=True)
|
||||
except (InvalidOperation, ValueError):
|
||||
continue
|
||||
|
||||
return payment_methods
|
||||
return payment_methods, max_confidence if payment_methods else 0.0
|
||||
|
||||
def _validate_payment_methods(
|
||||
self, payment_methods: List[dict], total: Optional[Decimal]
|
||||
|
||||
Reference in New Issue
Block a user