feat: Add payment methods extraction, OCR improvements, and AutoComplete fix

Backend:
- Add payment_methods and payment_mode fields to Receipt model
- Add payment method extraction (CARD/NUMERAR) with auto-suggestion logic
- Improve OCR service with TVA validation and reverse calculation
- Fix nomenclature service supplier limit (was 50, now unlimited)
- Add OCR fields migrations (ocr_raw_text, ocr_confidence, payment_mode)

Frontend:
- Fix AutoComplete to properly display supplier name after OCR
- Add payment methods display in OCR preview with suggested payment mode
- Improve ReceiptCreateView form handling and OCR data application

Database migrations:
- 20251215_add_ocr_fields_to_receipt.py
- 20251215_remove_partner_id.py
- 20251216_add_payment_mode.py

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-16 13:43:15 +02:00
parent 46d9be0c08
commit c1220e86a6
15 changed files with 734 additions and 94 deletions

View File

@@ -46,7 +46,7 @@ class NomenclatureService:
(SyncedSupplier.name.ilike(f"%{search}%")) |
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.limit(50) # Limit results
stmt = stmt.order_by(SyncedSupplier.name) # Order alphabetically, no limit for AutoComplete
result = await session.execute(stmt)
suppliers = result.scalars().all()
@@ -59,34 +59,44 @@ class NomenclatureService:
(LocalSupplier.name.ilike(f"%{search}%")) |
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
)
local_stmt = local_stmt.limit(50)
local_stmt = local_stmt.order_by(LocalSupplier.name) # Order alphabetically
local_result = await session.execute(local_stmt)
local_suppliers = local_result.scalars().all()
# Combine both
# Combine both - no IDs needed, just text data for autocomplete
partners = []
for s in suppliers:
partners.append(PartnerOption(id=s.id, name=s.name, code=s.fiscal_code))
partners.append(PartnerOption(
name=s.name,
fiscal_code=s.fiscal_code,
address=s.address,
source="oracle"
))
for l in local_suppliers:
partners.append(PartnerOption(id=l.id, name=f"{l.name} (local)", code=l.fiscal_code))
partners.append(PartnerOption(
name=l.name, # No suffix - must match search results
fiscal_code=l.fiscal_code,
address=l.address,
source="local"
))
return partners
# Fallback to mock data for Phase 1
# Fallback to mock data for Phase 1 (when no synced data)
mock_partners = [
PartnerOption(id=1, name="OMV Petrom", code="RO123456"),
PartnerOption(id=2, name="Dedeman", code="RO789012"),
PartnerOption(id=3, name="Kaufland", code="RO345678"),
PartnerOption(id=4, name="Emag", code="RO901234"),
PartnerOption(id=5, name="Altex", code="RO567890"),
PartnerOption(name="OMV Petrom", fiscal_code="RO123456", source="mock"),
PartnerOption(name="Dedeman", fiscal_code="RO789012", source="mock"),
PartnerOption(name="Kaufland", fiscal_code="RO345678", source="mock"),
PartnerOption(name="Emag", fiscal_code="RO901234", source="mock"),
PartnerOption(name="Altex", fiscal_code="RO567890", source="mock"),
]
if search:
search_lower = search.lower()
mock_partners = [
p for p in mock_partners
if search_lower in p.name.lower() or (p.code and search_lower in p.code.lower())
if search_lower in p.name.lower() or (p.fiscal_code and search_lower in p.fiscal_code.lower())
]
return mock_partners

View File

@@ -2,6 +2,8 @@
import os
import logging
import threading
import time
from dataclasses import dataclass
from typing import List, Optional, Tuple
@@ -53,23 +55,26 @@ class OCREngine:
def __init__(self):
self._paddle = None
self._paddle_initialized = False
self._paddle_init_started = False
self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready
self._paddle_init_lock = threading.Lock()
def _init_paddle_lazy(self):
"""Lazy initialize PaddleOCR on first use (avoids slow startup)."""
global PaddleOCR
if self._paddle_initialized:
return
with self._paddle_init_lock:
if self._paddle_init_started:
return # Already initializing or done
self._paddle_init_started = True
self._paddle_initialized = True
if PADDLE_AVAILABLE:
try:
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...")
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True)
from paddleocr import PaddleOCR as _PaddleOCR
PaddleOCR = _PaddleOCR
print("Initializing PaddleOCR engine...")
print("Initializing PaddleOCR engine...", flush=True)
# PaddleOCR 3.x API - optimized for Romanian receipts
# Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts
self._paddle = PaddleOCR(
@@ -81,11 +86,51 @@ class OCREngine:
rec_batch_num=6, # Batch size for recognition
use_angle_cls=True, # Enable text angle classification
)
print("PaddleOCR initialized successfully with high-quality settings")
print("PaddleOCR initialized successfully with high-quality settings", flush=True)
except Exception as e:
print(f"Warning: Failed to initialize PaddleOCR: {e}")
print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True)
self._paddle = None
# Signal that initialization is complete (success or failure)
self._paddle_ready.set()
def wait_for_paddle(self, timeout: float = 30.0) -> bool:
"""
Wait for PaddleOCR to be fully initialized.
Args:
timeout: Max seconds to wait (default 30s)
Returns:
True if PaddleOCR is ready, False if timeout or unavailable
"""
if not PADDLE_AVAILABLE:
return False
if self._paddle is not None:
return True # Already ready
if not self._paddle_init_started:
# Start initialization if not already started
self._init_paddle_lazy()
# Wait for initialization to complete
print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True)
start = time.time()
ready = self._paddle_ready.wait(timeout=timeout)
elapsed = time.time() - start
if ready and self._paddle is not None:
print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True)
return True
else:
print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True)
return False
def is_paddle_ready(self) -> bool:
"""Check if PaddleOCR is ready without waiting."""
return self._paddle is not None
def recognize(self, image: np.ndarray) -> OCRResult:
"""Perform OCR on preprocessed image."""
logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}")
@@ -107,6 +152,13 @@ class OCREngine:
def _paddle_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using PaddleOCR 3.x API."""
# Wait for PaddleOCR to be fully ready (handles background init)
if not self.wait_for_paddle(timeout=30.0):
logger.warning("[PaddleOCR] Not ready, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise RuntimeError("PaddleOCR not ready and Tesseract not available")
try:
logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}")

View File

@@ -170,14 +170,17 @@ class OCRService:
print(f"[OCR] PaddleOCR heavy failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# STEP 3: Tesseract fallback
# STEP 3: Tesseract - ONLY to complete missing fields
# Uses Tesseract-optimized preprocessing (binarized, high contrast)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 3: Tesseract fallback", flush=True)
print("[OCR] STEP 3: Tesseract (complement only, not override)", flush=True)
print("=" * 60, flush=True)
try:
tesseract_result = self.ocr_engine._tesseract_recognize(light_img)
# Use Tesseract-specific preprocessing (Otsu binarization)
tesseract_img = self.preprocessor.preprocess_for_tesseract(image)
tesseract_result = self.ocr_engine._tesseract_recognize(tesseract_img)
if tesseract_result and tesseract_result.text:
extraction_tess = self.extractor.extract(tesseract_result.text)
extraction_tess.ocr_engine = "tesseract"
@@ -189,10 +192,17 @@ class OCRService:
print(f" - Date: {extraction_tess.receipt_date}", flush=True)
print(f" - CUI: {extraction_tess.cui}", flush=True)
extraction = self._merge_extractions(extraction, extraction_tess)
# IMPORTANT: Tesseract only COMPLETES missing fields, never overrides!
extraction = self._complement_extraction(extraction, extraction_tess)
except Exception as e:
print(f"[OCR] Tesseract failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# FINAL VALIDATION: Fix impossible values
# ══════════════════════════════════════════════════════════════
if extraction:
extraction = self._final_validation(extraction)
# Final result
if extraction is None:
return False, "No text detected", None
@@ -438,6 +448,122 @@ class OCRService:
print(f"[OCR] ✓ All 5 fields found with {ext.overall_confidence:.0%} confidence", flush=True)
return True
def _complement_extraction(
self,
primary: Optional[ExtractionResult],
secondary: Optional[ExtractionResult]
) -> ExtractionResult:
"""
Complement primary extraction with missing fields from secondary.
NEVER overrides existing values - only fills in gaps.
This is different from _merge_extractions which can override values.
"""
if primary is None and secondary is None:
return ExtractionResult()
if primary is None:
return secondary
if secondary is None:
return primary
print("[Complement] Adding missing fields from Tesseract...", flush=True)
# Only fill missing amount
if not primary.amount and secondary.amount:
primary.amount = secondary.amount
primary.confidence_amount = secondary.confidence_amount
print(f"[Complement] Added amount: {secondary.amount}", flush=True)
# Only fill missing date
if not primary.receipt_date and secondary.receipt_date:
primary.receipt_date = secondary.receipt_date
primary.confidence_date = secondary.confidence_date
print(f"[Complement] Added date: {secondary.receipt_date}", flush=True)
# Only fill missing vendor
if not primary.partner_name and secondary.partner_name:
primary.partner_name = secondary.partner_name
primary.confidence_vendor = secondary.confidence_vendor
print(f"[Complement] Added vendor: {secondary.partner_name}", flush=True)
# Only fill missing CUI
if not primary.cui and secondary.cui and self._is_valid_cui(secondary.cui):
primary.cui = secondary.cui
print(f"[Complement] Added CUI: {secondary.cui}", flush=True)
# Only fill missing TVA
if not primary.tva_entries and secondary.tva_entries:
primary.tva_entries = secondary.tva_entries
primary.tva_total = secondary.tva_total
print(f"[Complement] Added TVA: {secondary.tva_total}", flush=True)
# Only fill missing receipt number
if not primary.receipt_number and secondary.receipt_number:
primary.receipt_number = secondary.receipt_number
print(f"[Complement] Added number: {secondary.receipt_number}", flush=True)
# Only fill missing address
if not primary.address and secondary.address:
primary.address = secondary.address
print(f"[Complement] Added address: {secondary.address}", flush=True)
return primary
def _final_validation(self, extraction: ExtractionResult) -> ExtractionResult:
"""
Final validation and correction of impossible values.
Key rules:
1. TVA cannot be greater than TOTAL (it's always a fraction)
2. If TVA > TOTAL, recalculate TOTAL from TVA using known rates
3. Validate TVA entries sum equals TVA total
"""
print("[Final Validation] Checking extracted values...", flush=True)
# Rule 1: TVA cannot be greater than TOTAL
if extraction.tva_total and extraction.amount:
if extraction.tva_total > extraction.amount:
print(f"[Final Validation] TVA ({extraction.tva_total}) > TOTAL ({extraction.amount}) - IMPOSSIBLE!", flush=True)
# Calculate TOTAL from TVA using reverse formula:
# total = base + tva = tva * (100/rate + 1) = tva * (100 + rate) / rate
# For 9% TVA: total = tva * 109 / 9 = tva * 12.11
# For 19% TVA: total = tva * 119 / 19 = tva * 6.26
# For 21% TVA: total = tva * 121 / 21 = tva * 5.76
rate = 19 # Default rate assumption
if extraction.tva_entries:
# Use the rate from the first entry
rate = extraction.tva_entries[0].get('percent', 19)
if rate > 0:
# Formula: total = tva * (100 + rate) / rate
calculated_total = extraction.tva_total * (Decimal('100') + Decimal(str(rate))) / Decimal(str(rate))
calculated_total = calculated_total.quantize(Decimal('0.01'))
print(f"[Final Validation] Calculated TOTAL from TVA: {calculated_total} (using {rate}% rate)", flush=True)
extraction.amount = calculated_total
extraction.confidence_amount = 0.70 # Lower confidence for calculated value
# Rule 2: TVA cannot be more than ~25% of total (max Romanian rate is 21%)
if extraction.tva_total and extraction.amount:
tva_percent = extraction.tva_total / extraction.amount * Decimal('100')
if tva_percent > Decimal('25'):
print(f"[Final Validation] Warning: TVA is {tva_percent:.1f}% of total - suspicious", flush=True)
# Rule 3: Validate TVA entries sum
if extraction.tva_entries and extraction.tva_total:
entries_sum = sum(e.get('amount', Decimal('0')) for e in extraction.tva_entries)
tolerance = Decimal('0.05')
if abs(entries_sum - extraction.tva_total) > tolerance:
print(f"[Final Validation] TVA entries sum ({entries_sum}) != tva_total ({extraction.tva_total})", flush=True)
# Use the sum as it's more reliable
extraction.tva_total = entries_sum
print(f"[Final Validation] Done. Amount={extraction.amount}, TVA={extraction.tva_total}", flush=True)
return extraction
# Singleton instance
ocr_service = OCRService()

View File

@@ -20,6 +20,14 @@ from app.schemas.receipt import (
from app.services.expense_types import EXPENSE_TYPES, get_expense_type
# Payment mode to accounting account mapping
PAYMENT_MODE_ACCOUNTS = {
'casa': ('5311', 'Casa in lei'),
'banca': ('5121', 'Conturi la banci in lei'),
'avans_decontare': ('542', 'Avansuri de trezorerie'),
}
class ReceiptService:
"""Service for receipt business logic and workflow."""
@@ -151,21 +159,36 @@ class ReceiptService:
partner_id=receipt.partner_id,
))
# Credit: Cash/Bank
cash_account = receipt.cash_register_account or "5311"
cash_name = receipt.cash_register_name or "Casa in lei"
# Credit entry - based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
credit_account, credit_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
# Backwards compatibility for existing receipts
credit_account = receipt.cash_register_account
credit_name = receipt.cash_register_name or "Casa/Banca"
else:
# Default fallback
credit_account = "5311"
credit_name = "Casa in lei"
entries.append(AccountingEntryCreate(
entry_type=EntryType.CREDIT,
account_code=cash_account,
account_name=cash_name,
account_code=credit_account,
account_name=credit_name,
amount=amount,
))
else:
# Income: Debit cash/bank, Credit income account
# For now, simple income posting
cash_account = receipt.cash_register_account or "5311"
cash_name = receipt.cash_register_name or "Casa in lei"
# Based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
cash_account, cash_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
cash_account = receipt.cash_register_account
cash_name = receipt.cash_register_name or "Casa/Banca"
else:
cash_account = "5311"
cash_name = "Casa in lei"
# Debit: Cash/Bank
entries.append(AccountingEntryCreate(
@@ -211,8 +234,9 @@ class ReceiptService:
if not receipt.expense_type_code:
return False, "Expense type is required", None
if not receipt.cash_register_account:
return False, "Cash register is required", None
# Validate payment_mode or cash_register (backwards compatibility)
if not receipt.payment_mode and not receipt.cash_register_account:
return False, "Modul de plata este obligatoriu", None
# Generate accounting entries
entries = ReceiptService.generate_accounting_entries(receipt)
@@ -239,6 +263,7 @@ class ReceiptService:
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Approve receipt (PENDING_REVIEW → APPROVED).
Requires valid CUI (fiscal code) for approval.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
@@ -248,6 +273,10 @@ class ReceiptService:
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Receipt is not pending review", None
# Validate CUI is present (required for Oracle import)
if not receipt.cui:
return False, "Trebuie completat codul fiscal (CUI) pentru aprobare", None
# Validate accounting entries
if not receipt.entries:
return False, "Receipt has no accounting entries", None

View File

@@ -267,9 +267,8 @@ class SyncService:
supplier = result.scalar_one_or_none()
if supplier:
# Return only text data - no IDs needed for autocomplete
return True, {
"id": supplier.id,
"oracle_id": supplier.oracle_id,
"name": supplier.name,
"fiscal_code": supplier.fiscal_code,
"address": supplier.address,
@@ -291,12 +290,11 @@ class SyncService:
local = result.scalar_one_or_none()
if local:
# Return only text data - no IDs needed for autocomplete
return True, {
"id": local.id,
"name": local.name,
"fiscal_code": local.fiscal_code,
"address": local.address,
"is_local": True,
}, "local"
# 3. Try live Oracle search (optional fallback for unsynced data)