feat: Add payment methods extraction, OCR improvements, and AutoComplete fix
Backend: - Add payment_methods and payment_mode fields to Receipt model - Add payment method extraction (CARD/NUMERAR) with auto-suggestion logic - Improve OCR service with TVA validation and reverse calculation - Fix nomenclature service supplier limit (was 50, now unlimited) - Add OCR fields migrations (ocr_raw_text, ocr_confidence, payment_mode) Frontend: - Fix AutoComplete to properly display supplier name after OCR - Add payment methods display in OCR preview with suggested payment mode - Improve ReceiptCreateView form handling and OCR data application Database migrations: - 20251215_add_ocr_fields_to_receipt.py - 20251215_remove_partner_id.py - 20251216_add_payment_mode.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -46,7 +46,7 @@ class NomenclatureService:
|
||||
(SyncedSupplier.name.ilike(f"%{search}%")) |
|
||||
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
|
||||
)
|
||||
stmt = stmt.limit(50) # Limit results
|
||||
stmt = stmt.order_by(SyncedSupplier.name) # Order alphabetically, no limit for AutoComplete
|
||||
|
||||
result = await session.execute(stmt)
|
||||
suppliers = result.scalars().all()
|
||||
@@ -59,34 +59,44 @@ class NomenclatureService:
|
||||
(LocalSupplier.name.ilike(f"%{search}%")) |
|
||||
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
|
||||
)
|
||||
local_stmt = local_stmt.limit(50)
|
||||
local_stmt = local_stmt.order_by(LocalSupplier.name) # Order alphabetically
|
||||
|
||||
local_result = await session.execute(local_stmt)
|
||||
local_suppliers = local_result.scalars().all()
|
||||
|
||||
# Combine both
|
||||
# Combine both - no IDs needed, just text data for autocomplete
|
||||
partners = []
|
||||
for s in suppliers:
|
||||
partners.append(PartnerOption(id=s.id, name=s.name, code=s.fiscal_code))
|
||||
partners.append(PartnerOption(
|
||||
name=s.name,
|
||||
fiscal_code=s.fiscal_code,
|
||||
address=s.address,
|
||||
source="oracle"
|
||||
))
|
||||
for l in local_suppliers:
|
||||
partners.append(PartnerOption(id=l.id, name=f"{l.name} (local)", code=l.fiscal_code))
|
||||
partners.append(PartnerOption(
|
||||
name=l.name, # No suffix - must match search results
|
||||
fiscal_code=l.fiscal_code,
|
||||
address=l.address,
|
||||
source="local"
|
||||
))
|
||||
|
||||
return partners
|
||||
|
||||
# Fallback to mock data for Phase 1
|
||||
# Fallback to mock data for Phase 1 (when no synced data)
|
||||
mock_partners = [
|
||||
PartnerOption(id=1, name="OMV Petrom", code="RO123456"),
|
||||
PartnerOption(id=2, name="Dedeman", code="RO789012"),
|
||||
PartnerOption(id=3, name="Kaufland", code="RO345678"),
|
||||
PartnerOption(id=4, name="Emag", code="RO901234"),
|
||||
PartnerOption(id=5, name="Altex", code="RO567890"),
|
||||
PartnerOption(name="OMV Petrom", fiscal_code="RO123456", source="mock"),
|
||||
PartnerOption(name="Dedeman", fiscal_code="RO789012", source="mock"),
|
||||
PartnerOption(name="Kaufland", fiscal_code="RO345678", source="mock"),
|
||||
PartnerOption(name="Emag", fiscal_code="RO901234", source="mock"),
|
||||
PartnerOption(name="Altex", fiscal_code="RO567890", source="mock"),
|
||||
]
|
||||
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
mock_partners = [
|
||||
p for p in mock_partners
|
||||
if search_lower in p.name.lower() or (p.code and search_lower in p.code.lower())
|
||||
if search_lower in p.name.lower() or (p.fiscal_code and search_lower in p.fiscal_code.lower())
|
||||
]
|
||||
|
||||
return mock_partners
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
@@ -53,23 +55,26 @@ class OCREngine:
|
||||
|
||||
def __init__(self):
|
||||
self._paddle = None
|
||||
self._paddle_initialized = False
|
||||
self._paddle_init_started = False
|
||||
self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready
|
||||
self._paddle_init_lock = threading.Lock()
|
||||
|
||||
def _init_paddle_lazy(self):
|
||||
"""Lazy initialize PaddleOCR on first use (avoids slow startup)."""
|
||||
global PaddleOCR
|
||||
|
||||
if self._paddle_initialized:
|
||||
return
|
||||
with self._paddle_init_lock:
|
||||
if self._paddle_init_started:
|
||||
return # Already initializing or done
|
||||
self._paddle_init_started = True
|
||||
|
||||
self._paddle_initialized = True
|
||||
if PADDLE_AVAILABLE:
|
||||
try:
|
||||
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...")
|
||||
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True)
|
||||
from paddleocr import PaddleOCR as _PaddleOCR
|
||||
PaddleOCR = _PaddleOCR
|
||||
|
||||
print("Initializing PaddleOCR engine...")
|
||||
print("Initializing PaddleOCR engine...", flush=True)
|
||||
# PaddleOCR 3.x API - optimized for Romanian receipts
|
||||
# Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts
|
||||
self._paddle = PaddleOCR(
|
||||
@@ -81,11 +86,51 @@ class OCREngine:
|
||||
rec_batch_num=6, # Batch size for recognition
|
||||
use_angle_cls=True, # Enable text angle classification
|
||||
)
|
||||
print("PaddleOCR initialized successfully with high-quality settings")
|
||||
print("PaddleOCR initialized successfully with high-quality settings", flush=True)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to initialize PaddleOCR: {e}")
|
||||
print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True)
|
||||
self._paddle = None
|
||||
|
||||
# Signal that initialization is complete (success or failure)
|
||||
self._paddle_ready.set()
|
||||
|
||||
def wait_for_paddle(self, timeout: float = 30.0) -> bool:
|
||||
"""
|
||||
Wait for PaddleOCR to be fully initialized.
|
||||
|
||||
Args:
|
||||
timeout: Max seconds to wait (default 30s)
|
||||
|
||||
Returns:
|
||||
True if PaddleOCR is ready, False if timeout or unavailable
|
||||
"""
|
||||
if not PADDLE_AVAILABLE:
|
||||
return False
|
||||
|
||||
if self._paddle is not None:
|
||||
return True # Already ready
|
||||
|
||||
if not self._paddle_init_started:
|
||||
# Start initialization if not already started
|
||||
self._init_paddle_lazy()
|
||||
|
||||
# Wait for initialization to complete
|
||||
print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True)
|
||||
start = time.time()
|
||||
ready = self._paddle_ready.wait(timeout=timeout)
|
||||
elapsed = time.time() - start
|
||||
|
||||
if ready and self._paddle is not None:
|
||||
print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True)
|
||||
return True
|
||||
else:
|
||||
print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True)
|
||||
return False
|
||||
|
||||
def is_paddle_ready(self) -> bool:
|
||||
"""Check if PaddleOCR is ready without waiting."""
|
||||
return self._paddle is not None
|
||||
|
||||
def recognize(self, image: np.ndarray) -> OCRResult:
|
||||
"""Perform OCR on preprocessed image."""
|
||||
logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}")
|
||||
@@ -107,6 +152,13 @@ class OCREngine:
|
||||
|
||||
def _paddle_recognize(self, image: np.ndarray) -> OCRResult:
|
||||
"""Recognize text using PaddleOCR 3.x API."""
|
||||
# Wait for PaddleOCR to be fully ready (handles background init)
|
||||
if not self.wait_for_paddle(timeout=30.0):
|
||||
logger.warning("[PaddleOCR] Not ready, falling back to Tesseract")
|
||||
if TESSERACT_AVAILABLE:
|
||||
return self._tesseract_recognize(image)
|
||||
raise RuntimeError("PaddleOCR not ready and Tesseract not available")
|
||||
|
||||
try:
|
||||
logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}")
|
||||
|
||||
|
||||
@@ -170,14 +170,17 @@ class OCRService:
|
||||
print(f"[OCR] PaddleOCR heavy failed: {e}", flush=True)
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# STEP 3: Tesseract fallback
|
||||
# STEP 3: Tesseract - ONLY to complete missing fields
|
||||
# Uses Tesseract-optimized preprocessing (binarized, high contrast)
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
print("=" * 60, flush=True)
|
||||
print("[OCR] STEP 3: Tesseract fallback", flush=True)
|
||||
print("[OCR] STEP 3: Tesseract (complement only, not override)", flush=True)
|
||||
print("=" * 60, flush=True)
|
||||
|
||||
try:
|
||||
tesseract_result = self.ocr_engine._tesseract_recognize(light_img)
|
||||
# Use Tesseract-specific preprocessing (Otsu binarization)
|
||||
tesseract_img = self.preprocessor.preprocess_for_tesseract(image)
|
||||
tesseract_result = self.ocr_engine._tesseract_recognize(tesseract_img)
|
||||
if tesseract_result and tesseract_result.text:
|
||||
extraction_tess = self.extractor.extract(tesseract_result.text)
|
||||
extraction_tess.ocr_engine = "tesseract"
|
||||
@@ -189,10 +192,17 @@ class OCRService:
|
||||
print(f" - Date: {extraction_tess.receipt_date}", flush=True)
|
||||
print(f" - CUI: {extraction_tess.cui}", flush=True)
|
||||
|
||||
extraction = self._merge_extractions(extraction, extraction_tess)
|
||||
# IMPORTANT: Tesseract only COMPLETES missing fields, never overrides!
|
||||
extraction = self._complement_extraction(extraction, extraction_tess)
|
||||
except Exception as e:
|
||||
print(f"[OCR] Tesseract failed: {e}", flush=True)
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# FINAL VALIDATION: Fix impossible values
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
if extraction:
|
||||
extraction = self._final_validation(extraction)
|
||||
|
||||
# Final result
|
||||
if extraction is None:
|
||||
return False, "No text detected", None
|
||||
@@ -438,6 +448,122 @@ class OCRService:
|
||||
print(f"[OCR] ✓ All 5 fields found with {ext.overall_confidence:.0%} confidence", flush=True)
|
||||
return True
|
||||
|
||||
def _complement_extraction(
|
||||
self,
|
||||
primary: Optional[ExtractionResult],
|
||||
secondary: Optional[ExtractionResult]
|
||||
) -> ExtractionResult:
|
||||
"""
|
||||
Complement primary extraction with missing fields from secondary.
|
||||
NEVER overrides existing values - only fills in gaps.
|
||||
|
||||
This is different from _merge_extractions which can override values.
|
||||
"""
|
||||
if primary is None and secondary is None:
|
||||
return ExtractionResult()
|
||||
if primary is None:
|
||||
return secondary
|
||||
if secondary is None:
|
||||
return primary
|
||||
|
||||
print("[Complement] Adding missing fields from Tesseract...", flush=True)
|
||||
|
||||
# Only fill missing amount
|
||||
if not primary.amount and secondary.amount:
|
||||
primary.amount = secondary.amount
|
||||
primary.confidence_amount = secondary.confidence_amount
|
||||
print(f"[Complement] Added amount: {secondary.amount}", flush=True)
|
||||
|
||||
# Only fill missing date
|
||||
if not primary.receipt_date and secondary.receipt_date:
|
||||
primary.receipt_date = secondary.receipt_date
|
||||
primary.confidence_date = secondary.confidence_date
|
||||
print(f"[Complement] Added date: {secondary.receipt_date}", flush=True)
|
||||
|
||||
# Only fill missing vendor
|
||||
if not primary.partner_name and secondary.partner_name:
|
||||
primary.partner_name = secondary.partner_name
|
||||
primary.confidence_vendor = secondary.confidence_vendor
|
||||
print(f"[Complement] Added vendor: {secondary.partner_name}", flush=True)
|
||||
|
||||
# Only fill missing CUI
|
||||
if not primary.cui and secondary.cui and self._is_valid_cui(secondary.cui):
|
||||
primary.cui = secondary.cui
|
||||
print(f"[Complement] Added CUI: {secondary.cui}", flush=True)
|
||||
|
||||
# Only fill missing TVA
|
||||
if not primary.tva_entries and secondary.tva_entries:
|
||||
primary.tva_entries = secondary.tva_entries
|
||||
primary.tva_total = secondary.tva_total
|
||||
print(f"[Complement] Added TVA: {secondary.tva_total}", flush=True)
|
||||
|
||||
# Only fill missing receipt number
|
||||
if not primary.receipt_number and secondary.receipt_number:
|
||||
primary.receipt_number = secondary.receipt_number
|
||||
print(f"[Complement] Added number: {secondary.receipt_number}", flush=True)
|
||||
|
||||
# Only fill missing address
|
||||
if not primary.address and secondary.address:
|
||||
primary.address = secondary.address
|
||||
print(f"[Complement] Added address: {secondary.address}", flush=True)
|
||||
|
||||
return primary
|
||||
|
||||
def _final_validation(self, extraction: ExtractionResult) -> ExtractionResult:
|
||||
"""
|
||||
Final validation and correction of impossible values.
|
||||
|
||||
Key rules:
|
||||
1. TVA cannot be greater than TOTAL (it's always a fraction)
|
||||
2. If TVA > TOTAL, recalculate TOTAL from TVA using known rates
|
||||
3. Validate TVA entries sum equals TVA total
|
||||
"""
|
||||
print("[Final Validation] Checking extracted values...", flush=True)
|
||||
|
||||
# Rule 1: TVA cannot be greater than TOTAL
|
||||
if extraction.tva_total and extraction.amount:
|
||||
if extraction.tva_total > extraction.amount:
|
||||
print(f"[Final Validation] TVA ({extraction.tva_total}) > TOTAL ({extraction.amount}) - IMPOSSIBLE!", flush=True)
|
||||
|
||||
# Calculate TOTAL from TVA using reverse formula:
|
||||
# total = base + tva = tva * (100/rate + 1) = tva * (100 + rate) / rate
|
||||
# For 9% TVA: total = tva * 109 / 9 = tva * 12.11
|
||||
# For 19% TVA: total = tva * 119 / 19 = tva * 6.26
|
||||
# For 21% TVA: total = tva * 121 / 21 = tva * 5.76
|
||||
|
||||
rate = 19 # Default rate assumption
|
||||
if extraction.tva_entries:
|
||||
# Use the rate from the first entry
|
||||
rate = extraction.tva_entries[0].get('percent', 19)
|
||||
|
||||
if rate > 0:
|
||||
# Formula: total = tva * (100 + rate) / rate
|
||||
calculated_total = extraction.tva_total * (Decimal('100') + Decimal(str(rate))) / Decimal(str(rate))
|
||||
calculated_total = calculated_total.quantize(Decimal('0.01'))
|
||||
|
||||
print(f"[Final Validation] Calculated TOTAL from TVA: {calculated_total} (using {rate}% rate)", flush=True)
|
||||
|
||||
extraction.amount = calculated_total
|
||||
extraction.confidence_amount = 0.70 # Lower confidence for calculated value
|
||||
|
||||
# Rule 2: TVA cannot be more than ~25% of total (max Romanian rate is 21%)
|
||||
if extraction.tva_total and extraction.amount:
|
||||
tva_percent = extraction.tva_total / extraction.amount * Decimal('100')
|
||||
if tva_percent > Decimal('25'):
|
||||
print(f"[Final Validation] Warning: TVA is {tva_percent:.1f}% of total - suspicious", flush=True)
|
||||
|
||||
# Rule 3: Validate TVA entries sum
|
||||
if extraction.tva_entries and extraction.tva_total:
|
||||
entries_sum = sum(e.get('amount', Decimal('0')) for e in extraction.tva_entries)
|
||||
tolerance = Decimal('0.05')
|
||||
if abs(entries_sum - extraction.tva_total) > tolerance:
|
||||
print(f"[Final Validation] TVA entries sum ({entries_sum}) != tva_total ({extraction.tva_total})", flush=True)
|
||||
# Use the sum as it's more reliable
|
||||
extraction.tva_total = entries_sum
|
||||
|
||||
print(f"[Final Validation] Done. Amount={extraction.amount}, TVA={extraction.tva_total}", flush=True)
|
||||
return extraction
|
||||
|
||||
|
||||
# Singleton instance
|
||||
ocr_service = OCRService()
|
||||
|
||||
@@ -20,6 +20,14 @@ from app.schemas.receipt import (
|
||||
from app.services.expense_types import EXPENSE_TYPES, get_expense_type
|
||||
|
||||
|
||||
# Payment mode to accounting account mapping
|
||||
PAYMENT_MODE_ACCOUNTS = {
|
||||
'casa': ('5311', 'Casa in lei'),
|
||||
'banca': ('5121', 'Conturi la banci in lei'),
|
||||
'avans_decontare': ('542', 'Avansuri de trezorerie'),
|
||||
}
|
||||
|
||||
|
||||
class ReceiptService:
|
||||
"""Service for receipt business logic and workflow."""
|
||||
|
||||
@@ -151,21 +159,36 @@ class ReceiptService:
|
||||
partner_id=receipt.partner_id,
|
||||
))
|
||||
|
||||
# Credit: Cash/Bank
|
||||
cash_account = receipt.cash_register_account or "5311"
|
||||
cash_name = receipt.cash_register_name or "Casa in lei"
|
||||
# Credit entry - based on payment_mode (new) or cash_register (legacy)
|
||||
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
|
||||
credit_account, credit_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
|
||||
elif receipt.cash_register_account:
|
||||
# Backwards compatibility for existing receipts
|
||||
credit_account = receipt.cash_register_account
|
||||
credit_name = receipt.cash_register_name or "Casa/Banca"
|
||||
else:
|
||||
# Default fallback
|
||||
credit_account = "5311"
|
||||
credit_name = "Casa in lei"
|
||||
|
||||
entries.append(AccountingEntryCreate(
|
||||
entry_type=EntryType.CREDIT,
|
||||
account_code=cash_account,
|
||||
account_name=cash_name,
|
||||
account_code=credit_account,
|
||||
account_name=credit_name,
|
||||
amount=amount,
|
||||
))
|
||||
|
||||
else:
|
||||
# Income: Debit cash/bank, Credit income account
|
||||
# For now, simple income posting
|
||||
cash_account = receipt.cash_register_account or "5311"
|
||||
cash_name = receipt.cash_register_name or "Casa in lei"
|
||||
# Based on payment_mode (new) or cash_register (legacy)
|
||||
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
|
||||
cash_account, cash_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
|
||||
elif receipt.cash_register_account:
|
||||
cash_account = receipt.cash_register_account
|
||||
cash_name = receipt.cash_register_name or "Casa/Banca"
|
||||
else:
|
||||
cash_account = "5311"
|
||||
cash_name = "Casa in lei"
|
||||
|
||||
# Debit: Cash/Bank
|
||||
entries.append(AccountingEntryCreate(
|
||||
@@ -211,8 +234,9 @@ class ReceiptService:
|
||||
if not receipt.expense_type_code:
|
||||
return False, "Expense type is required", None
|
||||
|
||||
if not receipt.cash_register_account:
|
||||
return False, "Cash register is required", None
|
||||
# Validate payment_mode or cash_register (backwards compatibility)
|
||||
if not receipt.payment_mode and not receipt.cash_register_account:
|
||||
return False, "Modul de plata este obligatoriu", None
|
||||
|
||||
# Generate accounting entries
|
||||
entries = ReceiptService.generate_accounting_entries(receipt)
|
||||
@@ -239,6 +263,7 @@ class ReceiptService:
|
||||
) -> Tuple[bool, str, Optional[Receipt]]:
|
||||
"""
|
||||
Approve receipt (PENDING_REVIEW → APPROVED).
|
||||
Requires valid CUI (fiscal code) for approval.
|
||||
"""
|
||||
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
|
||||
|
||||
@@ -248,6 +273,10 @@ class ReceiptService:
|
||||
if receipt.status != ReceiptStatus.PENDING_REVIEW:
|
||||
return False, "Receipt is not pending review", None
|
||||
|
||||
# Validate CUI is present (required for Oracle import)
|
||||
if not receipt.cui:
|
||||
return False, "Trebuie completat codul fiscal (CUI) pentru aprobare", None
|
||||
|
||||
# Validate accounting entries
|
||||
if not receipt.entries:
|
||||
return False, "Receipt has no accounting entries", None
|
||||
|
||||
@@ -267,9 +267,8 @@ class SyncService:
|
||||
supplier = result.scalar_one_or_none()
|
||||
|
||||
if supplier:
|
||||
# Return only text data - no IDs needed for autocomplete
|
||||
return True, {
|
||||
"id": supplier.id,
|
||||
"oracle_id": supplier.oracle_id,
|
||||
"name": supplier.name,
|
||||
"fiscal_code": supplier.fiscal_code,
|
||||
"address": supplier.address,
|
||||
@@ -291,12 +290,11 @@ class SyncService:
|
||||
local = result.scalar_one_or_none()
|
||||
|
||||
if local:
|
||||
# Return only text data - no IDs needed for autocomplete
|
||||
return True, {
|
||||
"id": local.id,
|
||||
"name": local.name,
|
||||
"fiscal_code": local.fiscal_code,
|
||||
"address": local.address,
|
||||
"is_local": True,
|
||||
}, "local"
|
||||
|
||||
# 3. Try live Oracle search (optional fallback for unsynced data)
|
||||
|
||||
Reference in New Issue
Block a user