feat: Add payment methods extraction, OCR improvements, and AutoComplete fix

Backend:
- Add payment_methods and payment_mode fields to Receipt model
- Add payment method extraction (CARD/NUMERAR) with auto-suggestion logic
- Improve OCR service with TVA validation and reverse calculation
- Fix nomenclature service supplier limit (was 50, now unlimited)
- Add OCR fields migrations (ocr_raw_text, ocr_confidence, payment_mode)

Frontend:
- Fix AutoComplete to properly display supplier name after OCR
- Add payment methods display in OCR preview with suggested payment mode
- Improve ReceiptCreateView form handling and OCR data application

Database migrations:
- 20251215_add_ocr_fields_to_receipt.py
- 20251215_remove_partner_id.py
- 20251216_add_payment_mode.py

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-16 13:43:15 +02:00
parent 46d9be0c08
commit c1220e86a6
15 changed files with 734 additions and 94 deletions

View File

@@ -2,6 +2,8 @@
import os
import logging
import threading
import time
from dataclasses import dataclass
from typing import List, Optional, Tuple
@@ -53,23 +55,26 @@ class OCREngine:
def __init__(self):
self._paddle = None
self._paddle_initialized = False
self._paddle_init_started = False
self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready
self._paddle_init_lock = threading.Lock()
def _init_paddle_lazy(self):
"""Lazy initialize PaddleOCR on first use (avoids slow startup)."""
global PaddleOCR
if self._paddle_initialized:
return
with self._paddle_init_lock:
if self._paddle_init_started:
return # Already initializing or done
self._paddle_init_started = True
self._paddle_initialized = True
if PADDLE_AVAILABLE:
try:
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...")
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True)
from paddleocr import PaddleOCR as _PaddleOCR
PaddleOCR = _PaddleOCR
print("Initializing PaddleOCR engine...")
print("Initializing PaddleOCR engine...", flush=True)
# PaddleOCR 3.x API - optimized for Romanian receipts
# Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts
self._paddle = PaddleOCR(
@@ -81,11 +86,51 @@ class OCREngine:
rec_batch_num=6, # Batch size for recognition
use_angle_cls=True, # Enable text angle classification
)
print("PaddleOCR initialized successfully with high-quality settings")
print("PaddleOCR initialized successfully with high-quality settings", flush=True)
except Exception as e:
print(f"Warning: Failed to initialize PaddleOCR: {e}")
print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True)
self._paddle = None
# Signal that initialization is complete (success or failure)
self._paddle_ready.set()
def wait_for_paddle(self, timeout: float = 30.0) -> bool:
"""
Wait for PaddleOCR to be fully initialized.
Args:
timeout: Max seconds to wait (default 30s)
Returns:
True if PaddleOCR is ready, False if timeout or unavailable
"""
if not PADDLE_AVAILABLE:
return False
if self._paddle is not None:
return True # Already ready
if not self._paddle_init_started:
# Start initialization if not already started
self._init_paddle_lazy()
# Wait for initialization to complete
print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True)
start = time.time()
ready = self._paddle_ready.wait(timeout=timeout)
elapsed = time.time() - start
if ready and self._paddle is not None:
print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True)
return True
else:
print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True)
return False
def is_paddle_ready(self) -> bool:
"""Check if PaddleOCR is ready without waiting."""
return self._paddle is not None
def recognize(self, image: np.ndarray) -> OCRResult:
"""Perform OCR on preprocessed image."""
logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}")
@@ -107,6 +152,13 @@ class OCREngine:
def _paddle_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using PaddleOCR 3.x API."""
# Wait for PaddleOCR to be fully ready (handles background init)
if not self.wait_for_paddle(timeout=30.0):
logger.warning("[PaddleOCR] Not ready, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise RuntimeError("PaddleOCR not ready and Tesseract not available")
try:
logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}")