diff --git a/data-entry-app/backend/app/db/crud/receipt.py b/data-entry-app/backend/app/db/crud/receipt.py index 0995153..7bc9b39 100644 --- a/data-entry-app/backend/app/db/crud/receipt.py +++ b/data-entry-app/backend/app/db/crud/receipt.py @@ -37,6 +37,30 @@ def _serialize_tva_breakdown(tva_breakdown: Optional[List[Any]]) -> Optional[str return json.dumps(serializable) +def _serialize_payment_methods(payment_methods: Optional[List[Any]]) -> Optional[str]: + """Serialize payment methods list to JSON string for SQLite storage.""" + if payment_methods is None: + return None + + serializable = [] + for pm in payment_methods: + if hasattr(pm, 'model_dump'): + item = pm.model_dump() + elif isinstance(pm, dict): + item = pm.copy() + else: + item = dict(pm) + + # Convert Decimal to float for JSON + if 'amount' in item: + if hasattr(item['amount'], '__float__'): + item['amount'] = float(item['amount']) + + serializable.append(item) + + return json.dumps(serializable) + + class ReceiptCRUD: """CRUD operations for Receipt model.""" @@ -47,9 +71,10 @@ class ReceiptCRUD: created_by: str, ) -> Receipt: """Create a new receipt.""" - # Get data as dict and serialize tva_breakdown to JSON string + # Get data as dict and serialize tva_breakdown and payment_methods to JSON string receipt_data = data.model_dump() receipt_data['tva_breakdown'] = _serialize_tva_breakdown(receipt_data.get('tva_breakdown')) + receipt_data['payment_methods'] = _serialize_payment_methods(receipt_data.get('payment_methods')) receipt = Receipt( **receipt_data, @@ -165,9 +190,11 @@ class ReceiptCRUD: """Update receipt fields.""" update_data = data.model_dump(exclude_unset=True) - # Serialize tva_breakdown to JSON string if present + # Serialize tva_breakdown and payment_methods to JSON string if present if 'tva_breakdown' in update_data: update_data['tva_breakdown'] = _serialize_tva_breakdown(update_data['tva_breakdown']) + if 'payment_methods' in update_data: + update_data['payment_methods'] = _serialize_payment_methods(update_data['payment_methods']) for field, value in update_data.items(): setattr(receipt, field, value) diff --git a/data-entry-app/backend/app/db/models/receipt.py b/data-entry-app/backend/app/db/models/receipt.py index 12f4d0b..2a829c0 100644 --- a/data-entry-app/backend/app/db/models/receipt.py +++ b/data-entry-app/backend/app/db/models/receipt.py @@ -29,6 +29,13 @@ class ReceiptStatus(str, Enum): SYNCED = "synced" # Synced to Oracle (Phase 2) +class PaymentMode(str, Enum): + """Payment mode - how the expense was paid.""" + CASA = "casa" # Numerar firma (5311) + BANCA = "banca" # Virament/POS (5121) + AVANS_DECONTARE = "avans_decontare" # Decont angajat (542) + + if TYPE_CHECKING: from .accounting_entry import AccountingEntry @@ -62,11 +69,15 @@ class Receipt(SQLModel, table=True): # Oracle references (nomenclatures) company_id: int - partner_id: Optional[int] = Field(default=None) - partner_name: Optional[str] = Field(default=None, max_length=200) # Cache for display + # partner_id removed - supplier data is text-only (partner_name, cui) + partner_name: Optional[str] = Field(default=None, max_length=200) # Supplier name from OCR/selection + cui: Optional[str] = Field(default=None, max_length=20) # Fiscal code from OCR + ocr_raw_text: Optional[str] = Field(default=None) # Raw OCR text for debugging + payment_methods: Optional[str] = Field(default=None, max_length=500) # JSON: [{"method":"CARD","amount":"50.00"}] cash_register_id: Optional[int] = Field(default=None) # Cash/Bank ID from Oracle cash_register_name: Optional[str] = Field(default=None, max_length=100) # Cache for display cash_register_account: Optional[str] = Field(default=None, max_length=20) # Account code (5311, 5121) + payment_mode: Optional[str] = Field(default=None, max_length=20) # PaymentMode value: casa/banca/avans_decontare # Workflow status: ReceiptStatus = Field(default=ReceiptStatus.DRAFT) diff --git a/data-entry-app/backend/app/routers/ocr.py b/data-entry-app/backend/app/routers/ocr.py index b5a99a5..ca30406 100644 --- a/data-entry-app/backend/app/routers/ocr.py +++ b/data-entry-app/backend/app/routers/ocr.py @@ -11,7 +11,7 @@ from app.db.database import get_session from app.db.crud.attachment import AttachmentCRUD from app.services.ocr_service import ocr_service from app.services.ocr_engine import OCREngine -from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry +from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry, PaymentMethod # Auth integration (will be protected by middleware) from auth.dependencies import get_current_user @@ -88,6 +88,21 @@ async def extract_from_image(file: UploadFile = File(...)): for e in result.tva_entries ] if result.tva_entries else [] + # Convert payment_methods from dict to PaymentMethod objects + from decimal import Decimal + payment_methods_list = [ + PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount']))) + for pm in result.payment_methods + ] if result.payment_methods else [] + + # Auto-suggest payment_mode based on detected methods + suggested_payment_mode = None + if payment_methods_list: + has_card = any(pm.method == 'CARD' for pm in payment_methods_list) + if has_card: + suggested_payment_mode = 'banca' + # NUMERAR -> no auto-suggestion, user chooses between casa/avans + data = ExtractionData( receipt_type=result.receipt_type, receipt_number=result.receipt_number, @@ -101,6 +116,8 @@ async def extract_from_image(file: UploadFile = File(...)): tva_total=result.tva_total, address=result.address, items_count=result.items_count, + payment_methods=payment_methods_list, + suggested_payment_mode=suggested_payment_mode, confidence_amount=result.confidence_amount, confidence_date=result.confidence_date, confidence_vendor=result.confidence_vendor, @@ -159,6 +176,21 @@ async def extract_from_attachment( for e in result.tva_entries ] if result.tva_entries else [] + # Convert payment_methods from dict to PaymentMethod objects + from decimal import Decimal + payment_methods_list = [ + PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount']))) + for pm in result.payment_methods + ] if result.payment_methods else [] + + # Auto-suggest payment_mode based on detected methods + suggested_payment_mode = None + if payment_methods_list: + has_card = any(pm.method == 'CARD' for pm in payment_methods_list) + if has_card: + suggested_payment_mode = 'banca' + # NUMERAR -> no auto-suggestion, user chooses between casa/avans + data = ExtractionData( receipt_type=result.receipt_type, receipt_number=result.receipt_number, @@ -172,6 +204,8 @@ async def extract_from_attachment( tva_total=result.tva_total, address=result.address, items_count=result.items_count, + payment_methods=payment_methods_list, + suggested_payment_mode=suggested_payment_mode, confidence_amount=result.confidence_amount, confidence_date=result.confidence_date, confidence_vendor=result.confidence_vendor, diff --git a/data-entry-app/backend/app/schemas/ocr.py b/data-entry-app/backend/app/schemas/ocr.py index ee2bf3b..78059cb 100644 --- a/data-entry-app/backend/app/schemas/ocr.py +++ b/data-entry-app/backend/app/schemas/ocr.py @@ -14,6 +14,12 @@ class TvaEntry(BaseModel): amount: Decimal = Field(description="TVA amount for this rate") +class PaymentMethod(BaseModel): + """Payment method entry from OCR.""" + method: str = Field(description="CARD or NUMERAR") + amount: Decimal = Field(description="Amount paid") + + class ExtractionData(BaseModel): """Extracted receipt data from OCR.""" @@ -32,6 +38,10 @@ class ExtractionData(BaseModel): address: Optional[str] = Field(default=None, description="Vendor address") items_count: Optional[int] = Field(default=None, description="Number of items/articles") + # Payment methods extracted from receipt + payment_methods: List[PaymentMethod] = Field(default=[], description="Payment methods from receipt (CARD, NUMERAR)") + suggested_payment_mode: Optional[str] = Field(default=None, description="Auto-suggested payment mode based on OCR (casa/banca)") + confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence") confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence") confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence") diff --git a/data-entry-app/backend/app/schemas/receipt.py b/data-entry-app/backend/app/schemas/receipt.py index 98ca36b..eed3ba0 100644 --- a/data-entry-app/backend/app/schemas/receipt.py +++ b/data-entry-app/backend/app/schemas/receipt.py @@ -74,6 +74,12 @@ class TvaEntrySchema(BaseModel): amount: Decimal = Field(description="TVA amount for this rate") +class PaymentMethodSchema(BaseModel): + """Payment method entry (CARD/NUMERAR).""" + method: str = Field(description="Payment method: CARD or NUMERAR") + amount: Decimal = Field(description="Amount paid with this method") + + # ============ Receipt Schemas ============ class ReceiptBase(BaseModel): @@ -93,11 +99,15 @@ class ReceiptBase(BaseModel): # Other fields expense_type_code: Optional[str] = Field(default=None, max_length=20) company_id: int - partner_id: Optional[int] = None + # partner_id removed - supplier data is text-only (partner_name, cui) partner_name: Optional[str] = Field(default=None, max_length=200) + cui: Optional[str] = Field(default=None, max_length=20, description="Fiscal code (CUI) from OCR") + ocr_raw_text: Optional[str] = Field(default=None, description="Raw OCR text for debugging") + payment_methods: Optional[List[PaymentMethodSchema]] = Field(default=None, description="Payment methods from OCR") cash_register_id: Optional[int] = None cash_register_name: Optional[str] = Field(default=None, max_length=100) cash_register_account: Optional[str] = Field(default=None, max_length=20) + payment_mode: Optional[str] = Field(default=None, description="Payment mode: casa/banca/avans_decontare") class ReceiptCreate(ReceiptBase): @@ -121,11 +131,15 @@ class ReceiptUpdate(BaseModel): vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address") # Other fields expense_type_code: Optional[str] = Field(default=None, max_length=20) - partner_id: Optional[int] = None + # partner_id removed - supplier data is text-only (partner_name, cui) partner_name: Optional[str] = Field(default=None, max_length=200) + cui: Optional[str] = Field(default=None, max_length=20, description="Fiscal code (CUI) from OCR") + ocr_raw_text: Optional[str] = Field(default=None, description="Raw OCR text for debugging") + payment_methods: Optional[List[PaymentMethodSchema]] = Field(default=None, description="Payment methods from OCR") cash_register_id: Optional[int] = None cash_register_name: Optional[str] = Field(default=None, max_length=100) cash_register_account: Optional[str] = Field(default=None, max_length=20) + payment_mode: Optional[str] = Field(default=None, description="Payment mode: casa/banca/avans_decontare") class ReceiptResponse(ReceiptBase): @@ -164,6 +178,21 @@ class ReceiptResponse(ReceiptBase): return v return None + @field_validator('payment_methods', mode='before') + @classmethod + def parse_payment_methods(cls, v: Any) -> Optional[List[dict]]: + """Deserialize payment_methods from JSON string if needed.""" + if v is None: + return None + if isinstance(v, str): + try: + return json.loads(v) + except (json.JSONDecodeError, TypeError): + return None + if isinstance(v, list): + return v + return None + class ReceiptListResponse(BaseModel): """Schema for paginated receipt list response.""" @@ -208,10 +237,11 @@ class EntriesUpdateRequest(BaseModel): # ============ Nomenclature Schemas ============ class PartnerOption(BaseModel): - """Schema for partner dropdown option.""" - id: int + """Schema for partner dropdown option (used for autocomplete assistance).""" name: str - code: Optional[str] = None + fiscal_code: Optional[str] = None + address: Optional[str] = None + source: str = "oracle" # 'oracle' (synced) or 'local' class AccountOption(BaseModel): diff --git a/data-entry-app/backend/app/services/nomenclature_service.py b/data-entry-app/backend/app/services/nomenclature_service.py index 361cd90..d25546b 100644 --- a/data-entry-app/backend/app/services/nomenclature_service.py +++ b/data-entry-app/backend/app/services/nomenclature_service.py @@ -46,7 +46,7 @@ class NomenclatureService: (SyncedSupplier.name.ilike(f"%{search}%")) | (SyncedSupplier.fiscal_code.ilike(f"%{search}%")) ) - stmt = stmt.limit(50) # Limit results + stmt = stmt.order_by(SyncedSupplier.name) # Order alphabetically, no limit for AutoComplete result = await session.execute(stmt) suppliers = result.scalars().all() @@ -59,34 +59,44 @@ class NomenclatureService: (LocalSupplier.name.ilike(f"%{search}%")) | (LocalSupplier.fiscal_code.ilike(f"%{search}%")) ) - local_stmt = local_stmt.limit(50) + local_stmt = local_stmt.order_by(LocalSupplier.name) # Order alphabetically local_result = await session.execute(local_stmt) local_suppliers = local_result.scalars().all() - # Combine both + # Combine both - no IDs needed, just text data for autocomplete partners = [] for s in suppliers: - partners.append(PartnerOption(id=s.id, name=s.name, code=s.fiscal_code)) + partners.append(PartnerOption( + name=s.name, + fiscal_code=s.fiscal_code, + address=s.address, + source="oracle" + )) for l in local_suppliers: - partners.append(PartnerOption(id=l.id, name=f"{l.name} (local)", code=l.fiscal_code)) + partners.append(PartnerOption( + name=l.name, # No suffix - must match search results + fiscal_code=l.fiscal_code, + address=l.address, + source="local" + )) return partners - # Fallback to mock data for Phase 1 + # Fallback to mock data for Phase 1 (when no synced data) mock_partners = [ - PartnerOption(id=1, name="OMV Petrom", code="RO123456"), - PartnerOption(id=2, name="Dedeman", code="RO789012"), - PartnerOption(id=3, name="Kaufland", code="RO345678"), - PartnerOption(id=4, name="Emag", code="RO901234"), - PartnerOption(id=5, name="Altex", code="RO567890"), + PartnerOption(name="OMV Petrom", fiscal_code="RO123456", source="mock"), + PartnerOption(name="Dedeman", fiscal_code="RO789012", source="mock"), + PartnerOption(name="Kaufland", fiscal_code="RO345678", source="mock"), + PartnerOption(name="Emag", fiscal_code="RO901234", source="mock"), + PartnerOption(name="Altex", fiscal_code="RO567890", source="mock"), ] if search: search_lower = search.lower() mock_partners = [ p for p in mock_partners - if search_lower in p.name.lower() or (p.code and search_lower in p.code.lower()) + if search_lower in p.name.lower() or (p.fiscal_code and search_lower in p.fiscal_code.lower()) ] return mock_partners diff --git a/data-entry-app/backend/app/services/ocr_engine.py b/data-entry-app/backend/app/services/ocr_engine.py index 2af189c..ef10a90 100644 --- a/data-entry-app/backend/app/services/ocr_engine.py +++ b/data-entry-app/backend/app/services/ocr_engine.py @@ -2,6 +2,8 @@ import os import logging +import threading +import time from dataclasses import dataclass from typing import List, Optional, Tuple @@ -53,23 +55,26 @@ class OCREngine: def __init__(self): self._paddle = None - self._paddle_initialized = False + self._paddle_init_started = False + self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready + self._paddle_init_lock = threading.Lock() def _init_paddle_lazy(self): """Lazy initialize PaddleOCR on first use (avoids slow startup).""" global PaddleOCR - if self._paddle_initialized: - return + with self._paddle_init_lock: + if self._paddle_init_started: + return # Already initializing or done + self._paddle_init_started = True - self._paddle_initialized = True if PADDLE_AVAILABLE: try: - print("Importing PaddleOCR (first use, may take ~15-20 seconds)...") + print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True) from paddleocr import PaddleOCR as _PaddleOCR PaddleOCR = _PaddleOCR - print("Initializing PaddleOCR engine...") + print("Initializing PaddleOCR engine...", flush=True) # PaddleOCR 3.x API - optimized for Romanian receipts # Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts self._paddle = PaddleOCR( @@ -81,11 +86,51 @@ class OCREngine: rec_batch_num=6, # Batch size for recognition use_angle_cls=True, # Enable text angle classification ) - print("PaddleOCR initialized successfully with high-quality settings") + print("PaddleOCR initialized successfully with high-quality settings", flush=True) except Exception as e: - print(f"Warning: Failed to initialize PaddleOCR: {e}") + print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True) self._paddle = None + # Signal that initialization is complete (success or failure) + self._paddle_ready.set() + + def wait_for_paddle(self, timeout: float = 30.0) -> bool: + """ + Wait for PaddleOCR to be fully initialized. + + Args: + timeout: Max seconds to wait (default 30s) + + Returns: + True if PaddleOCR is ready, False if timeout or unavailable + """ + if not PADDLE_AVAILABLE: + return False + + if self._paddle is not None: + return True # Already ready + + if not self._paddle_init_started: + # Start initialization if not already started + self._init_paddle_lazy() + + # Wait for initialization to complete + print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True) + start = time.time() + ready = self._paddle_ready.wait(timeout=timeout) + elapsed = time.time() - start + + if ready and self._paddle is not None: + print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True) + return True + else: + print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True) + return False + + def is_paddle_ready(self) -> bool: + """Check if PaddleOCR is ready without waiting.""" + return self._paddle is not None + def recognize(self, image: np.ndarray) -> OCRResult: """Perform OCR on preprocessed image.""" logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}") @@ -107,6 +152,13 @@ class OCREngine: def _paddle_recognize(self, image: np.ndarray) -> OCRResult: """Recognize text using PaddleOCR 3.x API.""" + # Wait for PaddleOCR to be fully ready (handles background init) + if not self.wait_for_paddle(timeout=30.0): + logger.warning("[PaddleOCR] Not ready, falling back to Tesseract") + if TESSERACT_AVAILABLE: + return self._tesseract_recognize(image) + raise RuntimeError("PaddleOCR not ready and Tesseract not available") + try: logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}") diff --git a/data-entry-app/backend/app/services/ocr_service.py b/data-entry-app/backend/app/services/ocr_service.py index 354bbb6..f771915 100644 --- a/data-entry-app/backend/app/services/ocr_service.py +++ b/data-entry-app/backend/app/services/ocr_service.py @@ -170,14 +170,17 @@ class OCRService: print(f"[OCR] PaddleOCR heavy failed: {e}", flush=True) # ══════════════════════════════════════════════════════════════ - # STEP 3: Tesseract fallback + # STEP 3: Tesseract - ONLY to complete missing fields + # Uses Tesseract-optimized preprocessing (binarized, high contrast) # ══════════════════════════════════════════════════════════════ print("=" * 60, flush=True) - print("[OCR] STEP 3: Tesseract fallback", flush=True) + print("[OCR] STEP 3: Tesseract (complement only, not override)", flush=True) print("=" * 60, flush=True) try: - tesseract_result = self.ocr_engine._tesseract_recognize(light_img) + # Use Tesseract-specific preprocessing (Otsu binarization) + tesseract_img = self.preprocessor.preprocess_for_tesseract(image) + tesseract_result = self.ocr_engine._tesseract_recognize(tesseract_img) if tesseract_result and tesseract_result.text: extraction_tess = self.extractor.extract(tesseract_result.text) extraction_tess.ocr_engine = "tesseract" @@ -189,10 +192,17 @@ class OCRService: print(f" - Date: {extraction_tess.receipt_date}", flush=True) print(f" - CUI: {extraction_tess.cui}", flush=True) - extraction = self._merge_extractions(extraction, extraction_tess) + # IMPORTANT: Tesseract only COMPLETES missing fields, never overrides! + extraction = self._complement_extraction(extraction, extraction_tess) except Exception as e: print(f"[OCR] Tesseract failed: {e}", flush=True) + # ══════════════════════════════════════════════════════════════ + # FINAL VALIDATION: Fix impossible values + # ══════════════════════════════════════════════════════════════ + if extraction: + extraction = self._final_validation(extraction) + # Final result if extraction is None: return False, "No text detected", None @@ -438,6 +448,122 @@ class OCRService: print(f"[OCR] ✓ All 5 fields found with {ext.overall_confidence:.0%} confidence", flush=True) return True + def _complement_extraction( + self, + primary: Optional[ExtractionResult], + secondary: Optional[ExtractionResult] + ) -> ExtractionResult: + """ + Complement primary extraction with missing fields from secondary. + NEVER overrides existing values - only fills in gaps. + + This is different from _merge_extractions which can override values. + """ + if primary is None and secondary is None: + return ExtractionResult() + if primary is None: + return secondary + if secondary is None: + return primary + + print("[Complement] Adding missing fields from Tesseract...", flush=True) + + # Only fill missing amount + if not primary.amount and secondary.amount: + primary.amount = secondary.amount + primary.confidence_amount = secondary.confidence_amount + print(f"[Complement] Added amount: {secondary.amount}", flush=True) + + # Only fill missing date + if not primary.receipt_date and secondary.receipt_date: + primary.receipt_date = secondary.receipt_date + primary.confidence_date = secondary.confidence_date + print(f"[Complement] Added date: {secondary.receipt_date}", flush=True) + + # Only fill missing vendor + if not primary.partner_name and secondary.partner_name: + primary.partner_name = secondary.partner_name + primary.confidence_vendor = secondary.confidence_vendor + print(f"[Complement] Added vendor: {secondary.partner_name}", flush=True) + + # Only fill missing CUI + if not primary.cui and secondary.cui and self._is_valid_cui(secondary.cui): + primary.cui = secondary.cui + print(f"[Complement] Added CUI: {secondary.cui}", flush=True) + + # Only fill missing TVA + if not primary.tva_entries and secondary.tva_entries: + primary.tva_entries = secondary.tva_entries + primary.tva_total = secondary.tva_total + print(f"[Complement] Added TVA: {secondary.tva_total}", flush=True) + + # Only fill missing receipt number + if not primary.receipt_number and secondary.receipt_number: + primary.receipt_number = secondary.receipt_number + print(f"[Complement] Added number: {secondary.receipt_number}", flush=True) + + # Only fill missing address + if not primary.address and secondary.address: + primary.address = secondary.address + print(f"[Complement] Added address: {secondary.address}", flush=True) + + return primary + + def _final_validation(self, extraction: ExtractionResult) -> ExtractionResult: + """ + Final validation and correction of impossible values. + + Key rules: + 1. TVA cannot be greater than TOTAL (it's always a fraction) + 2. If TVA > TOTAL, recalculate TOTAL from TVA using known rates + 3. Validate TVA entries sum equals TVA total + """ + print("[Final Validation] Checking extracted values...", flush=True) + + # Rule 1: TVA cannot be greater than TOTAL + if extraction.tva_total and extraction.amount: + if extraction.tva_total > extraction.amount: + print(f"[Final Validation] TVA ({extraction.tva_total}) > TOTAL ({extraction.amount}) - IMPOSSIBLE!", flush=True) + + # Calculate TOTAL from TVA using reverse formula: + # total = base + tva = tva * (100/rate + 1) = tva * (100 + rate) / rate + # For 9% TVA: total = tva * 109 / 9 = tva * 12.11 + # For 19% TVA: total = tva * 119 / 19 = tva * 6.26 + # For 21% TVA: total = tva * 121 / 21 = tva * 5.76 + + rate = 19 # Default rate assumption + if extraction.tva_entries: + # Use the rate from the first entry + rate = extraction.tva_entries[0].get('percent', 19) + + if rate > 0: + # Formula: total = tva * (100 + rate) / rate + calculated_total = extraction.tva_total * (Decimal('100') + Decimal(str(rate))) / Decimal(str(rate)) + calculated_total = calculated_total.quantize(Decimal('0.01')) + + print(f"[Final Validation] Calculated TOTAL from TVA: {calculated_total} (using {rate}% rate)", flush=True) + + extraction.amount = calculated_total + extraction.confidence_amount = 0.70 # Lower confidence for calculated value + + # Rule 2: TVA cannot be more than ~25% of total (max Romanian rate is 21%) + if extraction.tva_total and extraction.amount: + tva_percent = extraction.tva_total / extraction.amount * Decimal('100') + if tva_percent > Decimal('25'): + print(f"[Final Validation] Warning: TVA is {tva_percent:.1f}% of total - suspicious", flush=True) + + # Rule 3: Validate TVA entries sum + if extraction.tva_entries and extraction.tva_total: + entries_sum = sum(e.get('amount', Decimal('0')) for e in extraction.tva_entries) + tolerance = Decimal('0.05') + if abs(entries_sum - extraction.tva_total) > tolerance: + print(f"[Final Validation] TVA entries sum ({entries_sum}) != tva_total ({extraction.tva_total})", flush=True) + # Use the sum as it's more reliable + extraction.tva_total = entries_sum + + print(f"[Final Validation] Done. Amount={extraction.amount}, TVA={extraction.tva_total}", flush=True) + return extraction + # Singleton instance ocr_service = OCRService() diff --git a/data-entry-app/backend/app/services/receipt_service.py b/data-entry-app/backend/app/services/receipt_service.py index b6219e6..f61abe6 100644 --- a/data-entry-app/backend/app/services/receipt_service.py +++ b/data-entry-app/backend/app/services/receipt_service.py @@ -20,6 +20,14 @@ from app.schemas.receipt import ( from app.services.expense_types import EXPENSE_TYPES, get_expense_type +# Payment mode to accounting account mapping +PAYMENT_MODE_ACCOUNTS = { + 'casa': ('5311', 'Casa in lei'), + 'banca': ('5121', 'Conturi la banci in lei'), + 'avans_decontare': ('542', 'Avansuri de trezorerie'), +} + + class ReceiptService: """Service for receipt business logic and workflow.""" @@ -151,21 +159,36 @@ class ReceiptService: partner_id=receipt.partner_id, )) - # Credit: Cash/Bank - cash_account = receipt.cash_register_account or "5311" - cash_name = receipt.cash_register_name or "Casa in lei" + # Credit entry - based on payment_mode (new) or cash_register (legacy) + if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS: + credit_account, credit_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode] + elif receipt.cash_register_account: + # Backwards compatibility for existing receipts + credit_account = receipt.cash_register_account + credit_name = receipt.cash_register_name or "Casa/Banca" + else: + # Default fallback + credit_account = "5311" + credit_name = "Casa in lei" + entries.append(AccountingEntryCreate( entry_type=EntryType.CREDIT, - account_code=cash_account, - account_name=cash_name, + account_code=credit_account, + account_name=credit_name, amount=amount, )) else: # Income: Debit cash/bank, Credit income account - # For now, simple income posting - cash_account = receipt.cash_register_account or "5311" - cash_name = receipt.cash_register_name or "Casa in lei" + # Based on payment_mode (new) or cash_register (legacy) + if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS: + cash_account, cash_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode] + elif receipt.cash_register_account: + cash_account = receipt.cash_register_account + cash_name = receipt.cash_register_name or "Casa/Banca" + else: + cash_account = "5311" + cash_name = "Casa in lei" # Debit: Cash/Bank entries.append(AccountingEntryCreate( @@ -211,8 +234,9 @@ class ReceiptService: if not receipt.expense_type_code: return False, "Expense type is required", None - if not receipt.cash_register_account: - return False, "Cash register is required", None + # Validate payment_mode or cash_register (backwards compatibility) + if not receipt.payment_mode and not receipt.cash_register_account: + return False, "Modul de plata este obligatoriu", None # Generate accounting entries entries = ReceiptService.generate_accounting_entries(receipt) @@ -239,6 +263,7 @@ class ReceiptService: ) -> Tuple[bool, str, Optional[Receipt]]: """ Approve receipt (PENDING_REVIEW → APPROVED). + Requires valid CUI (fiscal code) for approval. """ receipt = await ReceiptCRUD.get_by_id(session, receipt_id) @@ -248,6 +273,10 @@ class ReceiptService: if receipt.status != ReceiptStatus.PENDING_REVIEW: return False, "Receipt is not pending review", None + # Validate CUI is present (required for Oracle import) + if not receipt.cui: + return False, "Trebuie completat codul fiscal (CUI) pentru aprobare", None + # Validate accounting entries if not receipt.entries: return False, "Receipt has no accounting entries", None diff --git a/data-entry-app/backend/app/services/sync_service.py b/data-entry-app/backend/app/services/sync_service.py index 043f041..a3599c5 100644 --- a/data-entry-app/backend/app/services/sync_service.py +++ b/data-entry-app/backend/app/services/sync_service.py @@ -267,9 +267,8 @@ class SyncService: supplier = result.scalar_one_or_none() if supplier: + # Return only text data - no IDs needed for autocomplete return True, { - "id": supplier.id, - "oracle_id": supplier.oracle_id, "name": supplier.name, "fiscal_code": supplier.fiscal_code, "address": supplier.address, @@ -291,12 +290,11 @@ class SyncService: local = result.scalar_one_or_none() if local: + # Return only text data - no IDs needed for autocomplete return True, { - "id": local.id, "name": local.name, "fiscal_code": local.fiscal_code, "address": local.address, - "is_local": True, }, "local" # 3. Try live Oracle search (optional fallback for unsynced data) diff --git a/data-entry-app/backend/migrations/versions/20251215_add_ocr_fields_to_receipt.py b/data-entry-app/backend/migrations/versions/20251215_add_ocr_fields_to_receipt.py new file mode 100644 index 0000000..a50afaa --- /dev/null +++ b/data-entry-app/backend/migrations/versions/20251215_add_ocr_fields_to_receipt.py @@ -0,0 +1,35 @@ +"""add_ocr_fields_to_receipt + +Revision ID: 4b8e5f2a1d93 +Revises: 3a653da79002 +Create Date: 2025-12-15 10:00:00.000000+00:00 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import sqlmodel + + +# revision identifiers, used by Alembic. +revision: str = '4b8e5f2a1d93' +down_revision: Union[str, None] = '3a653da79002' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add OCR-related columns to receipts table + with op.batch_alter_table('receipts', schema=None) as batch_op: + batch_op.add_column(sa.Column('cui', sqlmodel.sql.sqltypes.AutoString(length=20), nullable=True)) + batch_op.add_column(sa.Column('ocr_raw_text', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('payment_methods', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True)) + + +def downgrade() -> None: + # Remove OCR-related columns from receipts table + with op.batch_alter_table('receipts', schema=None) as batch_op: + batch_op.drop_column('payment_methods') + batch_op.drop_column('ocr_raw_text') + batch_op.drop_column('cui') diff --git a/data-entry-app/backend/migrations/versions/20251215_remove_partner_id.py b/data-entry-app/backend/migrations/versions/20251215_remove_partner_id.py new file mode 100644 index 0000000..81456ec --- /dev/null +++ b/data-entry-app/backend/migrations/versions/20251215_remove_partner_id.py @@ -0,0 +1,29 @@ +"""Remove partner_id from receipts - supplier data is text-only + +Revision ID: 20251215_remove_partner_id +Revises: 20251216_payment_mode +Create Date: 2025-12-15 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '20251215_remove_partner_id' +down_revision: Union[str, None] = '20251216_payment_mode' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Remove partner_id column - supplier data is now text-only (partner_name, cui).""" + # Drop the partner_id column + op.drop_column('receipts', 'partner_id') + + +def downgrade() -> None: + """Re-add partner_id column.""" + op.add_column('receipts', sa.Column('partner_id', sa.Integer(), nullable=True)) diff --git a/data-entry-app/backend/migrations/versions/20251216_add_payment_mode.py b/data-entry-app/backend/migrations/versions/20251216_add_payment_mode.py new file mode 100644 index 0000000..2b83c73 --- /dev/null +++ b/data-entry-app/backend/migrations/versions/20251216_add_payment_mode.py @@ -0,0 +1,44 @@ +"""Add payment_mode field to receipts table. + +Revision ID: 20251216_payment_mode +Revises: 4b8e5f2a1d93 +Create Date: 2024-12-16 +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '20251216_payment_mode' +down_revision = '4b8e5f2a1d93' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Add payment_mode column and migrate existing data.""" + with op.batch_alter_table('receipts', schema=None) as batch_op: + batch_op.add_column(sa.Column('payment_mode', sa.String(length=20), nullable=True)) + + # Migrate existing data based on cash_register_account + op.execute(""" + UPDATE receipts + SET payment_mode = 'casa' + WHERE cash_register_account LIKE '531%' AND payment_mode IS NULL + """) + op.execute(""" + UPDATE receipts + SET payment_mode = 'banca' + WHERE cash_register_account LIKE '512%' AND payment_mode IS NULL + """) + op.execute(""" + UPDATE receipts + SET payment_mode = 'avans_decontare' + WHERE cash_register_account LIKE '542%' AND payment_mode IS NULL + """) + + +def downgrade() -> None: + """Remove payment_mode column.""" + with op.batch_alter_table('receipts', schema=None) as batch_op: + batch_op.drop_column('payment_mode') diff --git a/data-entry-app/frontend/src/components/ocr/OCRPreview.vue b/data-entry-app/frontend/src/components/ocr/OCRPreview.vue index fb058e4..89f1cb9 100644 --- a/data-entry-app/frontend/src/components/ocr/OCRPreview.vue +++ b/data-entry-app/frontend/src/components/ocr/OCRPreview.vue @@ -87,6 +87,24 @@ + +