feat: Migrate to ultrathin monolith architecture

Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified
architecture with single backend and frontend:

Backend Changes:
- Unified FastAPI backend at backend/ with modular structure
- Modules: reports, data_entry, telegram in backend/modules/
- Centralized config.py and main.py with all routers registered
- Single worker mode (--workers 1) for Telegram bot compatibility
- Shared Oracle connection pool and JWT authentication
- Unified requirements.txt and environment configuration

Frontend Changes:
- Single Vue.js SPA with module-based routing
- Unified frontend at src/ with modules in src/modules/{reports,data-entry}/
- Shared components and stores in src/shared/
- Error boundaries for module isolation
- Dual API proxy in Vite for module communication

Infrastructure:
- New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh
- Environment templates: .env.dev.example, .env.test.example, .env.prod.example
- Updated deployment scripts for Windows IIS
- Simplified SSH tunnel management

Documentation:
- Comprehensive CLAUDE.md with architecture overview
- Module-specific docs in docs/{data-entry,telegram}/
- Architecture decision records in docs/ARCHITECTURE-DECISIONS.md
- Deployment guides consolidated in deployment/windows/docs/

This migration reduces complexity, improves maintainability, and enables easier
deployment while maintaining all existing functionality.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 23:48:14 +02:00
parent 2a101f1ef5
commit c5e051ad80
378 changed files with 7566 additions and 73730 deletions

View File

@@ -0,0 +1,11 @@
# Business logic services
from .receipt_service import ReceiptService
from .nomenclature_service import NomenclatureService
from .expense_types import EXPENSE_TYPES, ExpenseType
__all__ = [
"ReceiptService",
"NomenclatureService",
"EXPENSE_TYPES",
"ExpenseType",
]

View File

@@ -0,0 +1,101 @@
"""Predefined expense types for automatic accounting entry generation."""
from decimal import Decimal
from dataclasses import dataclass
from typing import Dict, Optional
@dataclass
class ExpenseType:
"""Expense type definition with accounting configuration."""
code: str
name: str
account_code: str
account_name: str
has_vat: bool
vat_percent: Decimal = Decimal("19")
vat_account: str = "4426"
# Predefined expense types
EXPENSE_TYPES: Dict[str, ExpenseType] = {
"FUEL": ExpenseType(
code="FUEL",
name="Combustibil",
account_code="6022",
account_name="Cheltuieli cu combustibilii",
has_vat=True,
),
"MATERIALS": ExpenseType(
code="MATERIALS",
name="Materiale consumabile",
account_code="6028",
account_name="Alte cheltuieli cu materiale consumabile",
has_vat=True,
),
"OFFICE": ExpenseType(
code="OFFICE",
name="Rechizite birou",
account_code="6024",
account_name="Cheltuieli privind materialele pentru ambalat",
has_vat=True,
),
"PHONE": ExpenseType(
code="PHONE",
name="Telefonie / Internet",
account_code="626",
account_name="Cheltuieli postale si taxe de telecomunicatii",
has_vat=True,
),
"PARKING": ExpenseType(
code="PARKING",
name="Parcare",
account_code="6022",
account_name="Cheltuieli cu combustibilii",
has_vat=True,
),
"FOOD": ExpenseType(
code="FOOD",
name="Alimentatie",
account_code="6028",
account_name="Alte cheltuieli cu materiale consumabile",
has_vat=False, # No deductible VAT for food
),
"TRANSPORT": ExpenseType(
code="TRANSPORT",
name="Transport",
account_code="624",
account_name="Cheltuieli cu transportul de bunuri si personal",
has_vat=True,
),
"OTHER": ExpenseType(
code="OTHER",
name="Altele",
account_code="628",
account_name="Alte cheltuieli cu serviciile executate de terti",
has_vat=True,
),
}
def get_expense_type(code: str) -> Optional[ExpenseType]:
"""Get expense type by code."""
return EXPENSE_TYPES.get(code)
def get_all_expense_types() -> Dict[str, ExpenseType]:
"""Get all expense types."""
return EXPENSE_TYPES.copy()
# Default cash register accounts
CASH_REGISTER_ACCOUNTS = {
"CASA": {
"code": "5311",
"name": "Casa in lei",
},
"BANCA": {
"code": "5121",
"name": "Conturi la banci in lei",
},
}

View File

@@ -0,0 +1,270 @@
"""Image preprocessing for optimal OCR results."""
from pathlib import Path
from typing import List
import numpy as np
import cv2
try:
import pdf2image
PDF_AVAILABLE = True
except ImportError:
PDF_AVAILABLE = False
class ImagePreprocessor:
"""Preprocess receipt images for OCR."""
def _add_safety_padding(self, image: np.ndarray, padding: int = 50) -> np.ndarray:
"""Add white padding around image to protect edge content during rotation.
This prevents left/right margin truncation in OCR by ensuring text near
edges isn't lost during deskew rotation.
"""
if len(image.shape) == 2:
# Grayscale
return cv2.copyMakeBorder(
image, padding, padding, padding, padding,
cv2.BORDER_CONSTANT, value=255
)
else:
# Color (BGR)
return cv2.copyMakeBorder(
image, padding, padding, padding, padding,
cv2.BORDER_CONSTANT, value=(255, 255, 255)
)
def load_image(self, path: Path) -> np.ndarray:
"""Load image from file."""
image = cv2.imread(str(path))
if image is None:
raise ValueError(f"Could not load image: {path}")
return image
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
"""
Convert PDF to images.
Args:
path: Path to PDF file
dpi: Resolution (300 = fast & good quality, 400 = better but slower)
"""
if not PDF_AVAILABLE:
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
images = pdf2image.convert_from_path(str(path), dpi=dpi)
return [np.array(img) for img in images]
def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
"""
Apply LIGHT preprocessing - better for clear PDFs.
Heavy binarization can destroy text on clear images.
"""
return self.preprocess_light(image)
def preprocess_light(self, image: np.ndarray) -> np.ndarray:
"""
Light preprocessing for CLEAR images (PDFs, good scans).
Preserves original quality, only enhances contrast.
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
height, width = gray.shape
max_side = max(height, width)
if max_side > 4000:
scale = 4000 / max_side
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
height, width = gray.shape
# 2b. Scale UP if too small
if width < 1500:
scale = 1500 / width
# Ensure we don't exceed 4000px after upscaling
new_width = int(width * scale)
new_height = int(height * scale)
if max(new_width, new_height) > 4000:
scale = 4000 / max(new_width, new_height)
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
# 3. Deskew
gray = self._deskew(gray)
# 4. Light contrast enhancement only
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# NO binarization, NO morphological ops - preserve original quality
return enhanced
def preprocess_heavy(self, image: np.ndarray) -> np.ndarray:
"""
Heavy preprocessing for FADED thermal receipts.
Aggressive binarization to recover faded text.
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
height, width = gray.shape
max_side = max(height, width)
if max_side > 4000:
scale = 4000 / max_side
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
height, width = gray.shape
# 2b. Scale UP if too small (larger = better OCR)
if width < 1500:
scale = 1500 / width
# Ensure we don't exceed 4000px after upscaling
new_width = int(width * scale)
new_height = int(height * scale)
if max(new_width, new_height) > 4000:
scale = 4000 / max(new_width, new_height)
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
# 3. Deskew
gray = self._deskew(gray)
# 4. Contrast enhancement with CLAHE
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 5. Denoise
denoised = cv2.fastNlMeansDenoising(enhanced, h=8, templateWindowSize=7, searchWindowSize=21)
# 6. Sharpening
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
# 7. Adaptive thresholding (binarization)
binary = cv2.adaptiveThreshold(
sharpened, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=11, C=5
)
# 8. Morphological operations
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
return result
def preprocess_for_tesseract(self, image: np.ndarray) -> np.ndarray:
"""
Tesseract-optimized preprocessing.
Tesseract works best with:
- Clean black text on white background (binarized)
- High DPI (scale up small images)
- Otsu thresholding (better than adaptive for clean documents)
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2. Scale for optimal Tesseract (target ~2000px width for receipts)
height, width = gray.shape
if width < 2000:
scale = 2000 / width
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
elif width > 3000:
scale = 3000 / width
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
# 3. Deskew
gray = self._deskew(gray)
# 4. Strong contrast enhancement
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 5. Denoise before binarization
denoised = cv2.fastNlMeansDenoising(enhanced, h=10, templateWindowSize=7, searchWindowSize=21)
# 6. Otsu binarization (better than adaptive for clean PDFs)
_, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 7. Light morphological cleanup
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
return cleaned
def get_all_variants(self, image: np.ndarray) -> List[np.ndarray]:
"""
Generate 2 preprocessing variants for OCR (fast mode).
Returns: [light_processed, heavy_processed]
"""
return [
self.preprocess_light(image),
self.preprocess_heavy(image),
]
def _deskew(self, image: np.ndarray) -> np.ndarray:
"""Correct image rotation/skew using Hough lines.
Uses expanded canvas to preserve all content during rotation,
preventing left/right margin truncation.
"""
edges = cv2.Canny(image, 50, 150, apertureSize=3)
lines = cv2.HoughLinesP(
edges, 1, np.pi / 180,
threshold=100, minLineLength=100, maxLineGap=10
)
if lines is None:
return image
angles = []
for line in lines:
x1, y1, x2, y2 = line[0]
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
if abs(angle) < 45:
angles.append(angle)
if not angles:
return image
median_angle = np.median(angles)
if abs(median_angle) < 0.5:
return image
h, w = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
# Calculate new canvas size to fit entire rotated image (prevents edge truncation)
cos_angle = abs(np.cos(np.radians(median_angle)))
sin_angle = abs(np.sin(np.radians(median_angle)))
new_w = int(h * sin_angle + w * cos_angle)
new_h = int(h * cos_angle + w * sin_angle)
# Adjust rotation matrix for new canvas center
M[0, 2] += (new_w - w) / 2
M[1, 2] += (new_h - h) / 2
return cv2.warpAffine(
image, M, (new_w, new_h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=255 # White background (grayscale)
)

View File

@@ -0,0 +1,234 @@
"""Service for fetching nomenclatures from Oracle (read-only)."""
from typing import List, Optional
from decimal import Decimal
from sqlmodel import select
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.schemas.receipt import (
PartnerOption,
AccountOption,
CashRegisterOption,
ExpenseTypeOption,
)
from backend.modules.data_entry.services.expense_types import EXPENSE_TYPES
from backend.modules.data_entry.db.models.nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
class NomenclatureService:
"""
Service for fetching nomenclatures.
In Phase 1 (MVP), some nomenclatures are hardcoded.
In Phase 2, these will be fetched from Oracle.
"""
@staticmethod
async def get_partners(
company_id: int,
search: Optional[str] = None,
session: Optional[AsyncSession] = None
) -> List[PartnerOption]:
"""
Get partners (suppliers/customers) for a company.
Phase 1: Returns mock data.
Phase 2: Returns synced data from SQLite (from Oracle sync).
Phase 3: Will fetch live from Oracle.
"""
# If session is provided, try to get from synced SQLite data
if session:
# Try to get from SQLite synced data
stmt = select(SyncedSupplier).where(SyncedSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(SyncedSupplier.name.ilike(f"%{search}%")) |
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.order_by(SyncedSupplier.name) # Order alphabetically, no limit for AutoComplete
result = await session.execute(stmt)
suppliers = result.scalars().all()
if suppliers:
# Also get local suppliers
local_stmt = select(LocalSupplier).where(LocalSupplier.company_id == company_id)
if search:
local_stmt = local_stmt.where(
(LocalSupplier.name.ilike(f"%{search}%")) |
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
)
local_stmt = local_stmt.order_by(LocalSupplier.name) # Order alphabetically
local_result = await session.execute(local_stmt)
local_suppliers = local_result.scalars().all()
# Combine both - no IDs needed, just text data for autocomplete
partners = []
for s in suppliers:
partners.append(PartnerOption(
name=s.name,
fiscal_code=s.fiscal_code,
address=s.address,
source="oracle"
))
for l in local_suppliers:
partners.append(PartnerOption(
name=l.name, # No suffix - must match search results
fiscal_code=l.fiscal_code,
address=l.address,
source="local"
))
return partners
# Fallback to mock data for Phase 1 (when no synced data)
mock_partners = [
PartnerOption(name="OMV Petrom", fiscal_code="RO123456", source="mock"),
PartnerOption(name="Dedeman", fiscal_code="RO789012", source="mock"),
PartnerOption(name="Kaufland", fiscal_code="RO345678", source="mock"),
PartnerOption(name="Emag", fiscal_code="RO901234", source="mock"),
PartnerOption(name="Altex", fiscal_code="RO567890", source="mock"),
]
if search:
search_lower = search.lower()
mock_partners = [
p for p in mock_partners
if search_lower in p.name.lower() or (p.fiscal_code and search_lower in p.fiscal_code.lower())
]
return mock_partners
@staticmethod
async def get_accounts(company_id: int, prefix: Optional[str] = None) -> List[AccountOption]:
"""
Get chart of accounts for a company.
Phase 1: Returns common expense/income accounts.
Phase 2: Will fetch from Oracle PLAN_CONTURI.
"""
# Common accounts for expenses and receipts
accounts = [
# Expense accounts (Class 6)
AccountOption(code="6022", name="Cheltuieli cu combustibilii"),
AccountOption(code="6024", name="Cheltuieli materiale pentru ambalat"),
AccountOption(code="6028", name="Alte cheltuieli cu materiale consumabile"),
AccountOption(code="624", name="Cheltuieli cu transportul de bunuri si personal"),
AccountOption(code="626", name="Cheltuieli postale si taxe telecomunicatii"),
AccountOption(code="628", name="Alte cheltuieli cu serviciile executate de terti"),
# VAT
AccountOption(code="4426", name="TVA deductibila"),
AccountOption(code="4427", name="TVA colectata"),
# Cash and Bank (Class 5)
AccountOption(code="5311", name="Casa in lei"),
AccountOption(code="5121", name="Conturi la banci in lei"),
# Income accounts (Class 7)
AccountOption(code="7588", name="Alte venituri din exploatare"),
]
if prefix:
accounts = [a for a in accounts if a.code.startswith(prefix)]
return accounts
@staticmethod
async def get_cash_registers(
company_id: int,
session: Optional[AsyncSession] = None
) -> List[CashRegisterOption]:
"""
Get cash registers and bank accounts for a company.
Phase 1: Returns default options.
Phase 2: Returns synced data from SQLite (from Oracle sync).
Phase 3: Will fetch live from Oracle NOM_CASE / NOM_BANCI.
"""
# If session is provided, try to get from synced SQLite data
if session:
stmt = select(SyncedCashRegister).where(SyncedCashRegister.company_id == company_id)
result = await session.execute(stmt)
registers = result.scalars().all()
if registers:
return [
CashRegisterOption(id=r.id, name=r.name, account_code=r.account_code)
for r in registers
]
# Fallback to default cash registers for Phase 1
return [
CashRegisterOption(id=1, name="Casa principala", account_code="5311"),
CashRegisterOption(id=2, name="Cont BCR", account_code="5121"),
CashRegisterOption(id=3, name="Cont BRD", account_code="5121"),
]
@staticmethod
async def get_expense_types() -> List[ExpenseTypeOption]:
"""
Get predefined expense types with their accounting configuration.
"""
return [
ExpenseTypeOption(
code=et.code,
name=et.name,
account_code=et.account_code,
has_vat=et.has_vat,
vat_percent=et.vat_percent,
)
for et in EXPENSE_TYPES.values()
]
@staticmethod
async def get_companies(username: str) -> List[dict]:
"""
Get companies accessible by user.
Phase 1: Returns mock data.
Phase 2: Will fetch from shared auth based on user permissions.
"""
# TODO: Integrate with shared auth to get user's companies
return [
{"id": 1, "name": "SC Test SRL", "cui": "RO12345678"},
{"id": 2, "name": "SC Demo SA", "cui": "RO87654321"},
]
# ============ Phase 2 Oracle Integration Methods ============
@staticmethod
async def _fetch_partners_oracle(company_id: int, search: Optional[str] = None) -> List[PartnerOption]:
"""
Fetch partners from Oracle NOM_PARTENERI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
# Example query:
# SELECT ID_PART, DEN_PART, COD_FISCAL
# FROM {schema}.NOM_PARTENERI
# WHERE DEN_PART LIKE :search
raise NotImplementedError("Oracle integration pending - Phase 2")
@staticmethod
async def _fetch_accounts_oracle(company_id: int, prefix: Optional[str] = None) -> List[AccountOption]:
"""
Fetch chart of accounts from Oracle PLAN_CONTURI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
raise NotImplementedError("Oracle integration pending - Phase 2")
@staticmethod
async def _fetch_cash_registers_oracle(company_id: int) -> List[CashRegisterOption]:
"""
Fetch cash registers from Oracle NOM_CASE / NOM_BANCI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
raise NotImplementedError("Oracle integration pending - Phase 2")

View File

@@ -0,0 +1,295 @@
"""OCR engine wrapper for PaddleOCR and Tesseract."""
import os
import logging
import threading
import time
from dataclasses import dataclass
from typing import List, Optional, Tuple
import numpy as np
# Setup logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO) # Ensure logs are visible
# Disable PaddleOCR model source check for faster startup (PaddleX 3.x)
os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'
# Lazy imports - these will be imported on first use
PaddleOCR = None # Will be imported lazily
pytesseract = None # Will be imported lazily
# Check availability without importing heavy libraries
def _check_paddle_available() -> bool:
"""Check if paddleocr is installed without importing it."""
try:
import importlib.util
return importlib.util.find_spec("paddleocr") is not None
except Exception:
return False
def _check_tesseract_available() -> bool:
"""Check if pytesseract is installed without importing it."""
try:
import importlib.util
return importlib.util.find_spec("pytesseract") is not None
except Exception:
return False
PADDLE_AVAILABLE = _check_paddle_available()
TESSERACT_AVAILABLE = _check_tesseract_available()
@dataclass
class OCRResult:
"""Raw OCR result."""
text: str
confidence: float
boxes: List[dict]
engine: str = "" # OCR engine used: paddleocr or tesseract
class OCREngine:
"""Unified OCR engine with fallback support."""
def __init__(self):
self._paddle = None
self._paddle_init_started = False
self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready
self._paddle_init_lock = threading.Lock()
def _init_paddle_lazy(self):
"""Lazy initialize PaddleOCR on first use (avoids slow startup)."""
global PaddleOCR
with self._paddle_init_lock:
if self._paddle_init_started:
return # Already initializing or done
self._paddle_init_started = True
if PADDLE_AVAILABLE:
try:
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True)
from paddleocr import PaddleOCR as _PaddleOCR
PaddleOCR = _PaddleOCR
print("Initializing PaddleOCR engine...", flush=True)
# PaddleOCR 3.x API - optimized for Romanian receipts
# Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts
self._paddle = PaddleOCR(
lang='en', # 'en' handles Latin alphabet well for receipts
# High quality settings for better accuracy
det_db_thresh=0.3, # Lower threshold = detect more text (default 0.3)
det_db_box_thresh=0.5, # Box confidence threshold (default 0.5)
det_db_unclip_ratio=1.8, # Expand detected boxes slightly (default 1.5)
rec_batch_num=6, # Batch size for recognition
use_angle_cls=True, # Enable text angle classification
)
print("PaddleOCR initialized successfully with high-quality settings", flush=True)
except Exception as e:
print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True)
self._paddle = None
# Signal that initialization is complete (success or failure)
self._paddle_ready.set()
def wait_for_paddle(self, timeout: float = 30.0) -> bool:
"""
Wait for PaddleOCR to be fully initialized.
Args:
timeout: Max seconds to wait (default 30s)
Returns:
True if PaddleOCR is ready, False if timeout or unavailable
"""
if not PADDLE_AVAILABLE:
return False
if self._paddle is not None:
return True # Already ready
if not self._paddle_init_started:
# Start initialization if not already started
self._init_paddle_lazy()
# Wait for initialization to complete
print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True)
start = time.time()
ready = self._paddle_ready.wait(timeout=timeout)
elapsed = time.time() - start
if ready and self._paddle is not None:
print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True)
return True
else:
print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True)
return False
def is_paddle_ready(self) -> bool:
"""Check if PaddleOCR is ready without waiting."""
return self._paddle is not None
def recognize(self, image: np.ndarray) -> OCRResult:
"""Perform OCR on preprocessed image."""
logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}")
# Lazy init PaddleOCR on first call
self._init_paddle_lazy()
if PADDLE_AVAILABLE and self._paddle:
logger.info("[OCR] Using PaddleOCR engine")
return self._paddle_recognize(image)
elif TESSERACT_AVAILABLE:
logger.info("[OCR] Using Tesseract engine (PaddleOCR not available)")
return self._tesseract_recognize(image)
else:
logger.error("[OCR] No OCR engine available!")
raise RuntimeError(
"No OCR engine available. Install PaddleOCR or Tesseract."
)
def _paddle_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using PaddleOCR 3.x API."""
# Wait for PaddleOCR to be fully ready (handles background init)
if not self.wait_for_paddle(timeout=30.0):
logger.warning("[PaddleOCR] Not ready, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise RuntimeError("PaddleOCR not ready and Tesseract not available")
try:
logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}")
# PaddleOCR 3.x requires 3-channel images
if len(image.shape) == 2:
# Convert grayscale to 3-channel BGR
import cv2
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
logger.info(f"[PaddleOCR] Converted to BGR, new shape: {image.shape}")
# PaddleOCR 3.x uses predict() with new parameter names
logger.info("[PaddleOCR] Calling predict()...")
result = self._paddle.predict(image, use_textline_orientation=True)
logger.info(f"[PaddleOCR] predict() returned, result type: {type(result)}")
if not result or len(result) == 0:
logger.warning("[PaddleOCR] No results returned")
return OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
# PaddleOCR 3.x returns OCRResult objects with different structure
ocr_result = result[0]
# Extract texts and scores from the new format
rec_texts = ocr_result.get('rec_texts', [])
rec_scores = ocr_result.get('rec_scores', [])
dt_polys = ocr_result.get('dt_polys', [])
if not rec_texts:
return OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
boxes = []
for i, text in enumerate(rec_texts):
conf = rec_scores[i] if i < len(rec_scores) else 0.0
box = dt_polys[i].tolist() if i < len(dt_polys) else []
boxes.append({
'text': text,
'confidence': float(conf),
'box': box
})
avg_conf = sum(rec_scores) / len(rec_scores) if rec_scores else 0.0
text_result = '\n'.join(rec_texts)
logger.info(f"[PaddleOCR] SUCCESS - Found {len(rec_texts)} text lines, avg confidence: {avg_conf:.2%}")
logger.debug(f"[PaddleOCR] Raw text preview: {text_result[:200]}...")
return OCRResult(
text=text_result,
confidence=float(avg_conf),
boxes=boxes,
engine="paddleocr"
)
except Exception as e:
logger.error(f"[PaddleOCR] ERROR: {e}, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise
def _tesseract_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using Tesseract."""
global pytesseract
logger.info(f"[Tesseract] Processing image, shape: {image.shape}")
# Lazy import pytesseract
if pytesseract is None:
logger.info("[Tesseract] Importing pytesseract...")
import pytesseract as _pytesseract
pytesseract = _pytesseract
# PSM 4: Single column (best for receipts)
config = '--psm 4 -l ron+eng'
text = pytesseract.image_to_string(image, config=config)
# Quick confidence estimate
data = pytesseract.image_to_data(image, config=config, output_type=pytesseract.Output.DICT)
confidences = [int(c) for c in data['conf'] if int(c) > 0]
avg_conf = sum(confidences) / len(confidences) / 100 if confidences else 0.0
logger.info(f"[Tesseract] Done: {len(text)} chars, conf: {avg_conf:.2%}")
return OCRResult(text=text, confidence=avg_conf, boxes=[], engine="tesseract")
def recognize_dual(self, image: np.ndarray) -> Tuple[OCRResult, Optional[OCRResult]]:
"""
Run both OCR engines and return both results.
Returns:
Tuple of (paddle_result, tesseract_result)
tesseract_result may be None if Tesseract is not available
"""
logger.info(f"[OCR Dual] Starting dual recognition, image shape: {image.shape}")
# Lazy init PaddleOCR
self._init_paddle_lazy()
paddle_result = None
tesseract_result = None
# Run PaddleOCR
if PADDLE_AVAILABLE and self._paddle:
try:
logger.info("[OCR Dual] Running PaddleOCR...")
paddle_result = self._paddle_recognize(image)
logger.info(f"[OCR Dual] PaddleOCR: {len(paddle_result.text)} chars, conf: {paddle_result.confidence:.2%}")
except Exception as e:
logger.error(f"[OCR Dual] PaddleOCR failed: {e}")
paddle_result = OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
# Run Tesseract
if TESSERACT_AVAILABLE:
try:
logger.info("[OCR Dual] Running Tesseract...")
tesseract_result = self._tesseract_recognize(image)
logger.info(f"[OCR Dual] Tesseract: {len(tesseract_result.text)} chars, conf: {tesseract_result.confidence:.2%}")
except Exception as e:
logger.error(f"[OCR Dual] Tesseract failed: {e}")
tesseract_result = OCRResult(text="", confidence=0.0, boxes=[], engine="tesseract")
# Fallback if PaddleOCR not available
if paddle_result is None:
if tesseract_result:
paddle_result = tesseract_result
else:
raise RuntimeError("No OCR engine available")
return paddle_result, tesseract_result
@staticmethod
def get_available_engines() -> List[str]:
"""Return list of available OCR engines."""
engines = []
if PADDLE_AVAILABLE:
engines.append('paddleocr')
if TESSERACT_AVAILABLE:
engines.append('tesseract')
return engines

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,569 @@
"""Main OCR service coordinating preprocessing, recognition, and extraction."""
import os
import re
import logging
# Disable PaddleOCR model source check for faster startup (PaddleX 3.x) - must be set before import
os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
from decimal import Decimal
from pathlib import Path
from typing import Optional, Tuple
from backend.modules.data_entry.services.ocr_engine import OCREngine
from backend.modules.data_entry.services.ocr_extractor import ReceiptExtractor, ExtractionResult
from backend.modules.data_entry.services.image_preprocessor import ImagePreprocessor
# Setup logging
logger = logging.getLogger(__name__)
class OCRService:
"""Service for OCR processing of receipt images."""
_executor = ThreadPoolExecutor(max_workers=2)
def __init__(self):
self.preprocessor = ImagePreprocessor()
self.ocr_engine = OCREngine()
self.extractor = ReceiptExtractor()
async def process_image(
self,
image_path: Path,
mime_type: str
) -> Tuple[bool, str, Optional[ExtractionResult]]:
"""
Process receipt image and extract structured data.
Args:
image_path: Path to the image file
mime_type: MIME type of the file
Returns:
Tuple of (success, message, extraction_result)
"""
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
self._executor,
self._process_sync,
image_path,
mime_type
)
return result
except Exception as e:
return False, f"OCR processing failed: {str(e)}", None
def _process_sync(
self,
image_path: Path,
mime_type: str
) -> Tuple[bool, str, Optional[ExtractionResult]]:
"""Synchronous processing with ADAPTIVE OCR pipeline."""
start_time = time.time()
print(f"[OCR Service] Starting processing: {image_path}, mime: {mime_type}", flush=True)
# Load image
if mime_type == 'application/pdf':
try:
images = self.preprocessor.pdf_to_images(image_path)
if not images:
return False, "Failed to extract images from PDF", None
image = images[0]
except RuntimeError as e:
return False, str(e), None
else:
try:
image = self.preprocessor.load_image(image_path)
except ValueError as e:
return False, str(e), None
raw_texts = []
extraction = None
# ══════════════════════════════════════════════════════════════
# STEP 1: PaddleOCR + Light (fastest, best for clear PDFs)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 1: PaddleOCR + Light preprocessing", flush=True)
print("=" * 60, flush=True)
light_img = self.preprocessor.preprocess_light(image)
try:
paddle_light = self.ocr_engine._paddle_recognize(light_img)
if paddle_light and paddle_light.text:
extraction = self.extractor.extract(paddle_light.text)
extraction.ocr_engine = "paddle-light"
raw_texts.append(f"═══ PaddleOCR (light, conf: {paddle_light.confidence:.0%}) ═══\n{paddle_light.text}")
# Log extraction results
print(f"[OCR] Step 1 Results:", flush=True)
print(f" - OCR Confidence: {paddle_light.confidence:.0%}", flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total} (entries: {len(extraction.tva_entries) if extraction.tva_entries else 0})", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
# Early exit if complete
if self._is_extraction_complete(extraction):
extraction.raw_text = "\n\n".join(raw_texts)
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print(f"[OCR] ✓✓✓ EARLY EXIT at Step 1 - All fields found! ({elapsed_ms}ms) ✓✓✓", flush=True)
return True, "OCR complete (fast mode)", extraction
else:
print("[OCR] → Step 1 incomplete, continuing to Step 2...", flush=True)
except Exception as e:
print(f"[OCR] PaddleOCR light failed: {e}", flush=True)
extraction = ExtractionResult()
# ══════════════════════════════════════════════════════════════
# STEP 2: PaddleOCR + Heavy (for faded thermal receipts)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 2: PaddleOCR + Heavy preprocessing", flush=True)
print("=" * 60, flush=True)
heavy_img = self.preprocessor.preprocess_heavy(image)
try:
paddle_heavy = self.ocr_engine._paddle_recognize(heavy_img)
if paddle_heavy and paddle_heavy.text:
extraction_heavy = self.extractor.extract(paddle_heavy.text)
extraction_heavy.ocr_engine = "paddle-heavy"
raw_texts.append(f"═══ PaddleOCR (heavy, conf: {paddle_heavy.confidence:.0%}) ═══\n{paddle_heavy.text}")
print(f"[OCR] Step 2 (Heavy) Results:", flush=True)
print(f" - OCR Confidence: {paddle_heavy.confidence:.0%}", flush=True)
print(f" - Amount: {extraction_heavy.amount}", flush=True)
print(f" - Date: {extraction_heavy.receipt_date}", flush=True)
print(f" - CUI: {extraction_heavy.cui}", flush=True)
# Merge with previous
extraction = self._merge_extractions(extraction, extraction_heavy)
print(f"[OCR] After merge:", flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total}", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
if self._is_extraction_complete(extraction):
extraction.raw_text = "\n\n".join(raw_texts)
extraction.ocr_engine = "paddle-adaptive"
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print(f"[OCR] ✓✓✓ EARLY EXIT at Step 2 - All fields found after merge! ({elapsed_ms}ms) ✓✓✓", flush=True)
return True, "OCR complete (paddle dual)", extraction
else:
print("[OCR] → Step 2 incomplete, continuing to Step 3 (Tesseract)...", flush=True)
except Exception as e:
print(f"[OCR] PaddleOCR heavy failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# STEP 3: Tesseract - ONLY to complete missing fields
# Uses Tesseract-optimized preprocessing (binarized, high contrast)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 3: Tesseract (complement only, not override)", flush=True)
print("=" * 60, flush=True)
try:
# Use Tesseract-specific preprocessing (Otsu binarization)
tesseract_img = self.preprocessor.preprocess_for_tesseract(image)
tesseract_result = self.ocr_engine._tesseract_recognize(tesseract_img)
if tesseract_result and tesseract_result.text:
extraction_tess = self.extractor.extract(tesseract_result.text)
extraction_tess.ocr_engine = "tesseract"
raw_texts.append(f"═══ Tesseract (conf: {tesseract_result.confidence:.0%}) ═══\n{tesseract_result.text}")
print(f"[OCR] Step 3 (Tesseract) Results:", flush=True)
print(f" - OCR Confidence: {tesseract_result.confidence:.0%}", flush=True)
print(f" - Amount: {extraction_tess.amount}", flush=True)
print(f" - Date: {extraction_tess.receipt_date}", flush=True)
print(f" - CUI: {extraction_tess.cui}", flush=True)
# IMPORTANT: Tesseract only COMPLETES missing fields, never overrides!
extraction = self._complement_extraction(extraction, extraction_tess)
except Exception as e:
print(f"[OCR] Tesseract failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# FINAL VALIDATION: Fix impossible values
# ══════════════════════════════════════════════════════════════
if extraction:
extraction = self._final_validation(extraction)
# Final result
if extraction is None:
return False, "No text detected", None
extraction.raw_text = "\n\n".join(raw_texts)
extraction.ocr_engine = "adaptive-full"
# Build result message
fields_found = []
if extraction.amount: fields_found.append("amount")
if extraction.receipt_date: fields_found.append("date")
if extraction.receipt_number: fields_found.append("number")
if extraction.cui: fields_found.append("CUI")
if extraction.tva_total or extraction.tva_entries: fields_found.append("TVA")
message = f"OCR complete (full pipeline). Found: {', '.join(fields_found) or 'no fields'}"
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print("=" * 60, flush=True)
print(f"[OCR] FINAL RESULT (full pipeline) - {elapsed_ms}ms", flush=True)
print("=" * 60, flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total}", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
print(f" - Processing Time: {elapsed_ms}ms", flush=True)
print(f" - Message: {message}", flush=True)
return True, message, extraction
def _merge_extractions(
self,
paddle: Optional[ExtractionResult],
tesseract: Optional[ExtractionResult]
) -> ExtractionResult:
"""
Merge two extractions, picking best fields from each engine.
Strategy:
- For each field, prefer the one with higher confidence
- Use validation rules (CUI format, date validity, company indicators)
- Combine TVA entries if different
"""
result = ExtractionResult()
# Handle case where one is None
if paddle is None and tesseract is None:
return result
if paddle is None:
return tesseract
if tesseract is None:
return paddle
print("[Merge] Comparing PaddleOCR vs Tesseract extractions...", flush=True)
# === AMOUNT ===
# Pick higher confidence, both must be positive
if paddle.amount and tesseract.amount:
if paddle.confidence_amount >= tesseract.confidence_amount:
result.amount = paddle.amount
result.confidence_amount = paddle.confidence_amount
print(f"[Merge] Amount: PaddleOCR {paddle.amount} (conf: {paddle.confidence_amount:.0%})", flush=True)
else:
result.amount = tesseract.amount
result.confidence_amount = tesseract.confidence_amount
print(f"[Merge] Amount: Tesseract {tesseract.amount} (conf: {tesseract.confidence_amount:.0%})", flush=True)
elif paddle.amount:
result.amount = paddle.amount
result.confidence_amount = paddle.confidence_amount
elif tesseract.amount:
result.amount = tesseract.amount
result.confidence_amount = tesseract.confidence_amount
# === DATE ===
# Pick higher confidence, validate date reasonableness
if paddle.receipt_date and tesseract.receipt_date:
if paddle.confidence_date >= tesseract.confidence_date:
result.receipt_date = paddle.receipt_date
result.confidence_date = paddle.confidence_date
print(f"[Merge] Date: PaddleOCR {paddle.receipt_date}", flush=True)
else:
result.receipt_date = tesseract.receipt_date
result.confidence_date = tesseract.confidence_date
print(f"[Merge] Date: Tesseract {tesseract.receipt_date}", flush=True)
elif paddle.receipt_date:
result.receipt_date = paddle.receipt_date
result.confidence_date = paddle.confidence_date
elif tesseract.receipt_date:
result.receipt_date = tesseract.receipt_date
result.confidence_date = tesseract.confidence_date
# === VENDOR NAME ===
# Prefer one with company indicators (S.R.L., S.A., etc.)
paddle_has_indicator = self._has_company_indicator(paddle.partner_name)
tesseract_has_indicator = self._has_company_indicator(tesseract.partner_name)
if paddle.partner_name and tesseract.partner_name:
if paddle_has_indicator and not tesseract_has_indicator:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
print(f"[Merge] Vendor: PaddleOCR '{paddle.partner_name}' (has company indicator)", flush=True)
elif tesseract_has_indicator and not paddle_has_indicator:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
print(f"[Merge] Vendor: Tesseract '{tesseract.partner_name}' (has company indicator)", flush=True)
elif paddle.confidence_vendor >= tesseract.confidence_vendor:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
print(f"[Merge] Vendor: PaddleOCR '{paddle.partner_name}' (higher conf)", flush=True)
else:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
print(f"[Merge] Vendor: Tesseract '{tesseract.partner_name}' (higher conf)", flush=True)
elif paddle.partner_name:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
elif tesseract.partner_name:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
# === CUI (Fiscal Code) ===
# Validate format: 6-10 digits, prefer valid one
paddle_cui_valid = self._is_valid_cui(paddle.cui)
tesseract_cui_valid = self._is_valid_cui(tesseract.cui)
if paddle.cui and tesseract.cui:
if paddle_cui_valid and not tesseract_cui_valid:
result.cui = paddle.cui
print(f"[Merge] CUI: PaddleOCR {paddle.cui} (valid format)", flush=True)
elif tesseract_cui_valid and not paddle_cui_valid:
result.cui = tesseract.cui
print(f"[Merge] CUI: Tesseract {tesseract.cui} (valid format)", flush=True)
else:
# Both valid or both invalid - prefer PaddleOCR
result.cui = paddle.cui
print(f"[Merge] CUI: PaddleOCR {paddle.cui}", flush=True)
elif paddle.cui and paddle_cui_valid:
result.cui = paddle.cui
elif tesseract.cui and tesseract_cui_valid:
result.cui = tesseract.cui
elif paddle.cui:
result.cui = paddle.cui
elif tesseract.cui:
result.cui = tesseract.cui
# === TVA ENTRIES ===
# Prefer non-empty, use the one with more entries or higher amounts
if paddle.tva_entries and tesseract.tva_entries:
# Compare: prefer the one with actual amounts (not just 0)
paddle_total = sum(e.get('amount', Decimal('0')) for e in paddle.tva_entries)
tesseract_total = sum(e.get('amount', Decimal('0')) for e in tesseract.tva_entries)
if paddle_total >= tesseract_total:
result.tva_entries = paddle.tva_entries
result.tva_total = paddle.tva_total
print(f"[Merge] TVA: PaddleOCR (total: {paddle_total})", flush=True)
else:
result.tva_entries = tesseract.tva_entries
result.tva_total = tesseract.tva_total
print(f"[Merge] TVA: Tesseract (total: {tesseract_total})", flush=True)
elif paddle.tva_entries:
result.tva_entries = paddle.tva_entries
result.tva_total = paddle.tva_total
elif tesseract.tva_entries:
result.tva_entries = tesseract.tva_entries
result.tva_total = tesseract.tva_total
# === OTHER FIELDS ===
# Simple preference: paddle > tesseract
result.receipt_number = paddle.receipt_number or tesseract.receipt_number
result.receipt_series = paddle.receipt_series or tesseract.receipt_series
result.receipt_type = paddle.receipt_type or tesseract.receipt_type
result.items_count = paddle.items_count or tesseract.items_count
result.address = paddle.address or tesseract.address
result.description = paddle.description or tesseract.description
return result
def _has_company_indicator(self, name: Optional[str]) -> bool:
"""Check if vendor name has company type indicator (S.R.L., S.A., etc.)"""
if not name:
return False
name_upper = name.upper()
indicators = [
r'\bS\.?\s*R\.?\s*L\.?\b',
r'\bS\.?\s*A\.?\b',
r'\bS\.?\s*N\.?\s*C\.?\b',
r'\bP\.?\s*F\.?\s*A\.?\b',
r'\bI\.?\s*I\.?\b',
r'\bHOLDING\b',
r'\bGROUP\b',
r'\bCOMPANY\b',
]
for indicator in indicators:
if re.search(indicator, name_upper):
return True
return False
def _is_valid_cui(self, cui: Optional[str]) -> bool:
"""Validate CUI format: 6-10 digits."""
if not cui:
return False
# Remove any RO prefix
cui_clean = re.sub(r'^RO', '', cui.upper())
# Must be 6-10 digits
return bool(re.match(r'^\d{6,10}$', cui_clean))
def _is_extraction_complete(self, ext: ExtractionResult, min_confidence: float = 0.85) -> bool:
"""
Check if extraction has ALL required fields to skip further processing.
Required for early exit (ALL must be true):
- Overall confidence >= 85%
- ALL 5 critical fields present: number, date, amount, TVA, CUI
"""
# Must have high confidence
if ext.overall_confidence < min_confidence:
print(f"[OCR] Confidence {ext.overall_confidence:.0%} < {min_confidence:.0%} - continuing", flush=True)
return False
# Check all required fields
has_number = bool(ext.receipt_number)
has_date = bool(ext.receipt_date)
has_amount = bool(ext.amount)
has_tva = bool(ext.tva_total) or bool(ext.tva_entries)
has_cui = bool(ext.cui)
missing = []
if not has_number: missing.append("number")
if not has_date: missing.append("date")
if not has_amount: missing.append("amount")
if not has_tva: missing.append("TVA")
if not has_cui: missing.append("CUI")
if missing:
print(f"[OCR] Missing: {', '.join(missing)} - continuing", flush=True)
return False
print(f"[OCR] ✓ All 5 fields found with {ext.overall_confidence:.0%} confidence", flush=True)
return True
def _complement_extraction(
self,
primary: Optional[ExtractionResult],
secondary: Optional[ExtractionResult]
) -> ExtractionResult:
"""
Complement primary extraction with missing fields from secondary.
NEVER overrides existing values - only fills in gaps.
This is different from _merge_extractions which can override values.
"""
if primary is None and secondary is None:
return ExtractionResult()
if primary is None:
return secondary
if secondary is None:
return primary
print("[Complement] Adding missing fields from Tesseract...", flush=True)
# Only fill missing amount
if not primary.amount and secondary.amount:
primary.amount = secondary.amount
primary.confidence_amount = secondary.confidence_amount
print(f"[Complement] Added amount: {secondary.amount}", flush=True)
# Only fill missing date
if not primary.receipt_date and secondary.receipt_date:
primary.receipt_date = secondary.receipt_date
primary.confidence_date = secondary.confidence_date
print(f"[Complement] Added date: {secondary.receipt_date}", flush=True)
# Only fill missing vendor
if not primary.partner_name and secondary.partner_name:
primary.partner_name = secondary.partner_name
primary.confidence_vendor = secondary.confidence_vendor
print(f"[Complement] Added vendor: {secondary.partner_name}", flush=True)
# Only fill missing CUI
if not primary.cui and secondary.cui and self._is_valid_cui(secondary.cui):
primary.cui = secondary.cui
print(f"[Complement] Added CUI: {secondary.cui}", flush=True)
# Only fill missing TVA
if not primary.tva_entries and secondary.tva_entries:
primary.tva_entries = secondary.tva_entries
primary.tva_total = secondary.tva_total
print(f"[Complement] Added TVA: {secondary.tva_total}", flush=True)
# Only fill missing receipt number
if not primary.receipt_number and secondary.receipt_number:
primary.receipt_number = secondary.receipt_number
print(f"[Complement] Added number: {secondary.receipt_number}", flush=True)
# Only fill missing address
if not primary.address and secondary.address:
primary.address = secondary.address
print(f"[Complement] Added address: {secondary.address}", flush=True)
return primary
def _final_validation(self, extraction: ExtractionResult) -> ExtractionResult:
"""
Final validation and correction of impossible values.
Key rules:
1. TVA cannot be greater than TOTAL (it's always a fraction)
2. If TVA > TOTAL, recalculate TOTAL from TVA using known rates
3. Validate TVA entries sum equals TVA total
"""
print("[Final Validation] Checking extracted values...", flush=True)
# Rule 1: TVA cannot be greater than TOTAL
if extraction.tva_total and extraction.amount:
if extraction.tva_total > extraction.amount:
print(f"[Final Validation] TVA ({extraction.tva_total}) > TOTAL ({extraction.amount}) - IMPOSSIBLE!", flush=True)
# Calculate TOTAL from TVA using reverse formula:
# total = base + tva = tva * (100/rate + 1) = tva * (100 + rate) / rate
# For 9% TVA: total = tva * 109 / 9 = tva * 12.11
# For 19% TVA: total = tva * 119 / 19 = tva * 6.26
# For 21% TVA: total = tva * 121 / 21 = tva * 5.76
rate = 19 # Default rate assumption
if extraction.tva_entries:
# Use the rate from the first entry
rate = extraction.tva_entries[0].get('percent', 19)
if rate > 0:
# Formula: total = tva * (100 + rate) / rate
calculated_total = extraction.tva_total * (Decimal('100') + Decimal(str(rate))) / Decimal(str(rate))
calculated_total = calculated_total.quantize(Decimal('0.01'))
print(f"[Final Validation] Calculated TOTAL from TVA: {calculated_total} (using {rate}% rate)", flush=True)
extraction.amount = calculated_total
extraction.confidence_amount = 0.70 # Lower confidence for calculated value
# Rule 2: TVA cannot be more than ~25% of total (max Romanian rate is 21%)
if extraction.tva_total and extraction.amount:
tva_percent = extraction.tva_total / extraction.amount * Decimal('100')
if tva_percent > Decimal('25'):
print(f"[Final Validation] Warning: TVA is {tva_percent:.1f}% of total - suspicious", flush=True)
# Rule 3: Validate TVA entries sum
if extraction.tva_entries and extraction.tva_total:
entries_sum = sum(e.get('amount', Decimal('0')) for e in extraction.tva_entries)
tolerance = Decimal('0.05')
if abs(entries_sum - extraction.tva_total) > tolerance:
print(f"[Final Validation] TVA entries sum ({entries_sum}) != tva_total ({extraction.tva_total})", flush=True)
# Use the sum as it's more reliable
extraction.tva_total = entries_sum
print(f"[Final Validation] Done. Amount={extraction.amount}, TVA={extraction.tva_total}", flush=True)
return extraction
# Singleton instance
ocr_service = OCRService()

View File

@@ -0,0 +1,447 @@
"""Business logic service for receipts workflow."""
from decimal import Decimal, ROUND_HALF_UP
from typing import List, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptStatus, ReceiptDirection
from backend.modules.data_entry.db.models.accounting_entry import EntryType
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
from backend.modules.data_entry.schemas.receipt import (
ReceiptCreate,
ReceiptUpdate,
ReceiptFilter,
ReceiptResponse,
ReceiptListResponse,
AccountingEntryCreate,
)
from backend.modules.data_entry.services.expense_types import EXPENSE_TYPES, get_expense_type
# Payment mode to accounting account mapping
PAYMENT_MODE_ACCOUNTS = {
'casa': ('5311', 'Casa in lei'),
'banca': ('5121', 'Conturi la banci in lei'),
'avans_decontare': ('542', 'Avansuri de trezorerie'),
}
class ReceiptService:
"""Service for receipt business logic and workflow."""
@staticmethod
async def create_receipt(
session: AsyncSession,
data: ReceiptCreate,
created_by: str,
) -> Receipt:
"""Create a new receipt in DRAFT status."""
return await ReceiptCRUD.create(session, data, created_by)
@staticmethod
async def get_receipt(
session: AsyncSession,
receipt_id: int,
) -> Optional[Receipt]:
"""Get receipt by ID with all relationships."""
return await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=True)
@staticmethod
async def get_receipts(
session: AsyncSession,
filters: ReceiptFilter,
) -> ReceiptListResponse:
"""Get paginated list of receipts."""
receipts, total = await ReceiptCRUD.get_list(session, filters)
pages = (total + filters.page_size - 1) // filters.page_size if total > 0 else 1
return ReceiptListResponse(
items=[ReceiptResponse.model_validate(r) for r in receipts],
total=total,
page=filters.page,
page_size=filters.page_size,
pages=pages,
)
@staticmethod
async def update_receipt(
session: AsyncSession,
receipt_id: int,
data: ReceiptUpdate,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Update receipt (only DRAFT status).
Returns (success, message, receipt).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if not await ReceiptCRUD.can_edit(receipt, username):
return False, "Cannot edit this receipt", None
updated = await ReceiptCRUD.update(session, receipt, data)
return True, "Receipt updated", updated
@staticmethod
async def delete_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str]:
"""
Delete receipt (only DRAFT status).
Returns (success, message).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found"
if not await ReceiptCRUD.can_delete(receipt, username):
return False, "Cannot delete this receipt"
await ReceiptCRUD.delete(session, receipt)
return True, "Receipt deleted"
@staticmethod
def generate_accounting_entries(receipt: Receipt) -> List[AccountingEntryCreate]:
"""
Generate accounting entries based on receipt data and expense type.
"""
entries: List[AccountingEntryCreate] = []
# Get expense type configuration
expense_type = get_expense_type(receipt.expense_type_code or "OTHER")
if not expense_type:
expense_type = EXPENSE_TYPES["OTHER"]
amount = Decimal(str(receipt.amount))
if receipt.direction == ReceiptDirection.CHELTUIALA:
# Expense: Debit expense account, Credit cash/bank
if expense_type.has_vat:
# Calculate net and VAT
vat_rate = expense_type.vat_percent / Decimal("100")
net_amount = (amount / (1 + vat_rate)).quantize(
Decimal("0.01"), rounding=ROUND_HALF_UP
)
vat_amount = amount - net_amount
# Debit: Expense account (net)
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.account_code,
account_name=expense_type.account_name,
amount=net_amount,
))
# Debit: VAT deductible
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.vat_account,
account_name="TVA deductibila",
amount=vat_amount,
))
else:
# No VAT - full amount to expense
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.account_code,
account_name=expense_type.account_name,
amount=amount,
))
# Credit entry - based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
credit_account, credit_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
# Backwards compatibility for existing receipts
credit_account = receipt.cash_register_account
credit_name = receipt.cash_register_name or "Casa/Banca"
else:
# Default fallback
credit_account = "5311"
credit_name = "Casa in lei"
entries.append(AccountingEntryCreate(
entry_type=EntryType.CREDIT,
account_code=credit_account,
account_name=credit_name,
amount=amount,
))
else:
# Income: Debit cash/bank, Credit income account
# Based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
cash_account, cash_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
cash_account = receipt.cash_register_account
cash_name = receipt.cash_register_name or "Casa/Banca"
else:
cash_account = "5311"
cash_name = "Casa in lei"
# Debit: Cash/Bank
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=cash_account,
account_name=cash_name,
amount=amount,
))
# Credit: Income account (7xx - to be configured)
entries.append(AccountingEntryCreate(
entry_type=EntryType.CREDIT,
account_code="7588",
account_name="Alte venituri din exploatare",
amount=amount,
))
return entries
@staticmethod
async def submit_for_review(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Submit receipt for review (DRAFT/REJECTED → PENDING_REVIEW).
Generates accounting entries automatically.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if not await ReceiptCRUD.can_submit(receipt, username):
return False, "Cannot submit this receipt", None
# Check if receipt has at least one attachment
if not receipt.attachments:
return False, "Receipt must have at least one attachment", None
# Check required fields
if not receipt.expense_type_code:
return False, "Expense type is required", None
# Validate payment_mode or cash_register (backwards compatibility)
if not receipt.payment_mode and not receipt.cash_register_account:
return False, "Modul de plata este obligatoriu", None
# Generate accounting entries
entries = ReceiptService.generate_accounting_entries(receipt)
# Delete existing entries and create new ones
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
# Refresh receipt to clear stale relationship references after entry deletion
await session.refresh(receipt)
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
# Reload with entries
updated = await ReceiptCRUD.get_by_id(session, receipt_id)
return True, "Receipt submitted for review", updated
@staticmethod
async def approve_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Approve receipt (PENDING_REVIEW → APPROVED).
Requires valid CUI (fiscal code) for approval.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Receipt is not pending review", None
# Validate CUI is present (required for Oracle import)
if not receipt.cui:
return False, "Trebuie completat codul fiscal (CUI) pentru aprobare", None
# Validate accounting entries
if not receipt.entries:
return False, "Receipt has no accounting entries", None
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.APPROVED, reviewed_by=username
)
return True, "Receipt approved", updated
@staticmethod
async def unapprove_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Unapprove receipt (APPROVED → PENDING_REVIEW).
Returns receipt to pending review for corrections.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.APPROVED:
return False, "Receipt is not approved", None
# Update status back to pending review
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
return True, "Receipt returned to pending review", updated
@staticmethod
async def reject_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
reason: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Reject receipt (PENDING_REVIEW → REJECTED).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Receipt is not pending review", None
# Update status
updated = await ReceiptCRUD.update_status(
session,
receipt,
ReceiptStatus.REJECTED,
reviewed_by=username,
rejection_reason=reason,
)
return True, "Receipt rejected", updated
@staticmethod
async def resubmit_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Resubmit rejected receipt after corrections (REJECTED → PENDING_REVIEW).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.REJECTED:
return False, "Receipt is not rejected", None
if receipt.created_by != username:
return False, "Only the creator can resubmit", None
# Re-generate accounting entries
entries = ReceiptService.generate_accounting_entries(receipt)
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
# Refresh receipt to clear stale relationship references after entry deletion
await session.refresh(receipt)
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
# Reload with entries
updated = await ReceiptCRUD.get_by_id(session, receipt_id)
return True, "Receipt resubmitted for review", updated
@staticmethod
async def regenerate_entries(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, List[AccountingEntryCreate]]:
"""
Regenerate accounting entries for a receipt.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", []
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.PENDING_REVIEW]:
return False, "Cannot regenerate entries for this receipt status", []
# Generate new entries
entries = ReceiptService.generate_accounting_entries(receipt)
# Replace existing entries
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
return True, "Entries regenerated", entries
@staticmethod
async def update_entries(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
username: str,
) -> Tuple[bool, str, List]:
"""
Update accounting entries for a receipt (accountant action).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", []
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Can only modify entries for receipts pending review", []
# Validate entries
is_valid, error = await AccountingEntryCRUD.validate_entries(entries)
if not is_valid:
return False, error, []
# Replace entries
updated_entries = await AccountingEntryCRUD.replace_all_for_receipt(
session, receipt_id, entries, username
)
return True, "Entries updated", updated_entries
@staticmethod
async def get_pending_count(
session: AsyncSession,
company_id: Optional[int] = None,
) -> int:
"""Get count of receipts pending review."""
receipts = await ReceiptCRUD.get_pending_review(session, company_id)
return len(receipts)

View File

@@ -0,0 +1,406 @@
"""Service for syncing nomenclatures from Oracle to SQLite."""
import sys
from pathlib import Path
from typing import Optional, List, Tuple
from datetime import datetime
import logging
from sqlmodel import select
from sqlalchemy.ext.asyncio import AsyncSession
# Path setup handled by main.py - this is redundant
# project_root = Path(__file__).parent.parent.parent.parent.parent
# sys.path.insert(0, str(project_root / "shared"))
from shared.database.oracle_pool import oracle_pool
from backend.modules.data_entry.db.models.nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
logger = logging.getLogger(__name__)
# Cache for schema lookups (populated dynamically from Oracle)
_schema_cache: dict[int, str] = {}
class SyncService:
"""Service for syncing nomenclatures from Oracle."""
@staticmethod
async def get_schema_for_company(company_id: int) -> Optional[str]:
"""
Get Oracle schema for company ID from V_NOM_FIRME view.
Results are cached in memory for performance.
"""
# Check cache first
if company_id in _schema_cache:
return _schema_cache[company_id]
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
cursor.execute("""
SELECT SCHEMA
FROM CONTAFIN_ORACLE.V_NOM_FIRME
WHERE ID_FIRMA = :company_id
""", {'company_id': company_id})
result = cursor.fetchone()
if result:
schema = result[0]
_schema_cache[company_id] = schema
logger.info(f"Resolved schema for company {company_id}: {schema}")
return schema
else:
logger.warning(f"No schema found for company {company_id}")
return None
except Exception as e:
logger.error(f"Error fetching schema for company {company_id}: {e}")
return None
@staticmethod
async def sync_suppliers(session: AsyncSession, company_id: int) -> Tuple[int, int]:
"""
Sync suppliers (furnizori, id_tip_part=17) from Oracle to SQLite.
Uses CORESP_TIP_PART joined with VNOM_PARTENERI view.
Returns (synced_count, error_count).
"""
schema = await SyncService.get_schema_for_company(company_id)
if not schema:
logger.warning(f"No schema mapping for company {company_id}")
return 0, 0
synced = 0
errors = 0
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
# Fetch active suppliers from Oracle
# id_tip_part = 17 means "furnizori" (suppliers)
# Using CORESP_TIP_PART to filter by partner type
cursor.execute(f"""
SELECT B.ID_PART, B.DENUMIRE, B.COD_FISCAL, B.ADRESA
FROM {schema}.CORESP_TIP_PART A
INNER JOIN {schema}.VNOM_PARTENERI B ON A.ID_PART = B.ID_PART
WHERE A.ID_TIP_PART = 17
AND (B.INACTIV = 0 OR B.INACTIV IS NULL)
AND B.ID_PART IS NOT NULL
ORDER BY B.DENUMIRE
""")
rows = cursor.fetchall()
for row in rows:
try:
oracle_id, name, fiscal_code, address = row
# Check if already exists
stmt = select(SyncedSupplier).where(
SyncedSupplier.oracle_id == oracle_id,
SyncedSupplier.company_id == company_id
)
result = await session.execute(stmt)
existing = result.scalar_one_or_none()
if existing:
# Update existing record
existing.name = name or ""
existing.fiscal_code = fiscal_code
existing.address = address
existing.synced_at = datetime.utcnow()
logger.debug(f"Updated supplier {oracle_id}: {name}")
else:
# Create new record
supplier = SyncedSupplier(
oracle_id=oracle_id,
company_id=company_id,
name=name or "",
fiscal_code=fiscal_code,
address=address,
)
session.add(supplier)
logger.debug(f"Created supplier {oracle_id}: {name}")
synced += 1
except Exception as e:
logger.error(f"Error processing supplier row {row}: {e}")
errors += 1
# Commit all changes
await session.commit()
logger.info(f"Synced {synced} suppliers for company {company_id}, {errors} errors")
except Exception as e:
logger.error(f"Error syncing suppliers for company {company_id}: {e}")
errors += 1
await session.rollback()
return synced, errors
@staticmethod
async def sync_cash_registers(session: AsyncSession, company_id: int) -> Tuple[int, int]:
"""
Sync cash registers and bank accounts from Oracle to SQLite.
Returns (synced_count, error_count).
Uses CORESP_TIP_PART with:
- id_tip_part = 22: CASA LEI
- id_tip_part = 23: CASA VALUTA
- id_tip_part = 24: BANCA LEI
- id_tip_part = 25: BANCA VALUTA
"""
schema = await SyncService.get_schema_for_company(company_id)
if not schema:
logger.warning(f"No schema mapping for company {company_id}")
return 0, 0
synced = 0
errors = 0
# Partner types mapping
# 22=CASA LEI, 23=CASA VALUTA -> cash
# 24=BANCA LEI, 25=BANCA VALUTA -> bank
partner_types = [22, 23, 24, 25]
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
# Fetch cash/bank partners from CORESP_TIP_PART
cursor.execute(f"""
SELECT B.ID_PART, B.DENUMIRE, A.ID_TIP_PART
FROM {schema}.CORESP_TIP_PART A
INNER JOIN {schema}.VNOM_PARTENERI B ON A.ID_PART = B.ID_PART
WHERE A.ID_TIP_PART IN (22, 23, 24, 25)
AND (B.INACTIV = 0 OR B.INACTIV IS NULL)
AND B.ID_PART IS NOT NULL
ORDER BY A.ID_TIP_PART, B.DENUMIRE
""")
rows = cursor.fetchall()
# Type mapping: 22=CASA LEI, 23=CASA VALUTA -> cash; 24=BANCA LEI, 25=BANCA VALUTA -> bank
type_mapping = {
22: ("cash", "CASA_LEI"),
23: ("cash", "CASA_VALUTA"),
24: ("bank", "BANCA_LEI"),
25: ("bank", "BANCA_VALUTA"),
}
for row in rows:
try:
oracle_id, name, tip_part_id = row
# Determine type based on partner type
register_type, account_code = type_mapping.get(tip_part_id, ("cash", "UNKNOWN"))
# Check if already exists
stmt = select(SyncedCashRegister).where(
SyncedCashRegister.oracle_id == oracle_id,
SyncedCashRegister.company_id == company_id
)
result = await session.execute(stmt)
existing = result.scalar_one_or_none()
if existing:
# Update existing record
existing.name = name or ""
existing.account_code = account_code
existing.register_type = register_type
existing.synced_at = datetime.utcnow()
logger.debug(f"Updated cash register {oracle_id}: {name}")
else:
# Create new record
cash_register = SyncedCashRegister(
oracle_id=oracle_id,
company_id=company_id,
name=name or "",
account_code=account_code,
register_type=register_type,
)
session.add(cash_register)
logger.debug(f"Created cash register {oracle_id}: {name}")
synced += 1
except Exception as e:
logger.error(f"Error processing cash register row {row}: {e}")
errors += 1
# Commit all changes
await session.commit()
logger.info(f"Synced {synced} cash registers for company {company_id}, {errors} errors")
except Exception as e:
logger.error(f"Error syncing cash registers for company {company_id}: {e}")
errors += 1
await session.rollback()
return synced, errors
@staticmethod
async def search_supplier(
session: AsyncSession,
company_id: int,
fiscal_code: Optional[str] = None,
name: Optional[str] = None
) -> Tuple[bool, Optional[dict], str]:
"""
Search for supplier in SQLite first, then Oracle if not found.
Returns (found, supplier_data, source).
Source can be: 'synced', 'local', 'not_found'
"""
# 1. Search in synced suppliers
if fiscal_code:
stmt = select(SyncedSupplier).where(
SyncedSupplier.company_id == company_id,
SyncedSupplier.fiscal_code == fiscal_code
)
elif name:
stmt = select(SyncedSupplier).where(
SyncedSupplier.company_id == company_id,
SyncedSupplier.name.ilike(f"%{name}%")
)
else:
return False, None, "no_query"
result = await session.execute(stmt)
supplier = result.scalar_one_or_none()
if supplier:
# Return only text data - no IDs needed for autocomplete
return True, {
"name": supplier.name,
"fiscal_code": supplier.fiscal_code,
"address": supplier.address,
}, "synced"
# 2. Search in local suppliers
if fiscal_code:
stmt = select(LocalSupplier).where(
LocalSupplier.company_id == company_id,
LocalSupplier.fiscal_code == fiscal_code
)
elif name:
stmt = select(LocalSupplier).where(
LocalSupplier.company_id == company_id,
LocalSupplier.name.ilike(f"%{name}%")
)
result = await session.execute(stmt)
local = result.scalar_one_or_none()
if local:
# Return only text data - no IDs needed for autocomplete
return True, {
"name": local.name,
"fiscal_code": local.fiscal_code,
"address": local.address,
}, "local"
# 3. Try live Oracle search (optional fallback for unsynced data)
# This is a fallback - ideally sync should be up to date
# TODO: Implement live Oracle search if needed
return False, None, "not_found"
@staticmethod
async def create_local_supplier(
session: AsyncSession,
company_id: int,
name: str,
fiscal_code: Optional[str],
address: Optional[str],
created_by: str
) -> LocalSupplier:
"""Create a local supplier entry from OCR data."""
supplier = LocalSupplier(
company_id=company_id,
name=name,
fiscal_code=fiscal_code,
address=address,
created_by=created_by,
)
session.add(supplier)
await session.commit()
await session.refresh(supplier)
logger.info(f"Created local supplier: {name} (CUI: {fiscal_code})")
return supplier
@staticmethod
async def get_all_suppliers(
session: AsyncSession,
company_id: int,
search: Optional[str] = None
) -> List[dict]:
"""
Get all suppliers (synced + local) for a company.
Used for dropdown/autocomplete in UI.
"""
suppliers = []
# Get synced suppliers
stmt = select(SyncedSupplier).where(SyncedSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(SyncedSupplier.name.ilike(f"%{search}%")) |
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.limit(50) # Limit results for performance
result = await session.execute(stmt)
synced = result.scalars().all()
for s in synced:
suppliers.append({
"id": s.id,
"oracle_id": s.oracle_id,
"name": s.name,
"fiscal_code": s.fiscal_code,
"source": "synced"
})
# Get local suppliers
stmt = select(LocalSupplier).where(LocalSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(LocalSupplier.name.ilike(f"%{search}%")) |
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.limit(50)
result = await session.execute(stmt)
local = result.scalars().all()
for l in local:
suppliers.append({
"id": l.id,
"name": l.name,
"fiscal_code": l.fiscal_code,
"source": "local"
})
return suppliers
@staticmethod
async def get_all_cash_registers(
session: AsyncSession,
company_id: int
) -> List[dict]:
"""
Get all cash registers for a company.
Used for dropdown in UI.
"""
stmt = select(SyncedCashRegister).where(SyncedCashRegister.company_id == company_id)
result = await session.execute(stmt)
registers = result.scalars().all()
return [
{
"id": r.id,
"oracle_id": r.oracle_id,
"name": r.name,
"account_code": r.account_code,
"register_type": r.register_type
}
for r in registers
]