feat: Add multiple TVA entries support for Romanian receipts

- Add TvaEntry schema supporting multiple TVA rates (A, B, C, D codes)
- Update OCR extractor to extract multiple TVA entries from receipts
- Support both old (19%, 9%, 5%) and new Romanian rates (21%, 11% from Aug 2025)
- Add tva_breakdown, tva_total, items_count, vendor_address to Receipt model
- Update OCRPreview.vue to display TVA entries with rate badges
- Add "Detalii Suplimentare" section in ReceiptCreateView with editable TVA table
- Add TVA breakdown display in ReceiptDetailView
- Create database migration for new TVA columns

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-12 16:23:53 +02:00
parent 41ae97180e
commit 20448f7aa0
11 changed files with 1021 additions and 68 deletions

View File

@@ -51,6 +51,12 @@ class Receipt(SQLModel, table=True):
amount: Decimal = Field(decimal_places=2, max_digits=15) amount: Decimal = Field(decimal_places=2, max_digits=15)
description: Optional[str] = Field(default=None, max_length=500) description: Optional[str] = Field(default=None, max_length=500)
# TVA info (extracted from OCR) - stored as JSON for multiple entries
tva_breakdown: Optional[str] = Field(default=None, max_length=1000) # JSON: [{"code":"A","percent":19,"amount":"15.20"}]
tva_total: Optional[Decimal] = Field(default=None, decimal_places=2, max_digits=15)
items_count: Optional[int] = Field(default=None)
vendor_address: Optional[str] = Field(default=None, max_length=500)
# Expense type (for auto-generating accounting entries) # Expense type (for auto-generating accounting entries)
expense_type_code: Optional[str] = Field(default=None, max_length=20) expense_type_code: Optional[str] = Field(default=None, max_length=20)

View File

@@ -11,7 +11,7 @@ from app.db.database import get_session
from app.db.crud.attachment import AttachmentCRUD from app.db.crud.attachment import AttachmentCRUD
from app.services.ocr_service import ocr_service from app.services.ocr_service import ocr_service
from app.services.ocr_engine import OCREngine from app.services.ocr_engine import OCREngine
from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry
router = APIRouter() router = APIRouter()
@@ -78,6 +78,12 @@ async def extract_from_image(file: UploadFile = File(...)):
raise HTTPException(status_code=422, detail=message) raise HTTPException(status_code=422, detail=message)
# Convert ExtractionResult to ExtractionData schema # Convert ExtractionResult to ExtractionData schema
# Convert tva_entries from dict to TvaEntry objects
tva_entries_schema = [
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
for e in result.tva_entries
] if result.tva_entries else []
data = ExtractionData( data = ExtractionData(
receipt_type=result.receipt_type, receipt_type=result.receipt_type,
receipt_number=result.receipt_number, receipt_number=result.receipt_number,
@@ -87,6 +93,10 @@ async def extract_from_image(file: UploadFile = File(...)):
partner_name=result.partner_name, partner_name=result.partner_name,
cui=result.cui, cui=result.cui,
description=result.description, description=result.description,
tva_entries=tva_entries_schema,
tva_total=result.tva_total,
address=result.address,
items_count=result.items_count,
confidence_amount=result.confidence_amount, confidence_amount=result.confidence_amount,
confidence_date=result.confidence_date, confidence_date=result.confidence_date,
confidence_vendor=result.confidence_vendor, confidence_vendor=result.confidence_vendor,
@@ -137,6 +147,12 @@ async def extract_from_attachment(
raise HTTPException(status_code=422, detail=message) raise HTTPException(status_code=422, detail=message)
# Convert ExtractionResult to ExtractionData schema # Convert ExtractionResult to ExtractionData schema
# Convert tva_entries from dict to TvaEntry objects
tva_entries_schema = [
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
for e in result.tva_entries
] if result.tva_entries else []
data = ExtractionData( data = ExtractionData(
receipt_type=result.receipt_type, receipt_type=result.receipt_type,
receipt_number=result.receipt_number, receipt_number=result.receipt_number,
@@ -146,6 +162,10 @@ async def extract_from_attachment(
partner_name=result.partner_name, partner_name=result.partner_name,
cui=result.cui, cui=result.cui,
description=result.description, description=result.description,
tva_entries=tva_entries_schema,
tva_total=result.tva_total,
address=result.address,
items_count=result.items_count,
confidence_amount=result.confidence_amount, confidence_amount=result.confidence_amount,
confidence_date=result.confidence_date, confidence_date=result.confidence_date,
confidence_vendor=result.confidence_vendor, confidence_vendor=result.confidence_vendor,

View File

@@ -2,11 +2,18 @@
from datetime import date from datetime import date
from decimal import Decimal from decimal import Decimal
from typing import Optional from typing import Optional, List
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
class TvaEntry(BaseModel):
"""Single TVA entry with code, percentage and amount."""
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
amount: Decimal = Field(description="TVA amount for this rate")
class ExtractionData(BaseModel): class ExtractionData(BaseModel):
"""Extracted receipt data from OCR.""" """Extracted receipt data from OCR."""
@@ -19,6 +26,12 @@ class ExtractionData(BaseModel):
cui: Optional[str] = Field(default=None, description="CUI (fiscal identification code)") cui: Optional[str] = Field(default=None, description="CUI (fiscal identification code)")
description: Optional[str] = Field(default=None, description="Optional description") description: Optional[str] = Field(default=None, description="Optional description")
# Additional extracted fields - Multiple TVA entries support
tva_entries: List[TvaEntry] = Field(default=[], description="List of TVA entries by rate (A, B, C, D)")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
address: Optional[str] = Field(default=None, description="Vendor address")
items_count: Optional[int] = Field(default=None, description="Number of items/articles")
confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence") confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence")
confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence") confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence")
confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence") confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence")
@@ -30,18 +43,25 @@ class ExtractionData(BaseModel):
json_schema_extra = { json_schema_extra = {
"example": { "example": {
"receipt_type": "bon_fiscal", "receipt_type": "bon_fiscal",
"receipt_number": "12345", "receipt_number": "1360760",
"receipt_series": None, "receipt_series": "0146",
"receipt_date": "2024-01-15", "receipt_date": "2025-10-11",
"amount": 125.50, "amount": 186.16,
"partner_name": "MEGA IMAGE SRL", "partner_name": "FIVE-HOLDING S.A.",
"cui": "12345678", "cui": "10562600",
"description": None, "description": None,
"confidence_amount": 0.95, "tva_entries": [
"confidence_date": 0.90, {"code": "A", "percent": 19, "amount": 25.00},
"confidence_vendor": 0.75, {"code": "B", "percent": 9, "amount": 7.31}
"overall_confidence": 0.87, ],
"raw_text": "BON FISCAL\nMEGA IMAGE SRL\n..." "tva_total": 32.31,
"address": "JUD. CONSTANTA, MUN. CONSTANTA, STR. ION ROATA NR. 3",
"items_count": 17,
"confidence_amount": 0.98,
"confidence_date": 0.98,
"confidence_vendor": 0.95,
"overall_confidence": 0.97,
"raw_text": "FIVE-HOLDING S.A.\nCIF: RO10562600\n..."
} }
} }

View File

@@ -64,6 +64,15 @@ class AttachmentResponse(BaseModel):
uploaded_at: datetime uploaded_at: datetime
# ============ TVA Schema ============
class TvaEntrySchema(BaseModel):
"""Single TVA entry with code, percentage and amount."""
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
amount: Decimal = Field(description="TVA amount for this rate")
# ============ Receipt Schemas ============ # ============ Receipt Schemas ============
class ReceiptBase(BaseModel): class ReceiptBase(BaseModel):
@@ -75,6 +84,12 @@ class ReceiptBase(BaseModel):
receipt_date: date receipt_date: date
amount: Decimal = Field(gt=0) amount: Decimal = Field(gt=0)
description: Optional[str] = Field(default=None, max_length=500) description: Optional[str] = Field(default=None, max_length=500)
# TVA info (multiple entries support)
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
items_count: Optional[int] = Field(default=None, description="Number of items")
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
# Other fields
expense_type_code: Optional[str] = Field(default=None, max_length=20) expense_type_code: Optional[str] = Field(default=None, max_length=20)
company_id: int company_id: int
partner_id: Optional[int] = None partner_id: Optional[int] = None
@@ -98,6 +113,12 @@ class ReceiptUpdate(BaseModel):
receipt_date: Optional[date] = None receipt_date: Optional[date] = None
amount: Optional[Decimal] = Field(default=None, gt=0) amount: Optional[Decimal] = Field(default=None, gt=0)
description: Optional[str] = Field(default=None, max_length=500) description: Optional[str] = Field(default=None, max_length=500)
# TVA info (multiple entries support)
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
items_count: Optional[int] = Field(default=None, description="Number of items")
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
# Other fields
expense_type_code: Optional[str] = Field(default=None, max_length=20) expense_type_code: Optional[str] = Field(default=None, max_length=20)
partner_id: Optional[int] = None partner_id: Optional[int] = None
partner_name: Optional[str] = Field(default=None, max_length=200) partner_name: Optional[str] = Field(default=None, max_length=200)

View File

@@ -23,24 +23,37 @@ class ImagePreprocessor:
raise ValueError(f"Could not load image: {path}") raise ValueError(f"Could not load image: {path}")
return image return image
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]: def pdf_to_images(self, path: Path, dpi: int = 400) -> List[np.ndarray]:
"""Convert PDF to images.""" """
Convert PDF to images with high DPI for better OCR.
Args:
path: Path to PDF file
dpi: Resolution (400 recommended for receipts, higher = better quality but slower)
"""
if not PDF_AVAILABLE: if not PDF_AVAILABLE:
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image") raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
# Use 400 DPI for better text recognition on thermal receipts
images = pdf2image.convert_from_path(str(path), dpi=dpi) images = pdf2image.convert_from_path(str(path), dpi=dpi)
return [np.array(img) for img in images] return [np.array(img) for img in images]
def preprocess(self, image: np.ndarray) -> np.ndarray: def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
""" """
Apply preprocessing pipeline for thermal receipt images. Apply preprocessing pipeline for thermal receipt images.
Pipeline: Pipeline:
1. Convert to grayscale 1. Convert to grayscale
2. Resize if too small (min 1000px width) 2. Resize if too small (min 1500px width for high quality)
3. Deskew (straighten rotated text) 3. Deskew (straighten rotated text)
4. Denoise (Non-local means) 4. Contrast enhancement (CLAHE)
5. Adaptive thresholding (binarization) 5. Denoise (Non-local means)
6. Morphological close (connect broken chars) 6. Sharpening (for clearer text edges)
7. Adaptive thresholding (binarization)
8. Morphological operations (connect broken chars)
Args:
image: Input image (BGR or grayscale)
high_quality: If True, apply more aggressive preprocessing
""" """
# 1. Grayscale # 1. Grayscale
if len(image.shape) == 3: if len(image.shape) == 3:
@@ -48,10 +61,11 @@ class ImagePreprocessor:
else: else:
gray = image.copy() gray = image.copy()
# 2. Resize if too small # 2. Resize if too small (larger = better OCR)
height, width = gray.shape height, width = gray.shape
if width < 1000: min_width = 1500 if high_quality else 1000
scale = 1000 / width if width < min_width:
scale = min_width / width
gray = cv2.resize( gray = cv2.resize(
gray, None, fx=scale, fy=scale, gray, None, fx=scale, fy=scale,
interpolation=cv2.INTER_CUBIC interpolation=cv2.INTER_CUBIC
@@ -60,24 +74,43 @@ class ImagePreprocessor:
# 3. Deskew # 3. Deskew
gray = self._deskew(gray) gray = self._deskew(gray)
# 4. Denoise # 4. Contrast enhancement with CLAHE (Contrast Limited Adaptive Histogram Equalization)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 5. Denoise (slightly less aggressive to preserve text details)
denoised = cv2.fastNlMeansDenoising( denoised = cv2.fastNlMeansDenoising(
gray, h=10, enhanced, h=8, # Lower h = preserve more details
templateWindowSize=7, templateWindowSize=7,
searchWindowSize=21 searchWindowSize=21
) )
# 5. Adaptive thresholding # 6. Sharpening to enhance text edges
if high_quality:
# Unsharp mask for better text clarity
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
else:
sharpened = denoised
# 7. Adaptive thresholding with optimized parameters
binary = cv2.adaptiveThreshold( binary = cv2.adaptiveThreshold(
denoised, 255, sharpened, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, cv2.THRESH_BINARY,
blockSize=15, C=8 blockSize=11, # Smaller block = better for small text
C=5 # Lower C = darker result, better for faded receipts
) )
# 6. Morphological close # 8. Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) # Close small gaps in characters
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
# Optional: Remove small noise spots
if high_quality:
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
result = cv2.morphologyEx(result, cv2.MORPH_OPEN, kernel_open)
return result return result

View File

@@ -64,11 +64,17 @@ class OCREngine:
PaddleOCR = _PaddleOCR PaddleOCR = _PaddleOCR
print("Initializing PaddleOCR engine...") print("Initializing PaddleOCR engine...")
# PaddleOCR 3.x API - simplified parameters # PaddleOCR 3.x API - optimized for Romanian receipts
self._paddle = PaddleOCR( self._paddle = PaddleOCR(
lang='en', # Better for mixed text with numbers lang='en', # 'en' works better than 'ro' for mixed alphanumeric
# High quality settings for better accuracy
det_db_thresh=0.3, # Lower threshold = detect more text (default 0.3)
det_db_box_thresh=0.5, # Box confidence threshold (default 0.5)
det_db_unclip_ratio=1.8, # Expand detected boxes slightly (default 1.5)
rec_batch_num=6, # Batch size for recognition
use_angle_cls=True, # Enable text angle classification
) )
print("PaddleOCR initialized successfully") print("PaddleOCR initialized successfully with high-quality settings")
except Exception as e: except Exception as e:
print(f"Warning: Failed to initialize PaddleOCR: {e}") print(f"Warning: Failed to initialize PaddleOCR: {e}")
self._paddle = None self._paddle = None

View File

@@ -3,7 +3,7 @@
import re import re
from datetime import date, datetime from datetime import date, datetime
from decimal import Decimal, InvalidOperation from decimal import Decimal, InvalidOperation
from typing import Optional, Tuple from typing import Optional, Tuple, List
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -18,6 +18,11 @@ class ExtractionResult:
partner_name: Optional[str] = None partner_name: Optional[str] = None
cui: Optional[str] = None cui: Optional[str] = None
description: Optional[str] = None description: Optional[str] = None
# Additional extracted fields - Multiple TVA entries support
tva_entries: List[dict] = field(default_factory=list) # [{code, percent, amount}]
tva_total: Optional[Decimal] = None
address: Optional[str] = None
items_count: Optional[int] = None
confidence_amount: float = 0.0 confidence_amount: float = 0.0
confidence_date: float = 0.0 confidence_date: float = 0.0
@@ -40,44 +45,158 @@ class ReceiptExtractor:
"""Extract receipt fields using pattern matching for Romanian receipts.""" """Extract receipt fields using pattern matching for Romanian receipts."""
# Total amount patterns (most specific first) # Total amount patterns (most specific first)
# Romanian receipts use various formats: TOTAL LEI, TOTAL:, TOTAL RON, etc.
# OCR often produces errors, so patterns must be tolerant
TOTAL_PATTERNS = [ TOTAL_PATTERNS = [
# Most common: TOTAL LEI followed by amount
(r'TOTAL\s+LEI\s*([\d\s.,]+)', 0.98),
(r'[OT]?OTAL\s+LEI\s*([\d\s.,]+)', 0.95), # OCR may miss first letter
# Standard patterns
(r'TOTAL\s*:?\s*([\d\s.,]+)\s*(?:RON|LEI)?', 0.95), (r'TOTAL\s*:?\s*([\d\s.,]+)\s*(?:RON|LEI)?', 0.95),
(r'TOTAL\s+(?:RON|LEI)\s*([\d\s.,]+)', 0.95), (r'TOTAL\s+(?:RON|LEI)\s*([\d\s.,]+)', 0.95),
# SUBTOTAL when TOTAL not found
(r'SUBTOTAL\s*([\d\s.,]+)', 0.90),
(r'[SB]?UBTOTAL\s*([\d\s.,]+)', 0.88), # OCR variations
# Payment methods
(r'DE\s+PLATA\s*:?\s*([\d\s.,]+)', 0.90), (r'DE\s+PLATA\s*:?\s*([\d\s.,]+)', 0.90),
(r'SUMA\s*:?\s*([\d\s.,]+)', 0.85), (r'SUMA\s*:?\s*([\d\s.,]+)', 0.85),
(r'PLATA\s+CARD\s*:?\s*([\d\s.,]+)', 0.85), (r'PLATA\s+CARD\s*:?\s*([\d\s.,]+)', 0.85),
(r'NUMERAR\s*:?\s*([\d\s.,]+)', 0.80), (r'NUMERAR\s*:?\s*([\d\s.,]+)', 0.80),
(r'REST\s*:?\s*([\d\s.,]+)', 0.70), # Sometimes total is near REST
] ]
# Date patterns # Fallback: Find the largest repeated amount (likely the total)
# This handles cases where OCR doesn't capture "TOTAL" keyword
# Date patterns - support dash, dot, and slash separators
# OCR may produce DRTA instead of DATA, DAIA, etc.
DATE_PATTERNS = [ DATE_PATTERNS = [
(r'DATA\s*:?\s*(\d{2}[./]\d{2}[./]\d{4})', 0.95), # DATA/DRTA/DAIA: DD-MM-YYYY (OCR tolerant)
(r'(\d{2}[./]\d{2}[./]\d{4})\s+\d{2}:\d{2}', 0.90), (r'D[AR]TA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
(r'(\d{2}[./]\d{2}[./]\d{4})', 0.80), (r'DATA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
(r'(\d{4}[./]\d{2}[./]\d{2})', 0.75), # YYYY.MM.DD format # Date followed by ORA (time) - OCR may produce 0RA
(r'(\d{2}[-./]\d{2}[-./]\d{4})\s+[O0]RA\s*:?\s*\d{2}:\d{2}', 0.95),
# Date followed by time without ORA keyword
(r'(\d{2}[-./]\d{2}[-./]\d{4})\s+\d{2}:\d{2}', 0.90),
# Standalone date
(r'(\d{2}[-./]\d{2}[-./]\d{4})', 0.80),
# YYYY-MM-DD format (less common)
(r'(\d{4}[-./]\d{2}[-./]\d{2})', 0.75),
] ]
# Receipt number patterns # Receipt number patterns - Romanian fiscal receipt formats
# OCR may produce N instead of : or other errors
NUMBER_PATTERNS = [ NUMBER_PATTERNS = [
# NDS format (common in Romanian POS)
(r'NDS\s*:?\s*(\d+)', 0.98),
# C3POS terminal format - OCR may have N instead of : (C3POS-CT2N1360760)
(r'C3POS[-A-Z0-9]*[N:](\d{6,7})', 0.98), # CT2N1360760 format
(r'C3POS.*?(\d{6,7})\b', 0.95), # Any C3POS followed by 6-7 digit number
(r'CT2[N:]\s*(\d{6,})', 0.95), # CT2N prefix
# BF (Bon Fiscal) number
(r'BF\s*:?\s*(\d+)', 0.93),
# NIVS format
(r'NIVS\s*:?\s*(\d+)', 0.95),
# Standard NR BON formats
(r'NR\.?\s*BON\s*:?\s*(\d+)', 0.95), (r'NR\.?\s*BON\s*:?\s*(\d+)', 0.95),
(r'BON\s+(?:FISCAL\s+)?NR\.?\s*:?\s*(\d+)', 0.95), (r'BON\s+(?:FISCAL\s+)?NR\.?\s*:?\s*(\d+)', 0.95),
(r'CHITANTA\s+NR\.?\s*:?\s*(\d+)', 0.95), (r'CHITANTA\s+NR\.?\s*:?\s*(\d+)', 0.95),
# Document number
(r'NR\.?\s+DOCUMENT\s*:?\s*(\d+)', 0.90), (r'NR\.?\s+DOCUMENT\s*:?\s*(\d+)', 0.90),
(r'NR\.?\s*:?\s*(\d{4,})', 0.70), # ID BF format
(r'ID\s*BF\s*:?\s*(\d+)', 0.90),
# TD format (transaction ID)
(r'TD\s*:?\s*(\d+)', 0.85),
# 6-8 digit number (typical receipt number length)
(r'\b(\d{6,8})\b', 0.70),
# Generic long number at end (fallback)
(r'NR\.?\s*:?\s*(\d{4,})', 0.65),
] ]
# CUI (fiscal code) patterns # CUI (fiscal code) patterns - IMPORTANT: exclude CLIENT CUI
# CIF = Cod de Identificare Fiscală (vendor's tax ID)
# CLIENT C.U.I. = client's tax ID (should be ignored)
# OCR errors: R0 instead of RO, C1F instead of CIF
CUI_PATTERNS = [ CUI_PATTERNS = [
(r'C\.?U\.?I\.?\s*:?\s*(?:RO)?(\d{6,10})', 0.95), # CIF at start of line (definitely vendor) - tolerant to OCR errors
(r'C\.?I\.?F\.?\s*:?\s*(?:RO)?(\d{6,10})', 0.95), (r'^CIF\s*:?\s*(?:R[O0])?(\d{6,10})', 0.98),
(r'COD\s+FISCAL\s*:?\s*(?:RO)?(\d{6,10})', 0.90), (r'^C[I1]F\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95), # C1F OCR error
(r'(?:RO)?(\d{6,10})\s*-?\s*(?:J|CUI)', 0.80), # CIF not preceded by CLIENT (negative lookbehind)
(r'(?<!CLIENT\s)(?<!LIENT\s)CIF\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95),
# Standalone CIF: format with OCR tolerance
(r'\bC[I1]F\s*:?\s*(?:R[O0])?(\d{6,10})\b', 0.90),
# COD FISCAL (vendor)
(r'COD\s+FISCAL\s*:?\s*(?:R[O0])?(\d{6,10})', 0.90),
# C.I.F. format (with dots)
(r'(?<!CLIENT\s)C\.[I1]\.F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.88),
# CUI format (less specific, use with caution)
(r'(?<!CLIENT\s)C\.?U\.?[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.85),
] ]
# Series patterns # Series patterns - be strict to avoid false matches
SERIES_PATTERNS = [ SERIES_PATTERNS = [
(r'SERIE\s*:?\s*([A-Z]{1,4})', 0.90), (r'SERIE\s*:?\s*([A-Z]{1,4})', 0.90),
(r'([A-Z]{2,4})\s+NR\.?\s*\d+', 0.80), # Z: format from Romanian fiscal receipts (must be at start of line or after space)
(r'(?:^|\s)Z\s*:\s*(\d{4})', 0.85),
# BF series with explicit marker
(r'(?:^|\s)BF\s*:\s*(\d{4})', 0.85),
]
# TVA (VAT) patterns - OCR may produce TUA, TVR, etc.
TVA_PATTERNS = [
# TOTAL TVA BON format (OCR tolerant: TUA, TVR)
(r'TOTAL\s+T[VU][AR]\s+BON\s*:?\s*([\d\s.,]+)', 0.98),
(r'T[O0]TAL\s+T[VU][AR]\s*:?\s*([\d\s.,]+)', 0.95),
# TVA with percentage (OCR tolerant)
(r'T[VU][AR]\s+(?:A\s*[-:]?\s*)?(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)', 0.95),
(r'T[VU][AR]\s+[A-Z]\s*[-:]\s*(\d{1,2})\s*%\s*([\d\s.,]+)', 0.93),
# Simple TVA pattern
(r'T[VU][AR]\s*:?\s*([\d\s.,]+)', 0.85),
# Standalone percentage line near TVA
(r'(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)', 0.75),
]
# Items count patterns - OCR may produce OZ instead of POZ, etc.
# Number may be on separate line before or after the label
ITEMS_COUNT_PATTERNS = [
# NR. POZ. ART. IN BON: 17 (Romanian format with dots and spaces)
# OCR tolerant: OZ instead of POZ, ARI instead of ART
(r'NR\.?\s*P?[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*(\d+)', 0.98),
# Number on line BEFORE "OZ. ART. IN BON:" - OCR sometimes reorders
(r'(\d{1,2})\s*\n\s*[O0]Z\.?\s*ART', 0.95),
# Number may be on next line after label
(r'[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.93),
(r'NR\.?\s*(?:P?[O0]Z\.?)?\s*ART(?:ICOLE)?\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.90),
# Simpler patterns
(r'ARTIC[O0]LE\s*:?\s*(\d+)', 0.88),
(r'P?[O0]Z\s*:?\s*(\d+)', 0.85),
# X articole/pozitii
(r'(\d+)\s*(?:ARTIC[O0]LE|P[O0]ZITII|BUC)', 0.80),
]
# Address patterns (Romanian format)
ADDRESS_PATTERNS = [
# Street patterns
(r'(STR\.?\s+[A-Z0-9\s.,]+(?:NR\.?\s*\d+)?)', 0.90),
# Full address with JUD (county)
(r'(JUD\.?\s+[A-Z]+,?\s*(?:MUN\.?|OR\.?|COM\.?)?\s*[A-Z]+)', 0.85),
]
# Vendor name indicators (lines containing these are likely vendor names)
VENDOR_INDICATORS = [
r'\bS\.?R\.?L\.?\b', # S.R.L.
r'\bS\.?A\.?\b', # S.A.
r'\bS\.?N\.?C\.?\b', # S.N.C.
r'\bS\.?C\.?S\.?\b', # S.C.S.
r'\bI\.?I\.?\b', # I.I. (Individual)
r'\bP\.?F\.?A\.?\b', # P.F.A.
r'\bS\.?C\.?\b', # S.C.
r'HOLDING',
r'COMPANY',
r'GROUP',
r'MAGAZIN',
r'MARKET',
r'SHOP',
] ]
def extract(self, text: str) -> ExtractionResult: def extract(self, text: str) -> ExtractionResult:
@@ -86,13 +205,18 @@ class ReceiptExtractor:
result.raw_text = text result.raw_text = text
text_upper = text.upper() text_upper = text.upper()
# Extract fields # Extract core fields
result.amount, result.confidence_amount = self._extract_amount(text_upper) result.amount, result.confidence_amount = self._extract_amount(text_upper)
result.receipt_date, result.confidence_date = self._extract_date(text_upper) result.receipt_date, result.confidence_date = self._extract_date(text_upper)
result.receipt_number, _ = self._extract_number(text_upper) result.receipt_number, _ = self._extract_number(text_upper)
result.receipt_series, _ = self._extract_series(text_upper) result.receipt_series, _ = self._extract_series(text_upper)
result.partner_name, result.confidence_vendor = self._extract_vendor(text) result.partner_name, result.confidence_vendor = self._extract_vendor(text)
result.cui, _ = self._extract_cui(text_upper) result.cui, _ = self._extract_cui(text_upper, text)
# Extract additional fields - Multiple TVA entries
result.tva_entries, result.tva_total = self._extract_tva_entries(text_upper)
result.items_count = self._extract_items_count(text_upper)
result.address = self._extract_address(text_upper)
# Detect receipt type # Detect receipt type
result.receipt_type = self._detect_receipt_type(text_upper) result.receipt_type = self._detect_receipt_type(text_upper)
@@ -101,18 +225,85 @@ class ReceiptExtractor:
def _extract_amount(self, text: str) -> Tuple[Optional[Decimal], float]: def _extract_amount(self, text: str) -> Tuple[Optional[Decimal], float]:
"""Extract total amount from text.""" """Extract total amount from text."""
# First try standard patterns (TOTAL, SUBTOTAL, etc.)
for pattern, confidence in self.TOTAL_PATTERNS: for pattern, confidence in self.TOTAL_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE) match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
if match: if match:
try: try:
amount_str = re.sub(r'[^\d.,]', '', match.group(1)) amount_str = re.sub(r'[^\d.,]', '', match.group(1))
# Handle Romanian number format (1.234,56)
amount_str = self._normalize_number(amount_str) amount_str = self._normalize_number(amount_str)
amount = Decimal(amount_str) amount = Decimal(amount_str)
if amount > 0: if amount > 0:
return amount, confidence return amount, confidence
except (InvalidOperation, ValueError): except (InvalidOperation, ValueError):
continue continue
# Strategy 2: Find amounts AFTER product lines end
# Products have pattern: "X BUC/ROLA X price = price"
# Total appears after all products
product_pattern = r'\d\s+(?:BUC|ROLA|ROLN|ROL)\s+X'
product_matches = list(re.finditer(product_pattern, text, re.IGNORECASE))
if product_matches:
# Get text after the last product line
last_product_pos = product_matches[-1].end()
after_products = text[last_product_pos:]
# Find standalone amounts on their own line after products
line_amount_pattern = r'^[\s]*(\d{2,4}[.,]\s*\d{2})[\s]*$'
standalone_amounts = []
for match in re.finditer(line_amount_pattern, after_products, re.MULTILINE):
try:
amount_str = match.group(1).replace(' ', '')
amount_str = self._normalize_number(amount_str)
amount = Decimal(amount_str)
if amount > 10: # Filter out small values
standalone_amounts.append(amount)
except (InvalidOperation, ValueError):
continue
if standalone_amounts:
# The largest standalone amount after products is likely the total
max_amount = max(standalone_amounts)
# Higher confidence if it appears multiple times
count = standalone_amounts.count(max_amount)
confidence = 0.85 if count >= 2 else 0.75
return max_amount, confidence
# Strategy 3: Find the most repeated large amount
# Normalize spaces in numbers (OCR may produce "186. 16")
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
amount_pattern = r'(\d{2,4}[.,]\d{2})\b'
amounts = re.findall(amount_pattern, normalized_text)
if amounts:
from collections import Counter
amount_counts = Counter(amounts)
# Filter amounts that appear 2+ times and are > 20
candidates = []
for amt_str, count in amount_counts.items():
try:
amt = Decimal(self._normalize_number(amt_str))
if count >= 2 and amt > 20:
candidates.append((amt, count))
except (InvalidOperation, ValueError):
continue
if candidates:
# Return the LARGEST amount that appears multiple times
candidates.sort(key=lambda x: x[0], reverse=True)
return candidates[0][0], 0.65
# Last resort: Find any standalone large amount
line_amount_pattern = r'^[\s]*(\d{2,4}[.,]\s*\d{2})[\s]*$'
for match in re.finditer(line_amount_pattern, text, re.MULTILINE):
try:
amount_str = match.group(1).replace(' ', '')
amount_str = self._normalize_number(amount_str)
amount = Decimal(amount_str)
if amount > 50: # Higher threshold for fallback
return amount, 0.50
except (InvalidOperation, ValueError):
continue
return None, 0.0 return None, 0.0
def _normalize_number(self, num_str: str) -> str: def _normalize_number(self, num_str: str) -> str:
@@ -147,7 +338,8 @@ class ReceiptExtractor:
match = re.search(pattern, text) match = re.search(pattern, text)
if match: if match:
try: try:
date_str = match.group(1).replace('/', '.') # Normalize separators to dots
date_str = match.group(1).replace('/', '.').replace('-', '.')
# Try DD.MM.YYYY format first # Try DD.MM.YYYY format first
try: try:
@@ -181,23 +373,68 @@ class ReceiptExtractor:
return None, 0.0 return None, 0.0
def _extract_vendor(self, text: str) -> Tuple[Optional[str], float]: def _extract_vendor(self, text: str) -> Tuple[Optional[str], float]:
"""Extract vendor/partner name from text.""" """
Extract vendor/partner name from text.
Uses multiple strategies:
1. Look for lines with company type indicators (S.R.L., S.A., etc.)
2. Look for lines near CIF
3. Use first valid line as fallback
"""
lines = text.split('\n') lines = text.split('\n')
skip_keywords = [ skip_keywords = [
'BON', 'FISCAL', 'TOTAL', 'DATA', 'NR', 'ORA', 'BON', 'FISCAL', 'TOTAL', 'DATA', 'NR', 'ORA',
'SUBTOTAL', 'TVA', 'PLATA', 'CARD', 'NUMERAR', 'SUBTOTAL', 'TVA', 'PLATA', 'CARD', 'NUMERAR',
'RON', 'LEI', 'CHITANTA', 'REST' 'RON', 'LEI', 'CHITANTA', 'REST', 'CLIENT',
'OPERATOR', 'CASIER', 'POS', 'AMEF', 'BINE ATI VENIT',
'VA RUGAM', 'PASTRATI', 'VOCEA', 'TIPARIT',
'DETERGENT', 'PROSOP', 'HARTIE', 'SACI', 'SPRAY',
'BUC', 'ROLA', 'CUMPARATOR'
] ]
for i, line in enumerate(lines[:7]): # Check first 7 lines # Strategy 1: Look for lines with vendor indicators (S.R.L., S.A., HOLDING, etc.)
for i, line in enumerate(lines[:15]): # Check first 15 lines
line = line.strip()
if not line or len(line) < 3:
continue
line_upper = line.upper()
# Check for vendor indicators
for indicator in self.VENDOR_INDICATORS:
if re.search(indicator, line_upper):
# Found a company name indicator
vendor = self._clean_vendor_name(line)
if vendor and len(vendor) >= 3:
# High confidence for lines with company indicators
return vendor, 0.95
# Strategy 2: Look for lines right before or after CIF
for i, line in enumerate(lines[:15]):
line_upper = line.upper()
if 'CIF' in line_upper and 'CLIENT' not in line_upper:
# Check line before
if i > 0:
prev_line = lines[i-1].strip()
if prev_line and len(prev_line) >= 3:
if not any(kw in prev_line.upper() for kw in skip_keywords):
vendor = self._clean_vendor_name(prev_line)
if vendor:
return vendor, 0.85
# Strategy 3: First valid line as fallback
for i, line in enumerate(lines[:10]):
line = line.strip() line = line.strip()
# Skip empty lines # Skip empty lines
if not line: if not line or len(line) < 3:
continue continue
# Skip lines that are just numbers # Skip lines that are just numbers or codes
if re.match(r'^[\d.,\s]+$', line): if re.match(r'^[\d.,\s:]+$', line):
continue
# Skip lines with barcodes/product codes
if re.match(r'^[A-Z]*\d{6,}', line):
continue continue
# Skip lines with keywords # Skip lines with keywords
@@ -205,23 +442,68 @@ class ReceiptExtractor:
continue continue
# Clean the line # Clean the line
vendor = re.sub(r'[^\w\s.,&-]', '', line).strip() vendor = self._clean_vendor_name(line)
if len(vendor) >= 3: if vendor and len(vendor) >= 3:
# Confidence decreases for lines further down # Confidence decreases for lines further down
confidence = max(0.3, 0.8 - (i * 0.1)) confidence = max(0.3, 0.7 - (i * 0.05))
return vendor, confidence return vendor, confidence
return None, 0.0 return None, 0.0
def _extract_cui(self, text: str) -> Tuple[Optional[str], float]: def _clean_vendor_name(self, name: str) -> Optional[str]:
"""Extract CUI (fiscal identification code) from text.""" """Clean and normalize vendor name."""
if not name:
return None
# Remove common OCR artifacts
name = re.sub(r'[^\w\s.,&\-()]', ' ', name)
# Normalize whitespace
name = re.sub(r'\s+', ' ', name).strip()
# Skip if it looks like an address line only
if re.match(r'^(STR|JUD|MUN|NR|BL|SC|ET|AP)\.?\s', name.upper()):
return None
# Skip if too short after cleaning
if len(name) < 3:
return None
return name
def _extract_cui(self, text_upper: str, original_text: str) -> Tuple[Optional[str], float]:
"""
Extract vendor CUI (fiscal identification code) from text.
Excludes CLIENT CUI which appears as 'CLIENT C.U.I./C.I.F.:...'
"""
# First, try to find CIF on a line that doesn't contain CLIENT
lines = text_upper.split('\n')
for line in lines:
# Skip lines that contain CLIENT (these are buyer's CUI, not vendor's)
if 'CLIENT' in line or 'CUMPARATOR' in line or 'LIENT' in line:
continue
# Look for CIF in this line
for pattern, confidence in self.CUI_PATTERNS: for pattern, confidence in self.CUI_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE) match = re.search(pattern, line, re.IGNORECASE | re.MULTILINE)
if match: if match:
cui = match.group(1) cui = match.group(1)
if 6 <= len(cui) <= 10: if 6 <= len(cui) <= 10:
return cui, confidence return cui, confidence
# Fallback: search entire text but exclude CLIENT patterns
for pattern, confidence in self.CUI_PATTERNS:
# Find all matches
for match in re.finditer(pattern, text_upper, re.IGNORECASE | re.MULTILINE):
cui = match.group(1)
if 6 <= len(cui) <= 10:
# Check if this match is preceded by CLIENT in the same line
start = match.start()
line_start = text_upper.rfind('\n', 0, start) + 1
line_text = text_upper[line_start:start]
if 'CLIENT' not in line_text and 'LIENT' not in line_text:
return cui, confidence
return None, 0.0 return None, 0.0
def _detect_receipt_type(self, text: str) -> str: def _detect_receipt_type(self, text: str) -> str:
@@ -229,3 +511,223 @@ class ReceiptExtractor:
if 'CHITANTA' in text or 'CHITANȚĂ' in text: if 'CHITANTA' in text or 'CHITANȚĂ' in text:
return 'chitanta' return 'chitanta'
return 'bon_fiscal' return 'bon_fiscal'
def _extract_tva_entries(self, text: str) -> Tuple[List[dict], Optional[Decimal]]:
"""
Extract multiple TVA (VAT) entries from text.
Romanian receipts can have multiple TVA rates (A=19%, B=9%, C=5%, D=0%).
Returns (tva_entries, tva_total) where tva_entries is a list of:
{'code': 'A', 'percent': 19, 'amount': Decimal('15.20')}
"""
tva_entries = []
seen_entries = set() # To avoid duplicates
# Normalize spaces in numbers first (OCR may produce "32. 31")
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
# Pattern 1: "TVA A - 19%: 15.20" or "TVAA - 21% 32.31" (with code)
# OCR tolerant: TUA, TVR, etc.
pattern_with_code = r'T[VU][AR]\s*([A-D])\s*[-:]\s*(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)'
for match in re.finditer(pattern_with_code, normalized_text, re.IGNORECASE):
try:
code = match.group(1).upper()
percent = int(match.group(2))
amount_str = match.group(3).replace(' ', '')
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
amount = Decimal(amount_str)
if amount > 0:
entry_key = (code, percent)
if entry_key not in seen_entries:
tva_entries.append({
'code': code,
'percent': percent,
'amount': amount
})
seen_entries.add(entry_key)
except (ValueError, InvalidOperation):
continue
# Pattern 2: "TVA - 21%: 32.31" (without explicit code, assume 'A')
if not tva_entries:
pattern_no_code = r'T[VU][AR]\s*[-:]\s*(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)'
for match in re.finditer(pattern_no_code, normalized_text, re.IGNORECASE):
try:
percent = int(match.group(1))
amount_str = match.group(2).replace(' ', '')
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
amount = Decimal(amount_str)
if amount > 0:
# Determine code based on percent
code = self._get_tva_code_from_percent(percent)
entry_key = (code, percent)
if entry_key not in seen_entries:
tva_entries.append({
'code': code,
'percent': percent,
'amount': amount
})
seen_entries.add(entry_key)
except (ValueError, InvalidOperation):
continue
# Pattern 3: "TVAA - 21%" on one line, amount on next line
if not tva_entries:
tva_line_pattern = r'T[VU][AR]\s*([A-D])?\s*[-:]\s*(\d{1,2})\s*%'
for match in re.finditer(tva_line_pattern, normalized_text, re.IGNORECASE):
try:
code = (match.group(1) or 'A').upper()
percent = int(match.group(2))
# Look for amount on the next line or immediately after
after_tva = normalized_text[match.end():]
amount_match = re.search(r'^[\s\n]*([\d.,]+)', after_tva)
if amount_match:
amount_str = self._normalize_number(amount_match.group(1))
amount = Decimal(amount_str)
if amount > 0:
entry_key = (code, percent)
if entry_key not in seen_entries:
tva_entries.append({
'code': code,
'percent': percent,
'amount': amount
})
seen_entries.add(entry_key)
except (ValueError, InvalidOperation):
continue
# Pattern 4: Use TVA_PATTERNS for fallback
if not tva_entries:
for pattern, _ in self.TVA_PATTERNS:
match = re.search(pattern, normalized_text, re.IGNORECASE)
if match:
try:
# Some patterns have 2 groups (percent, amount), others just amount
if match.lastindex >= 2:
percent = int(match.group(1))
amount_str = match.group(2)
else:
amount_str = match.group(1)
# Try to detect percent from text
percent = self._detect_tva_percent(text)
amount_str = amount_str.replace(' ', '')
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
amount = Decimal(amount_str)
if amount > 0 and percent:
code = self._get_tva_code_from_percent(percent)
entry_key = (code, percent)
if entry_key not in seen_entries:
tva_entries.append({
'code': code,
'percent': percent,
'amount': amount
})
seen_entries.add(entry_key)
break # Only use first match from fallback
except (ValueError, InvalidOperation):
continue
# Calculate total
tva_total = None
if tva_entries:
tva_total = sum(entry['amount'] for entry in tva_entries)
# Sort by code (A, B, C, D)
tva_entries.sort(key=lambda x: x.get('code', 'Z'))
return tva_entries, tva_total
def _get_tva_code_from_percent(self, percent: int) -> str:
"""Map TVA percentage to standard Romanian code.
Romanian TVA rates changed in August 2025:
- Standard rate: 19% → 21%
- Reduced rate: 9% → 11%
- Other rates (5%, 0%) remain unchanged
Old rates (before Aug 2025): New rates (from Aug 2025):
- A = 19% (standard) - A = 21% (standard)
- B = 9% (reduced) - B = 11% (reduced)
- C = 5% (reduced) - C = 5% (reduced)
- D = 0% (exempt) - D = 0% (exempt)
Both old and new rates are supported for historical receipts.
"""
if percent in (19, 21):
return 'A' # Standard rate (19% old, 21% new from Aug 2025)
elif percent in (9, 11):
return 'B' # Reduced rate (9% old, 11% new from Aug 2025)
elif percent == 5:
return 'C' # Reduced rate (unchanged)
elif percent == 0:
return 'D' # Exempt (unchanged)
else:
return 'A' # Default to standard rate
def _detect_tva_percent(self, text: str) -> Optional[int]:
"""Detect TVA percentage from text content."""
# Look for common Romanian TVA percentages
if '19%' in text or '19 %' in text:
return 19
elif '21%' in text or '21 %' in text:
return 21
elif '11%' in text or '11 %' in text:
return 11
elif '9%' in text or '9 %' in text:
return 9
elif '5%' in text or '5 %' in text:
return 5
return None
def _extract_items_count(self, text: str) -> Optional[int]:
"""Extract number of items/articles from receipt."""
for pattern, _ in self.ITEMS_COUNT_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE)
if match:
try:
count = int(match.group(1))
if 0 < count < 1000: # Reasonable range
return count
except ValueError:
continue
return None
def _extract_address(self, text: str) -> Optional[str]:
"""Extract vendor address from text."""
lines = text.split('\n')
address_parts = []
for line in lines[:15]: # Check first 15 lines
line = line.strip()
if not line:
continue
# Check for address patterns
line_upper = line.upper()
# JUD. (county) pattern
if re.search(r'\bJUD\.?\s+', line_upper):
address_parts.append(line)
continue
# STR. (street) pattern
if re.search(r'\bSTR\.?\s+', line_upper):
address_parts.append(line)
continue
# MUN./OR./COM. (city/town) pattern
if re.search(r'\b(MUN|OR|COM)\.?\s+', line_upper):
address_parts.append(line)
continue
if address_parts:
# Join and clean address parts
address = ', '.join(address_parts)
# Clean up
address = re.sub(r'\s+', ' ', address).strip()
address = re.sub(r',\s*,', ',', address)
return address if len(address) >= 5 else None
return None

View File

@@ -0,0 +1,37 @@
"""add_tva_breakdown_to_receipt
Revision ID: 1cfb423c6953
Revises: 001_initial
Create Date: 2025-12-12 14:04:22.464289+00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '1cfb423c6953'
down_revision: Union[str, None] = '001_initial'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add TVA-related columns to receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.add_column(sa.Column('tva_breakdown', sqlmodel.sql.sqltypes.AutoString(length=1000), nullable=True))
batch_op.add_column(sa.Column('tva_total', sa.Numeric(precision=15, scale=2), nullable=True))
batch_op.add_column(sa.Column('items_count', sa.Integer(), nullable=True))
batch_op.add_column(sa.Column('vendor_address', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True))
def downgrade() -> None:
# Remove TVA-related columns from receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.drop_column('vendor_address')
batch_op.drop_column('items_count')
batch_op.drop_column('tva_total')
batch_op.drop_column('tva_breakdown')

View File

@@ -71,6 +71,37 @@
<span v-if="data.cui" class="cui-badge">CUI: {{ data.cui }}</span> <span v-if="data.cui" class="cui-badge">CUI: {{ data.cui }}</span>
</div> </div>
</div> </div>
<!-- TVA Entries (multiple rates) -->
<div class="preview-field full-width" v-if="data.tva_entries?.length > 0 || data.tva_total">
<label>TVA</label>
<div class="tva-breakdown">
<div v-for="(entry, idx) in data.tva_entries" :key="idx" class="tva-entry">
<span class="tva-code" v-if="entry.code">{{ entry.code }}</span>
<span class="tva-percent-badge">{{ entry.percent }}%</span>
<span class="tva-amount">{{ formatAmount(entry.amount) }} RON</span>
</div>
<div v-if="data.tva_total && data.tva_entries?.length > 1" class="tva-total">
<strong>Total TVA:</strong> {{ formatAmount(data.tva_total) }} RON
</div>
</div>
</div>
<!-- Items Count -->
<div class="preview-field" v-if="data.items_count">
<label>Nr. Articole</label>
<div class="field-value">
{{ data.items_count }} articole
</div>
</div>
<!-- Address -->
<div class="preview-field full-width" v-if="data.address">
<label>Adresa</label>
<div class="field-value address-text">
{{ data.address }}
</div>
</div>
</div> </div>
<!-- Raw Text Toggle --> <!-- Raw Text Toggle -->
@@ -224,6 +255,50 @@ const formatDate = (dateStr) => {
color: #475569; color: #475569;
} }
.tva-breakdown {
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.tva-entry {
display: flex;
align-items: center;
gap: 0.5rem;
}
.tva-code {
font-weight: 600;
color: #475569;
min-width: 1rem;
}
.tva-percent-badge {
display: inline-block;
padding: 0.15rem 0.5rem;
background: #dbeafe;
border-radius: 4px;
font-size: 0.8rem;
color: #1e40af;
min-width: 2.5rem;
text-align: center;
}
.tva-amount {
font-weight: 500;
}
.tva-total {
margin-top: 0.25rem;
padding-top: 0.25rem;
border-top: 1px dashed #cbd5e1;
}
.address-text {
font-size: 0.9rem;
color: #475569;
}
.raw-text-section { .raw-text-section {
margin-top: 1rem; margin-top: 1rem;
padding-top: 1rem; padding-top: 1rem;

View File

@@ -246,11 +246,59 @@
<Textarea <Textarea
v-model="form.description" v-model="form.description"
rows="3" rows="3"
placeholder="Detalii suplimentare..." placeholder="Descriere optionala..."
/> />
</div> </div>
</div> </div>
<!-- Detalii Suplimentare (populated from OCR) -->
<div v-if="form.tva_breakdown?.length > 0 || form.items_count || form.vendor_address" class="extra-details-section">
<h3>
<i class="pi pi-list"></i>
Detalii Suplimentare (din OCR)
</h3>
<!-- TVA Breakdown -->
<div class="form-field form-field-full" v-if="form.tva_breakdown?.length > 0">
<label>Defalcare TVA</label>
<div class="tva-table">
<div v-for="(entry, idx) in form.tva_breakdown" :key="idx" class="tva-row">
<span class="tva-label">TVA {{ entry.code }} ({{ entry.percent }}%):</span>
<InputNumber
v-model="form.tva_breakdown[idx].amount"
mode="currency"
currency="RON"
locale="ro-RO"
:minFractionDigits="2"
class="tva-input"
/>
</div>
<div class="tva-row total" v-if="form.tva_breakdown.length > 0">
<span class="tva-label"><strong>Total TVA:</strong></span>
<span class="tva-value">{{ formatTvaTotal() }} RON</span>
</div>
</div>
</div>
<div class="form-grid">
<div class="form-field" v-if="form.items_count">
<label>Nr. Articole</label>
<InputNumber
v-model="form.items_count"
:min="1"
placeholder="Ex: 17"
/>
</div>
<div class="form-field" v-if="form.vendor_address">
<label>Adresa Furnizor</label>
<InputText
v-model="form.vendor_address"
placeholder="Adresa din bon"
/>
</div>
</div>
</div>
<Divider /> <Divider />
<!-- Actions --> <!-- Actions -->
@@ -314,6 +362,11 @@ const form = ref({
receipt_number: '', receipt_number: '',
description: '', description: '',
company_id: 1, // Default company for Phase 1 company_id: 1, // Default company for Phase 1
// TVA info (multiple entries support)
tva_breakdown: [], // Array of {code, percent, amount}
tva_total: null,
items_count: null,
vendor_address: '',
}) })
const selectedFiles = ref([]) const selectedFiles = ref([])
@@ -435,6 +488,24 @@ const applyOCRData = (data) => {
} }
} }
// Apply TVA entries
if (data.tva_entries?.length > 0) {
form.value.tva_breakdown = data.tva_entries.map(e => ({
code: e.code,
percent: e.percent,
amount: parseFloat(e.amount)
}))
}
if (data.tva_total) {
form.value.tva_total = parseFloat(data.tva_total)
}
if (data.items_count) {
form.value.items_count = data.items_count
}
if (data.address) {
form.value.vendor_address = data.address
}
// Clear OCR preview // Clear OCR preview
ocrData.value = null ocrData.value = null
@@ -499,6 +570,12 @@ const formatFileSize = (bytes) => {
return (bytes / (1024 * 1024)).toFixed(1) + ' MB' return (bytes / (1024 * 1024)).toFixed(1) + ' MB'
} }
const formatTvaTotal = () => {
if (!form.value.tva_breakdown?.length) return '0.00'
const total = form.value.tva_breakdown.reduce((sum, e) => sum + (e.amount || 0), 0)
return total.toLocaleString('ro-RO', { minimumFractionDigits: 2, maximumFractionDigits: 2 })
}
const validateForm = () => { const validateForm = () => {
// Check if we have at least one file (for new receipts) // Check if we have at least one file (for new receipts)
if (!isEditMode.value && selectedFiles.value.length === 0) { if (!isEditMode.value && selectedFiles.value.length === 0) {
@@ -725,4 +802,55 @@ const submitForReview = async () => {
font-size: 0.85rem; font-size: 0.85rem;
color: #64748b; color: #64748b;
} }
/* Extra details section (TVA, items, address) */
.extra-details-section {
margin-top: 1.5rem;
padding: 1rem;
background: #f0f9ff;
border: 1px solid #bae6fd;
border-radius: 8px;
}
.extra-details-section h3 {
display: flex;
align-items: center;
gap: 0.5rem;
margin-bottom: 1rem;
color: #0284c7;
}
.tva-table {
display: flex;
flex-direction: column;
gap: 0.5rem;
margin-bottom: 1rem;
}
.tva-row {
display: flex;
align-items: center;
gap: 1rem;
}
.tva-row.total {
margin-top: 0.5rem;
padding-top: 0.5rem;
border-top: 1px dashed #0284c7;
}
.tva-label {
min-width: 150px;
font-weight: 500;
color: #334155;
}
.tva-input {
max-width: 150px;
}
.tva-value {
font-weight: 600;
color: #0284c7;
}
</style> </style>

View File

@@ -112,6 +112,43 @@
</div> </div>
</div> </div>
<!-- Detalii Suplimentare (TVA, items, address from OCR) -->
<template v-if="hasTvaData || receipt.items_count || receipt.vendor_address">
<Divider />
<h4 style="margin-bottom: 0.75rem; color: #0284c7;">
<i class="pi pi-list"></i>
Detalii Suplimentare
</h4>
<div class="detail-list">
<!-- TVA Breakdown -->
<div v-if="parsedTvaBreakdown?.length > 0" class="detail-item tva-detail">
<span class="label">TVA</span>
<div class="tva-breakdown-display">
<div v-for="(entry, idx) in parsedTvaBreakdown" :key="idx" class="tva-line">
<span class="tva-code" v-if="entry.code">{{ entry.code }}:</span>
<span class="tva-percent">{{ entry.percent }}%</span>
<span class="tva-amount">= {{ formatAmount(entry.amount) }}</span>
</div>
<div v-if="receipt.tva_total && parsedTvaBreakdown.length > 1" class="tva-total-line">
<strong>Total TVA: {{ formatAmount(receipt.tva_total) }}</strong>
</div>
</div>
</div>
<div class="detail-item" v-if="receipt.items_count">
<span class="label">Nr. Articole</span>
<span class="value">{{ receipt.items_count }} articole</span>
</div>
<div class="detail-item" v-if="receipt.vendor_address">
<span class="label">Adresa Furnizor</span>
<span class="value">{{ receipt.vendor_address }}</span>
</div>
</div>
</template>
<Divider /> <Divider />
<div class="detail-list"> <div class="detail-list">
@@ -283,6 +320,22 @@ const isBalanced = computed(() => {
return Math.abs(totalDebit.value - totalCredit.value) < 0.01 return Math.abs(totalDebit.value - totalCredit.value) < 0.01
}) })
const parsedTvaBreakdown = computed(() => {
if (!receipt.value?.tva_breakdown) return []
try {
// Handle both string (JSON) and array formats
return typeof receipt.value.tva_breakdown === 'string'
? JSON.parse(receipt.value.tva_breakdown)
: receipt.value.tva_breakdown
} catch {
return []
}
})
const hasTvaData = computed(() => {
return parsedTvaBreakdown.value?.length > 0 || receipt.value?.tva_total
})
onMounted(async () => { onMounted(async () => {
await loadReceipt() await loadReceipt()
}) })
@@ -521,4 +574,56 @@ const resubmitReceipt = async () => {
border-radius: 8px; border-radius: 8px;
color: #f57c00; color: #f57c00;
} }
/* TVA Breakdown Display */
.detail-item.tva-detail {
flex-direction: column;
align-items: flex-start;
gap: 0.5rem;
}
.tva-breakdown-display {
display: flex;
flex-direction: column;
gap: 0.25rem;
padding: 0.5rem;
background: #f0f9ff;
border-radius: 6px;
width: 100%;
}
.tva-line {
display: flex;
align-items: center;
gap: 0.5rem;
}
.tva-code {
font-weight: 600;
color: #475569;
min-width: 1.5rem;
}
.tva-percent {
display: inline-block;
padding: 0.1rem 0.4rem;
background: #dbeafe;
border-radius: 4px;
font-size: 0.85rem;
color: #1e40af;
min-width: 2.5rem;
text-align: center;
}
.tva-amount {
font-weight: 500;
color: #334155;
}
.tva-total-line {
margin-top: 0.25rem;
padding-top: 0.25rem;
border-top: 1px dashed #0284c7;
color: #0284c7;
}
</style> </style>