feat: Add multiple TVA entries support for Romanian receipts
- Add TvaEntry schema supporting multiple TVA rates (A, B, C, D codes) - Update OCR extractor to extract multiple TVA entries from receipts - Support both old (19%, 9%, 5%) and new Romanian rates (21%, 11% from Aug 2025) - Add tva_breakdown, tva_total, items_count, vendor_address to Receipt model - Update OCRPreview.vue to display TVA entries with rate badges - Add "Detalii Suplimentare" section in ReceiptCreateView with editable TVA table - Add TVA breakdown display in ReceiptDetailView - Create database migration for new TVA columns 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,12 @@ class Receipt(SQLModel, table=True):
|
|||||||
amount: Decimal = Field(decimal_places=2, max_digits=15)
|
amount: Decimal = Field(decimal_places=2, max_digits=15)
|
||||||
description: Optional[str] = Field(default=None, max_length=500)
|
description: Optional[str] = Field(default=None, max_length=500)
|
||||||
|
|
||||||
|
# TVA info (extracted from OCR) - stored as JSON for multiple entries
|
||||||
|
tva_breakdown: Optional[str] = Field(default=None, max_length=1000) # JSON: [{"code":"A","percent":19,"amount":"15.20"}]
|
||||||
|
tva_total: Optional[Decimal] = Field(default=None, decimal_places=2, max_digits=15)
|
||||||
|
items_count: Optional[int] = Field(default=None)
|
||||||
|
vendor_address: Optional[str] = Field(default=None, max_length=500)
|
||||||
|
|
||||||
# Expense type (for auto-generating accounting entries)
|
# Expense type (for auto-generating accounting entries)
|
||||||
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from app.db.database import get_session
|
|||||||
from app.db.crud.attachment import AttachmentCRUD
|
from app.db.crud.attachment import AttachmentCRUD
|
||||||
from app.services.ocr_service import ocr_service
|
from app.services.ocr_service import ocr_service
|
||||||
from app.services.ocr_engine import OCREngine
|
from app.services.ocr_engine import OCREngine
|
||||||
from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData
|
from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -78,6 +78,12 @@ async def extract_from_image(file: UploadFile = File(...)):
|
|||||||
raise HTTPException(status_code=422, detail=message)
|
raise HTTPException(status_code=422, detail=message)
|
||||||
|
|
||||||
# Convert ExtractionResult to ExtractionData schema
|
# Convert ExtractionResult to ExtractionData schema
|
||||||
|
# Convert tva_entries from dict to TvaEntry objects
|
||||||
|
tva_entries_schema = [
|
||||||
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
||||||
|
for e in result.tva_entries
|
||||||
|
] if result.tva_entries else []
|
||||||
|
|
||||||
data = ExtractionData(
|
data = ExtractionData(
|
||||||
receipt_type=result.receipt_type,
|
receipt_type=result.receipt_type,
|
||||||
receipt_number=result.receipt_number,
|
receipt_number=result.receipt_number,
|
||||||
@@ -87,6 +93,10 @@ async def extract_from_image(file: UploadFile = File(...)):
|
|||||||
partner_name=result.partner_name,
|
partner_name=result.partner_name,
|
||||||
cui=result.cui,
|
cui=result.cui,
|
||||||
description=result.description,
|
description=result.description,
|
||||||
|
tva_entries=tva_entries_schema,
|
||||||
|
tva_total=result.tva_total,
|
||||||
|
address=result.address,
|
||||||
|
items_count=result.items_count,
|
||||||
confidence_amount=result.confidence_amount,
|
confidence_amount=result.confidence_amount,
|
||||||
confidence_date=result.confidence_date,
|
confidence_date=result.confidence_date,
|
||||||
confidence_vendor=result.confidence_vendor,
|
confidence_vendor=result.confidence_vendor,
|
||||||
@@ -137,6 +147,12 @@ async def extract_from_attachment(
|
|||||||
raise HTTPException(status_code=422, detail=message)
|
raise HTTPException(status_code=422, detail=message)
|
||||||
|
|
||||||
# Convert ExtractionResult to ExtractionData schema
|
# Convert ExtractionResult to ExtractionData schema
|
||||||
|
# Convert tva_entries from dict to TvaEntry objects
|
||||||
|
tva_entries_schema = [
|
||||||
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
||||||
|
for e in result.tva_entries
|
||||||
|
] if result.tva_entries else []
|
||||||
|
|
||||||
data = ExtractionData(
|
data = ExtractionData(
|
||||||
receipt_type=result.receipt_type,
|
receipt_type=result.receipt_type,
|
||||||
receipt_number=result.receipt_number,
|
receipt_number=result.receipt_number,
|
||||||
@@ -146,6 +162,10 @@ async def extract_from_attachment(
|
|||||||
partner_name=result.partner_name,
|
partner_name=result.partner_name,
|
||||||
cui=result.cui,
|
cui=result.cui,
|
||||||
description=result.description,
|
description=result.description,
|
||||||
|
tva_entries=tva_entries_schema,
|
||||||
|
tva_total=result.tva_total,
|
||||||
|
address=result.address,
|
||||||
|
items_count=result.items_count,
|
||||||
confidence_amount=result.confidence_amount,
|
confidence_amount=result.confidence_amount,
|
||||||
confidence_date=result.confidence_date,
|
confidence_date=result.confidence_date,
|
||||||
confidence_vendor=result.confidence_vendor,
|
confidence_vendor=result.confidence_vendor,
|
||||||
|
|||||||
@@ -2,11 +2,18 @@
|
|||||||
|
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Optional
|
from typing import Optional, List
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class TvaEntry(BaseModel):
|
||||||
|
"""Single TVA entry with code, percentage and amount."""
|
||||||
|
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
|
||||||
|
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
|
||||||
|
amount: Decimal = Field(description="TVA amount for this rate")
|
||||||
|
|
||||||
|
|
||||||
class ExtractionData(BaseModel):
|
class ExtractionData(BaseModel):
|
||||||
"""Extracted receipt data from OCR."""
|
"""Extracted receipt data from OCR."""
|
||||||
|
|
||||||
@@ -19,6 +26,12 @@ class ExtractionData(BaseModel):
|
|||||||
cui: Optional[str] = Field(default=None, description="CUI (fiscal identification code)")
|
cui: Optional[str] = Field(default=None, description="CUI (fiscal identification code)")
|
||||||
description: Optional[str] = Field(default=None, description="Optional description")
|
description: Optional[str] = Field(default=None, description="Optional description")
|
||||||
|
|
||||||
|
# Additional extracted fields - Multiple TVA entries support
|
||||||
|
tva_entries: List[TvaEntry] = Field(default=[], description="List of TVA entries by rate (A, B, C, D)")
|
||||||
|
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
|
||||||
|
address: Optional[str] = Field(default=None, description="Vendor address")
|
||||||
|
items_count: Optional[int] = Field(default=None, description="Number of items/articles")
|
||||||
|
|
||||||
confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence")
|
confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence")
|
||||||
confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence")
|
confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence")
|
||||||
confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence")
|
confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence")
|
||||||
@@ -30,18 +43,25 @@ class ExtractionData(BaseModel):
|
|||||||
json_schema_extra = {
|
json_schema_extra = {
|
||||||
"example": {
|
"example": {
|
||||||
"receipt_type": "bon_fiscal",
|
"receipt_type": "bon_fiscal",
|
||||||
"receipt_number": "12345",
|
"receipt_number": "1360760",
|
||||||
"receipt_series": None,
|
"receipt_series": "0146",
|
||||||
"receipt_date": "2024-01-15",
|
"receipt_date": "2025-10-11",
|
||||||
"amount": 125.50,
|
"amount": 186.16,
|
||||||
"partner_name": "MEGA IMAGE SRL",
|
"partner_name": "FIVE-HOLDING S.A.",
|
||||||
"cui": "12345678",
|
"cui": "10562600",
|
||||||
"description": None,
|
"description": None,
|
||||||
"confidence_amount": 0.95,
|
"tva_entries": [
|
||||||
"confidence_date": 0.90,
|
{"code": "A", "percent": 19, "amount": 25.00},
|
||||||
"confidence_vendor": 0.75,
|
{"code": "B", "percent": 9, "amount": 7.31}
|
||||||
"overall_confidence": 0.87,
|
],
|
||||||
"raw_text": "BON FISCAL\nMEGA IMAGE SRL\n..."
|
"tva_total": 32.31,
|
||||||
|
"address": "JUD. CONSTANTA, MUN. CONSTANTA, STR. ION ROATA NR. 3",
|
||||||
|
"items_count": 17,
|
||||||
|
"confidence_amount": 0.98,
|
||||||
|
"confidence_date": 0.98,
|
||||||
|
"confidence_vendor": 0.95,
|
||||||
|
"overall_confidence": 0.97,
|
||||||
|
"raw_text": "FIVE-HOLDING S.A.\nCIF: RO10562600\n..."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,15 @@ class AttachmentResponse(BaseModel):
|
|||||||
uploaded_at: datetime
|
uploaded_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
# ============ TVA Schema ============
|
||||||
|
|
||||||
|
class TvaEntrySchema(BaseModel):
|
||||||
|
"""Single TVA entry with code, percentage and amount."""
|
||||||
|
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
|
||||||
|
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
|
||||||
|
amount: Decimal = Field(description="TVA amount for this rate")
|
||||||
|
|
||||||
|
|
||||||
# ============ Receipt Schemas ============
|
# ============ Receipt Schemas ============
|
||||||
|
|
||||||
class ReceiptBase(BaseModel):
|
class ReceiptBase(BaseModel):
|
||||||
@@ -75,6 +84,12 @@ class ReceiptBase(BaseModel):
|
|||||||
receipt_date: date
|
receipt_date: date
|
||||||
amount: Decimal = Field(gt=0)
|
amount: Decimal = Field(gt=0)
|
||||||
description: Optional[str] = Field(default=None, max_length=500)
|
description: Optional[str] = Field(default=None, max_length=500)
|
||||||
|
# TVA info (multiple entries support)
|
||||||
|
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
|
||||||
|
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
|
||||||
|
items_count: Optional[int] = Field(default=None, description="Number of items")
|
||||||
|
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
|
||||||
|
# Other fields
|
||||||
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
||||||
company_id: int
|
company_id: int
|
||||||
partner_id: Optional[int] = None
|
partner_id: Optional[int] = None
|
||||||
@@ -98,6 +113,12 @@ class ReceiptUpdate(BaseModel):
|
|||||||
receipt_date: Optional[date] = None
|
receipt_date: Optional[date] = None
|
||||||
amount: Optional[Decimal] = Field(default=None, gt=0)
|
amount: Optional[Decimal] = Field(default=None, gt=0)
|
||||||
description: Optional[str] = Field(default=None, max_length=500)
|
description: Optional[str] = Field(default=None, max_length=500)
|
||||||
|
# TVA info (multiple entries support)
|
||||||
|
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
|
||||||
|
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
|
||||||
|
items_count: Optional[int] = Field(default=None, description="Number of items")
|
||||||
|
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
|
||||||
|
# Other fields
|
||||||
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
expense_type_code: Optional[str] = Field(default=None, max_length=20)
|
||||||
partner_id: Optional[int] = None
|
partner_id: Optional[int] = None
|
||||||
partner_name: Optional[str] = Field(default=None, max_length=200)
|
partner_name: Optional[str] = Field(default=None, max_length=200)
|
||||||
|
|||||||
@@ -23,24 +23,37 @@ class ImagePreprocessor:
|
|||||||
raise ValueError(f"Could not load image: {path}")
|
raise ValueError(f"Could not load image: {path}")
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
|
def pdf_to_images(self, path: Path, dpi: int = 400) -> List[np.ndarray]:
|
||||||
"""Convert PDF to images."""
|
"""
|
||||||
|
Convert PDF to images with high DPI for better OCR.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to PDF file
|
||||||
|
dpi: Resolution (400 recommended for receipts, higher = better quality but slower)
|
||||||
|
"""
|
||||||
if not PDF_AVAILABLE:
|
if not PDF_AVAILABLE:
|
||||||
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
|
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
|
||||||
|
# Use 400 DPI for better text recognition on thermal receipts
|
||||||
images = pdf2image.convert_from_path(str(path), dpi=dpi)
|
images = pdf2image.convert_from_path(str(path), dpi=dpi)
|
||||||
return [np.array(img) for img in images]
|
return [np.array(img) for img in images]
|
||||||
|
|
||||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Apply preprocessing pipeline for thermal receipt images.
|
Apply preprocessing pipeline for thermal receipt images.
|
||||||
|
|
||||||
Pipeline:
|
Pipeline:
|
||||||
1. Convert to grayscale
|
1. Convert to grayscale
|
||||||
2. Resize if too small (min 1000px width)
|
2. Resize if too small (min 1500px width for high quality)
|
||||||
3. Deskew (straighten rotated text)
|
3. Deskew (straighten rotated text)
|
||||||
4. Denoise (Non-local means)
|
4. Contrast enhancement (CLAHE)
|
||||||
5. Adaptive thresholding (binarization)
|
5. Denoise (Non-local means)
|
||||||
6. Morphological close (connect broken chars)
|
6. Sharpening (for clearer text edges)
|
||||||
|
7. Adaptive thresholding (binarization)
|
||||||
|
8. Morphological operations (connect broken chars)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image: Input image (BGR or grayscale)
|
||||||
|
high_quality: If True, apply more aggressive preprocessing
|
||||||
"""
|
"""
|
||||||
# 1. Grayscale
|
# 1. Grayscale
|
||||||
if len(image.shape) == 3:
|
if len(image.shape) == 3:
|
||||||
@@ -48,10 +61,11 @@ class ImagePreprocessor:
|
|||||||
else:
|
else:
|
||||||
gray = image.copy()
|
gray = image.copy()
|
||||||
|
|
||||||
# 2. Resize if too small
|
# 2. Resize if too small (larger = better OCR)
|
||||||
height, width = gray.shape
|
height, width = gray.shape
|
||||||
if width < 1000:
|
min_width = 1500 if high_quality else 1000
|
||||||
scale = 1000 / width
|
if width < min_width:
|
||||||
|
scale = min_width / width
|
||||||
gray = cv2.resize(
|
gray = cv2.resize(
|
||||||
gray, None, fx=scale, fy=scale,
|
gray, None, fx=scale, fy=scale,
|
||||||
interpolation=cv2.INTER_CUBIC
|
interpolation=cv2.INTER_CUBIC
|
||||||
@@ -60,24 +74,43 @@ class ImagePreprocessor:
|
|||||||
# 3. Deskew
|
# 3. Deskew
|
||||||
gray = self._deskew(gray)
|
gray = self._deskew(gray)
|
||||||
|
|
||||||
# 4. Denoise
|
# 4. Contrast enhancement with CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
||||||
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||||
|
enhanced = clahe.apply(gray)
|
||||||
|
|
||||||
|
# 5. Denoise (slightly less aggressive to preserve text details)
|
||||||
denoised = cv2.fastNlMeansDenoising(
|
denoised = cv2.fastNlMeansDenoising(
|
||||||
gray, h=10,
|
enhanced, h=8, # Lower h = preserve more details
|
||||||
templateWindowSize=7,
|
templateWindowSize=7,
|
||||||
searchWindowSize=21
|
searchWindowSize=21
|
||||||
)
|
)
|
||||||
|
|
||||||
# 5. Adaptive thresholding
|
# 6. Sharpening to enhance text edges
|
||||||
|
if high_quality:
|
||||||
|
# Unsharp mask for better text clarity
|
||||||
|
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
|
||||||
|
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
|
||||||
|
else:
|
||||||
|
sharpened = denoised
|
||||||
|
|
||||||
|
# 7. Adaptive thresholding with optimized parameters
|
||||||
binary = cv2.adaptiveThreshold(
|
binary = cv2.adaptiveThreshold(
|
||||||
denoised, 255,
|
sharpened, 255,
|
||||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
cv2.THRESH_BINARY,
|
cv2.THRESH_BINARY,
|
||||||
blockSize=15, C=8
|
blockSize=11, # Smaller block = better for small text
|
||||||
|
C=5 # Lower C = darker result, better for faded receipts
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. Morphological close
|
# 8. Morphological operations
|
||||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
# Close small gaps in characters
|
||||||
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||||
|
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
|
||||||
|
|
||||||
|
# Optional: Remove small noise spots
|
||||||
|
if high_quality:
|
||||||
|
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
|
||||||
|
result = cv2.morphologyEx(result, cv2.MORPH_OPEN, kernel_open)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -64,11 +64,17 @@ class OCREngine:
|
|||||||
PaddleOCR = _PaddleOCR
|
PaddleOCR = _PaddleOCR
|
||||||
|
|
||||||
print("Initializing PaddleOCR engine...")
|
print("Initializing PaddleOCR engine...")
|
||||||
# PaddleOCR 3.x API - simplified parameters
|
# PaddleOCR 3.x API - optimized for Romanian receipts
|
||||||
self._paddle = PaddleOCR(
|
self._paddle = PaddleOCR(
|
||||||
lang='en', # Better for mixed text with numbers
|
lang='en', # 'en' works better than 'ro' for mixed alphanumeric
|
||||||
|
# High quality settings for better accuracy
|
||||||
|
det_db_thresh=0.3, # Lower threshold = detect more text (default 0.3)
|
||||||
|
det_db_box_thresh=0.5, # Box confidence threshold (default 0.5)
|
||||||
|
det_db_unclip_ratio=1.8, # Expand detected boxes slightly (default 1.5)
|
||||||
|
rec_batch_num=6, # Batch size for recognition
|
||||||
|
use_angle_cls=True, # Enable text angle classification
|
||||||
)
|
)
|
||||||
print("PaddleOCR initialized successfully")
|
print("PaddleOCR initialized successfully with high-quality settings")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to initialize PaddleOCR: {e}")
|
print(f"Warning: Failed to initialize PaddleOCR: {e}")
|
||||||
self._paddle = None
|
self._paddle = None
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import re
|
import re
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from decimal import Decimal, InvalidOperation
|
from decimal import Decimal, InvalidOperation
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple, List
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
@@ -18,6 +18,11 @@ class ExtractionResult:
|
|||||||
partner_name: Optional[str] = None
|
partner_name: Optional[str] = None
|
||||||
cui: Optional[str] = None
|
cui: Optional[str] = None
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
|
# Additional extracted fields - Multiple TVA entries support
|
||||||
|
tva_entries: List[dict] = field(default_factory=list) # [{code, percent, amount}]
|
||||||
|
tva_total: Optional[Decimal] = None
|
||||||
|
address: Optional[str] = None
|
||||||
|
items_count: Optional[int] = None
|
||||||
|
|
||||||
confidence_amount: float = 0.0
|
confidence_amount: float = 0.0
|
||||||
confidence_date: float = 0.0
|
confidence_date: float = 0.0
|
||||||
@@ -40,44 +45,158 @@ class ReceiptExtractor:
|
|||||||
"""Extract receipt fields using pattern matching for Romanian receipts."""
|
"""Extract receipt fields using pattern matching for Romanian receipts."""
|
||||||
|
|
||||||
# Total amount patterns (most specific first)
|
# Total amount patterns (most specific first)
|
||||||
|
# Romanian receipts use various formats: TOTAL LEI, TOTAL:, TOTAL RON, etc.
|
||||||
|
# OCR often produces errors, so patterns must be tolerant
|
||||||
TOTAL_PATTERNS = [
|
TOTAL_PATTERNS = [
|
||||||
|
# Most common: TOTAL LEI followed by amount
|
||||||
|
(r'TOTAL\s+LEI\s*([\d\s.,]+)', 0.98),
|
||||||
|
(r'[OT]?OTAL\s+LEI\s*([\d\s.,]+)', 0.95), # OCR may miss first letter
|
||||||
|
# Standard patterns
|
||||||
(r'TOTAL\s*:?\s*([\d\s.,]+)\s*(?:RON|LEI)?', 0.95),
|
(r'TOTAL\s*:?\s*([\d\s.,]+)\s*(?:RON|LEI)?', 0.95),
|
||||||
(r'TOTAL\s+(?:RON|LEI)\s*([\d\s.,]+)', 0.95),
|
(r'TOTAL\s+(?:RON|LEI)\s*([\d\s.,]+)', 0.95),
|
||||||
|
# SUBTOTAL when TOTAL not found
|
||||||
|
(r'SUBTOTAL\s*([\d\s.,]+)', 0.90),
|
||||||
|
(r'[SB]?UBTOTAL\s*([\d\s.,]+)', 0.88), # OCR variations
|
||||||
|
# Payment methods
|
||||||
(r'DE\s+PLATA\s*:?\s*([\d\s.,]+)', 0.90),
|
(r'DE\s+PLATA\s*:?\s*([\d\s.,]+)', 0.90),
|
||||||
(r'SUMA\s*:?\s*([\d\s.,]+)', 0.85),
|
(r'SUMA\s*:?\s*([\d\s.,]+)', 0.85),
|
||||||
(r'PLATA\s+CARD\s*:?\s*([\d\s.,]+)', 0.85),
|
(r'PLATA\s+CARD\s*:?\s*([\d\s.,]+)', 0.85),
|
||||||
(r'NUMERAR\s*:?\s*([\d\s.,]+)', 0.80),
|
(r'NUMERAR\s*:?\s*([\d\s.,]+)', 0.80),
|
||||||
|
(r'REST\s*:?\s*([\d\s.,]+)', 0.70), # Sometimes total is near REST
|
||||||
]
|
]
|
||||||
|
|
||||||
# Date patterns
|
# Fallback: Find the largest repeated amount (likely the total)
|
||||||
|
# This handles cases where OCR doesn't capture "TOTAL" keyword
|
||||||
|
|
||||||
|
# Date patterns - support dash, dot, and slash separators
|
||||||
|
# OCR may produce DRTA instead of DATA, DAIA, etc.
|
||||||
DATE_PATTERNS = [
|
DATE_PATTERNS = [
|
||||||
(r'DATA\s*:?\s*(\d{2}[./]\d{2}[./]\d{4})', 0.95),
|
# DATA/DRTA/DAIA: DD-MM-YYYY (OCR tolerant)
|
||||||
(r'(\d{2}[./]\d{2}[./]\d{4})\s+\d{2}:\d{2}', 0.90),
|
(r'D[AR]TA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
|
||||||
(r'(\d{2}[./]\d{2}[./]\d{4})', 0.80),
|
(r'DATA\s*:?\s*(\d{2}[-./]\d{2}[-./]\d{4})', 0.98),
|
||||||
(r'(\d{4}[./]\d{2}[./]\d{2})', 0.75), # YYYY.MM.DD format
|
# Date followed by ORA (time) - OCR may produce 0RA
|
||||||
|
(r'(\d{2}[-./]\d{2}[-./]\d{4})\s+[O0]RA\s*:?\s*\d{2}:\d{2}', 0.95),
|
||||||
|
# Date followed by time without ORA keyword
|
||||||
|
(r'(\d{2}[-./]\d{2}[-./]\d{4})\s+\d{2}:\d{2}', 0.90),
|
||||||
|
# Standalone date
|
||||||
|
(r'(\d{2}[-./]\d{2}[-./]\d{4})', 0.80),
|
||||||
|
# YYYY-MM-DD format (less common)
|
||||||
|
(r'(\d{4}[-./]\d{2}[-./]\d{2})', 0.75),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Receipt number patterns
|
# Receipt number patterns - Romanian fiscal receipt formats
|
||||||
|
# OCR may produce N instead of : or other errors
|
||||||
NUMBER_PATTERNS = [
|
NUMBER_PATTERNS = [
|
||||||
|
# NDS format (common in Romanian POS)
|
||||||
|
(r'NDS\s*:?\s*(\d+)', 0.98),
|
||||||
|
# C3POS terminal format - OCR may have N instead of : (C3POS-CT2N1360760)
|
||||||
|
(r'C3POS[-A-Z0-9]*[N:](\d{6,7})', 0.98), # CT2N1360760 format
|
||||||
|
(r'C3POS.*?(\d{6,7})\b', 0.95), # Any C3POS followed by 6-7 digit number
|
||||||
|
(r'CT2[N:]\s*(\d{6,})', 0.95), # CT2N prefix
|
||||||
|
# BF (Bon Fiscal) number
|
||||||
|
(r'BF\s*:?\s*(\d+)', 0.93),
|
||||||
|
# NIVS format
|
||||||
|
(r'NIVS\s*:?\s*(\d+)', 0.95),
|
||||||
|
# Standard NR BON formats
|
||||||
(r'NR\.?\s*BON\s*:?\s*(\d+)', 0.95),
|
(r'NR\.?\s*BON\s*:?\s*(\d+)', 0.95),
|
||||||
(r'BON\s+(?:FISCAL\s+)?NR\.?\s*:?\s*(\d+)', 0.95),
|
(r'BON\s+(?:FISCAL\s+)?NR\.?\s*:?\s*(\d+)', 0.95),
|
||||||
(r'CHITANTA\s+NR\.?\s*:?\s*(\d+)', 0.95),
|
(r'CHITANTA\s+NR\.?\s*:?\s*(\d+)', 0.95),
|
||||||
|
# Document number
|
||||||
(r'NR\.?\s+DOCUMENT\s*:?\s*(\d+)', 0.90),
|
(r'NR\.?\s+DOCUMENT\s*:?\s*(\d+)', 0.90),
|
||||||
(r'NR\.?\s*:?\s*(\d{4,})', 0.70),
|
# ID BF format
|
||||||
|
(r'ID\s*BF\s*:?\s*(\d+)', 0.90),
|
||||||
|
# TD format (transaction ID)
|
||||||
|
(r'TD\s*:?\s*(\d+)', 0.85),
|
||||||
|
# 6-8 digit number (typical receipt number length)
|
||||||
|
(r'\b(\d{6,8})\b', 0.70),
|
||||||
|
# Generic long number at end (fallback)
|
||||||
|
(r'NR\.?\s*:?\s*(\d{4,})', 0.65),
|
||||||
]
|
]
|
||||||
|
|
||||||
# CUI (fiscal code) patterns
|
# CUI (fiscal code) patterns - IMPORTANT: exclude CLIENT CUI
|
||||||
|
# CIF = Cod de Identificare Fiscală (vendor's tax ID)
|
||||||
|
# CLIENT C.U.I. = client's tax ID (should be ignored)
|
||||||
|
# OCR errors: R0 instead of RO, C1F instead of CIF
|
||||||
CUI_PATTERNS = [
|
CUI_PATTERNS = [
|
||||||
(r'C\.?U\.?I\.?\s*:?\s*(?:RO)?(\d{6,10})', 0.95),
|
# CIF at start of line (definitely vendor) - tolerant to OCR errors
|
||||||
(r'C\.?I\.?F\.?\s*:?\s*(?:RO)?(\d{6,10})', 0.95),
|
(r'^CIF\s*:?\s*(?:R[O0])?(\d{6,10})', 0.98),
|
||||||
(r'COD\s+FISCAL\s*:?\s*(?:RO)?(\d{6,10})', 0.90),
|
(r'^C[I1]F\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95), # C1F OCR error
|
||||||
(r'(?:RO)?(\d{6,10})\s*-?\s*(?:J|CUI)', 0.80),
|
# CIF not preceded by CLIENT (negative lookbehind)
|
||||||
|
(r'(?<!CLIENT\s)(?<!LIENT\s)CIF\s*:?\s*(?:R[O0])?(\d{6,10})', 0.95),
|
||||||
|
# Standalone CIF: format with OCR tolerance
|
||||||
|
(r'\bC[I1]F\s*:?\s*(?:R[O0])?(\d{6,10})\b', 0.90),
|
||||||
|
# COD FISCAL (vendor)
|
||||||
|
(r'COD\s+FISCAL\s*:?\s*(?:R[O0])?(\d{6,10})', 0.90),
|
||||||
|
# C.I.F. format (with dots)
|
||||||
|
(r'(?<!CLIENT\s)C\.[I1]\.F\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.88),
|
||||||
|
# CUI format (less specific, use with caution)
|
||||||
|
(r'(?<!CLIENT\s)C\.?U\.?[I1]\.?\s*:?\s*(?:R[O0])?(\d{6,10})', 0.85),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Series patterns
|
# Series patterns - be strict to avoid false matches
|
||||||
SERIES_PATTERNS = [
|
SERIES_PATTERNS = [
|
||||||
(r'SERIE\s*:?\s*([A-Z]{1,4})', 0.90),
|
(r'SERIE\s*:?\s*([A-Z]{1,4})', 0.90),
|
||||||
(r'([A-Z]{2,4})\s+NR\.?\s*\d+', 0.80),
|
# Z: format from Romanian fiscal receipts (must be at start of line or after space)
|
||||||
|
(r'(?:^|\s)Z\s*:\s*(\d{4})', 0.85),
|
||||||
|
# BF series with explicit marker
|
||||||
|
(r'(?:^|\s)BF\s*:\s*(\d{4})', 0.85),
|
||||||
|
]
|
||||||
|
|
||||||
|
# TVA (VAT) patterns - OCR may produce TUA, TVR, etc.
|
||||||
|
TVA_PATTERNS = [
|
||||||
|
# TOTAL TVA BON format (OCR tolerant: TUA, TVR)
|
||||||
|
(r'TOTAL\s+T[VU][AR]\s+BON\s*:?\s*([\d\s.,]+)', 0.98),
|
||||||
|
(r'T[O0]TAL\s+T[VU][AR]\s*:?\s*([\d\s.,]+)', 0.95),
|
||||||
|
# TVA with percentage (OCR tolerant)
|
||||||
|
(r'T[VU][AR]\s+(?:A\s*[-:]?\s*)?(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)', 0.95),
|
||||||
|
(r'T[VU][AR]\s+[A-Z]\s*[-:]\s*(\d{1,2})\s*%\s*([\d\s.,]+)', 0.93),
|
||||||
|
# Simple TVA pattern
|
||||||
|
(r'T[VU][AR]\s*:?\s*([\d\s.,]+)', 0.85),
|
||||||
|
# Standalone percentage line near TVA
|
||||||
|
(r'(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)', 0.75),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Items count patterns - OCR may produce OZ instead of POZ, etc.
|
||||||
|
# Number may be on separate line before or after the label
|
||||||
|
ITEMS_COUNT_PATTERNS = [
|
||||||
|
# NR. POZ. ART. IN BON: 17 (Romanian format with dots and spaces)
|
||||||
|
# OCR tolerant: OZ instead of POZ, ARI instead of ART
|
||||||
|
(r'NR\.?\s*P?[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*(\d+)', 0.98),
|
||||||
|
# Number on line BEFORE "OZ. ART. IN BON:" - OCR sometimes reorders
|
||||||
|
(r'(\d{1,2})\s*\n\s*[O0]Z\.?\s*ART', 0.95),
|
||||||
|
# Number may be on next line after label
|
||||||
|
(r'[O0]Z\.?\s*ART\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.93),
|
||||||
|
(r'NR\.?\s*(?:P?[O0]Z\.?)?\s*ART(?:ICOLE)?\.?\s*(?:IN\s+BON)?\s*:?\s*[\n\s]*(\d+)', 0.90),
|
||||||
|
# Simpler patterns
|
||||||
|
(r'ARTIC[O0]LE\s*:?\s*(\d+)', 0.88),
|
||||||
|
(r'P?[O0]Z\s*:?\s*(\d+)', 0.85),
|
||||||
|
# X articole/pozitii
|
||||||
|
(r'(\d+)\s*(?:ARTIC[O0]LE|P[O0]ZITII|BUC)', 0.80),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Address patterns (Romanian format)
|
||||||
|
ADDRESS_PATTERNS = [
|
||||||
|
# Street patterns
|
||||||
|
(r'(STR\.?\s+[A-Z0-9\s.,]+(?:NR\.?\s*\d+)?)', 0.90),
|
||||||
|
# Full address with JUD (county)
|
||||||
|
(r'(JUD\.?\s+[A-Z]+,?\s*(?:MUN\.?|OR\.?|COM\.?)?\s*[A-Z]+)', 0.85),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Vendor name indicators (lines containing these are likely vendor names)
|
||||||
|
VENDOR_INDICATORS = [
|
||||||
|
r'\bS\.?R\.?L\.?\b', # S.R.L.
|
||||||
|
r'\bS\.?A\.?\b', # S.A.
|
||||||
|
r'\bS\.?N\.?C\.?\b', # S.N.C.
|
||||||
|
r'\bS\.?C\.?S\.?\b', # S.C.S.
|
||||||
|
r'\bI\.?I\.?\b', # I.I. (Individual)
|
||||||
|
r'\bP\.?F\.?A\.?\b', # P.F.A.
|
||||||
|
r'\bS\.?C\.?\b', # S.C.
|
||||||
|
r'HOLDING',
|
||||||
|
r'COMPANY',
|
||||||
|
r'GROUP',
|
||||||
|
r'MAGAZIN',
|
||||||
|
r'MARKET',
|
||||||
|
r'SHOP',
|
||||||
]
|
]
|
||||||
|
|
||||||
def extract(self, text: str) -> ExtractionResult:
|
def extract(self, text: str) -> ExtractionResult:
|
||||||
@@ -86,13 +205,18 @@ class ReceiptExtractor:
|
|||||||
result.raw_text = text
|
result.raw_text = text
|
||||||
text_upper = text.upper()
|
text_upper = text.upper()
|
||||||
|
|
||||||
# Extract fields
|
# Extract core fields
|
||||||
result.amount, result.confidence_amount = self._extract_amount(text_upper)
|
result.amount, result.confidence_amount = self._extract_amount(text_upper)
|
||||||
result.receipt_date, result.confidence_date = self._extract_date(text_upper)
|
result.receipt_date, result.confidence_date = self._extract_date(text_upper)
|
||||||
result.receipt_number, _ = self._extract_number(text_upper)
|
result.receipt_number, _ = self._extract_number(text_upper)
|
||||||
result.receipt_series, _ = self._extract_series(text_upper)
|
result.receipt_series, _ = self._extract_series(text_upper)
|
||||||
result.partner_name, result.confidence_vendor = self._extract_vendor(text)
|
result.partner_name, result.confidence_vendor = self._extract_vendor(text)
|
||||||
result.cui, _ = self._extract_cui(text_upper)
|
result.cui, _ = self._extract_cui(text_upper, text)
|
||||||
|
|
||||||
|
# Extract additional fields - Multiple TVA entries
|
||||||
|
result.tva_entries, result.tva_total = self._extract_tva_entries(text_upper)
|
||||||
|
result.items_count = self._extract_items_count(text_upper)
|
||||||
|
result.address = self._extract_address(text_upper)
|
||||||
|
|
||||||
# Detect receipt type
|
# Detect receipt type
|
||||||
result.receipt_type = self._detect_receipt_type(text_upper)
|
result.receipt_type = self._detect_receipt_type(text_upper)
|
||||||
@@ -101,18 +225,85 @@ class ReceiptExtractor:
|
|||||||
|
|
||||||
def _extract_amount(self, text: str) -> Tuple[Optional[Decimal], float]:
|
def _extract_amount(self, text: str) -> Tuple[Optional[Decimal], float]:
|
||||||
"""Extract total amount from text."""
|
"""Extract total amount from text."""
|
||||||
|
# First try standard patterns (TOTAL, SUBTOTAL, etc.)
|
||||||
for pattern, confidence in self.TOTAL_PATTERNS:
|
for pattern, confidence in self.TOTAL_PATTERNS:
|
||||||
match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
|
match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
amount_str = re.sub(r'[^\d.,]', '', match.group(1))
|
amount_str = re.sub(r'[^\d.,]', '', match.group(1))
|
||||||
# Handle Romanian number format (1.234,56)
|
|
||||||
amount_str = self._normalize_number(amount_str)
|
amount_str = self._normalize_number(amount_str)
|
||||||
amount = Decimal(amount_str)
|
amount = Decimal(amount_str)
|
||||||
if amount > 0:
|
if amount > 0:
|
||||||
return amount, confidence
|
return amount, confidence
|
||||||
except (InvalidOperation, ValueError):
|
except (InvalidOperation, ValueError):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Strategy 2: Find amounts AFTER product lines end
|
||||||
|
# Products have pattern: "X BUC/ROLA X price = price"
|
||||||
|
# Total appears after all products
|
||||||
|
product_pattern = r'\d\s+(?:BUC|ROLA|ROLN|ROL)\s+X'
|
||||||
|
product_matches = list(re.finditer(product_pattern, text, re.IGNORECASE))
|
||||||
|
if product_matches:
|
||||||
|
# Get text after the last product line
|
||||||
|
last_product_pos = product_matches[-1].end()
|
||||||
|
after_products = text[last_product_pos:]
|
||||||
|
|
||||||
|
# Find standalone amounts on their own line after products
|
||||||
|
line_amount_pattern = r'^[\s]*(\d{2,4}[.,]\s*\d{2})[\s]*$'
|
||||||
|
standalone_amounts = []
|
||||||
|
for match in re.finditer(line_amount_pattern, after_products, re.MULTILINE):
|
||||||
|
try:
|
||||||
|
amount_str = match.group(1).replace(' ', '')
|
||||||
|
amount_str = self._normalize_number(amount_str)
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 10: # Filter out small values
|
||||||
|
standalone_amounts.append(amount)
|
||||||
|
except (InvalidOperation, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if standalone_amounts:
|
||||||
|
# The largest standalone amount after products is likely the total
|
||||||
|
max_amount = max(standalone_amounts)
|
||||||
|
# Higher confidence if it appears multiple times
|
||||||
|
count = standalone_amounts.count(max_amount)
|
||||||
|
confidence = 0.85 if count >= 2 else 0.75
|
||||||
|
return max_amount, confidence
|
||||||
|
|
||||||
|
# Strategy 3: Find the most repeated large amount
|
||||||
|
# Normalize spaces in numbers (OCR may produce "186. 16")
|
||||||
|
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
|
||||||
|
amount_pattern = r'(\d{2,4}[.,]\d{2})\b'
|
||||||
|
amounts = re.findall(amount_pattern, normalized_text)
|
||||||
|
if amounts:
|
||||||
|
from collections import Counter
|
||||||
|
amount_counts = Counter(amounts)
|
||||||
|
# Filter amounts that appear 2+ times and are > 20
|
||||||
|
candidates = []
|
||||||
|
for amt_str, count in amount_counts.items():
|
||||||
|
try:
|
||||||
|
amt = Decimal(self._normalize_number(amt_str))
|
||||||
|
if count >= 2 and amt > 20:
|
||||||
|
candidates.append((amt, count))
|
||||||
|
except (InvalidOperation, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if candidates:
|
||||||
|
# Return the LARGEST amount that appears multiple times
|
||||||
|
candidates.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
return candidates[0][0], 0.65
|
||||||
|
|
||||||
|
# Last resort: Find any standalone large amount
|
||||||
|
line_amount_pattern = r'^[\s]*(\d{2,4}[.,]\s*\d{2})[\s]*$'
|
||||||
|
for match in re.finditer(line_amount_pattern, text, re.MULTILINE):
|
||||||
|
try:
|
||||||
|
amount_str = match.group(1).replace(' ', '')
|
||||||
|
amount_str = self._normalize_number(amount_str)
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 50: # Higher threshold for fallback
|
||||||
|
return amount, 0.50
|
||||||
|
except (InvalidOperation, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
return None, 0.0
|
return None, 0.0
|
||||||
|
|
||||||
def _normalize_number(self, num_str: str) -> str:
|
def _normalize_number(self, num_str: str) -> str:
|
||||||
@@ -147,7 +338,8 @@ class ReceiptExtractor:
|
|||||||
match = re.search(pattern, text)
|
match = re.search(pattern, text)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
date_str = match.group(1).replace('/', '.')
|
# Normalize separators to dots
|
||||||
|
date_str = match.group(1).replace('/', '.').replace('-', '.')
|
||||||
|
|
||||||
# Try DD.MM.YYYY format first
|
# Try DD.MM.YYYY format first
|
||||||
try:
|
try:
|
||||||
@@ -181,23 +373,68 @@ class ReceiptExtractor:
|
|||||||
return None, 0.0
|
return None, 0.0
|
||||||
|
|
||||||
def _extract_vendor(self, text: str) -> Tuple[Optional[str], float]:
|
def _extract_vendor(self, text: str) -> Tuple[Optional[str], float]:
|
||||||
"""Extract vendor/partner name from text."""
|
"""
|
||||||
|
Extract vendor/partner name from text.
|
||||||
|
Uses multiple strategies:
|
||||||
|
1. Look for lines with company type indicators (S.R.L., S.A., etc.)
|
||||||
|
2. Look for lines near CIF
|
||||||
|
3. Use first valid line as fallback
|
||||||
|
"""
|
||||||
lines = text.split('\n')
|
lines = text.split('\n')
|
||||||
skip_keywords = [
|
skip_keywords = [
|
||||||
'BON', 'FISCAL', 'TOTAL', 'DATA', 'NR', 'ORA',
|
'BON', 'FISCAL', 'TOTAL', 'DATA', 'NR', 'ORA',
|
||||||
'SUBTOTAL', 'TVA', 'PLATA', 'CARD', 'NUMERAR',
|
'SUBTOTAL', 'TVA', 'PLATA', 'CARD', 'NUMERAR',
|
||||||
'RON', 'LEI', 'CHITANTA', 'REST'
|
'RON', 'LEI', 'CHITANTA', 'REST', 'CLIENT',
|
||||||
|
'OPERATOR', 'CASIER', 'POS', 'AMEF', 'BINE ATI VENIT',
|
||||||
|
'VA RUGAM', 'PASTRATI', 'VOCEA', 'TIPARIT',
|
||||||
|
'DETERGENT', 'PROSOP', 'HARTIE', 'SACI', 'SPRAY',
|
||||||
|
'BUC', 'ROLA', 'CUMPARATOR'
|
||||||
]
|
]
|
||||||
|
|
||||||
for i, line in enumerate(lines[:7]): # Check first 7 lines
|
# Strategy 1: Look for lines with vendor indicators (S.R.L., S.A., HOLDING, etc.)
|
||||||
|
for i, line in enumerate(lines[:15]): # Check first 15 lines
|
||||||
|
line = line.strip()
|
||||||
|
if not line or len(line) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
line_upper = line.upper()
|
||||||
|
|
||||||
|
# Check for vendor indicators
|
||||||
|
for indicator in self.VENDOR_INDICATORS:
|
||||||
|
if re.search(indicator, line_upper):
|
||||||
|
# Found a company name indicator
|
||||||
|
vendor = self._clean_vendor_name(line)
|
||||||
|
if vendor and len(vendor) >= 3:
|
||||||
|
# High confidence for lines with company indicators
|
||||||
|
return vendor, 0.95
|
||||||
|
|
||||||
|
# Strategy 2: Look for lines right before or after CIF
|
||||||
|
for i, line in enumerate(lines[:15]):
|
||||||
|
line_upper = line.upper()
|
||||||
|
if 'CIF' in line_upper and 'CLIENT' not in line_upper:
|
||||||
|
# Check line before
|
||||||
|
if i > 0:
|
||||||
|
prev_line = lines[i-1].strip()
|
||||||
|
if prev_line and len(prev_line) >= 3:
|
||||||
|
if not any(kw in prev_line.upper() for kw in skip_keywords):
|
||||||
|
vendor = self._clean_vendor_name(prev_line)
|
||||||
|
if vendor:
|
||||||
|
return vendor, 0.85
|
||||||
|
|
||||||
|
# Strategy 3: First valid line as fallback
|
||||||
|
for i, line in enumerate(lines[:10]):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
||||||
# Skip empty lines
|
# Skip empty lines
|
||||||
if not line:
|
if not line or len(line) < 3:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip lines that are just numbers
|
# Skip lines that are just numbers or codes
|
||||||
if re.match(r'^[\d.,\s]+$', line):
|
if re.match(r'^[\d.,\s:]+$', line):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip lines with barcodes/product codes
|
||||||
|
if re.match(r'^[A-Z]*\d{6,}', line):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip lines with keywords
|
# Skip lines with keywords
|
||||||
@@ -205,23 +442,68 @@ class ReceiptExtractor:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Clean the line
|
# Clean the line
|
||||||
vendor = re.sub(r'[^\w\s.,&-]', '', line).strip()
|
vendor = self._clean_vendor_name(line)
|
||||||
|
|
||||||
if len(vendor) >= 3:
|
if vendor and len(vendor) >= 3:
|
||||||
# Confidence decreases for lines further down
|
# Confidence decreases for lines further down
|
||||||
confidence = max(0.3, 0.8 - (i * 0.1))
|
confidence = max(0.3, 0.7 - (i * 0.05))
|
||||||
return vendor, confidence
|
return vendor, confidence
|
||||||
|
|
||||||
return None, 0.0
|
return None, 0.0
|
||||||
|
|
||||||
def _extract_cui(self, text: str) -> Tuple[Optional[str], float]:
|
def _clean_vendor_name(self, name: str) -> Optional[str]:
|
||||||
"""Extract CUI (fiscal identification code) from text."""
|
"""Clean and normalize vendor name."""
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Remove common OCR artifacts
|
||||||
|
name = re.sub(r'[^\w\s.,&\-()]', ' ', name)
|
||||||
|
# Normalize whitespace
|
||||||
|
name = re.sub(r'\s+', ' ', name).strip()
|
||||||
|
|
||||||
|
# Skip if it looks like an address line only
|
||||||
|
if re.match(r'^(STR|JUD|MUN|NR|BL|SC|ET|AP)\.?\s', name.upper()):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Skip if too short after cleaning
|
||||||
|
if len(name) < 3:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
def _extract_cui(self, text_upper: str, original_text: str) -> Tuple[Optional[str], float]:
|
||||||
|
"""
|
||||||
|
Extract vendor CUI (fiscal identification code) from text.
|
||||||
|
Excludes CLIENT CUI which appears as 'CLIENT C.U.I./C.I.F.:...'
|
||||||
|
"""
|
||||||
|
# First, try to find CIF on a line that doesn't contain CLIENT
|
||||||
|
lines = text_upper.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
# Skip lines that contain CLIENT (these are buyer's CUI, not vendor's)
|
||||||
|
if 'CLIENT' in line or 'CUMPARATOR' in line or 'LIENT' in line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Look for CIF in this line
|
||||||
for pattern, confidence in self.CUI_PATTERNS:
|
for pattern, confidence in self.CUI_PATTERNS:
|
||||||
match = re.search(pattern, text, re.IGNORECASE)
|
match = re.search(pattern, line, re.IGNORECASE | re.MULTILINE)
|
||||||
if match:
|
if match:
|
||||||
cui = match.group(1)
|
cui = match.group(1)
|
||||||
if 6 <= len(cui) <= 10:
|
if 6 <= len(cui) <= 10:
|
||||||
return cui, confidence
|
return cui, confidence
|
||||||
|
|
||||||
|
# Fallback: search entire text but exclude CLIENT patterns
|
||||||
|
for pattern, confidence in self.CUI_PATTERNS:
|
||||||
|
# Find all matches
|
||||||
|
for match in re.finditer(pattern, text_upper, re.IGNORECASE | re.MULTILINE):
|
||||||
|
cui = match.group(1)
|
||||||
|
if 6 <= len(cui) <= 10:
|
||||||
|
# Check if this match is preceded by CLIENT in the same line
|
||||||
|
start = match.start()
|
||||||
|
line_start = text_upper.rfind('\n', 0, start) + 1
|
||||||
|
line_text = text_upper[line_start:start]
|
||||||
|
if 'CLIENT' not in line_text and 'LIENT' not in line_text:
|
||||||
|
return cui, confidence
|
||||||
|
|
||||||
return None, 0.0
|
return None, 0.0
|
||||||
|
|
||||||
def _detect_receipt_type(self, text: str) -> str:
|
def _detect_receipt_type(self, text: str) -> str:
|
||||||
@@ -229,3 +511,223 @@ class ReceiptExtractor:
|
|||||||
if 'CHITANTA' in text or 'CHITANȚĂ' in text:
|
if 'CHITANTA' in text or 'CHITANȚĂ' in text:
|
||||||
return 'chitanta'
|
return 'chitanta'
|
||||||
return 'bon_fiscal'
|
return 'bon_fiscal'
|
||||||
|
|
||||||
|
def _extract_tva_entries(self, text: str) -> Tuple[List[dict], Optional[Decimal]]:
|
||||||
|
"""
|
||||||
|
Extract multiple TVA (VAT) entries from text.
|
||||||
|
Romanian receipts can have multiple TVA rates (A=19%, B=9%, C=5%, D=0%).
|
||||||
|
|
||||||
|
Returns (tva_entries, tva_total) where tva_entries is a list of:
|
||||||
|
{'code': 'A', 'percent': 19, 'amount': Decimal('15.20')}
|
||||||
|
"""
|
||||||
|
tva_entries = []
|
||||||
|
seen_entries = set() # To avoid duplicates
|
||||||
|
|
||||||
|
# Normalize spaces in numbers first (OCR may produce "32. 31")
|
||||||
|
normalized_text = re.sub(r'(\d+)[.,]\s+(\d{2})', r'\1.\2', text)
|
||||||
|
|
||||||
|
# Pattern 1: "TVA A - 19%: 15.20" or "TVAA - 21% 32.31" (with code)
|
||||||
|
# OCR tolerant: TUA, TVR, etc.
|
||||||
|
pattern_with_code = r'T[VU][AR]\s*([A-D])\s*[-:]\s*(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)'
|
||||||
|
for match in re.finditer(pattern_with_code, normalized_text, re.IGNORECASE):
|
||||||
|
try:
|
||||||
|
code = match.group(1).upper()
|
||||||
|
percent = int(match.group(2))
|
||||||
|
amount_str = match.group(3).replace(' ', '')
|
||||||
|
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 0:
|
||||||
|
entry_key = (code, percent)
|
||||||
|
if entry_key not in seen_entries:
|
||||||
|
tva_entries.append({
|
||||||
|
'code': code,
|
||||||
|
'percent': percent,
|
||||||
|
'amount': amount
|
||||||
|
})
|
||||||
|
seen_entries.add(entry_key)
|
||||||
|
except (ValueError, InvalidOperation):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Pattern 2: "TVA - 21%: 32.31" (without explicit code, assume 'A')
|
||||||
|
if not tva_entries:
|
||||||
|
pattern_no_code = r'T[VU][AR]\s*[-:]\s*(\d{1,2})\s*%\s*:?\s*([\d\s.,]+)'
|
||||||
|
for match in re.finditer(pattern_no_code, normalized_text, re.IGNORECASE):
|
||||||
|
try:
|
||||||
|
percent = int(match.group(1))
|
||||||
|
amount_str = match.group(2).replace(' ', '')
|
||||||
|
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 0:
|
||||||
|
# Determine code based on percent
|
||||||
|
code = self._get_tva_code_from_percent(percent)
|
||||||
|
entry_key = (code, percent)
|
||||||
|
if entry_key not in seen_entries:
|
||||||
|
tva_entries.append({
|
||||||
|
'code': code,
|
||||||
|
'percent': percent,
|
||||||
|
'amount': amount
|
||||||
|
})
|
||||||
|
seen_entries.add(entry_key)
|
||||||
|
except (ValueError, InvalidOperation):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Pattern 3: "TVAA - 21%" on one line, amount on next line
|
||||||
|
if not tva_entries:
|
||||||
|
tva_line_pattern = r'T[VU][AR]\s*([A-D])?\s*[-:]\s*(\d{1,2})\s*%'
|
||||||
|
for match in re.finditer(tva_line_pattern, normalized_text, re.IGNORECASE):
|
||||||
|
try:
|
||||||
|
code = (match.group(1) or 'A').upper()
|
||||||
|
percent = int(match.group(2))
|
||||||
|
|
||||||
|
# Look for amount on the next line or immediately after
|
||||||
|
after_tva = normalized_text[match.end():]
|
||||||
|
amount_match = re.search(r'^[\s\n]*([\d.,]+)', after_tva)
|
||||||
|
if amount_match:
|
||||||
|
amount_str = self._normalize_number(amount_match.group(1))
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 0:
|
||||||
|
entry_key = (code, percent)
|
||||||
|
if entry_key not in seen_entries:
|
||||||
|
tva_entries.append({
|
||||||
|
'code': code,
|
||||||
|
'percent': percent,
|
||||||
|
'amount': amount
|
||||||
|
})
|
||||||
|
seen_entries.add(entry_key)
|
||||||
|
except (ValueError, InvalidOperation):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Pattern 4: Use TVA_PATTERNS for fallback
|
||||||
|
if not tva_entries:
|
||||||
|
for pattern, _ in self.TVA_PATTERNS:
|
||||||
|
match = re.search(pattern, normalized_text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
# Some patterns have 2 groups (percent, amount), others just amount
|
||||||
|
if match.lastindex >= 2:
|
||||||
|
percent = int(match.group(1))
|
||||||
|
amount_str = match.group(2)
|
||||||
|
else:
|
||||||
|
amount_str = match.group(1)
|
||||||
|
# Try to detect percent from text
|
||||||
|
percent = self._detect_tva_percent(text)
|
||||||
|
|
||||||
|
amount_str = amount_str.replace(' ', '')
|
||||||
|
amount_str = self._normalize_number(re.sub(r'[^\d.,]', '', amount_str))
|
||||||
|
amount = Decimal(amount_str)
|
||||||
|
if amount > 0 and percent:
|
||||||
|
code = self._get_tva_code_from_percent(percent)
|
||||||
|
entry_key = (code, percent)
|
||||||
|
if entry_key not in seen_entries:
|
||||||
|
tva_entries.append({
|
||||||
|
'code': code,
|
||||||
|
'percent': percent,
|
||||||
|
'amount': amount
|
||||||
|
})
|
||||||
|
seen_entries.add(entry_key)
|
||||||
|
break # Only use first match from fallback
|
||||||
|
except (ValueError, InvalidOperation):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Calculate total
|
||||||
|
tva_total = None
|
||||||
|
if tva_entries:
|
||||||
|
tva_total = sum(entry['amount'] for entry in tva_entries)
|
||||||
|
|
||||||
|
# Sort by code (A, B, C, D)
|
||||||
|
tva_entries.sort(key=lambda x: x.get('code', 'Z'))
|
||||||
|
|
||||||
|
return tva_entries, tva_total
|
||||||
|
|
||||||
|
def _get_tva_code_from_percent(self, percent: int) -> str:
|
||||||
|
"""Map TVA percentage to standard Romanian code.
|
||||||
|
|
||||||
|
Romanian TVA rates changed in August 2025:
|
||||||
|
- Standard rate: 19% → 21%
|
||||||
|
- Reduced rate: 9% → 11%
|
||||||
|
- Other rates (5%, 0%) remain unchanged
|
||||||
|
|
||||||
|
Old rates (before Aug 2025): New rates (from Aug 2025):
|
||||||
|
- A = 19% (standard) - A = 21% (standard)
|
||||||
|
- B = 9% (reduced) - B = 11% (reduced)
|
||||||
|
- C = 5% (reduced) - C = 5% (reduced)
|
||||||
|
- D = 0% (exempt) - D = 0% (exempt)
|
||||||
|
|
||||||
|
Both old and new rates are supported for historical receipts.
|
||||||
|
"""
|
||||||
|
if percent in (19, 21):
|
||||||
|
return 'A' # Standard rate (19% old, 21% new from Aug 2025)
|
||||||
|
elif percent in (9, 11):
|
||||||
|
return 'B' # Reduced rate (9% old, 11% new from Aug 2025)
|
||||||
|
elif percent == 5:
|
||||||
|
return 'C' # Reduced rate (unchanged)
|
||||||
|
elif percent == 0:
|
||||||
|
return 'D' # Exempt (unchanged)
|
||||||
|
else:
|
||||||
|
return 'A' # Default to standard rate
|
||||||
|
|
||||||
|
def _detect_tva_percent(self, text: str) -> Optional[int]:
|
||||||
|
"""Detect TVA percentage from text content."""
|
||||||
|
# Look for common Romanian TVA percentages
|
||||||
|
if '19%' in text or '19 %' in text:
|
||||||
|
return 19
|
||||||
|
elif '21%' in text or '21 %' in text:
|
||||||
|
return 21
|
||||||
|
elif '11%' in text or '11 %' in text:
|
||||||
|
return 11
|
||||||
|
elif '9%' in text or '9 %' in text:
|
||||||
|
return 9
|
||||||
|
elif '5%' in text or '5 %' in text:
|
||||||
|
return 5
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_items_count(self, text: str) -> Optional[int]:
|
||||||
|
"""Extract number of items/articles from receipt."""
|
||||||
|
for pattern, _ in self.ITEMS_COUNT_PATTERNS:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
count = int(match.group(1))
|
||||||
|
if 0 < count < 1000: # Reasonable range
|
||||||
|
return count
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_address(self, text: str) -> Optional[str]:
|
||||||
|
"""Extract vendor address from text."""
|
||||||
|
lines = text.split('\n')
|
||||||
|
address_parts = []
|
||||||
|
|
||||||
|
for line in lines[:15]: # Check first 15 lines
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for address patterns
|
||||||
|
line_upper = line.upper()
|
||||||
|
|
||||||
|
# JUD. (county) pattern
|
||||||
|
if re.search(r'\bJUD\.?\s+', line_upper):
|
||||||
|
address_parts.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# STR. (street) pattern
|
||||||
|
if re.search(r'\bSTR\.?\s+', line_upper):
|
||||||
|
address_parts.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# MUN./OR./COM. (city/town) pattern
|
||||||
|
if re.search(r'\b(MUN|OR|COM)\.?\s+', line_upper):
|
||||||
|
address_parts.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if address_parts:
|
||||||
|
# Join and clean address parts
|
||||||
|
address = ', '.join(address_parts)
|
||||||
|
# Clean up
|
||||||
|
address = re.sub(r'\s+', ' ', address).strip()
|
||||||
|
address = re.sub(r',\s*,', ',', address)
|
||||||
|
return address if len(address) >= 5 else None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
"""add_tva_breakdown_to_receipt
|
||||||
|
|
||||||
|
Revision ID: 1cfb423c6953
|
||||||
|
Revises: 001_initial
|
||||||
|
Create Date: 2025-12-12 14:04:22.464289+00:00
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import sqlmodel
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = '1cfb423c6953'
|
||||||
|
down_revision: Union[str, None] = '001_initial'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Add TVA-related columns to receipts table
|
||||||
|
with op.batch_alter_table('receipts', schema=None) as batch_op:
|
||||||
|
batch_op.add_column(sa.Column('tva_breakdown', sqlmodel.sql.sqltypes.AutoString(length=1000), nullable=True))
|
||||||
|
batch_op.add_column(sa.Column('tva_total', sa.Numeric(precision=15, scale=2), nullable=True))
|
||||||
|
batch_op.add_column(sa.Column('items_count', sa.Integer(), nullable=True))
|
||||||
|
batch_op.add_column(sa.Column('vendor_address', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# Remove TVA-related columns from receipts table
|
||||||
|
with op.batch_alter_table('receipts', schema=None) as batch_op:
|
||||||
|
batch_op.drop_column('vendor_address')
|
||||||
|
batch_op.drop_column('items_count')
|
||||||
|
batch_op.drop_column('tva_total')
|
||||||
|
batch_op.drop_column('tva_breakdown')
|
||||||
@@ -71,6 +71,37 @@
|
|||||||
<span v-if="data.cui" class="cui-badge">CUI: {{ data.cui }}</span>
|
<span v-if="data.cui" class="cui-badge">CUI: {{ data.cui }}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- TVA Entries (multiple rates) -->
|
||||||
|
<div class="preview-field full-width" v-if="data.tva_entries?.length > 0 || data.tva_total">
|
||||||
|
<label>TVA</label>
|
||||||
|
<div class="tva-breakdown">
|
||||||
|
<div v-for="(entry, idx) in data.tva_entries" :key="idx" class="tva-entry">
|
||||||
|
<span class="tva-code" v-if="entry.code">{{ entry.code }}</span>
|
||||||
|
<span class="tva-percent-badge">{{ entry.percent }}%</span>
|
||||||
|
<span class="tva-amount">{{ formatAmount(entry.amount) }} RON</span>
|
||||||
|
</div>
|
||||||
|
<div v-if="data.tva_total && data.tva_entries?.length > 1" class="tva-total">
|
||||||
|
<strong>Total TVA:</strong> {{ formatAmount(data.tva_total) }} RON
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Items Count -->
|
||||||
|
<div class="preview-field" v-if="data.items_count">
|
||||||
|
<label>Nr. Articole</label>
|
||||||
|
<div class="field-value">
|
||||||
|
{{ data.items_count }} articole
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Address -->
|
||||||
|
<div class="preview-field full-width" v-if="data.address">
|
||||||
|
<label>Adresa</label>
|
||||||
|
<div class="field-value address-text">
|
||||||
|
{{ data.address }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Raw Text Toggle -->
|
<!-- Raw Text Toggle -->
|
||||||
@@ -224,6 +255,50 @@ const formatDate = (dateStr) => {
|
|||||||
color: #475569;
|
color: #475569;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.tva-breakdown {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-entry {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-code {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #475569;
|
||||||
|
min-width: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-percent-badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.15rem 0.5rem;
|
||||||
|
background: #dbeafe;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #1e40af;
|
||||||
|
min-width: 2.5rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-amount {
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-total {
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
padding-top: 0.25rem;
|
||||||
|
border-top: 1px dashed #cbd5e1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.address-text {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: #475569;
|
||||||
|
}
|
||||||
|
|
||||||
.raw-text-section {
|
.raw-text-section {
|
||||||
margin-top: 1rem;
|
margin-top: 1rem;
|
||||||
padding-top: 1rem;
|
padding-top: 1rem;
|
||||||
|
|||||||
@@ -246,11 +246,59 @@
|
|||||||
<Textarea
|
<Textarea
|
||||||
v-model="form.description"
|
v-model="form.description"
|
||||||
rows="3"
|
rows="3"
|
||||||
placeholder="Detalii suplimentare..."
|
placeholder="Descriere optionala..."
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Detalii Suplimentare (populated from OCR) -->
|
||||||
|
<div v-if="form.tva_breakdown?.length > 0 || form.items_count || form.vendor_address" class="extra-details-section">
|
||||||
|
<h3>
|
||||||
|
<i class="pi pi-list"></i>
|
||||||
|
Detalii Suplimentare (din OCR)
|
||||||
|
</h3>
|
||||||
|
|
||||||
|
<!-- TVA Breakdown -->
|
||||||
|
<div class="form-field form-field-full" v-if="form.tva_breakdown?.length > 0">
|
||||||
|
<label>Defalcare TVA</label>
|
||||||
|
<div class="tva-table">
|
||||||
|
<div v-for="(entry, idx) in form.tva_breakdown" :key="idx" class="tva-row">
|
||||||
|
<span class="tva-label">TVA {{ entry.code }} ({{ entry.percent }}%):</span>
|
||||||
|
<InputNumber
|
||||||
|
v-model="form.tva_breakdown[idx].amount"
|
||||||
|
mode="currency"
|
||||||
|
currency="RON"
|
||||||
|
locale="ro-RO"
|
||||||
|
:minFractionDigits="2"
|
||||||
|
class="tva-input"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="tva-row total" v-if="form.tva_breakdown.length > 0">
|
||||||
|
<span class="tva-label"><strong>Total TVA:</strong></span>
|
||||||
|
<span class="tva-value">{{ formatTvaTotal() }} RON</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-field" v-if="form.items_count">
|
||||||
|
<label>Nr. Articole</label>
|
||||||
|
<InputNumber
|
||||||
|
v-model="form.items_count"
|
||||||
|
:min="1"
|
||||||
|
placeholder="Ex: 17"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="form-field" v-if="form.vendor_address">
|
||||||
|
<label>Adresa Furnizor</label>
|
||||||
|
<InputText
|
||||||
|
v-model="form.vendor_address"
|
||||||
|
placeholder="Adresa din bon"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<Divider />
|
<Divider />
|
||||||
|
|
||||||
<!-- Actions -->
|
<!-- Actions -->
|
||||||
@@ -314,6 +362,11 @@ const form = ref({
|
|||||||
receipt_number: '',
|
receipt_number: '',
|
||||||
description: '',
|
description: '',
|
||||||
company_id: 1, // Default company for Phase 1
|
company_id: 1, // Default company for Phase 1
|
||||||
|
// TVA info (multiple entries support)
|
||||||
|
tva_breakdown: [], // Array of {code, percent, amount}
|
||||||
|
tva_total: null,
|
||||||
|
items_count: null,
|
||||||
|
vendor_address: '',
|
||||||
})
|
})
|
||||||
|
|
||||||
const selectedFiles = ref([])
|
const selectedFiles = ref([])
|
||||||
@@ -435,6 +488,24 @@ const applyOCRData = (data) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply TVA entries
|
||||||
|
if (data.tva_entries?.length > 0) {
|
||||||
|
form.value.tva_breakdown = data.tva_entries.map(e => ({
|
||||||
|
code: e.code,
|
||||||
|
percent: e.percent,
|
||||||
|
amount: parseFloat(e.amount)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
if (data.tva_total) {
|
||||||
|
form.value.tva_total = parseFloat(data.tva_total)
|
||||||
|
}
|
||||||
|
if (data.items_count) {
|
||||||
|
form.value.items_count = data.items_count
|
||||||
|
}
|
||||||
|
if (data.address) {
|
||||||
|
form.value.vendor_address = data.address
|
||||||
|
}
|
||||||
|
|
||||||
// Clear OCR preview
|
// Clear OCR preview
|
||||||
ocrData.value = null
|
ocrData.value = null
|
||||||
|
|
||||||
@@ -499,6 +570,12 @@ const formatFileSize = (bytes) => {
|
|||||||
return (bytes / (1024 * 1024)).toFixed(1) + ' MB'
|
return (bytes / (1024 * 1024)).toFixed(1) + ' MB'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const formatTvaTotal = () => {
|
||||||
|
if (!form.value.tva_breakdown?.length) return '0.00'
|
||||||
|
const total = form.value.tva_breakdown.reduce((sum, e) => sum + (e.amount || 0), 0)
|
||||||
|
return total.toLocaleString('ro-RO', { minimumFractionDigits: 2, maximumFractionDigits: 2 })
|
||||||
|
}
|
||||||
|
|
||||||
const validateForm = () => {
|
const validateForm = () => {
|
||||||
// Check if we have at least one file (for new receipts)
|
// Check if we have at least one file (for new receipts)
|
||||||
if (!isEditMode.value && selectedFiles.value.length === 0) {
|
if (!isEditMode.value && selectedFiles.value.length === 0) {
|
||||||
@@ -725,4 +802,55 @@ const submitForReview = async () => {
|
|||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
color: #64748b;
|
color: #64748b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Extra details section (TVA, items, address) */
|
||||||
|
.extra-details-section {
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
padding: 1rem;
|
||||||
|
background: #f0f9ff;
|
||||||
|
border: 1px solid #bae6fd;
|
||||||
|
border-radius: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.extra-details-section h3 {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
color: #0284c7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-table {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-row.total {
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
padding-top: 0.5rem;
|
||||||
|
border-top: 1px dashed #0284c7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-label {
|
||||||
|
min-width: 150px;
|
||||||
|
font-weight: 500;
|
||||||
|
color: #334155;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-input {
|
||||||
|
max-width: 150px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-value {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #0284c7;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -112,6 +112,43 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Detalii Suplimentare (TVA, items, address from OCR) -->
|
||||||
|
<template v-if="hasTvaData || receipt.items_count || receipt.vendor_address">
|
||||||
|
<Divider />
|
||||||
|
|
||||||
|
<h4 style="margin-bottom: 0.75rem; color: #0284c7;">
|
||||||
|
<i class="pi pi-list"></i>
|
||||||
|
Detalii Suplimentare
|
||||||
|
</h4>
|
||||||
|
|
||||||
|
<div class="detail-list">
|
||||||
|
<!-- TVA Breakdown -->
|
||||||
|
<div v-if="parsedTvaBreakdown?.length > 0" class="detail-item tva-detail">
|
||||||
|
<span class="label">TVA</span>
|
||||||
|
<div class="tva-breakdown-display">
|
||||||
|
<div v-for="(entry, idx) in parsedTvaBreakdown" :key="idx" class="tva-line">
|
||||||
|
<span class="tva-code" v-if="entry.code">{{ entry.code }}:</span>
|
||||||
|
<span class="tva-percent">{{ entry.percent }}%</span>
|
||||||
|
<span class="tva-amount">= {{ formatAmount(entry.amount) }}</span>
|
||||||
|
</div>
|
||||||
|
<div v-if="receipt.tva_total && parsedTvaBreakdown.length > 1" class="tva-total-line">
|
||||||
|
<strong>Total TVA: {{ formatAmount(receipt.tva_total) }}</strong>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="detail-item" v-if="receipt.items_count">
|
||||||
|
<span class="label">Nr. Articole</span>
|
||||||
|
<span class="value">{{ receipt.items_count }} articole</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="detail-item" v-if="receipt.vendor_address">
|
||||||
|
<span class="label">Adresa Furnizor</span>
|
||||||
|
<span class="value">{{ receipt.vendor_address }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
<Divider />
|
<Divider />
|
||||||
|
|
||||||
<div class="detail-list">
|
<div class="detail-list">
|
||||||
@@ -283,6 +320,22 @@ const isBalanced = computed(() => {
|
|||||||
return Math.abs(totalDebit.value - totalCredit.value) < 0.01
|
return Math.abs(totalDebit.value - totalCredit.value) < 0.01
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const parsedTvaBreakdown = computed(() => {
|
||||||
|
if (!receipt.value?.tva_breakdown) return []
|
||||||
|
try {
|
||||||
|
// Handle both string (JSON) and array formats
|
||||||
|
return typeof receipt.value.tva_breakdown === 'string'
|
||||||
|
? JSON.parse(receipt.value.tva_breakdown)
|
||||||
|
: receipt.value.tva_breakdown
|
||||||
|
} catch {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const hasTvaData = computed(() => {
|
||||||
|
return parsedTvaBreakdown.value?.length > 0 || receipt.value?.tva_total
|
||||||
|
})
|
||||||
|
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
await loadReceipt()
|
await loadReceipt()
|
||||||
})
|
})
|
||||||
@@ -521,4 +574,56 @@ const resubmitReceipt = async () => {
|
|||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
color: #f57c00;
|
color: #f57c00;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TVA Breakdown Display */
|
||||||
|
.detail-item.tva-detail {
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: flex-start;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-breakdown-display {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.25rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: #f0f9ff;
|
||||||
|
border-radius: 6px;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-line {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-code {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #475569;
|
||||||
|
min-width: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-percent {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.1rem 0.4rem;
|
||||||
|
background: #dbeafe;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: #1e40af;
|
||||||
|
min-width: 2.5rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-amount {
|
||||||
|
font-weight: 500;
|
||||||
|
color: #334155;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tva-total-line {
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
padding-top: 0.25rem;
|
||||||
|
border-top: 1px dashed #0284c7;
|
||||||
|
color: #0284c7;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
Reference in New Issue
Block a user