386 lines
13 KiB
Python
386 lines
13 KiB
Python
"""
|
|
Auto-create Receipt from OCR results for bulk upload flow.
|
|
|
|
This service handles automatic creation of Receipt records from OCR extraction
|
|
results, enabling end-to-end processing without manual UI intervention.
|
|
|
|
The service:
|
|
1. Maps OCR ExtractionData fields to Receipt fields
|
|
2. Creates attachment from the original uploaded file
|
|
3. Generates accounting entries
|
|
4. Links the receipt back to the batch job for tracking
|
|
"""
|
|
|
|
import logging
|
|
import shutil
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from datetime import date, datetime
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
|
|
from sqlalchemy import select, update
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from backend.modules.data_entry.db.models.receipt import (
|
|
Receipt,
|
|
ReceiptAttachment,
|
|
ReceiptStatus,
|
|
ReceiptType,
|
|
ReceiptDirection,
|
|
)
|
|
from backend.modules.data_entry.db.models.batch import BatchJob
|
|
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
|
|
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
|
|
from backend.modules.data_entry.schemas.receipt import ReceiptCreate, TvaEntrySchema, PaymentMethodSchema
|
|
from backend.modules.data_entry.schemas.ocr import ExtractionData
|
|
from backend.modules.data_entry.services.receipt_service import ReceiptService
|
|
from backend.modules.data_entry.services import sse_service
|
|
from backend.config import settings
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ReceiptCreateResult:
|
|
"""Result of auto-create operation."""
|
|
success: bool
|
|
receipt_id: Optional[int] = None
|
|
error_message: Optional[str] = None
|
|
|
|
|
|
class ReceiptAutoCreateService:
|
|
"""
|
|
Service for automatically creating receipts from OCR results.
|
|
|
|
Used by the bulk upload flow to create receipts without user intervention.
|
|
Created receipts are in DRAFT status and require review before approval.
|
|
"""
|
|
|
|
@staticmethod
|
|
def _validate_ocr_result(ocr_result: ExtractionData) -> tuple[bool, str]:
|
|
"""
|
|
Perform minimal validation on OCR result.
|
|
|
|
Validates:
|
|
- amount > 0 (required for receipt)
|
|
- date is valid and not in future
|
|
|
|
Args:
|
|
ocr_result: Extracted data from OCR
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message)
|
|
"""
|
|
# Validate amount exists and is positive
|
|
if ocr_result.amount is None:
|
|
return False, "Amount not extracted from receipt"
|
|
|
|
if ocr_result.amount <= 0:
|
|
return False, f"Invalid amount: {ocr_result.amount} (must be > 0)"
|
|
|
|
# Validate date exists and is not in the future
|
|
if ocr_result.receipt_date is None:
|
|
return False, "Receipt date not extracted"
|
|
|
|
today = date.today()
|
|
if ocr_result.receipt_date > today:
|
|
return False, f"Receipt date {ocr_result.receipt_date} is in the future"
|
|
|
|
return True, ""
|
|
|
|
@staticmethod
|
|
def _map_ocr_to_receipt(
|
|
ocr_result: ExtractionData,
|
|
company_id: int,
|
|
) -> ReceiptCreate:
|
|
"""
|
|
Map OCR ExtractionData fields to ReceiptCreate schema.
|
|
|
|
Args:
|
|
ocr_result: Extracted data from OCR
|
|
company_id: Company ID for the receipt
|
|
|
|
Returns:
|
|
ReceiptCreate schema ready for database insertion
|
|
"""
|
|
# Map receipt type
|
|
receipt_type = ReceiptType.BON_FISCAL
|
|
if ocr_result.receipt_type == "chitanta":
|
|
receipt_type = ReceiptType.CHITANTA
|
|
|
|
# Map TVA breakdown from OCR TvaEntry to schema TvaEntrySchema
|
|
tva_breakdown: Optional[List[TvaEntrySchema]] = None
|
|
if ocr_result.tva_entries:
|
|
tva_breakdown = [
|
|
TvaEntrySchema(
|
|
code=entry.code,
|
|
percent=entry.percent,
|
|
amount=entry.amount
|
|
)
|
|
for entry in ocr_result.tva_entries
|
|
]
|
|
|
|
# Map payment methods
|
|
payment_methods: Optional[List[PaymentMethodSchema]] = None
|
|
if ocr_result.payment_methods:
|
|
payment_methods = [
|
|
PaymentMethodSchema(
|
|
method=pm.method,
|
|
amount=pm.amount
|
|
)
|
|
for pm in ocr_result.payment_methods
|
|
]
|
|
|
|
# Create receipt data
|
|
return ReceiptCreate(
|
|
receipt_type=receipt_type,
|
|
direction=ReceiptDirection.CHELTUIALA, # Default to expense
|
|
receipt_number=ocr_result.receipt_number,
|
|
receipt_series=ocr_result.receipt_series,
|
|
receipt_date=ocr_result.receipt_date,
|
|
amount=ocr_result.amount,
|
|
description=ocr_result.description,
|
|
tva_breakdown=tva_breakdown,
|
|
tva_total=ocr_result.tva_total,
|
|
items_count=ocr_result.items_count,
|
|
vendor_address=ocr_result.address,
|
|
company_id=company_id,
|
|
partner_name=ocr_result.partner_name,
|
|
cui=ocr_result.cui,
|
|
ocr_raw_text=ocr_result.raw_text[:5000] if ocr_result.raw_text else None, # Limit size
|
|
payment_methods=payment_methods,
|
|
payment_mode=ocr_result.suggested_payment_mode,
|
|
)
|
|
|
|
@staticmethod
|
|
async def _create_attachment_from_file(
|
|
session: AsyncSession,
|
|
receipt_id: int,
|
|
source_file_path: str,
|
|
original_filename: Optional[str] = None,
|
|
) -> Optional[ReceiptAttachment]:
|
|
"""
|
|
Create attachment by copying file from OCR job location.
|
|
|
|
Args:
|
|
session: Database session
|
|
receipt_id: Receipt ID to attach to
|
|
source_file_path: Path to the original file from OCR job
|
|
original_filename: Original filename from upload (optional)
|
|
|
|
Returns:
|
|
Created ReceiptAttachment or None if failed
|
|
"""
|
|
source_path = Path(source_file_path)
|
|
|
|
if not source_path.exists():
|
|
logger.warning(f"[ReceiptAutoCreate] Source file not found: {source_path}")
|
|
return None
|
|
|
|
# Generate stored filename
|
|
ext = source_path.suffix.lower()
|
|
stored_filename = f"{uuid.uuid4()}{ext}"
|
|
|
|
# Determine relative path (organized by year/month)
|
|
now = datetime.utcnow()
|
|
relative_path = Path(str(now.year)) / f"{now.month:02d}"
|
|
|
|
# Full destination path
|
|
dest_dir = settings.data_entry_upload_path_resolved / relative_path
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
dest_path = dest_dir / stored_filename
|
|
|
|
# Copy file to attachments directory
|
|
try:
|
|
shutil.copy2(source_path, dest_path)
|
|
except Exception as e:
|
|
logger.error(f"[ReceiptAutoCreate] Failed to copy file: {e}")
|
|
return None
|
|
|
|
# Get file size
|
|
file_size = dest_path.stat().st_size
|
|
|
|
# Determine MIME type
|
|
mime_map = {
|
|
'.jpg': 'image/jpeg',
|
|
'.jpeg': 'image/jpeg',
|
|
'.png': 'image/png',
|
|
'.pdf': 'application/pdf',
|
|
}
|
|
mime_type = mime_map.get(ext, 'application/octet-stream')
|
|
|
|
# Use original filename if provided, otherwise use source filename
|
|
display_filename = original_filename or source_path.name
|
|
|
|
# Create attachment record
|
|
attachment = ReceiptAttachment(
|
|
receipt_id=receipt_id,
|
|
filename=display_filename,
|
|
stored_filename=stored_filename,
|
|
file_path=str(relative_path / stored_filename),
|
|
file_size=file_size,
|
|
mime_type=mime_type,
|
|
)
|
|
|
|
session.add(attachment)
|
|
await session.flush()
|
|
|
|
return attachment
|
|
|
|
@staticmethod
|
|
async def _update_batch_job_receipt_id(
|
|
session: AsyncSession,
|
|
job_id: str,
|
|
receipt_id: int,
|
|
) -> None:
|
|
"""
|
|
Update batch_jobs table with the created receipt_id.
|
|
|
|
Args:
|
|
session: Database session
|
|
job_id: OCR job UUID
|
|
receipt_id: Created receipt ID
|
|
"""
|
|
await session.execute(
|
|
update(BatchJob)
|
|
.where(BatchJob.job_id == job_id)
|
|
.values(receipt_id=receipt_id)
|
|
)
|
|
|
|
@staticmethod
|
|
async def create_from_ocr_result(
|
|
session: AsyncSession,
|
|
job_id: str,
|
|
ocr_result: ExtractionData,
|
|
username: str,
|
|
batch_id: int,
|
|
company_id: int,
|
|
file_path: Optional[str] = None,
|
|
original_filename: Optional[str] = None,
|
|
file_hash: Optional[str] = None,
|
|
) -> ReceiptCreateResult:
|
|
"""
|
|
Create a receipt from OCR extraction result.
|
|
|
|
This method:
|
|
1. Validates the OCR result (amount > 0, date valid)
|
|
2. Maps OCR fields to Receipt fields
|
|
3. Creates the Receipt in DRAFT status
|
|
4. Creates attachment from original file
|
|
5. Generates accounting entries
|
|
6. Updates batch_jobs with receipt_id
|
|
|
|
Args:
|
|
session: Database session
|
|
job_id: OCR job UUID for tracking
|
|
ocr_result: Extracted data from OCR processing
|
|
username: User who initiated the upload
|
|
batch_id: Batch ID for grouping
|
|
company_id: Company ID for the receipt
|
|
file_path: Path to the original uploaded file
|
|
original_filename: Original filename from upload
|
|
file_hash: SHA-256 hash of the file for duplicate detection (US-007)
|
|
|
|
Returns:
|
|
ReceiptCreateResult with success status and receipt_id or error
|
|
"""
|
|
try:
|
|
# Step 1: Validate OCR result
|
|
is_valid, error_msg = ReceiptAutoCreateService._validate_ocr_result(ocr_result)
|
|
if not is_valid:
|
|
logger.warning(f"[ReceiptAutoCreate] Validation failed for job {job_id}: {error_msg}")
|
|
return ReceiptCreateResult(
|
|
success=False,
|
|
error_message=error_msg
|
|
)
|
|
|
|
# Step 2: Map OCR to Receipt schema
|
|
receipt_data = ReceiptAutoCreateService._map_ocr_to_receipt(
|
|
ocr_result=ocr_result,
|
|
company_id=company_id,
|
|
)
|
|
|
|
# Step 3: Create receipt in DRAFT status
|
|
receipt = await ReceiptCRUD.create(session, receipt_data, created_by=username)
|
|
|
|
# Set batch tracking fields (US-007, US-011)
|
|
receipt.batch_id = str(batch_id)
|
|
receipt.file_hash = file_hash
|
|
receipt.processing_status = "completed"
|
|
session.add(receipt)
|
|
await session.flush()
|
|
|
|
logger.info(
|
|
f"[ReceiptAutoCreate] Created receipt {receipt.id} for job {job_id}: "
|
|
f"amount={receipt.amount}, vendor={receipt.partner_name}, file_hash={file_hash[:16] if file_hash else None}..."
|
|
)
|
|
|
|
# Step 4: Create attachment from original file (if path provided)
|
|
if file_path:
|
|
attachment = await ReceiptAutoCreateService._create_attachment_from_file(
|
|
session=session,
|
|
receipt_id=receipt.id,
|
|
source_file_path=file_path,
|
|
original_filename=original_filename,
|
|
)
|
|
if attachment:
|
|
logger.info(f"[ReceiptAutoCreate] Created attachment for receipt {receipt.id}")
|
|
else:
|
|
logger.warning(f"[ReceiptAutoCreate] Failed to create attachment for receipt {receipt.id}")
|
|
|
|
# Step 5: Generate accounting entries
|
|
# Note: For DRAFT status, entries are generated but not required for validation
|
|
try:
|
|
entries = ReceiptService.generate_accounting_entries(receipt)
|
|
if entries:
|
|
await AccountingEntryCRUD.create_bulk(
|
|
session, receipt.id, entries, is_auto_generated=True
|
|
)
|
|
logger.info(
|
|
f"[ReceiptAutoCreate] Generated {len(entries)} accounting entries "
|
|
f"for receipt {receipt.id}"
|
|
)
|
|
except Exception as e:
|
|
# Don't fail the receipt creation if entry generation fails
|
|
logger.warning(
|
|
f"[ReceiptAutoCreate] Failed to generate entries for receipt {receipt.id}: {e}"
|
|
)
|
|
|
|
# Step 6: Update batch_jobs with receipt_id
|
|
await ReceiptAutoCreateService._update_batch_job_receipt_id(
|
|
session=session,
|
|
job_id=job_id,
|
|
receipt_id=receipt.id,
|
|
)
|
|
|
|
# Commit all changes
|
|
await session.commit()
|
|
|
|
# Broadcast SSE event for real-time updates (US-030)
|
|
try:
|
|
await sse_service.broadcast_status_change(
|
|
receipt_id=receipt.id,
|
|
status=receipt.status.value,
|
|
processing_status=receipt.processing_status,
|
|
batch_id=receipt.batch_id,
|
|
)
|
|
except Exception as e:
|
|
# Don't fail the receipt creation if SSE broadcast fails
|
|
logger.warning(f"[ReceiptAutoCreate] SSE broadcast failed for receipt {receipt.id}: {e}")
|
|
|
|
return ReceiptCreateResult(
|
|
success=True,
|
|
receipt_id=receipt.id
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"[ReceiptAutoCreate] Failed to create receipt for job {job_id}: {e}")
|
|
await session.rollback()
|
|
return ReceiptCreateResult(
|
|
success=False,
|
|
error_message=str(e)
|
|
)
|