Files
roa2web-service-auto/deploy-package-20260223-151231/backend/modules/data_entry/services/receipt_auto_create.py
Claude Agent 8bc567a9c5 fix telegram
2026-02-23 15:12:33 +00:00

386 lines
13 KiB
Python

"""
Auto-create Receipt from OCR results for bulk upload flow.
This service handles automatic creation of Receipt records from OCR extraction
results, enabling end-to-end processing without manual UI intervention.
The service:
1. Maps OCR ExtractionData fields to Receipt fields
2. Creates attachment from the original uploaded file
3. Generates accounting entries
4. Links the receipt back to the batch job for tracking
"""
import logging
import shutil
import uuid
from dataclasses import dataclass
from datetime import date, datetime
from decimal import Decimal
from pathlib import Path
from typing import Optional, List
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.receipt import (
Receipt,
ReceiptAttachment,
ReceiptStatus,
ReceiptType,
ReceiptDirection,
)
from backend.modules.data_entry.db.models.batch import BatchJob
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
from backend.modules.data_entry.schemas.receipt import ReceiptCreate, TvaEntrySchema, PaymentMethodSchema
from backend.modules.data_entry.schemas.ocr import ExtractionData
from backend.modules.data_entry.services.receipt_service import ReceiptService
from backend.modules.data_entry.services import sse_service
from backend.config import settings
logger = logging.getLogger(__name__)
@dataclass
class ReceiptCreateResult:
"""Result of auto-create operation."""
success: bool
receipt_id: Optional[int] = None
error_message: Optional[str] = None
class ReceiptAutoCreateService:
"""
Service for automatically creating receipts from OCR results.
Used by the bulk upload flow to create receipts without user intervention.
Created receipts are in DRAFT status and require review before approval.
"""
@staticmethod
def _validate_ocr_result(ocr_result: ExtractionData) -> tuple[bool, str]:
"""
Perform minimal validation on OCR result.
Validates:
- amount > 0 (required for receipt)
- date is valid and not in future
Args:
ocr_result: Extracted data from OCR
Returns:
Tuple of (is_valid, error_message)
"""
# Validate amount exists and is positive
if ocr_result.amount is None:
return False, "Amount not extracted from receipt"
if ocr_result.amount <= 0:
return False, f"Invalid amount: {ocr_result.amount} (must be > 0)"
# Validate date exists and is not in the future
if ocr_result.receipt_date is None:
return False, "Receipt date not extracted"
today = date.today()
if ocr_result.receipt_date > today:
return False, f"Receipt date {ocr_result.receipt_date} is in the future"
return True, ""
@staticmethod
def _map_ocr_to_receipt(
ocr_result: ExtractionData,
company_id: int,
) -> ReceiptCreate:
"""
Map OCR ExtractionData fields to ReceiptCreate schema.
Args:
ocr_result: Extracted data from OCR
company_id: Company ID for the receipt
Returns:
ReceiptCreate schema ready for database insertion
"""
# Map receipt type
receipt_type = ReceiptType.BON_FISCAL
if ocr_result.receipt_type == "chitanta":
receipt_type = ReceiptType.CHITANTA
# Map TVA breakdown from OCR TvaEntry to schema TvaEntrySchema
tva_breakdown: Optional[List[TvaEntrySchema]] = None
if ocr_result.tva_entries:
tva_breakdown = [
TvaEntrySchema(
code=entry.code,
percent=entry.percent,
amount=entry.amount
)
for entry in ocr_result.tva_entries
]
# Map payment methods
payment_methods: Optional[List[PaymentMethodSchema]] = None
if ocr_result.payment_methods:
payment_methods = [
PaymentMethodSchema(
method=pm.method,
amount=pm.amount
)
for pm in ocr_result.payment_methods
]
# Create receipt data
return ReceiptCreate(
receipt_type=receipt_type,
direction=ReceiptDirection.CHELTUIALA, # Default to expense
receipt_number=ocr_result.receipt_number,
receipt_series=ocr_result.receipt_series,
receipt_date=ocr_result.receipt_date,
amount=ocr_result.amount,
description=ocr_result.description,
tva_breakdown=tva_breakdown,
tva_total=ocr_result.tva_total,
items_count=ocr_result.items_count,
vendor_address=ocr_result.address,
company_id=company_id,
partner_name=ocr_result.partner_name,
cui=ocr_result.cui,
ocr_raw_text=ocr_result.raw_text[:5000] if ocr_result.raw_text else None, # Limit size
payment_methods=payment_methods,
payment_mode=ocr_result.suggested_payment_mode,
)
@staticmethod
async def _create_attachment_from_file(
session: AsyncSession,
receipt_id: int,
source_file_path: str,
original_filename: Optional[str] = None,
) -> Optional[ReceiptAttachment]:
"""
Create attachment by copying file from OCR job location.
Args:
session: Database session
receipt_id: Receipt ID to attach to
source_file_path: Path to the original file from OCR job
original_filename: Original filename from upload (optional)
Returns:
Created ReceiptAttachment or None if failed
"""
source_path = Path(source_file_path)
if not source_path.exists():
logger.warning(f"[ReceiptAutoCreate] Source file not found: {source_path}")
return None
# Generate stored filename
ext = source_path.suffix.lower()
stored_filename = f"{uuid.uuid4()}{ext}"
# Determine relative path (organized by year/month)
now = datetime.utcnow()
relative_path = Path(str(now.year)) / f"{now.month:02d}"
# Full destination path
dest_dir = settings.data_entry_upload_path_resolved / relative_path
dest_dir.mkdir(parents=True, exist_ok=True)
dest_path = dest_dir / stored_filename
# Copy file to attachments directory
try:
shutil.copy2(source_path, dest_path)
except Exception as e:
logger.error(f"[ReceiptAutoCreate] Failed to copy file: {e}")
return None
# Get file size
file_size = dest_path.stat().st_size
# Determine MIME type
mime_map = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.pdf': 'application/pdf',
}
mime_type = mime_map.get(ext, 'application/octet-stream')
# Use original filename if provided, otherwise use source filename
display_filename = original_filename or source_path.name
# Create attachment record
attachment = ReceiptAttachment(
receipt_id=receipt_id,
filename=display_filename,
stored_filename=stored_filename,
file_path=str(relative_path / stored_filename),
file_size=file_size,
mime_type=mime_type,
)
session.add(attachment)
await session.flush()
return attachment
@staticmethod
async def _update_batch_job_receipt_id(
session: AsyncSession,
job_id: str,
receipt_id: int,
) -> None:
"""
Update batch_jobs table with the created receipt_id.
Args:
session: Database session
job_id: OCR job UUID
receipt_id: Created receipt ID
"""
await session.execute(
update(BatchJob)
.where(BatchJob.job_id == job_id)
.values(receipt_id=receipt_id)
)
@staticmethod
async def create_from_ocr_result(
session: AsyncSession,
job_id: str,
ocr_result: ExtractionData,
username: str,
batch_id: int,
company_id: int,
file_path: Optional[str] = None,
original_filename: Optional[str] = None,
file_hash: Optional[str] = None,
) -> ReceiptCreateResult:
"""
Create a receipt from OCR extraction result.
This method:
1. Validates the OCR result (amount > 0, date valid)
2. Maps OCR fields to Receipt fields
3. Creates the Receipt in DRAFT status
4. Creates attachment from original file
5. Generates accounting entries
6. Updates batch_jobs with receipt_id
Args:
session: Database session
job_id: OCR job UUID for tracking
ocr_result: Extracted data from OCR processing
username: User who initiated the upload
batch_id: Batch ID for grouping
company_id: Company ID for the receipt
file_path: Path to the original uploaded file
original_filename: Original filename from upload
file_hash: SHA-256 hash of the file for duplicate detection (US-007)
Returns:
ReceiptCreateResult with success status and receipt_id or error
"""
try:
# Step 1: Validate OCR result
is_valid, error_msg = ReceiptAutoCreateService._validate_ocr_result(ocr_result)
if not is_valid:
logger.warning(f"[ReceiptAutoCreate] Validation failed for job {job_id}: {error_msg}")
return ReceiptCreateResult(
success=False,
error_message=error_msg
)
# Step 2: Map OCR to Receipt schema
receipt_data = ReceiptAutoCreateService._map_ocr_to_receipt(
ocr_result=ocr_result,
company_id=company_id,
)
# Step 3: Create receipt in DRAFT status
receipt = await ReceiptCRUD.create(session, receipt_data, created_by=username)
# Set batch tracking fields (US-007, US-011)
receipt.batch_id = str(batch_id)
receipt.file_hash = file_hash
receipt.processing_status = "completed"
session.add(receipt)
await session.flush()
logger.info(
f"[ReceiptAutoCreate] Created receipt {receipt.id} for job {job_id}: "
f"amount={receipt.amount}, vendor={receipt.partner_name}, file_hash={file_hash[:16] if file_hash else None}..."
)
# Step 4: Create attachment from original file (if path provided)
if file_path:
attachment = await ReceiptAutoCreateService._create_attachment_from_file(
session=session,
receipt_id=receipt.id,
source_file_path=file_path,
original_filename=original_filename,
)
if attachment:
logger.info(f"[ReceiptAutoCreate] Created attachment for receipt {receipt.id}")
else:
logger.warning(f"[ReceiptAutoCreate] Failed to create attachment for receipt {receipt.id}")
# Step 5: Generate accounting entries
# Note: For DRAFT status, entries are generated but not required for validation
try:
entries = ReceiptService.generate_accounting_entries(receipt)
if entries:
await AccountingEntryCRUD.create_bulk(
session, receipt.id, entries, is_auto_generated=True
)
logger.info(
f"[ReceiptAutoCreate] Generated {len(entries)} accounting entries "
f"for receipt {receipt.id}"
)
except Exception as e:
# Don't fail the receipt creation if entry generation fails
logger.warning(
f"[ReceiptAutoCreate] Failed to generate entries for receipt {receipt.id}: {e}"
)
# Step 6: Update batch_jobs with receipt_id
await ReceiptAutoCreateService._update_batch_job_receipt_id(
session=session,
job_id=job_id,
receipt_id=receipt.id,
)
# Commit all changes
await session.commit()
# Broadcast SSE event for real-time updates (US-030)
try:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
except Exception as e:
# Don't fail the receipt creation if SSE broadcast fails
logger.warning(f"[ReceiptAutoCreate] SSE broadcast failed for receipt {receipt.id}: {e}")
return ReceiptCreateResult(
success=True,
receipt_id=receipt.id
)
except Exception as e:
logger.error(f"[ReceiptAutoCreate] Failed to create receipt for job {job_id}: {e}")
await session.rollback()
return ReceiptCreateResult(
success=False,
error_message=str(e)
)