feat(data-entry): Bulk Receipt Upload cu Mobile UX Android Nativ
## Funcționalități Principale ### Bulk Upload & Processing - Drag & drop pentru upload bonuri multiple oriunde pe pagină - Batch processing cu job queue și worker pool - Real-time updates via SSE (Server-Sent Events) cu fallback polling - Duplicate detection via SHA-256 file hash - Auto-retry pentru job-uri failed - Cancel individual jobs sau batch complet ### Mobile UX - Android Native Style - Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter) - Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări) - FAB (Floating Action Button) cu hide/show on scroll - Filter chips orizontal scrollabile - Selecție multiplă prin long-press (500ms) - Select All + Bulk Delete cu confirmare - Layout Android pentru Create/Edit/View bon (Gmail compose style) ### Bug Fixes - Refresh individual via SSE în loc de refresh total pagină - Bonurile cu eroare OCR rămân vizibile pentru editare manuală - Afișare nume fișier original pentru toate bonurile - Upload stabil pe mobil (fix race condition File API) - Păstrare ordine bonuri la refresh (nu se reordonează) ### Backend - SSE endpoint pentru status updates real-time - Bulk delete endpoint cu partial success - Auto-cleanup bonuri failed după 7 zile - Batch model cu tracking complet ### Testing - E2E tests cu Playwright - Unit tests pentru bulk upload, auto-create, cleanup ## Commits Squashed: 43 user stories (US-001 → US-043) ## Branch: ralph/bulk-receipt-upload ## Timp dezvoltare: ~3 zile (Ralph autonomous) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,10 +2,15 @@
|
||||
from .receipt_service import ReceiptService
|
||||
from .nomenclature_service import NomenclatureService
|
||||
from .expense_types import EXPENSE_TYPES, ExpenseType
|
||||
from .receipt_auto_create import ReceiptAutoCreateService, ReceiptCreateResult
|
||||
from . import sse_service
|
||||
|
||||
__all__ = [
|
||||
"ReceiptService",
|
||||
"NomenclatureService",
|
||||
"EXPENSE_TYPES",
|
||||
"ExpenseType",
|
||||
"ReceiptAutoCreateService",
|
||||
"ReceiptCreateResult",
|
||||
"sse_service",
|
||||
]
|
||||
|
||||
215
backend/modules/data_entry/services/cleanup_service.py
Normal file
215
backend/modules/data_entry/services/cleanup_service.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
Cleanup service for auto-deleting expired failed receipts.
|
||||
|
||||
US-008: Backend - Auto-Cleanup Erori După 7 Zile
|
||||
- Finds receipts with processing_status='failed' and processing_completed_at < now() - 7 days
|
||||
- Deletes the receipts and their attached files from storage
|
||||
- Runs at startup and then daily as a background task
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import select, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptAttachment
|
||||
from backend.modules.data_entry.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cleanup configuration
|
||||
CLEANUP_RETENTION_DAYS = 7
|
||||
CLEANUP_INTERVAL_HOURS = 24
|
||||
|
||||
# In-memory storage for last cleanup stats (optional - for login notification)
|
||||
_last_cleanup_stats: dict = {
|
||||
"count": 0,
|
||||
"timestamp": None
|
||||
}
|
||||
|
||||
|
||||
def get_last_cleanup_stats() -> dict:
|
||||
"""Get stats from the last cleanup run for notification purposes."""
|
||||
return _last_cleanup_stats.copy()
|
||||
|
||||
|
||||
async def cleanup_expired_failed_receipts(session: AsyncSession) -> int:
|
||||
"""
|
||||
Find and delete receipts with processing_status='failed' older than 7 days.
|
||||
|
||||
This function:
|
||||
1. Queries for failed receipts where processing_completed_at < now() - 7 days
|
||||
2. Deletes attachment files from disk
|
||||
3. Deletes the receipt records (cascade deletes attachment records)
|
||||
|
||||
Args:
|
||||
session: AsyncSession for database operations
|
||||
|
||||
Returns:
|
||||
Number of receipts deleted
|
||||
"""
|
||||
global _last_cleanup_stats
|
||||
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=CLEANUP_RETENTION_DAYS)
|
||||
|
||||
# Find expired failed receipts with their attachments
|
||||
query = select(Receipt).options(
|
||||
selectinload(Receipt.attachments)
|
||||
).where(
|
||||
and_(
|
||||
Receipt.processing_status == "failed",
|
||||
Receipt.processing_completed_at.isnot(None),
|
||||
Receipt.processing_completed_at < cutoff_date
|
||||
)
|
||||
)
|
||||
|
||||
result = await session.execute(query)
|
||||
expired_receipts = result.scalars().all()
|
||||
|
||||
if not expired_receipts:
|
||||
logger.debug("[Cleanup] No expired failed receipts found")
|
||||
return 0
|
||||
|
||||
deleted_count = 0
|
||||
deleted_files = 0
|
||||
|
||||
upload_base_path = settings.upload_path_resolved
|
||||
|
||||
for receipt in expired_receipts:
|
||||
try:
|
||||
# Delete attachment files from disk
|
||||
for attachment in receipt.attachments:
|
||||
file_path = upload_base_path / attachment.file_path
|
||||
if file_path.exists():
|
||||
try:
|
||||
file_path.unlink()
|
||||
deleted_files += 1
|
||||
logger.debug(f"[Cleanup] Deleted file: {file_path}")
|
||||
except OSError as e:
|
||||
logger.warning(f"[Cleanup] Failed to delete file {file_path}: {e}")
|
||||
|
||||
# Also try to clean up empty parent directories
|
||||
parent_dir = file_path.parent
|
||||
if parent_dir.exists() and parent_dir != upload_base_path:
|
||||
try:
|
||||
# Only remove if directory is empty
|
||||
if not any(parent_dir.iterdir()):
|
||||
parent_dir.rmdir()
|
||||
logger.debug(f"[Cleanup] Removed empty directory: {parent_dir}")
|
||||
except OSError:
|
||||
pass # Directory not empty or permission issue, skip
|
||||
|
||||
# Delete receipt (cascade deletes attachment records in DB)
|
||||
await session.delete(receipt)
|
||||
deleted_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Cleanup] Error deleting receipt {receipt.id}: {e}")
|
||||
continue
|
||||
|
||||
# Commit all deletions
|
||||
if deleted_count > 0:
|
||||
await session.commit()
|
||||
|
||||
# Update stats for notification
|
||||
_last_cleanup_stats = {
|
||||
"count": deleted_count,
|
||||
"files_deleted": deleted_files,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
logger.info(f"[Cleanup] Cleaned up {deleted_count} expired failed receipts ({deleted_files} files)")
|
||||
|
||||
return deleted_count
|
||||
|
||||
|
||||
async def run_cleanup_task(get_session_func) -> None:
|
||||
"""
|
||||
Background task that runs cleanup at startup and then every 24 hours.
|
||||
|
||||
Args:
|
||||
get_session_func: Async generator function that yields database sessions
|
||||
"""
|
||||
logger.info("[Cleanup] Starting cleanup background task")
|
||||
|
||||
# Run immediately at startup
|
||||
try:
|
||||
async for session in get_session_func():
|
||||
count = await cleanup_expired_failed_receipts(session)
|
||||
if count > 0:
|
||||
logger.info(f"[Cleanup] Initial cleanup: {count} receipts removed")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"[Cleanup] Initial cleanup failed: {e}")
|
||||
|
||||
# Then run every 24 hours
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600)
|
||||
|
||||
async for session in get_session_func():
|
||||
count = await cleanup_expired_failed_receipts(session)
|
||||
if count > 0:
|
||||
logger.info(f"[Cleanup] Daily cleanup: {count} receipts removed")
|
||||
break
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("[Cleanup] Cleanup task cancelled")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Cleanup] Daily cleanup failed: {e}")
|
||||
# Continue running even if one cleanup fails
|
||||
|
||||
|
||||
# Global reference to cleanup task for graceful shutdown
|
||||
_cleanup_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
async def start_cleanup_task(get_session_func) -> bool:
|
||||
"""
|
||||
Start the cleanup background task.
|
||||
|
||||
Args:
|
||||
get_session_func: Async generator function that yields database sessions
|
||||
|
||||
Returns:
|
||||
True if task started successfully, False otherwise
|
||||
"""
|
||||
global _cleanup_task
|
||||
|
||||
if _cleanup_task is not None and not _cleanup_task.done():
|
||||
logger.warning("[Cleanup] Cleanup task already running")
|
||||
return False
|
||||
|
||||
try:
|
||||
_cleanup_task = asyncio.create_task(run_cleanup_task(get_session_func))
|
||||
logger.info("[Cleanup] ✅ Cleanup background task started")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[Cleanup] Failed to start cleanup task: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def stop_cleanup_task() -> None:
|
||||
"""Stop the cleanup background task gracefully."""
|
||||
global _cleanup_task
|
||||
|
||||
if _cleanup_task is not None and not _cleanup_task.done():
|
||||
_cleanup_task.cancel()
|
||||
try:
|
||||
await _cleanup_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("[Cleanup] Cleanup task stopped")
|
||||
|
||||
_cleanup_task = None
|
||||
|
||||
|
||||
def is_cleanup_task_running() -> bool:
|
||||
"""Check if the cleanup task is currently running."""
|
||||
return _cleanup_task is not None and not _cleanup_task.done()
|
||||
@@ -23,7 +23,9 @@ Schema:
|
||||
ocr_time_ms INTEGER, -- Actual OCR engine processing time
|
||||
created_by TEXT, -- Username
|
||||
original_filename TEXT,
|
||||
expires_at TIMESTAMP
|
||||
expires_at TIMESTAMP,
|
||||
batch_id INTEGER, -- Foreign key to batch_uploads (for bulk processing)
|
||||
file_hash TEXT -- SHA-256 hash for duplicate detection (US-007)
|
||||
)
|
||||
"""
|
||||
|
||||
@@ -66,6 +68,7 @@ class OCRJobStatus(str, Enum):
|
||||
processing = "processing"
|
||||
completed = "completed"
|
||||
failed = "failed"
|
||||
cancelled = "cancelled"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -86,6 +89,8 @@ class OCRJob:
|
||||
created_by: Optional[str] = None
|
||||
original_filename: Optional[str] = None
|
||||
expires_at: Optional[datetime] = None
|
||||
batch_id: Optional[int] = None # Links to batch_uploads table for bulk processing
|
||||
file_hash: Optional[str] = None # SHA-256 hash for duplicate detection (US-007)
|
||||
|
||||
@property
|
||||
def queue_wait_ms(self) -> Optional[int]:
|
||||
@@ -163,7 +168,8 @@ class OCRJobQueue:
|
||||
ocr_time_ms INTEGER,
|
||||
created_by TEXT,
|
||||
original_filename TEXT,
|
||||
expires_at TIMESTAMP
|
||||
expires_at TIMESTAMP,
|
||||
batch_id INTEGER
|
||||
)
|
||||
''')
|
||||
|
||||
@@ -174,6 +180,20 @@ class OCRJobQueue:
|
||||
except Exception:
|
||||
pass # Column already exists
|
||||
|
||||
# Migration: add batch_id column if it doesn't exist
|
||||
try:
|
||||
await db.execute('ALTER TABLE ocr_jobs ADD COLUMN batch_id INTEGER')
|
||||
logger.info("[OCRJobQueue] Added batch_id column to existing table")
|
||||
except Exception:
|
||||
pass # Column already exists
|
||||
|
||||
# Migration: add file_hash column if it doesn't exist (US-007)
|
||||
try:
|
||||
await db.execute('ALTER TABLE ocr_jobs ADD COLUMN file_hash TEXT')
|
||||
logger.info("[OCRJobQueue] Added file_hash column to existing table")
|
||||
except Exception:
|
||||
pass # Column already exists
|
||||
|
||||
# Index for efficient queue queries
|
||||
await db.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_jobs_status
|
||||
@@ -197,7 +217,9 @@ class OCRJobQueue:
|
||||
mime_type: str,
|
||||
engine: str = "doctr_plus",
|
||||
username: Optional[str] = None,
|
||||
original_filename: Optional[str] = None
|
||||
original_filename: Optional[str] = None,
|
||||
batch_id: Optional[int] = None,
|
||||
file_hash: Optional[str] = None
|
||||
) -> OCRJob:
|
||||
"""
|
||||
Create a new OCR job.
|
||||
@@ -210,6 +232,8 @@ class OCRJobQueue:
|
||||
engine: OCR engine ('tesseract', 'doctr', 'doctr_plus', 'paddleocr')
|
||||
username: Username of requester
|
||||
original_filename: Original filename from upload
|
||||
batch_id: Optional batch ID for bulk upload processing
|
||||
file_hash: Optional SHA-256 hash for duplicate detection (US-007)
|
||||
|
||||
Returns:
|
||||
Created OCRJob instance
|
||||
@@ -241,15 +265,15 @@ class OCRJobQueue:
|
||||
await db.execute('''
|
||||
INSERT INTO ocr_jobs (
|
||||
id, status, file_path, mime_type, engine,
|
||||
created_at, created_by, original_filename, expires_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
created_at, created_by, original_filename, expires_at, batch_id, file_hash
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
job_id, OCRJobStatus.pending.value, str(file_path), mime_type, engine,
|
||||
now.isoformat(), username, original_filename, expires_at.isoformat()
|
||||
now.isoformat(), username, original_filename, expires_at.isoformat(), batch_id, file_hash
|
||||
))
|
||||
await db.commit()
|
||||
|
||||
logger.info(f"[OCRJobQueue] Created job {job_id}: engine={engine}, file={file_path.name}")
|
||||
logger.info(f"[OCRJobQueue] Created job {job_id}: engine={engine}, file={file_path.name}, batch_id={batch_id}")
|
||||
|
||||
return OCRJob(
|
||||
id=job_id,
|
||||
@@ -260,7 +284,9 @@ class OCRJobQueue:
|
||||
created_at=now,
|
||||
created_by=username,
|
||||
original_filename=original_filename,
|
||||
expires_at=expires_at
|
||||
expires_at=expires_at,
|
||||
batch_id=batch_id,
|
||||
file_hash=file_hash
|
||||
)
|
||||
|
||||
async def get_job(self, job_id: str) -> Optional[OCRJob]:
|
||||
@@ -601,6 +627,8 @@ class OCRJobQueue:
|
||||
created_by=row['created_by'],
|
||||
original_filename=row['original_filename'],
|
||||
expires_at=parse_datetime(row['expires_at']),
|
||||
batch_id=row['batch_id'] if 'batch_id' in row.keys() else None,
|
||||
file_hash=row['file_hash'] if 'file_hash' in row.keys() else None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ from typing import Optional, Set
|
||||
|
||||
from .job_queue import job_queue, OCRJobStatus, OCRJob
|
||||
from .ocr_worker_pool import ocr_worker_pool
|
||||
from backend.modules.data_entry.schemas.ocr import ExtractionData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -223,6 +224,21 @@ async def _process_job(job: OCRJob) -> None:
|
||||
validation_errors_count=len(extraction.get('validation_errors', [])),
|
||||
)
|
||||
|
||||
# Auto-save receipt for batch jobs
|
||||
if job.batch_id:
|
||||
auto_save_result = await _auto_save_batch_receipt(
|
||||
job=job,
|
||||
extraction=extraction,
|
||||
file_path=str(file_path)
|
||||
)
|
||||
if not auto_save_result:
|
||||
# Auto-save failed - mark job as failed
|
||||
# Note: job_queue status already updated to 'completed' above
|
||||
# We need to update it back to failed with the auto-save error
|
||||
logger.warning(
|
||||
f"[JobWorker] Job {job.id} OCR succeeded but auto-save failed"
|
||||
)
|
||||
|
||||
else:
|
||||
# Job failed
|
||||
error_msg = result.get("error", "Unknown error")
|
||||
@@ -543,3 +559,107 @@ def _count_extracted_fields(extraction: dict) -> int:
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Auto-Save Batch Receipt Helper
|
||||
# ============================================================================
|
||||
|
||||
async def _auto_save_batch_receipt(
|
||||
job: OCRJob,
|
||||
extraction: dict,
|
||||
file_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
Automatically create a receipt from OCR result for batch jobs.
|
||||
|
||||
Called when a batch job completes successfully. Creates the receipt,
|
||||
attachment, and accounting entries using ReceiptAutoCreateService.
|
||||
|
||||
Args:
|
||||
job: Completed OCRJob with batch_id set
|
||||
extraction: OCR extraction result dict
|
||||
file_path: Path to the original uploaded file
|
||||
|
||||
Returns:
|
||||
True if receipt created successfully, False otherwise
|
||||
"""
|
||||
if not job.batch_id:
|
||||
return True # Not a batch job, nothing to do
|
||||
|
||||
logger.info(f"[JobWorker] Auto-saving receipt for batch job {job.id} (batch_id={job.batch_id})")
|
||||
|
||||
try:
|
||||
# Import here to avoid circular imports
|
||||
from backend.modules.data_entry.db.database import get_db_session
|
||||
from backend.modules.data_entry.db.models import BatchUpload
|
||||
from backend.modules.data_entry.services.receipt_auto_create import ReceiptAutoCreateService
|
||||
from sqlalchemy import select
|
||||
|
||||
# Convert extraction dict to ExtractionData schema
|
||||
ocr_result = ExtractionData(**extraction)
|
||||
|
||||
async with await get_db_session() as session:
|
||||
# Get batch info to retrieve company_id and user_id
|
||||
batch_result = await session.execute(
|
||||
select(BatchUpload).where(BatchUpload.id == job.batch_id)
|
||||
)
|
||||
batch = batch_result.scalar_one_or_none()
|
||||
|
||||
if not batch:
|
||||
error_msg = f"Batch {job.batch_id} not found"
|
||||
logger.error(f"[JobWorker] Auto-save failed for job {job.id}: {error_msg}")
|
||||
await job_queue.update_status(
|
||||
job_id=job.id,
|
||||
status=OCRJobStatus.failed,
|
||||
error=f"Auto-save error: {error_msg}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Call ReceiptAutoCreateService
|
||||
result = await ReceiptAutoCreateService.create_from_ocr_result(
|
||||
session=session,
|
||||
job_id=job.id,
|
||||
ocr_result=ocr_result,
|
||||
username=job.created_by or batch.user_id,
|
||||
batch_id=job.batch_id,
|
||||
company_id=batch.company_id,
|
||||
file_path=file_path,
|
||||
original_filename=job.original_filename,
|
||||
file_hash=job.file_hash # Pass file_hash for duplicate detection (US-007)
|
||||
)
|
||||
|
||||
if result.success:
|
||||
logger.info(
|
||||
f"[JobWorker] Auto-save successful for job {job.id}: "
|
||||
f"receipt_id={result.receipt_id}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
error_msg = result.error_message or "Unknown error"
|
||||
logger.warning(
|
||||
f"[JobWorker] Auto-save validation failed for job {job.id}: {error_msg}"
|
||||
)
|
||||
# Update job status to failed with the auto-save error
|
||||
await job_queue.update_status(
|
||||
job_id=job.id,
|
||||
status=OCRJobStatus.failed,
|
||||
error=f"Auto-save error: {error_msg}"
|
||||
)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"[JobWorker] Auto-save exception for job {job.id}: {error_msg}")
|
||||
|
||||
# Update job status to failed
|
||||
try:
|
||||
await job_queue.update_status(
|
||||
job_id=job.id,
|
||||
status=OCRJobStatus.failed,
|
||||
error=f"Auto-save error: {error_msg}"
|
||||
)
|
||||
except Exception as update_err:
|
||||
logger.error(f"[JobWorker] Failed to update job status after auto-save error: {update_err}")
|
||||
|
||||
return False
|
||||
|
||||
385
backend/modules/data_entry/services/receipt_auto_create.py
Normal file
385
backend/modules/data_entry/services/receipt_auto_create.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""
|
||||
Auto-create Receipt from OCR results for bulk upload flow.
|
||||
|
||||
This service handles automatic creation of Receipt records from OCR extraction
|
||||
results, enabling end-to-end processing without manual UI intervention.
|
||||
|
||||
The service:
|
||||
1. Maps OCR ExtractionData fields to Receipt fields
|
||||
2. Creates attachment from the original uploaded file
|
||||
3. Generates accounting entries
|
||||
4. Links the receipt back to the batch job for tracking
|
||||
"""
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.modules.data_entry.db.models.receipt import (
|
||||
Receipt,
|
||||
ReceiptAttachment,
|
||||
ReceiptStatus,
|
||||
ReceiptType,
|
||||
ReceiptDirection,
|
||||
)
|
||||
from backend.modules.data_entry.db.models.batch import BatchJob
|
||||
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
|
||||
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
|
||||
from backend.modules.data_entry.schemas.receipt import ReceiptCreate, TvaEntrySchema, PaymentMethodSchema
|
||||
from backend.modules.data_entry.schemas.ocr import ExtractionData
|
||||
from backend.modules.data_entry.services.receipt_service import ReceiptService
|
||||
from backend.modules.data_entry.services import sse_service
|
||||
from backend.config import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReceiptCreateResult:
|
||||
"""Result of auto-create operation."""
|
||||
success: bool
|
||||
receipt_id: Optional[int] = None
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class ReceiptAutoCreateService:
|
||||
"""
|
||||
Service for automatically creating receipts from OCR results.
|
||||
|
||||
Used by the bulk upload flow to create receipts without user intervention.
|
||||
Created receipts are in DRAFT status and require review before approval.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_ocr_result(ocr_result: ExtractionData) -> tuple[bool, str]:
|
||||
"""
|
||||
Perform minimal validation on OCR result.
|
||||
|
||||
Validates:
|
||||
- amount > 0 (required for receipt)
|
||||
- date is valid and not in future
|
||||
|
||||
Args:
|
||||
ocr_result: Extracted data from OCR
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Validate amount exists and is positive
|
||||
if ocr_result.amount is None:
|
||||
return False, "Amount not extracted from receipt"
|
||||
|
||||
if ocr_result.amount <= 0:
|
||||
return False, f"Invalid amount: {ocr_result.amount} (must be > 0)"
|
||||
|
||||
# Validate date exists and is not in the future
|
||||
if ocr_result.receipt_date is None:
|
||||
return False, "Receipt date not extracted"
|
||||
|
||||
today = date.today()
|
||||
if ocr_result.receipt_date > today:
|
||||
return False, f"Receipt date {ocr_result.receipt_date} is in the future"
|
||||
|
||||
return True, ""
|
||||
|
||||
@staticmethod
|
||||
def _map_ocr_to_receipt(
|
||||
ocr_result: ExtractionData,
|
||||
company_id: int,
|
||||
) -> ReceiptCreate:
|
||||
"""
|
||||
Map OCR ExtractionData fields to ReceiptCreate schema.
|
||||
|
||||
Args:
|
||||
ocr_result: Extracted data from OCR
|
||||
company_id: Company ID for the receipt
|
||||
|
||||
Returns:
|
||||
ReceiptCreate schema ready for database insertion
|
||||
"""
|
||||
# Map receipt type
|
||||
receipt_type = ReceiptType.BON_FISCAL
|
||||
if ocr_result.receipt_type == "chitanta":
|
||||
receipt_type = ReceiptType.CHITANTA
|
||||
|
||||
# Map TVA breakdown from OCR TvaEntry to schema TvaEntrySchema
|
||||
tva_breakdown: Optional[List[TvaEntrySchema]] = None
|
||||
if ocr_result.tva_entries:
|
||||
tva_breakdown = [
|
||||
TvaEntrySchema(
|
||||
code=entry.code,
|
||||
percent=entry.percent,
|
||||
amount=entry.amount
|
||||
)
|
||||
for entry in ocr_result.tva_entries
|
||||
]
|
||||
|
||||
# Map payment methods
|
||||
payment_methods: Optional[List[PaymentMethodSchema]] = None
|
||||
if ocr_result.payment_methods:
|
||||
payment_methods = [
|
||||
PaymentMethodSchema(
|
||||
method=pm.method,
|
||||
amount=pm.amount
|
||||
)
|
||||
for pm in ocr_result.payment_methods
|
||||
]
|
||||
|
||||
# Create receipt data
|
||||
return ReceiptCreate(
|
||||
receipt_type=receipt_type,
|
||||
direction=ReceiptDirection.CHELTUIALA, # Default to expense
|
||||
receipt_number=ocr_result.receipt_number,
|
||||
receipt_series=ocr_result.receipt_series,
|
||||
receipt_date=ocr_result.receipt_date,
|
||||
amount=ocr_result.amount,
|
||||
description=ocr_result.description,
|
||||
tva_breakdown=tva_breakdown,
|
||||
tva_total=ocr_result.tva_total,
|
||||
items_count=ocr_result.items_count,
|
||||
vendor_address=ocr_result.address,
|
||||
company_id=company_id,
|
||||
partner_name=ocr_result.partner_name,
|
||||
cui=ocr_result.cui,
|
||||
ocr_raw_text=ocr_result.raw_text[:5000] if ocr_result.raw_text else None, # Limit size
|
||||
payment_methods=payment_methods,
|
||||
payment_mode=ocr_result.suggested_payment_mode,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _create_attachment_from_file(
|
||||
session: AsyncSession,
|
||||
receipt_id: int,
|
||||
source_file_path: str,
|
||||
original_filename: Optional[str] = None,
|
||||
) -> Optional[ReceiptAttachment]:
|
||||
"""
|
||||
Create attachment by copying file from OCR job location.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
receipt_id: Receipt ID to attach to
|
||||
source_file_path: Path to the original file from OCR job
|
||||
original_filename: Original filename from upload (optional)
|
||||
|
||||
Returns:
|
||||
Created ReceiptAttachment or None if failed
|
||||
"""
|
||||
source_path = Path(source_file_path)
|
||||
|
||||
if not source_path.exists():
|
||||
logger.warning(f"[ReceiptAutoCreate] Source file not found: {source_path}")
|
||||
return None
|
||||
|
||||
# Generate stored filename
|
||||
ext = source_path.suffix.lower()
|
||||
stored_filename = f"{uuid.uuid4()}{ext}"
|
||||
|
||||
# Determine relative path (organized by year/month)
|
||||
now = datetime.utcnow()
|
||||
relative_path = Path(str(now.year)) / f"{now.month:02d}"
|
||||
|
||||
# Full destination path
|
||||
dest_dir = settings.data_entry_upload_path_resolved / relative_path
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = dest_dir / stored_filename
|
||||
|
||||
# Copy file to attachments directory
|
||||
try:
|
||||
shutil.copy2(source_path, dest_path)
|
||||
except Exception as e:
|
||||
logger.error(f"[ReceiptAutoCreate] Failed to copy file: {e}")
|
||||
return None
|
||||
|
||||
# Get file size
|
||||
file_size = dest_path.stat().st_size
|
||||
|
||||
# Determine MIME type
|
||||
mime_map = {
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.pdf': 'application/pdf',
|
||||
}
|
||||
mime_type = mime_map.get(ext, 'application/octet-stream')
|
||||
|
||||
# Use original filename if provided, otherwise use source filename
|
||||
display_filename = original_filename or source_path.name
|
||||
|
||||
# Create attachment record
|
||||
attachment = ReceiptAttachment(
|
||||
receipt_id=receipt_id,
|
||||
filename=display_filename,
|
||||
stored_filename=stored_filename,
|
||||
file_path=str(relative_path / stored_filename),
|
||||
file_size=file_size,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
|
||||
session.add(attachment)
|
||||
await session.flush()
|
||||
|
||||
return attachment
|
||||
|
||||
@staticmethod
|
||||
async def _update_batch_job_receipt_id(
|
||||
session: AsyncSession,
|
||||
job_id: str,
|
||||
receipt_id: int,
|
||||
) -> None:
|
||||
"""
|
||||
Update batch_jobs table with the created receipt_id.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
job_id: OCR job UUID
|
||||
receipt_id: Created receipt ID
|
||||
"""
|
||||
await session.execute(
|
||||
update(BatchJob)
|
||||
.where(BatchJob.job_id == job_id)
|
||||
.values(receipt_id=receipt_id)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def create_from_ocr_result(
|
||||
session: AsyncSession,
|
||||
job_id: str,
|
||||
ocr_result: ExtractionData,
|
||||
username: str,
|
||||
batch_id: int,
|
||||
company_id: int,
|
||||
file_path: Optional[str] = None,
|
||||
original_filename: Optional[str] = None,
|
||||
file_hash: Optional[str] = None,
|
||||
) -> ReceiptCreateResult:
|
||||
"""
|
||||
Create a receipt from OCR extraction result.
|
||||
|
||||
This method:
|
||||
1. Validates the OCR result (amount > 0, date valid)
|
||||
2. Maps OCR fields to Receipt fields
|
||||
3. Creates the Receipt in DRAFT status
|
||||
4. Creates attachment from original file
|
||||
5. Generates accounting entries
|
||||
6. Updates batch_jobs with receipt_id
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
job_id: OCR job UUID for tracking
|
||||
ocr_result: Extracted data from OCR processing
|
||||
username: User who initiated the upload
|
||||
batch_id: Batch ID for grouping
|
||||
company_id: Company ID for the receipt
|
||||
file_path: Path to the original uploaded file
|
||||
original_filename: Original filename from upload
|
||||
file_hash: SHA-256 hash of the file for duplicate detection (US-007)
|
||||
|
||||
Returns:
|
||||
ReceiptCreateResult with success status and receipt_id or error
|
||||
"""
|
||||
try:
|
||||
# Step 1: Validate OCR result
|
||||
is_valid, error_msg = ReceiptAutoCreateService._validate_ocr_result(ocr_result)
|
||||
if not is_valid:
|
||||
logger.warning(f"[ReceiptAutoCreate] Validation failed for job {job_id}: {error_msg}")
|
||||
return ReceiptCreateResult(
|
||||
success=False,
|
||||
error_message=error_msg
|
||||
)
|
||||
|
||||
# Step 2: Map OCR to Receipt schema
|
||||
receipt_data = ReceiptAutoCreateService._map_ocr_to_receipt(
|
||||
ocr_result=ocr_result,
|
||||
company_id=company_id,
|
||||
)
|
||||
|
||||
# Step 3: Create receipt in DRAFT status
|
||||
receipt = await ReceiptCRUD.create(session, receipt_data, created_by=username)
|
||||
|
||||
# Set batch tracking fields (US-007, US-011)
|
||||
receipt.batch_id = str(batch_id)
|
||||
receipt.file_hash = file_hash
|
||||
receipt.processing_status = "completed"
|
||||
session.add(receipt)
|
||||
await session.flush()
|
||||
|
||||
logger.info(
|
||||
f"[ReceiptAutoCreate] Created receipt {receipt.id} for job {job_id}: "
|
||||
f"amount={receipt.amount}, vendor={receipt.partner_name}, file_hash={file_hash[:16] if file_hash else None}..."
|
||||
)
|
||||
|
||||
# Step 4: Create attachment from original file (if path provided)
|
||||
if file_path:
|
||||
attachment = await ReceiptAutoCreateService._create_attachment_from_file(
|
||||
session=session,
|
||||
receipt_id=receipt.id,
|
||||
source_file_path=file_path,
|
||||
original_filename=original_filename,
|
||||
)
|
||||
if attachment:
|
||||
logger.info(f"[ReceiptAutoCreate] Created attachment for receipt {receipt.id}")
|
||||
else:
|
||||
logger.warning(f"[ReceiptAutoCreate] Failed to create attachment for receipt {receipt.id}")
|
||||
|
||||
# Step 5: Generate accounting entries
|
||||
# Note: For DRAFT status, entries are generated but not required for validation
|
||||
try:
|
||||
entries = ReceiptService.generate_accounting_entries(receipt)
|
||||
if entries:
|
||||
await AccountingEntryCRUD.create_bulk(
|
||||
session, receipt.id, entries, is_auto_generated=True
|
||||
)
|
||||
logger.info(
|
||||
f"[ReceiptAutoCreate] Generated {len(entries)} accounting entries "
|
||||
f"for receipt {receipt.id}"
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't fail the receipt creation if entry generation fails
|
||||
logger.warning(
|
||||
f"[ReceiptAutoCreate] Failed to generate entries for receipt {receipt.id}: {e}"
|
||||
)
|
||||
|
||||
# Step 6: Update batch_jobs with receipt_id
|
||||
await ReceiptAutoCreateService._update_batch_job_receipt_id(
|
||||
session=session,
|
||||
job_id=job_id,
|
||||
receipt_id=receipt.id,
|
||||
)
|
||||
|
||||
# Commit all changes
|
||||
await session.commit()
|
||||
|
||||
# Broadcast SSE event for real-time updates (US-030)
|
||||
try:
|
||||
await sse_service.broadcast_status_change(
|
||||
receipt_id=receipt.id,
|
||||
status=receipt.status.value,
|
||||
processing_status=receipt.processing_status,
|
||||
batch_id=receipt.batch_id,
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't fail the receipt creation if SSE broadcast fails
|
||||
logger.warning(f"[ReceiptAutoCreate] SSE broadcast failed for receipt {receipt.id}: {e}")
|
||||
|
||||
return ReceiptCreateResult(
|
||||
success=True,
|
||||
receipt_id=receipt.id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[ReceiptAutoCreate] Failed to create receipt for job {job_id}: {e}")
|
||||
await session.rollback()
|
||||
return ReceiptCreateResult(
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
@@ -15,6 +15,7 @@ from backend.modules.data_entry.schemas.receipt import (
|
||||
ReceiptFilter,
|
||||
ReceiptResponse,
|
||||
ReceiptListResponse,
|
||||
ProcessingStats,
|
||||
AccountingEntryCreate,
|
||||
)
|
||||
from backend.modules.data_entry.services.expense_types import EXPENSE_TYPES, get_expense_type
|
||||
@@ -53,17 +54,26 @@ class ReceiptService:
|
||||
session: AsyncSession,
|
||||
filters: ReceiptFilter,
|
||||
) -> ReceiptListResponse:
|
||||
"""Get paginated list of receipts."""
|
||||
"""Get paginated list of receipts with processing_stats (US-012)."""
|
||||
receipts, total = await ReceiptCRUD.get_list(session, filters)
|
||||
|
||||
pages = (total + filters.page_size - 1) // filters.page_size if total > 0 else 1
|
||||
|
||||
# Get processing stats for bulk uploaded receipts (US-012)
|
||||
stats_dict = await ReceiptCRUD.get_processing_stats(
|
||||
session,
|
||||
company_id=filters.company_id,
|
||||
batch_id=filters.batch_id,
|
||||
)
|
||||
processing_stats = ProcessingStats(**stats_dict)
|
||||
|
||||
return ReceiptListResponse(
|
||||
items=[ReceiptResponse.model_validate(r) for r in receipts],
|
||||
total=total,
|
||||
page=filters.page,
|
||||
page_size=filters.page_size,
|
||||
pages=pages,
|
||||
processing_stats=processing_stats,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
197
backend/modules/data_entry/services/sse_service.py
Normal file
197
backend/modules/data_entry/services/sse_service.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Server-Sent Events (SSE) service for real-time status updates.
|
||||
|
||||
This module implements an event broadcaster pattern using asyncio.Queue per client.
|
||||
When receipt status changes occur (CRUD operations), events are pushed to all
|
||||
connected clients who are listening for that specific batch or all receipts.
|
||||
|
||||
Usage:
|
||||
# In router endpoint (SSE stream):
|
||||
async for event in sse_service.subscribe(batch_id=None):
|
||||
yield event
|
||||
|
||||
# When status changes (from CRUD operations):
|
||||
await sse_service.broadcast_status_change(receipt_id, status, processing_status, batch_id)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import AsyncGenerator, Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StatusChangeEvent:
|
||||
"""Event data for receipt status changes."""
|
||||
receipt_id: int
|
||||
status: str
|
||||
processing_status: Optional[str] = None
|
||||
batch_id: Optional[str] = None
|
||||
timestamp: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.timestamp is None:
|
||||
self.timestamp = datetime.utcnow().isoformat()
|
||||
|
||||
def to_sse_data(self) -> str:
|
||||
"""Format as SSE data line."""
|
||||
data = asdict(self)
|
||||
return f"data: {json.dumps(data)}\n\n"
|
||||
|
||||
|
||||
class SSEEventBroadcaster:
|
||||
"""
|
||||
Manages SSE client connections and broadcasts events.
|
||||
|
||||
Each client gets its own asyncio.Queue. When an event occurs,
|
||||
it's pushed to all relevant queues based on batch_id filtering.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# Dict of {client_id: (queue, batch_id_filter)}
|
||||
# batch_id_filter is None for clients that want all events
|
||||
self._clients: dict[str, tuple[asyncio.Queue, Optional[str]]] = {}
|
||||
self._client_counter = 0
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def _generate_client_id(self) -> str:
|
||||
"""Generate unique client ID."""
|
||||
async with self._lock:
|
||||
self._client_counter += 1
|
||||
return f"client_{self._client_counter}_{datetime.utcnow().timestamp()}"
|
||||
|
||||
async def subscribe(
|
||||
self,
|
||||
batch_id: Optional[str] = None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
Subscribe to SSE events.
|
||||
|
||||
Args:
|
||||
batch_id: Optional filter - only receive events for this batch.
|
||||
If None, receives all events.
|
||||
|
||||
Yields:
|
||||
SSE-formatted event strings (ready to send to client).
|
||||
"""
|
||||
client_id = await self._generate_client_id()
|
||||
queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
# Register client
|
||||
async with self._lock:
|
||||
self._clients[client_id] = (queue, batch_id)
|
||||
|
||||
logger.info(
|
||||
f"SSE client {client_id} connected (batch_id filter: {batch_id}). "
|
||||
f"Total clients: {len(self._clients)}"
|
||||
)
|
||||
|
||||
try:
|
||||
# Send initial retry hint for reconnection
|
||||
yield "retry: 3000\n\n"
|
||||
|
||||
# Keep connection alive and yield events
|
||||
while True:
|
||||
try:
|
||||
# Wait for events with timeout for keep-alive
|
||||
event = await asyncio.wait_for(queue.get(), timeout=30.0)
|
||||
yield event
|
||||
except asyncio.TimeoutError:
|
||||
# Send keep-alive comment to prevent connection timeout
|
||||
yield ": keep-alive\n\n"
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"SSE client {client_id} subscription cancelled")
|
||||
raise
|
||||
finally:
|
||||
# Cleanup: remove client from registry
|
||||
async with self._lock:
|
||||
self._clients.pop(client_id, None)
|
||||
logger.info(
|
||||
f"SSE client {client_id} disconnected. "
|
||||
f"Remaining clients: {len(self._clients)}"
|
||||
)
|
||||
|
||||
async def broadcast_status_change(
|
||||
self,
|
||||
receipt_id: int,
|
||||
status: str,
|
||||
processing_status: Optional[str] = None,
|
||||
batch_id: Optional[str] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Broadcast a status change event to all relevant clients.
|
||||
|
||||
Args:
|
||||
receipt_id: The receipt ID that changed.
|
||||
status: New workflow status (DRAFT, PENDING_REVIEW, etc.).
|
||||
processing_status: New processing status (pending, processing, completed, failed).
|
||||
batch_id: The batch ID this receipt belongs to (for filtering).
|
||||
|
||||
Returns:
|
||||
Number of clients notified.
|
||||
"""
|
||||
event = StatusChangeEvent(
|
||||
receipt_id=receipt_id,
|
||||
status=status,
|
||||
processing_status=processing_status,
|
||||
batch_id=batch_id,
|
||||
)
|
||||
sse_data = event.to_sse_data()
|
||||
|
||||
notified = 0
|
||||
async with self._lock:
|
||||
for client_id, (queue, client_batch_filter) in self._clients.items():
|
||||
# Send event if:
|
||||
# 1. Client has no filter (wants all events), OR
|
||||
# 2. Client's filter matches the event's batch_id
|
||||
if client_batch_filter is None or client_batch_filter == batch_id:
|
||||
try:
|
||||
queue.put_nowait(sse_data)
|
||||
notified += 1
|
||||
except asyncio.QueueFull:
|
||||
logger.warning(
|
||||
f"SSE queue full for client {client_id}, dropping event"
|
||||
)
|
||||
|
||||
if notified > 0:
|
||||
logger.debug(
|
||||
f"SSE broadcast: receipt_id={receipt_id}, status={status}, "
|
||||
f"processing_status={processing_status}, notified={notified} clients"
|
||||
)
|
||||
|
||||
return notified
|
||||
|
||||
@property
|
||||
def client_count(self) -> int:
|
||||
"""Get current number of connected clients."""
|
||||
return len(self._clients)
|
||||
|
||||
|
||||
# Singleton instance for the application
|
||||
sse_broadcaster = SSEEventBroadcaster()
|
||||
|
||||
|
||||
# Convenience functions for external use
|
||||
async def subscribe(batch_id: Optional[str] = None) -> AsyncGenerator[str, None]:
|
||||
"""Subscribe to SSE status change events."""
|
||||
async for event in sse_broadcaster.subscribe(batch_id):
|
||||
yield event
|
||||
|
||||
|
||||
async def broadcast_status_change(
|
||||
receipt_id: int,
|
||||
status: str,
|
||||
processing_status: Optional[str] = None,
|
||||
batch_id: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Broadcast a status change event."""
|
||||
return await sse_broadcaster.broadcast_status_change(
|
||||
receipt_id=receipt_id,
|
||||
status=status,
|
||||
processing_status=processing_status,
|
||||
batch_id=batch_id,
|
||||
)
|
||||
Reference in New Issue
Block a user