## Funcționalități Principale ### Bulk Upload & Processing - Drag & drop pentru upload bonuri multiple oriunde pe pagină - Batch processing cu job queue și worker pool - Real-time updates via SSE (Server-Sent Events) cu fallback polling - Duplicate detection via SHA-256 file hash - Auto-retry pentru job-uri failed - Cancel individual jobs sau batch complet ### Mobile UX - Android Native Style - Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter) - Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări) - FAB (Floating Action Button) cu hide/show on scroll - Filter chips orizontal scrollabile - Selecție multiplă prin long-press (500ms) - Select All + Bulk Delete cu confirmare - Layout Android pentru Create/Edit/View bon (Gmail compose style) ### Bug Fixes - Refresh individual via SSE în loc de refresh total pagină - Bonurile cu eroare OCR rămân vizibile pentru editare manuală - Afișare nume fișier original pentru toate bonurile - Upload stabil pe mobil (fix race condition File API) - Păstrare ordine bonuri la refresh (nu se reordonează) ### Backend - SSE endpoint pentru status updates real-time - Bulk delete endpoint cu partial success - Auto-cleanup bonuri failed după 7 zile - Batch model cu tracking complet ### Testing - E2E tests cu Playwright - Unit tests pentru bulk upload, auto-create, cleanup ## Commits Squashed: 43 user stories (US-001 → US-043) ## Branch: ralph/bulk-receipt-upload ## Timp dezvoltare: ~3 zile (Ralph autonomous) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
998 lines
31 KiB
Python
998 lines
31 KiB
Python
"""
|
|
Bulk upload API endpoints for batch receipt processing.
|
|
|
|
Endpoints:
|
|
- POST /upload - Submit multiple files for OCR processing in a single batch
|
|
- GET /batches/{batch_id}/status - Get batch status with optional long-polling
|
|
|
|
Validation:
|
|
- Max 100 files per batch
|
|
- Max 10MB per file
|
|
- Allowed types: PDF, PNG, JPG
|
|
|
|
Duplicate Detection (US-007):
|
|
- SHA-256 hash calculated for each file
|
|
- Duplicate files (same hash + company_id) are rejected with 409 Conflict info
|
|
- Duplicates reported in error list, non-duplicates processed normally
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import logging
|
|
from datetime import datetime
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
from typing import Annotated, List, Optional, Union
|
|
|
|
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Header
|
|
from sqlalchemy import select, func, and_
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from backend.modules.data_entry.db.database import get_session
|
|
from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus, Receipt, ReceiptAttachment
|
|
from backend.modules.data_entry.schemas.bulk import (
|
|
BulkUploadResponse,
|
|
BulkUploadResponseWithDuplicates,
|
|
BatchStatusResponse,
|
|
BatchJobInfo,
|
|
DuplicateFileInfo,
|
|
RetryResponse,
|
|
BatchRetryResponse,
|
|
CancelJobResponse,
|
|
CancelBatchResponse
|
|
)
|
|
from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus
|
|
from backend.config import settings
|
|
|
|
# Auth integration
|
|
from shared.auth.dependencies import get_current_user
|
|
from shared.auth.models import CurrentUser
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# ============ Helper for selected company from header ============
|
|
|
|
async def get_selected_company(
|
|
current_user: CurrentUser = Depends(get_current_user),
|
|
x_selected_company: Annotated[Optional[str], Header()] = None
|
|
) -> int:
|
|
"""
|
|
Get selected company from X-Selected-Company header.
|
|
|
|
Validates that the user has access to the specified company.
|
|
Falls back to user's first company if no header is provided.
|
|
"""
|
|
if x_selected_company:
|
|
try:
|
|
company_id = int(x_selected_company)
|
|
except ValueError:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid company ID format: {x_selected_company}"
|
|
)
|
|
|
|
if str(company_id) in current_user.companies:
|
|
return company_id
|
|
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail=f"Nu aveți acces la firma {company_id}"
|
|
)
|
|
|
|
# No header - use first company from user's list
|
|
if current_user.companies:
|
|
try:
|
|
return int(current_user.companies[0])
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Nu aveți nicio firmă asignată"
|
|
)
|
|
|
|
# Validation constants
|
|
MAX_FILES_PER_BATCH = 100
|
|
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB
|
|
ALLOWED_MIME_TYPES = {"image/jpeg", "image/png", "application/pdf"}
|
|
|
|
|
|
def compute_file_hash(content: bytes) -> str:
|
|
"""
|
|
Compute SHA-256 hash of file content.
|
|
|
|
Used for duplicate detection - same file content = same hash.
|
|
|
|
Args:
|
|
content: Raw file bytes
|
|
|
|
Returns:
|
|
Hexadecimal string of SHA-256 hash (64 characters)
|
|
"""
|
|
return hashlib.sha256(content).hexdigest()
|
|
|
|
|
|
async def check_duplicate_hashes(
|
|
session: AsyncSession,
|
|
file_hashes: List[str],
|
|
company_id: int
|
|
) -> dict[str, int]:
|
|
"""
|
|
Check which file hashes already exist in the database for this company.
|
|
|
|
Args:
|
|
session: Database session
|
|
file_hashes: List of SHA-256 hashes to check
|
|
company_id: Company ID to scope the duplicate check
|
|
|
|
Returns:
|
|
Dict mapping hash -> existing receipt_id for duplicates found
|
|
"""
|
|
if not file_hashes:
|
|
return {}
|
|
|
|
# Query for existing receipts with these hashes for this company
|
|
result = await session.execute(
|
|
select(Receipt.file_hash, Receipt.id).where(
|
|
and_(
|
|
Receipt.file_hash.in_(file_hashes),
|
|
Receipt.company_id == company_id
|
|
)
|
|
)
|
|
)
|
|
|
|
# Build hash -> receipt_id mapping
|
|
# Note: result.all() is synchronous in SQLAlchemy async, returns list of tuples
|
|
duplicates = {}
|
|
rows = result.all()
|
|
for row in rows:
|
|
duplicates[row[0]] = row[1]
|
|
|
|
return duplicates
|
|
|
|
|
|
@router.post("/upload", response_model=Union[BulkUploadResponse, BulkUploadResponseWithDuplicates])
|
|
async def bulk_upload(
|
|
files: List[UploadFile] = File(..., description="Multiple files to upload"),
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user),
|
|
selected_company: int = Depends(get_selected_company)
|
|
):
|
|
"""
|
|
Upload multiple files for batch OCR processing.
|
|
|
|
Creates a batch record and queues all files as OCR jobs.
|
|
Invalid files cause entire batch rejection (validation errors).
|
|
Duplicate files are reported separately and skipped - non-duplicates are processed.
|
|
|
|
Duplicate Detection (US-007):
|
|
- SHA-256 hash calculated for each file before processing
|
|
- Files with existing hash for same company are rejected with 409 info
|
|
- Response includes duplicate details with existing_receipt_id
|
|
|
|
Args:
|
|
files: List of image/PDF files (max 100 files, max 10MB each)
|
|
|
|
Returns:
|
|
BulkUploadResponse with batch_id and list of job_ids
|
|
BulkUploadResponseWithDuplicates if some files were duplicates
|
|
|
|
Raises:
|
|
400: If validation fails (too many files, file too large, invalid type)
|
|
409: If ALL files are duplicates
|
|
500: If job creation fails
|
|
"""
|
|
# Validate file count
|
|
if len(files) == 0:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="No files provided"
|
|
)
|
|
|
|
if len(files) > MAX_FILES_PER_BATCH:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Too many files. Maximum {MAX_FILES_PER_BATCH} files per batch."
|
|
)
|
|
|
|
# Pre-validate all files before creating any jobs (atomic check)
|
|
invalid_files = []
|
|
file_contents = []
|
|
|
|
for file in files:
|
|
# Check MIME type
|
|
if file.content_type not in ALLOWED_MIME_TYPES:
|
|
invalid_files.append(f"{file.filename}: Invalid type ({file.content_type})")
|
|
continue
|
|
|
|
# Read content and check size
|
|
content = await file.read()
|
|
if len(content) > MAX_FILE_SIZE_BYTES:
|
|
invalid_files.append(f"{file.filename}: File too large ({len(content) // (1024*1024)}MB > 10MB)")
|
|
continue
|
|
|
|
# Compute SHA-256 hash for duplicate detection (US-007)
|
|
file_hash = compute_file_hash(content)
|
|
|
|
# Store for later processing
|
|
file_contents.append({
|
|
"filename": file.filename,
|
|
"content": content,
|
|
"mime_type": file.content_type,
|
|
"file_hash": file_hash
|
|
})
|
|
|
|
# If any files are invalid, reject the entire batch
|
|
if invalid_files:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail={
|
|
"message": f"Validation failed for {len(invalid_files)} file(s)",
|
|
"invalid_files": invalid_files
|
|
}
|
|
)
|
|
|
|
# Check for duplicates BEFORE creating batch (US-007)
|
|
all_hashes = [f["file_hash"] for f in file_contents]
|
|
existing_duplicates = await check_duplicate_hashes(session, all_hashes, selected_company)
|
|
|
|
# Separate duplicate files from processable files
|
|
duplicate_files: List[DuplicateFileInfo] = []
|
|
processable_files = []
|
|
|
|
for file_data in file_contents:
|
|
if file_data["file_hash"] in existing_duplicates:
|
|
existing_receipt_id = existing_duplicates[file_data["file_hash"]]
|
|
duplicate_files.append(DuplicateFileInfo(
|
|
filename=file_data["filename"],
|
|
error="duplicate",
|
|
existing_receipt_id=existing_receipt_id,
|
|
message=f"Fișier duplicat - există deja ca bon #{existing_receipt_id}"
|
|
))
|
|
logger.info(
|
|
f"[BulkUpload] Duplicate detected: {file_data['filename']} "
|
|
f"(hash={file_data['file_hash'][:16]}...) matches receipt #{existing_receipt_id}"
|
|
)
|
|
else:
|
|
processable_files.append(file_data)
|
|
|
|
# If ALL files are duplicates, return 409 Conflict
|
|
if len(duplicate_files) == len(file_contents):
|
|
raise HTTPException(
|
|
status_code=409,
|
|
detail={
|
|
"error": "all_duplicates",
|
|
"message": f"Toate cele {len(duplicate_files)} fișiere sunt duplicate",
|
|
"duplicates": [d.model_dump() for d in duplicate_files]
|
|
}
|
|
)
|
|
|
|
# If no processable files remain after filtering (shouldn't happen but be safe)
|
|
if not processable_files:
|
|
raise HTTPException(
|
|
status_code=409,
|
|
detail={
|
|
"error": "no_files_to_process",
|
|
"message": "Nu există fișiere de procesat",
|
|
"duplicates": [d.model_dump() for d in duplicate_files]
|
|
}
|
|
)
|
|
|
|
# Create batch record with company_id for auto-save
|
|
batch = BatchUpload(
|
|
user_id=current_user.username,
|
|
company_id=selected_company,
|
|
status=BatchStatus.PENDING,
|
|
total_files=len(processable_files) # Only count processable files
|
|
)
|
|
session.add(batch)
|
|
await session.flush() # Get batch.id before creating jobs
|
|
|
|
# Create OCR jobs for processable files only
|
|
job_ids = []
|
|
batch_jobs = []
|
|
|
|
try:
|
|
for file_data in processable_files:
|
|
# Create OCR job using existing job_queue
|
|
# Pass batch_id and file_hash for tracking
|
|
job = await job_queue.create_job(
|
|
file_bytes=file_data["content"],
|
|
mime_type=file_data["mime_type"],
|
|
engine="doctr_plus", # Default engine for bulk
|
|
username=current_user.username,
|
|
original_filename=file_data["filename"],
|
|
batch_id=batch.id, # Link job to batch for auto-save integration
|
|
file_hash=file_data["file_hash"] # Pass hash for storage in receipt
|
|
)
|
|
|
|
job_ids.append(job.id)
|
|
|
|
# Create batch_job link
|
|
batch_job = BatchJob(
|
|
batch_id=batch.id,
|
|
job_id=job.id,
|
|
filename=file_data["filename"]
|
|
)
|
|
batch_jobs.append(batch_job)
|
|
|
|
# Add all batch_job records
|
|
for bj in batch_jobs:
|
|
session.add(bj)
|
|
|
|
# Commit everything atomically
|
|
await session.commit()
|
|
|
|
logger.info(
|
|
f"[BulkUpload] Created batch {batch.id} with {len(job_ids)} jobs "
|
|
f"for user {current_user.username}"
|
|
f"{f', {len(duplicate_files)} duplicates skipped' if duplicate_files else ''}"
|
|
)
|
|
|
|
# Return response with duplicate info if any duplicates were found
|
|
if duplicate_files:
|
|
return BulkUploadResponseWithDuplicates(
|
|
batch_id=batch.id,
|
|
job_ids=job_ids,
|
|
total_files=len(file_contents),
|
|
processed_files=len(job_ids),
|
|
duplicate_files=len(duplicate_files),
|
|
duplicates=duplicate_files,
|
|
message=f"{len(job_ids)} fișier(e) în procesare, {len(duplicate_files)} duplicate ignorate"
|
|
)
|
|
|
|
return BulkUploadResponse(
|
|
batch_id=batch.id,
|
|
job_ids=job_ids,
|
|
total_files=len(job_ids),
|
|
message=f"{len(job_ids)} files queued for processing"
|
|
)
|
|
|
|
except Exception as e:
|
|
# Rollback on any error
|
|
await session.rollback()
|
|
logger.error(f"[BulkUpload] Failed to create batch: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to create batch: {str(e)}"
|
|
)
|
|
|
|
|
|
# Long-polling constants
|
|
MAX_WAIT_SECONDS = 30
|
|
POLL_INTERVAL_SECONDS = 0.5
|
|
|
|
|
|
async def _get_batch_status_snapshot(
|
|
batch_id: int,
|
|
session: AsyncSession
|
|
) -> Optional[dict]:
|
|
"""
|
|
Get current batch status snapshot.
|
|
|
|
Returns dict with status counts and jobs list, or None if batch not found.
|
|
"""
|
|
# Get batch record
|
|
batch_result = await session.execute(
|
|
select(BatchUpload).where(BatchUpload.id == batch_id)
|
|
)
|
|
batch = batch_result.scalar_one_or_none()
|
|
|
|
if not batch:
|
|
return None
|
|
|
|
# Get all batch_jobs for this batch
|
|
batch_jobs_result = await session.execute(
|
|
select(BatchJob).where(BatchJob.batch_id == batch_id)
|
|
)
|
|
batch_jobs = batch_jobs_result.scalars().all()
|
|
|
|
if not batch_jobs:
|
|
return {
|
|
"batch": batch,
|
|
"pending_count": 0,
|
|
"processing_count": 0,
|
|
"completed_count": 0,
|
|
"failed_count": 0,
|
|
"jobs": [],
|
|
"total_amount": None
|
|
}
|
|
|
|
# Get job statuses and error_messages from OCR job queue (SQLite)
|
|
job_statuses = {}
|
|
job_errors = {}
|
|
for bj in batch_jobs:
|
|
job = await job_queue.get_job(bj.job_id)
|
|
if job:
|
|
job_statuses[bj.job_id] = job.status.value
|
|
job_errors[bj.job_id] = job.error_message
|
|
else:
|
|
# Job not found in queue - treat as failed
|
|
job_statuses[bj.job_id] = "failed"
|
|
job_errors[bj.job_id] = "Job not found in queue"
|
|
|
|
# Count by status
|
|
pending_count = sum(1 for s in job_statuses.values() if s == "pending")
|
|
processing_count = sum(1 for s in job_statuses.values() if s == "processing")
|
|
completed_count = sum(1 for s in job_statuses.values() if s == "completed")
|
|
failed_count = sum(1 for s in job_statuses.values() if s == "failed")
|
|
|
|
# Build jobs list with status info
|
|
jobs_info = []
|
|
for bj in batch_jobs:
|
|
jobs_info.append({
|
|
"job_id": bj.job_id,
|
|
"filename": bj.filename,
|
|
"status": job_statuses.get(bj.job_id, "failed"),
|
|
"receipt_id": bj.receipt_id,
|
|
"error_message": job_errors.get(bj.job_id)
|
|
})
|
|
|
|
# Calculate total_amount from completed receipts
|
|
total_amount = None
|
|
receipt_ids = [bj.receipt_id for bj in batch_jobs if bj.receipt_id is not None]
|
|
if receipt_ids:
|
|
amount_result = await session.execute(
|
|
select(func.sum(Receipt.amount)).where(Receipt.id.in_(receipt_ids))
|
|
)
|
|
total_sum = amount_result.scalar()
|
|
if total_sum is not None:
|
|
total_amount = float(total_sum)
|
|
|
|
return {
|
|
"batch": batch,
|
|
"pending_count": pending_count,
|
|
"processing_count": processing_count,
|
|
"completed_count": completed_count,
|
|
"failed_count": failed_count,
|
|
"jobs": jobs_info,
|
|
"total_amount": total_amount
|
|
}
|
|
|
|
|
|
def _compute_batch_overall_status(pending: int, processing: int, completed: int, failed: int, total: int) -> str:
|
|
"""Compute overall batch status from job counts."""
|
|
if pending + processing == 0:
|
|
# All jobs finished
|
|
if failed == total:
|
|
return BatchStatus.FAILED.value
|
|
return BatchStatus.COMPLETED.value
|
|
elif processing > 0 or completed > 0 or failed > 0:
|
|
return BatchStatus.PROCESSING.value
|
|
else:
|
|
return BatchStatus.PENDING.value
|
|
|
|
|
|
@router.get("/batches/{batch_id}/status", response_model=BatchStatusResponse)
|
|
async def get_batch_status(
|
|
batch_id: int,
|
|
wait: Optional[int] = Query(
|
|
default=None,
|
|
ge=0,
|
|
le=MAX_WAIT_SECONDS,
|
|
description="Long-polling wait time in seconds (max 30)"
|
|
),
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get batch processing status with optional long-polling.
|
|
|
|
Returns aggregated status counts and individual job statuses.
|
|
When `wait` parameter is provided, the endpoint will poll until:
|
|
- Status changes from initial snapshot
|
|
- All jobs complete (pending + processing = 0)
|
|
- Timeout is reached
|
|
|
|
Args:
|
|
batch_id: Batch ID to query
|
|
wait: Optional wait time in seconds for long-polling (0-30)
|
|
|
|
Returns:
|
|
BatchStatusResponse with status counts and job details
|
|
|
|
Raises:
|
|
404: If batch not found
|
|
"""
|
|
# Get initial snapshot
|
|
snapshot = await _get_batch_status_snapshot(batch_id, session)
|
|
|
|
if snapshot is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Batch {batch_id} not found"
|
|
)
|
|
|
|
# If long-polling requested and jobs still in progress
|
|
if wait and wait > 0:
|
|
initial_pending = snapshot["pending_count"]
|
|
initial_processing = snapshot["processing_count"]
|
|
initial_completed = snapshot["completed_count"]
|
|
initial_failed = snapshot["failed_count"]
|
|
|
|
# Only wait if there are still jobs in progress
|
|
if initial_pending + initial_processing > 0:
|
|
elapsed = 0.0
|
|
while elapsed < wait:
|
|
await asyncio.sleep(POLL_INTERVAL_SECONDS)
|
|
elapsed += POLL_INTERVAL_SECONDS
|
|
|
|
# Refresh snapshot
|
|
snapshot = await _get_batch_status_snapshot(batch_id, session)
|
|
if snapshot is None:
|
|
# Batch deleted during polling (edge case)
|
|
raise HTTPException(status_code=404, detail=f"Batch {batch_id} not found")
|
|
|
|
# Check if status changed
|
|
current_pending = snapshot["pending_count"]
|
|
current_processing = snapshot["processing_count"]
|
|
current_completed = snapshot["completed_count"]
|
|
current_failed = snapshot["failed_count"]
|
|
|
|
if (current_pending != initial_pending or
|
|
current_processing != initial_processing or
|
|
current_completed != initial_completed or
|
|
current_failed != initial_failed):
|
|
# Status changed, return immediately
|
|
break
|
|
|
|
# Check if all jobs finished
|
|
if current_pending + current_processing == 0:
|
|
break
|
|
|
|
# Build response
|
|
batch = snapshot["batch"]
|
|
total_files = batch.total_files
|
|
|
|
overall_status = _compute_batch_overall_status(
|
|
snapshot["pending_count"],
|
|
snapshot["processing_count"],
|
|
snapshot["completed_count"],
|
|
snapshot["failed_count"],
|
|
total_files
|
|
)
|
|
|
|
jobs = [
|
|
BatchJobInfo(
|
|
job_id=j["job_id"],
|
|
filename=j["filename"],
|
|
status=j["status"],
|
|
receipt_id=j["receipt_id"],
|
|
error_message=j.get("error_message")
|
|
)
|
|
for j in snapshot["jobs"]
|
|
]
|
|
|
|
return BatchStatusResponse(
|
|
batch_id=batch.id,
|
|
status=overall_status,
|
|
total_files=total_files,
|
|
pending_count=snapshot["pending_count"],
|
|
processing_count=snapshot["processing_count"],
|
|
completed_count=snapshot["completed_count"],
|
|
failed_count=snapshot["failed_count"],
|
|
jobs=jobs,
|
|
total_amount=snapshot["total_amount"],
|
|
created_at=batch.created_at
|
|
)
|
|
|
|
|
|
# ============ Retry Endpoints (US-006) ============
|
|
|
|
|
|
async def _retry_single_receipt(
|
|
session: AsyncSession,
|
|
receipt: Receipt,
|
|
username: str
|
|
) -> tuple[bool, Optional[str], Optional[str]]:
|
|
"""
|
|
Retry processing for a single receipt.
|
|
|
|
Finds the original file from attachments, resets processing status,
|
|
and creates a new OCR job.
|
|
|
|
Args:
|
|
session: Database session
|
|
receipt: Receipt to retry
|
|
username: Username for the new OCR job
|
|
|
|
Returns:
|
|
Tuple of (success, job_id, error_message)
|
|
"""
|
|
# Get the first attachment to find the source file
|
|
attachments_result = await session.execute(
|
|
select(ReceiptAttachment)
|
|
.where(ReceiptAttachment.receipt_id == receipt.id)
|
|
.limit(1)
|
|
)
|
|
attachment = attachments_result.scalar_one_or_none()
|
|
|
|
if not attachment:
|
|
return False, None, "Bonul nu are fișier atașat"
|
|
|
|
# Construct full path to attachment file
|
|
file_path = settings.data_entry_upload_path_resolved / attachment.file_path
|
|
|
|
if not file_path.exists():
|
|
return False, None, "Fișierul original nu mai este disponibil"
|
|
|
|
# Read file content
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
file_bytes = f.read()
|
|
except Exception as e:
|
|
logger.error(f"[Retry] Failed to read file {file_path}: {e}")
|
|
return False, None, f"Eroare la citirea fișierului: {str(e)}"
|
|
|
|
# Create new OCR job
|
|
try:
|
|
job = await job_queue.create_job(
|
|
file_bytes=file_bytes,
|
|
mime_type=attachment.mime_type,
|
|
engine="doctr_plus",
|
|
username=username,
|
|
original_filename=attachment.filename,
|
|
batch_id=None, # No batch for retry - direct processing
|
|
file_hash=receipt.file_hash
|
|
)
|
|
|
|
# Reset receipt processing status
|
|
receipt.processing_status = "pending"
|
|
receipt.processing_error = None
|
|
receipt.processing_started_at = datetime.utcnow()
|
|
receipt.processing_completed_at = None
|
|
|
|
await session.flush()
|
|
|
|
logger.info(f"[Retry] Receipt {receipt.id} requeued as job {job.id}")
|
|
return True, job.id, None
|
|
|
|
except Exception as e:
|
|
logger.error(f"[Retry] Failed to create job for receipt {receipt.id}: {e}")
|
|
return False, None, f"Eroare la crearea job-ului OCR: {str(e)}"
|
|
|
|
|
|
@router.post("/retry/{receipt_id}", response_model=RetryResponse)
|
|
async def retry_receipt(
|
|
receipt_id: int,
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user),
|
|
selected_company: int = Depends(get_selected_company)
|
|
):
|
|
"""
|
|
Retry OCR processing for a single failed receipt.
|
|
|
|
Resets the receipt's processing_status to 'pending' and creates
|
|
a new OCR job using the original attachment file.
|
|
|
|
Args:
|
|
receipt_id: ID of the receipt to retry
|
|
|
|
Returns:
|
|
RetryResponse with success status and new job ID
|
|
|
|
Raises:
|
|
404: If receipt not found
|
|
400: If receipt is not in 'failed' status
|
|
400: If original file is not available
|
|
"""
|
|
# Get the receipt
|
|
result = await session.execute(
|
|
select(Receipt).where(
|
|
and_(
|
|
Receipt.id == receipt_id,
|
|
Receipt.company_id == selected_company
|
|
)
|
|
)
|
|
)
|
|
receipt = result.scalar_one_or_none()
|
|
|
|
if not receipt:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Bonul #{receipt_id} nu a fost găsit"
|
|
)
|
|
|
|
# Verify receipt is in failed status
|
|
if receipt.processing_status != "failed":
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Bonul nu este în stare de eroare (status actual: {receipt.processing_status})"
|
|
)
|
|
|
|
# Attempt retry
|
|
success, job_id, error = await _retry_single_receipt(
|
|
session, receipt, current_user.username
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=error or "Eroare necunoscută la reîncărcare"
|
|
)
|
|
|
|
await session.commit()
|
|
|
|
return RetryResponse(
|
|
success=True,
|
|
receipt_id=receipt_id,
|
|
job_id=job_id,
|
|
message="Bon reîncarcat în procesare"
|
|
)
|
|
|
|
|
|
@router.post("/retry-batch/{batch_id}", response_model=BatchRetryResponse)
|
|
async def retry_batch_failed(
|
|
batch_id: str,
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user),
|
|
selected_company: int = Depends(get_selected_company)
|
|
):
|
|
"""
|
|
Retry all failed receipts in a batch.
|
|
|
|
Finds all receipts with batch_id matching and processing_status='failed',
|
|
then attempts to retry each one.
|
|
|
|
Args:
|
|
batch_id: Batch ID (UUID string from receipt.batch_id)
|
|
|
|
Returns:
|
|
BatchRetryResponse with counts of successful and failed retries
|
|
|
|
Raises:
|
|
404: If no failed receipts found for batch
|
|
"""
|
|
# Find all failed receipts in this batch
|
|
result = await session.execute(
|
|
select(Receipt).where(
|
|
and_(
|
|
Receipt.batch_id == batch_id,
|
|
Receipt.company_id == selected_company,
|
|
Receipt.processing_status == "failed"
|
|
)
|
|
)
|
|
)
|
|
failed_receipts = result.scalars().all()
|
|
|
|
if not failed_receipts:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Nu există bonuri cu erori în batch-ul {batch_id}"
|
|
)
|
|
|
|
# Retry each receipt
|
|
retried_count = 0
|
|
failed_count = 0
|
|
errors = []
|
|
|
|
for receipt in failed_receipts:
|
|
success, job_id, error = await _retry_single_receipt(
|
|
session, receipt, current_user.username
|
|
)
|
|
|
|
if success:
|
|
retried_count += 1
|
|
else:
|
|
failed_count += 1
|
|
errors.append(f"Bon #{receipt.id}: {error}")
|
|
|
|
await session.commit()
|
|
|
|
return BatchRetryResponse(
|
|
success=retried_count > 0,
|
|
batch_id=batch_id,
|
|
retried_count=retried_count,
|
|
failed_count=failed_count,
|
|
errors=errors,
|
|
message=f"{retried_count} bonuri reîncarcate în procesare"
|
|
+ (f", {failed_count} erori" if failed_count > 0 else "")
|
|
)
|
|
|
|
|
|
# ============ Cancel Endpoints (US-014) ============
|
|
|
|
|
|
@router.post("/cancel/{job_id}", response_model=CancelJobResponse)
|
|
async def cancel_job(
|
|
job_id: str,
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Cancel a single OCR processing job.
|
|
|
|
Only jobs with status 'pending' or 'processing' can be cancelled.
|
|
Jobs with status 'completed' or 'failed' cannot be cancelled.
|
|
|
|
Important: If a receipt has already been created from this job,
|
|
it will NOT be deleted - receipts are preserved for audit purposes.
|
|
|
|
Args:
|
|
job_id: The UUID of the OCR job to cancel
|
|
|
|
Returns:
|
|
CancelJobResponse with cancellation details
|
|
|
|
Raises:
|
|
404: If job not found in batch_jobs table
|
|
400: If job has already completed or failed
|
|
"""
|
|
# Find the job in batch_jobs table
|
|
batch_job_result = await session.execute(
|
|
select(BatchJob).where(BatchJob.job_id == job_id)
|
|
)
|
|
batch_job = batch_job_result.scalar_one_or_none()
|
|
|
|
if not batch_job:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Job {job_id} nu a fost găsit"
|
|
)
|
|
|
|
# Get the OCR job from job_queue to check current status
|
|
ocr_job = await job_queue.get_job(job_id)
|
|
|
|
if not ocr_job:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Job {job_id} nu există în coada de procesare"
|
|
)
|
|
|
|
# Check if job can be cancelled
|
|
current_status = ocr_job.status.value
|
|
|
|
if current_status == OCRJobStatus.completed.value:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Job-ul a fost deja procesat cu succes. Nu poate fi anulat."
|
|
)
|
|
|
|
if current_status == OCRJobStatus.failed.value:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Job-ul a eșuat deja. Folosiți opțiunea de reîncercare în loc de anulare."
|
|
)
|
|
|
|
if current_status == OCRJobStatus.cancelled.value:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Job-ul a fost deja anulat."
|
|
)
|
|
|
|
# Update job status to cancelled in job_queue (SQLite)
|
|
cancelled_at = datetime.utcnow()
|
|
success = await job_queue.update_status(
|
|
job_id=job_id,
|
|
status=OCRJobStatus.cancelled,
|
|
error="Cancelled by user"
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Eroare la anularea job-ului"
|
|
)
|
|
|
|
logger.info(
|
|
f"[CancelJob] Job {job_id} cancelled by {current_user.username} "
|
|
f"(previous status: {current_status})"
|
|
)
|
|
|
|
return CancelJobResponse(
|
|
success=True,
|
|
job_id=job_id,
|
|
cancelled_at=cancelled_at,
|
|
message=f"Job anulat cu succes"
|
|
)
|
|
|
|
|
|
@router.post("/cancel-batch/{batch_id}", response_model=CancelBatchResponse)
|
|
async def cancel_batch(
|
|
batch_id: int,
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Cancel all pending/processing jobs in a batch.
|
|
|
|
Finds all jobs with status 'pending' or 'processing' in the specified batch
|
|
and marks them as 'cancelled'. Jobs with status 'completed' or 'failed'
|
|
are not affected.
|
|
|
|
Important: Receipts that have already been created from completed jobs
|
|
will NOT be deleted - they are preserved for audit purposes.
|
|
|
|
Args:
|
|
batch_id: The batch ID to cancel
|
|
|
|
Returns:
|
|
CancelBatchResponse with counts of cancelled and skipped jobs
|
|
|
|
Raises:
|
|
404: If batch not found or no jobs exist for batch
|
|
"""
|
|
# Verify batch exists
|
|
batch_result = await session.execute(
|
|
select(BatchUpload).where(BatchUpload.id == batch_id)
|
|
)
|
|
batch = batch_result.scalar_one_or_none()
|
|
|
|
if not batch:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Batch {batch_id} nu a fost găsit"
|
|
)
|
|
|
|
# Get all batch_jobs for this batch
|
|
batch_jobs_result = await session.execute(
|
|
select(BatchJob).where(BatchJob.batch_id == batch_id)
|
|
)
|
|
batch_jobs = batch_jobs_result.scalars().all()
|
|
|
|
if not batch_jobs:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Nu există job-uri în batch-ul {batch_id}"
|
|
)
|
|
|
|
# Process each job - cancel pending/processing, skip completed/failed
|
|
cancelled_count = 0
|
|
skipped_count = 0
|
|
|
|
for batch_job in batch_jobs:
|
|
# Get current job status from OCR job queue
|
|
ocr_job = await job_queue.get_job(batch_job.job_id)
|
|
|
|
if not ocr_job:
|
|
# Job not found in queue - treat as skipped
|
|
skipped_count += 1
|
|
continue
|
|
|
|
current_status = ocr_job.status.value
|
|
|
|
# Only cancel pending or processing jobs
|
|
if current_status in (OCRJobStatus.pending.value, OCRJobStatus.processing.value):
|
|
success = await job_queue.update_status(
|
|
job_id=batch_job.job_id,
|
|
status=OCRJobStatus.cancelled,
|
|
error="Cancelled by user (batch cancel)"
|
|
)
|
|
|
|
if success:
|
|
cancelled_count += 1
|
|
logger.debug(f"[CancelBatch] Cancelled job {batch_job.job_id}")
|
|
else:
|
|
# Failed to cancel - count as skipped
|
|
skipped_count += 1
|
|
logger.warning(
|
|
f"[CancelBatch] Failed to cancel job {batch_job.job_id}"
|
|
)
|
|
else:
|
|
# Job is completed, failed, or already cancelled - skip it
|
|
skipped_count += 1
|
|
|
|
logger.info(
|
|
f"[CancelBatch] Batch {batch_id} cancelled by {current_user.username}: "
|
|
f"{cancelled_count} cancelled, {skipped_count} skipped"
|
|
)
|
|
|
|
# Build message
|
|
if cancelled_count == 0:
|
|
message = f"Nu există job-uri de anulat în batch-ul {batch_id}"
|
|
elif skipped_count == 0:
|
|
message = f"{cancelled_count} job-uri anulate"
|
|
else:
|
|
message = f"{cancelled_count} job-uri anulate, {skipped_count} ignorate (deja procesate)"
|
|
|
|
return CancelBatchResponse(
|
|
success=cancelled_count > 0,
|
|
batch_id=batch_id,
|
|
cancelled_count=cancelled_count,
|
|
skipped_count=skipped_count,
|
|
message=message
|
|
)
|