Files
roa2web-service-auto/backend/modules/data_entry/routers/bulk.py
Claude Agent 7b3541403f feat(data-entry): Bulk Receipt Upload cu Mobile UX Android Nativ
## Funcționalități Principale

### Bulk Upload & Processing
- Drag & drop pentru upload bonuri multiple oriunde pe pagină
- Batch processing cu job queue și worker pool
- Real-time updates via SSE (Server-Sent Events) cu fallback polling
- Duplicate detection via SHA-256 file hash
- Auto-retry pentru job-uri failed
- Cancel individual jobs sau batch complet

### Mobile UX - Android Native Style
- Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter)
- Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări)
- FAB (Floating Action Button) cu hide/show on scroll
- Filter chips orizontal scrollabile
- Selecție multiplă prin long-press (500ms)
- Select All + Bulk Delete cu confirmare
- Layout Android pentru Create/Edit/View bon (Gmail compose style)

### Bug Fixes
- Refresh individual via SSE în loc de refresh total pagină
- Bonurile cu eroare OCR rămân vizibile pentru editare manuală
- Afișare nume fișier original pentru toate bonurile
- Upload stabil pe mobil (fix race condition File API)
- Păstrare ordine bonuri la refresh (nu se reordonează)

### Backend
- SSE endpoint pentru status updates real-time
- Bulk delete endpoint cu partial success
- Auto-cleanup bonuri failed după 7 zile
- Batch model cu tracking complet

### Testing
- E2E tests cu Playwright
- Unit tests pentru bulk upload, auto-create, cleanup

## Commits Squashed: 43 user stories (US-001 → US-043)
## Branch: ralph/bulk-receipt-upload
## Timp dezvoltare: ~3 zile (Ralph autonomous)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:33:17 +00:00

998 lines
31 KiB
Python

"""
Bulk upload API endpoints for batch receipt processing.
Endpoints:
- POST /upload - Submit multiple files for OCR processing in a single batch
- GET /batches/{batch_id}/status - Get batch status with optional long-polling
Validation:
- Max 100 files per batch
- Max 10MB per file
- Allowed types: PDF, PNG, JPG
Duplicate Detection (US-007):
- SHA-256 hash calculated for each file
- Duplicate files (same hash + company_id) are rejected with 409 Conflict info
- Duplicates reported in error list, non-duplicates processed normally
"""
import asyncio
import hashlib
import logging
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Annotated, List, Optional, Union
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Header
from sqlalchemy import select, func, and_
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus, Receipt, ReceiptAttachment
from backend.modules.data_entry.schemas.bulk import (
BulkUploadResponse,
BulkUploadResponseWithDuplicates,
BatchStatusResponse,
BatchJobInfo,
DuplicateFileInfo,
RetryResponse,
BatchRetryResponse,
CancelJobResponse,
CancelBatchResponse
)
from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus
from backend.config import settings
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
logger = logging.getLogger(__name__)
router = APIRouter()
# ============ Helper for selected company from header ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates that the user has access to the specified company.
Falls back to user's first company if no header is provided.
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid company ID format: {x_selected_company}"
)
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(
status_code=403,
detail=f"Nu aveți acces la firma {company_id}"
)
# No header - use first company from user's list
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(
status_code=400,
detail="Nu aveți nicio firmă asignată"
)
# Validation constants
MAX_FILES_PER_BATCH = 100
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB
ALLOWED_MIME_TYPES = {"image/jpeg", "image/png", "application/pdf"}
def compute_file_hash(content: bytes) -> str:
"""
Compute SHA-256 hash of file content.
Used for duplicate detection - same file content = same hash.
Args:
content: Raw file bytes
Returns:
Hexadecimal string of SHA-256 hash (64 characters)
"""
return hashlib.sha256(content).hexdigest()
async def check_duplicate_hashes(
session: AsyncSession,
file_hashes: List[str],
company_id: int
) -> dict[str, int]:
"""
Check which file hashes already exist in the database for this company.
Args:
session: Database session
file_hashes: List of SHA-256 hashes to check
company_id: Company ID to scope the duplicate check
Returns:
Dict mapping hash -> existing receipt_id for duplicates found
"""
if not file_hashes:
return {}
# Query for existing receipts with these hashes for this company
result = await session.execute(
select(Receipt.file_hash, Receipt.id).where(
and_(
Receipt.file_hash.in_(file_hashes),
Receipt.company_id == company_id
)
)
)
# Build hash -> receipt_id mapping
# Note: result.all() is synchronous in SQLAlchemy async, returns list of tuples
duplicates = {}
rows = result.all()
for row in rows:
duplicates[row[0]] = row[1]
return duplicates
@router.post("/upload", response_model=Union[BulkUploadResponse, BulkUploadResponseWithDuplicates])
async def bulk_upload(
files: List[UploadFile] = File(..., description="Multiple files to upload"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Upload multiple files for batch OCR processing.
Creates a batch record and queues all files as OCR jobs.
Invalid files cause entire batch rejection (validation errors).
Duplicate files are reported separately and skipped - non-duplicates are processed.
Duplicate Detection (US-007):
- SHA-256 hash calculated for each file before processing
- Files with existing hash for same company are rejected with 409 info
- Response includes duplicate details with existing_receipt_id
Args:
files: List of image/PDF files (max 100 files, max 10MB each)
Returns:
BulkUploadResponse with batch_id and list of job_ids
BulkUploadResponseWithDuplicates if some files were duplicates
Raises:
400: If validation fails (too many files, file too large, invalid type)
409: If ALL files are duplicates
500: If job creation fails
"""
# Validate file count
if len(files) == 0:
raise HTTPException(
status_code=400,
detail="No files provided"
)
if len(files) > MAX_FILES_PER_BATCH:
raise HTTPException(
status_code=400,
detail=f"Too many files. Maximum {MAX_FILES_PER_BATCH} files per batch."
)
# Pre-validate all files before creating any jobs (atomic check)
invalid_files = []
file_contents = []
for file in files:
# Check MIME type
if file.content_type not in ALLOWED_MIME_TYPES:
invalid_files.append(f"{file.filename}: Invalid type ({file.content_type})")
continue
# Read content and check size
content = await file.read()
if len(content) > MAX_FILE_SIZE_BYTES:
invalid_files.append(f"{file.filename}: File too large ({len(content) // (1024*1024)}MB > 10MB)")
continue
# Compute SHA-256 hash for duplicate detection (US-007)
file_hash = compute_file_hash(content)
# Store for later processing
file_contents.append({
"filename": file.filename,
"content": content,
"mime_type": file.content_type,
"file_hash": file_hash
})
# If any files are invalid, reject the entire batch
if invalid_files:
raise HTTPException(
status_code=400,
detail={
"message": f"Validation failed for {len(invalid_files)} file(s)",
"invalid_files": invalid_files
}
)
# Check for duplicates BEFORE creating batch (US-007)
all_hashes = [f["file_hash"] for f in file_contents]
existing_duplicates = await check_duplicate_hashes(session, all_hashes, selected_company)
# Separate duplicate files from processable files
duplicate_files: List[DuplicateFileInfo] = []
processable_files = []
for file_data in file_contents:
if file_data["file_hash"] in existing_duplicates:
existing_receipt_id = existing_duplicates[file_data["file_hash"]]
duplicate_files.append(DuplicateFileInfo(
filename=file_data["filename"],
error="duplicate",
existing_receipt_id=existing_receipt_id,
message=f"Fișier duplicat - există deja ca bon #{existing_receipt_id}"
))
logger.info(
f"[BulkUpload] Duplicate detected: {file_data['filename']} "
f"(hash={file_data['file_hash'][:16]}...) matches receipt #{existing_receipt_id}"
)
else:
processable_files.append(file_data)
# If ALL files are duplicates, return 409 Conflict
if len(duplicate_files) == len(file_contents):
raise HTTPException(
status_code=409,
detail={
"error": "all_duplicates",
"message": f"Toate cele {len(duplicate_files)} fișiere sunt duplicate",
"duplicates": [d.model_dump() for d in duplicate_files]
}
)
# If no processable files remain after filtering (shouldn't happen but be safe)
if not processable_files:
raise HTTPException(
status_code=409,
detail={
"error": "no_files_to_process",
"message": "Nu există fișiere de procesat",
"duplicates": [d.model_dump() for d in duplicate_files]
}
)
# Create batch record with company_id for auto-save
batch = BatchUpload(
user_id=current_user.username,
company_id=selected_company,
status=BatchStatus.PENDING,
total_files=len(processable_files) # Only count processable files
)
session.add(batch)
await session.flush() # Get batch.id before creating jobs
# Create OCR jobs for processable files only
job_ids = []
batch_jobs = []
try:
for file_data in processable_files:
# Create OCR job using existing job_queue
# Pass batch_id and file_hash for tracking
job = await job_queue.create_job(
file_bytes=file_data["content"],
mime_type=file_data["mime_type"],
engine="doctr_plus", # Default engine for bulk
username=current_user.username,
original_filename=file_data["filename"],
batch_id=batch.id, # Link job to batch for auto-save integration
file_hash=file_data["file_hash"] # Pass hash for storage in receipt
)
job_ids.append(job.id)
# Create batch_job link
batch_job = BatchJob(
batch_id=batch.id,
job_id=job.id,
filename=file_data["filename"]
)
batch_jobs.append(batch_job)
# Add all batch_job records
for bj in batch_jobs:
session.add(bj)
# Commit everything atomically
await session.commit()
logger.info(
f"[BulkUpload] Created batch {batch.id} with {len(job_ids)} jobs "
f"for user {current_user.username}"
f"{f', {len(duplicate_files)} duplicates skipped' if duplicate_files else ''}"
)
# Return response with duplicate info if any duplicates were found
if duplicate_files:
return BulkUploadResponseWithDuplicates(
batch_id=batch.id,
job_ids=job_ids,
total_files=len(file_contents),
processed_files=len(job_ids),
duplicate_files=len(duplicate_files),
duplicates=duplicate_files,
message=f"{len(job_ids)} fișier(e) în procesare, {len(duplicate_files)} duplicate ignorate"
)
return BulkUploadResponse(
batch_id=batch.id,
job_ids=job_ids,
total_files=len(job_ids),
message=f"{len(job_ids)} files queued for processing"
)
except Exception as e:
# Rollback on any error
await session.rollback()
logger.error(f"[BulkUpload] Failed to create batch: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to create batch: {str(e)}"
)
# Long-polling constants
MAX_WAIT_SECONDS = 30
POLL_INTERVAL_SECONDS = 0.5
async def _get_batch_status_snapshot(
batch_id: int,
session: AsyncSession
) -> Optional[dict]:
"""
Get current batch status snapshot.
Returns dict with status counts and jobs list, or None if batch not found.
"""
# Get batch record
batch_result = await session.execute(
select(BatchUpload).where(BatchUpload.id == batch_id)
)
batch = batch_result.scalar_one_or_none()
if not batch:
return None
# Get all batch_jobs for this batch
batch_jobs_result = await session.execute(
select(BatchJob).where(BatchJob.batch_id == batch_id)
)
batch_jobs = batch_jobs_result.scalars().all()
if not batch_jobs:
return {
"batch": batch,
"pending_count": 0,
"processing_count": 0,
"completed_count": 0,
"failed_count": 0,
"jobs": [],
"total_amount": None
}
# Get job statuses and error_messages from OCR job queue (SQLite)
job_statuses = {}
job_errors = {}
for bj in batch_jobs:
job = await job_queue.get_job(bj.job_id)
if job:
job_statuses[bj.job_id] = job.status.value
job_errors[bj.job_id] = job.error_message
else:
# Job not found in queue - treat as failed
job_statuses[bj.job_id] = "failed"
job_errors[bj.job_id] = "Job not found in queue"
# Count by status
pending_count = sum(1 for s in job_statuses.values() if s == "pending")
processing_count = sum(1 for s in job_statuses.values() if s == "processing")
completed_count = sum(1 for s in job_statuses.values() if s == "completed")
failed_count = sum(1 for s in job_statuses.values() if s == "failed")
# Build jobs list with status info
jobs_info = []
for bj in batch_jobs:
jobs_info.append({
"job_id": bj.job_id,
"filename": bj.filename,
"status": job_statuses.get(bj.job_id, "failed"),
"receipt_id": bj.receipt_id,
"error_message": job_errors.get(bj.job_id)
})
# Calculate total_amount from completed receipts
total_amount = None
receipt_ids = [bj.receipt_id for bj in batch_jobs if bj.receipt_id is not None]
if receipt_ids:
amount_result = await session.execute(
select(func.sum(Receipt.amount)).where(Receipt.id.in_(receipt_ids))
)
total_sum = amount_result.scalar()
if total_sum is not None:
total_amount = float(total_sum)
return {
"batch": batch,
"pending_count": pending_count,
"processing_count": processing_count,
"completed_count": completed_count,
"failed_count": failed_count,
"jobs": jobs_info,
"total_amount": total_amount
}
def _compute_batch_overall_status(pending: int, processing: int, completed: int, failed: int, total: int) -> str:
"""Compute overall batch status from job counts."""
if pending + processing == 0:
# All jobs finished
if failed == total:
return BatchStatus.FAILED.value
return BatchStatus.COMPLETED.value
elif processing > 0 or completed > 0 or failed > 0:
return BatchStatus.PROCESSING.value
else:
return BatchStatus.PENDING.value
@router.get("/batches/{batch_id}/status", response_model=BatchStatusResponse)
async def get_batch_status(
batch_id: int,
wait: Optional[int] = Query(
default=None,
ge=0,
le=MAX_WAIT_SECONDS,
description="Long-polling wait time in seconds (max 30)"
),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get batch processing status with optional long-polling.
Returns aggregated status counts and individual job statuses.
When `wait` parameter is provided, the endpoint will poll until:
- Status changes from initial snapshot
- All jobs complete (pending + processing = 0)
- Timeout is reached
Args:
batch_id: Batch ID to query
wait: Optional wait time in seconds for long-polling (0-30)
Returns:
BatchStatusResponse with status counts and job details
Raises:
404: If batch not found
"""
# Get initial snapshot
snapshot = await _get_batch_status_snapshot(batch_id, session)
if snapshot is None:
raise HTTPException(
status_code=404,
detail=f"Batch {batch_id} not found"
)
# If long-polling requested and jobs still in progress
if wait and wait > 0:
initial_pending = snapshot["pending_count"]
initial_processing = snapshot["processing_count"]
initial_completed = snapshot["completed_count"]
initial_failed = snapshot["failed_count"]
# Only wait if there are still jobs in progress
if initial_pending + initial_processing > 0:
elapsed = 0.0
while elapsed < wait:
await asyncio.sleep(POLL_INTERVAL_SECONDS)
elapsed += POLL_INTERVAL_SECONDS
# Refresh snapshot
snapshot = await _get_batch_status_snapshot(batch_id, session)
if snapshot is None:
# Batch deleted during polling (edge case)
raise HTTPException(status_code=404, detail=f"Batch {batch_id} not found")
# Check if status changed
current_pending = snapshot["pending_count"]
current_processing = snapshot["processing_count"]
current_completed = snapshot["completed_count"]
current_failed = snapshot["failed_count"]
if (current_pending != initial_pending or
current_processing != initial_processing or
current_completed != initial_completed or
current_failed != initial_failed):
# Status changed, return immediately
break
# Check if all jobs finished
if current_pending + current_processing == 0:
break
# Build response
batch = snapshot["batch"]
total_files = batch.total_files
overall_status = _compute_batch_overall_status(
snapshot["pending_count"],
snapshot["processing_count"],
snapshot["completed_count"],
snapshot["failed_count"],
total_files
)
jobs = [
BatchJobInfo(
job_id=j["job_id"],
filename=j["filename"],
status=j["status"],
receipt_id=j["receipt_id"],
error_message=j.get("error_message")
)
for j in snapshot["jobs"]
]
return BatchStatusResponse(
batch_id=batch.id,
status=overall_status,
total_files=total_files,
pending_count=snapshot["pending_count"],
processing_count=snapshot["processing_count"],
completed_count=snapshot["completed_count"],
failed_count=snapshot["failed_count"],
jobs=jobs,
total_amount=snapshot["total_amount"],
created_at=batch.created_at
)
# ============ Retry Endpoints (US-006) ============
async def _retry_single_receipt(
session: AsyncSession,
receipt: Receipt,
username: str
) -> tuple[bool, Optional[str], Optional[str]]:
"""
Retry processing for a single receipt.
Finds the original file from attachments, resets processing status,
and creates a new OCR job.
Args:
session: Database session
receipt: Receipt to retry
username: Username for the new OCR job
Returns:
Tuple of (success, job_id, error_message)
"""
# Get the first attachment to find the source file
attachments_result = await session.execute(
select(ReceiptAttachment)
.where(ReceiptAttachment.receipt_id == receipt.id)
.limit(1)
)
attachment = attachments_result.scalar_one_or_none()
if not attachment:
return False, None, "Bonul nu are fișier atașat"
# Construct full path to attachment file
file_path = settings.data_entry_upload_path_resolved / attachment.file_path
if not file_path.exists():
return False, None, "Fișierul original nu mai este disponibil"
# Read file content
try:
with open(file_path, 'rb') as f:
file_bytes = f.read()
except Exception as e:
logger.error(f"[Retry] Failed to read file {file_path}: {e}")
return False, None, f"Eroare la citirea fișierului: {str(e)}"
# Create new OCR job
try:
job = await job_queue.create_job(
file_bytes=file_bytes,
mime_type=attachment.mime_type,
engine="doctr_plus",
username=username,
original_filename=attachment.filename,
batch_id=None, # No batch for retry - direct processing
file_hash=receipt.file_hash
)
# Reset receipt processing status
receipt.processing_status = "pending"
receipt.processing_error = None
receipt.processing_started_at = datetime.utcnow()
receipt.processing_completed_at = None
await session.flush()
logger.info(f"[Retry] Receipt {receipt.id} requeued as job {job.id}")
return True, job.id, None
except Exception as e:
logger.error(f"[Retry] Failed to create job for receipt {receipt.id}: {e}")
return False, None, f"Eroare la crearea job-ului OCR: {str(e)}"
@router.post("/retry/{receipt_id}", response_model=RetryResponse)
async def retry_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Retry OCR processing for a single failed receipt.
Resets the receipt's processing_status to 'pending' and creates
a new OCR job using the original attachment file.
Args:
receipt_id: ID of the receipt to retry
Returns:
RetryResponse with success status and new job ID
Raises:
404: If receipt not found
400: If receipt is not in 'failed' status
400: If original file is not available
"""
# Get the receipt
result = await session.execute(
select(Receipt).where(
and_(
Receipt.id == receipt_id,
Receipt.company_id == selected_company
)
)
)
receipt = result.scalar_one_or_none()
if not receipt:
raise HTTPException(
status_code=404,
detail=f"Bonul #{receipt_id} nu a fost găsit"
)
# Verify receipt is in failed status
if receipt.processing_status != "failed":
raise HTTPException(
status_code=400,
detail=f"Bonul nu este în stare de eroare (status actual: {receipt.processing_status})"
)
# Attempt retry
success, job_id, error = await _retry_single_receipt(
session, receipt, current_user.username
)
if not success:
raise HTTPException(
status_code=400,
detail=error or "Eroare necunoscută la reîncărcare"
)
await session.commit()
return RetryResponse(
success=True,
receipt_id=receipt_id,
job_id=job_id,
message="Bon reîncarcat în procesare"
)
@router.post("/retry-batch/{batch_id}", response_model=BatchRetryResponse)
async def retry_batch_failed(
batch_id: str,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Retry all failed receipts in a batch.
Finds all receipts with batch_id matching and processing_status='failed',
then attempts to retry each one.
Args:
batch_id: Batch ID (UUID string from receipt.batch_id)
Returns:
BatchRetryResponse with counts of successful and failed retries
Raises:
404: If no failed receipts found for batch
"""
# Find all failed receipts in this batch
result = await session.execute(
select(Receipt).where(
and_(
Receipt.batch_id == batch_id,
Receipt.company_id == selected_company,
Receipt.processing_status == "failed"
)
)
)
failed_receipts = result.scalars().all()
if not failed_receipts:
raise HTTPException(
status_code=404,
detail=f"Nu există bonuri cu erori în batch-ul {batch_id}"
)
# Retry each receipt
retried_count = 0
failed_count = 0
errors = []
for receipt in failed_receipts:
success, job_id, error = await _retry_single_receipt(
session, receipt, current_user.username
)
if success:
retried_count += 1
else:
failed_count += 1
errors.append(f"Bon #{receipt.id}: {error}")
await session.commit()
return BatchRetryResponse(
success=retried_count > 0,
batch_id=batch_id,
retried_count=retried_count,
failed_count=failed_count,
errors=errors,
message=f"{retried_count} bonuri reîncarcate în procesare"
+ (f", {failed_count} erori" if failed_count > 0 else "")
)
# ============ Cancel Endpoints (US-014) ============
@router.post("/cancel/{job_id}", response_model=CancelJobResponse)
async def cancel_job(
job_id: str,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Cancel a single OCR processing job.
Only jobs with status 'pending' or 'processing' can be cancelled.
Jobs with status 'completed' or 'failed' cannot be cancelled.
Important: If a receipt has already been created from this job,
it will NOT be deleted - receipts are preserved for audit purposes.
Args:
job_id: The UUID of the OCR job to cancel
Returns:
CancelJobResponse with cancellation details
Raises:
404: If job not found in batch_jobs table
400: If job has already completed or failed
"""
# Find the job in batch_jobs table
batch_job_result = await session.execute(
select(BatchJob).where(BatchJob.job_id == job_id)
)
batch_job = batch_job_result.scalar_one_or_none()
if not batch_job:
raise HTTPException(
status_code=404,
detail=f"Job {job_id} nu a fost găsit"
)
# Get the OCR job from job_queue to check current status
ocr_job = await job_queue.get_job(job_id)
if not ocr_job:
raise HTTPException(
status_code=404,
detail=f"Job {job_id} nu există în coada de procesare"
)
# Check if job can be cancelled
current_status = ocr_job.status.value
if current_status == OCRJobStatus.completed.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a fost deja procesat cu succes. Nu poate fi anulat."
)
if current_status == OCRJobStatus.failed.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a eșuat deja. Folosiți opțiunea de reîncercare în loc de anulare."
)
if current_status == OCRJobStatus.cancelled.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a fost deja anulat."
)
# Update job status to cancelled in job_queue (SQLite)
cancelled_at = datetime.utcnow()
success = await job_queue.update_status(
job_id=job_id,
status=OCRJobStatus.cancelled,
error="Cancelled by user"
)
if not success:
raise HTTPException(
status_code=500,
detail=f"Eroare la anularea job-ului"
)
logger.info(
f"[CancelJob] Job {job_id} cancelled by {current_user.username} "
f"(previous status: {current_status})"
)
return CancelJobResponse(
success=True,
job_id=job_id,
cancelled_at=cancelled_at,
message=f"Job anulat cu succes"
)
@router.post("/cancel-batch/{batch_id}", response_model=CancelBatchResponse)
async def cancel_batch(
batch_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Cancel all pending/processing jobs in a batch.
Finds all jobs with status 'pending' or 'processing' in the specified batch
and marks them as 'cancelled'. Jobs with status 'completed' or 'failed'
are not affected.
Important: Receipts that have already been created from completed jobs
will NOT be deleted - they are preserved for audit purposes.
Args:
batch_id: The batch ID to cancel
Returns:
CancelBatchResponse with counts of cancelled and skipped jobs
Raises:
404: If batch not found or no jobs exist for batch
"""
# Verify batch exists
batch_result = await session.execute(
select(BatchUpload).where(BatchUpload.id == batch_id)
)
batch = batch_result.scalar_one_or_none()
if not batch:
raise HTTPException(
status_code=404,
detail=f"Batch {batch_id} nu a fost găsit"
)
# Get all batch_jobs for this batch
batch_jobs_result = await session.execute(
select(BatchJob).where(BatchJob.batch_id == batch_id)
)
batch_jobs = batch_jobs_result.scalars().all()
if not batch_jobs:
raise HTTPException(
status_code=404,
detail=f"Nu există job-uri în batch-ul {batch_id}"
)
# Process each job - cancel pending/processing, skip completed/failed
cancelled_count = 0
skipped_count = 0
for batch_job in batch_jobs:
# Get current job status from OCR job queue
ocr_job = await job_queue.get_job(batch_job.job_id)
if not ocr_job:
# Job not found in queue - treat as skipped
skipped_count += 1
continue
current_status = ocr_job.status.value
# Only cancel pending or processing jobs
if current_status in (OCRJobStatus.pending.value, OCRJobStatus.processing.value):
success = await job_queue.update_status(
job_id=batch_job.job_id,
status=OCRJobStatus.cancelled,
error="Cancelled by user (batch cancel)"
)
if success:
cancelled_count += 1
logger.debug(f"[CancelBatch] Cancelled job {batch_job.job_id}")
else:
# Failed to cancel - count as skipped
skipped_count += 1
logger.warning(
f"[CancelBatch] Failed to cancel job {batch_job.job_id}"
)
else:
# Job is completed, failed, or already cancelled - skip it
skipped_count += 1
logger.info(
f"[CancelBatch] Batch {batch_id} cancelled by {current_user.username}: "
f"{cancelled_count} cancelled, {skipped_count} skipped"
)
# Build message
if cancelled_count == 0:
message = f"Nu există job-uri de anulat în batch-ul {batch_id}"
elif skipped_count == 0:
message = f"{cancelled_count} job-uri anulate"
else:
message = f"{cancelled_count} job-uri anulate, {skipped_count} ignorate (deja procesate)"
return CancelBatchResponse(
success=cancelled_count > 0,
batch_id=batch_id,
cancelled_count=cancelled_count,
skipped_count=skipped_count,
message=message
)