fix telegram

2026-02-23 15:12:33 +00:00
parent 6c78fec8a7
commit 8bc567a9c5
426 changed files with 112478 additions and 1 deletions
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/init.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/init.py
@@ -0,0 +1,39 @@
+"""Data Entry module router factory."""
+
+from fastapi import APIRouter
+
+
+def create_data_entry_router() -> APIRouter:
+    """
+    Create and configure Data Entry module router.
+
+    Includes all data entry endpoints:
+    - /receipts - Receipt CRUD and workflow
+    - /ocr - OCR processing for receipts
+    - /nomenclature - Nomenclature syncing from Oracle
+    - /settings - User settings (OCR preferences)
+    - /metrics - OCR analytics and metrics
+    - /bulk - Bulk upload for batch processing
+
+    Returns:
+        APIRouter: Configured router for data entry module
+    """
+    router = APIRouter()
+
+    # Import routers here to avoid circular imports
+    from .receipts import router as receipts_router
+    from .ocr import router as ocr_router
+    from .nomenclature import router as nomenclature_router
+    from .ocr_settings import router as ocr_settings_router
+    from .bulk import router as bulk_router
+
+    # Include all sub-routers (no prefix - already prefixed in main.py with /api/data-entry)
+    router.include_router(receipts_router, prefix="/receipts", tags=["data-entry-receipts"])
+    router.include_router(ocr_router, prefix="/ocr", tags=["data-entry-ocr"])
+    router.include_router(nomenclature_router, prefix="/nomenclature", tags=["data-entry-nomenclature"])
+    # OCR settings and metrics (endpoints at /settings/* and /metrics/*)
+    router.include_router(ocr_settings_router, tags=["data-entry-settings"])
+    # Bulk upload for batch processing
+    router.include_router(bulk_router, prefix="/bulk", tags=["data-entry-bulk"])
+
+    return router
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/bulk.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/bulk.py
@@ -0,0 +1,997 @@
+"""
+Bulk upload API endpoints for batch receipt processing.
+
+Endpoints:
+- POST /upload - Submit multiple files for OCR processing in a single batch
+- GET /batches/{batch_id}/status - Get batch status with optional long-polling
+
+Validation:
+- Max 100 files per batch
+- Max 10MB per file
+- Allowed types: PDF, PNG, JPG
+
+Duplicate Detection (US-007):
+- SHA-256 hash calculated for each file
+- Duplicate files (same hash + company_id) are rejected with 409 Conflict info
+- Duplicates reported in error list, non-duplicates processed normally
+"""
+
+import asyncio
+import hashlib
+import logging
+from datetime import datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Annotated, List, Optional, Union
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Header
+from sqlalchemy import select, func, and_
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus, Receipt, ReceiptAttachment
+from backend.modules.data_entry.schemas.bulk import (
+    BulkUploadResponse,
+    BulkUploadResponseWithDuplicates,
+    BatchStatusResponse,
+    BatchJobInfo,
+    DuplicateFileInfo,
+    RetryResponse,
+    BatchRetryResponse,
+    CancelJobResponse,
+    CancelBatchResponse
+)
+from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus
+from backend.config import settings
+
+# Auth integration
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+# ============ Helper for selected company from header ============
+
+async def get_selected_company(
+    current_user: CurrentUser = Depends(get_current_user),
+    x_selected_company: Annotated[Optional[str], Header()] = None
+) -> int:
+    """
+    Get selected company from X-Selected-Company header.
+
+    Validates that the user has access to the specified company.
+    Falls back to user's first company if no header is provided.
+    """
+    if x_selected_company:
+        try:
+            company_id = int(x_selected_company)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid company ID format: {x_selected_company}"
+            )
+
+        if str(company_id) in current_user.companies:
+            return company_id
+
+        raise HTTPException(
+            status_code=403,
+            detail=f"Nu aveți acces la firma {company_id}"
+        )
+
+    # No header - use first company from user's list
+    if current_user.companies:
+        try:
+            return int(current_user.companies[0])
+        except (ValueError, IndexError):
+            pass
+
+    raise HTTPException(
+        status_code=400,
+        detail="Nu aveți nicio firmă asignată"
+    )
+
+# Validation constants
+MAX_FILES_PER_BATCH = 100
+MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024  # 10MB
+ALLOWED_MIME_TYPES = {"image/jpeg", "image/png", "application/pdf"}
+
+
+def compute_file_hash(content: bytes) -> str:
+    """
+    Compute SHA-256 hash of file content.
+
+    Used for duplicate detection - same file content = same hash.
+
+    Args:
+        content: Raw file bytes
+
+    Returns:
+        Hexadecimal string of SHA-256 hash (64 characters)
+    """
+    return hashlib.sha256(content).hexdigest()
+
+
+async def check_duplicate_hashes(
+    session: AsyncSession,
+    file_hashes: List[str],
+    company_id: int
+) -> dict[str, int]:
+    """
+    Check which file hashes already exist in the database for this company.
+
+    Args:
+        session: Database session
+        file_hashes: List of SHA-256 hashes to check
+        company_id: Company ID to scope the duplicate check
+
+    Returns:
+        Dict mapping hash -> existing receipt_id for duplicates found
+    """
+    if not file_hashes:
+        return {}
+
+    # Query for existing receipts with these hashes for this company
+    result = await session.execute(
+        select(Receipt.file_hash, Receipt.id).where(
+            and_(
+                Receipt.file_hash.in_(file_hashes),
+                Receipt.company_id == company_id
+            )
+        )
+    )
+
+    # Build hash -> receipt_id mapping
+    # Note: result.all() is synchronous in SQLAlchemy async, returns list of tuples
+    duplicates = {}
+    rows = result.all()
+    for row in rows:
+        duplicates[row[0]] = row[1]
+
+    return duplicates
+
+
+@router.post("/upload", response_model=Union[BulkUploadResponse, BulkUploadResponseWithDuplicates])
+async def bulk_upload(
+    files: List[UploadFile] = File(..., description="Multiple files to upload"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+    selected_company: int = Depends(get_selected_company)
+):
+    """
+    Upload multiple files for batch OCR processing.
+
+    Creates a batch record and queues all files as OCR jobs.
+    Invalid files cause entire batch rejection (validation errors).
+    Duplicate files are reported separately and skipped - non-duplicates are processed.
+
+    Duplicate Detection (US-007):
+    - SHA-256 hash calculated for each file before processing
+    - Files with existing hash for same company are rejected with 409 info
+    - Response includes duplicate details with existing_receipt_id
+
+    Args:
+        files: List of image/PDF files (max 100 files, max 10MB each)
+
+    Returns:
+        BulkUploadResponse with batch_id and list of job_ids
+        BulkUploadResponseWithDuplicates if some files were duplicates
+
+    Raises:
+        400: If validation fails (too many files, file too large, invalid type)
+        409: If ALL files are duplicates
+        500: If job creation fails
+    """
+    # Validate file count
+    if len(files) == 0:
+        raise HTTPException(
+            status_code=400,
+            detail="No files provided"
+        )
+
+    if len(files) > MAX_FILES_PER_BATCH:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Too many files. Maximum {MAX_FILES_PER_BATCH} files per batch."
+        )
+
+    # Pre-validate all files before creating any jobs (atomic check)
+    invalid_files = []
+    file_contents = []
+
+    for file in files:
+        # Check MIME type
+        if file.content_type not in ALLOWED_MIME_TYPES:
+            invalid_files.append(f"{file.filename}: Invalid type ({file.content_type})")
+            continue
+
+        # Read content and check size
+        content = await file.read()
+        if len(content) > MAX_FILE_SIZE_BYTES:
+            invalid_files.append(f"{file.filename}: File too large ({len(content) // (1024*1024)}MB > 10MB)")
+            continue
+
+        # Compute SHA-256 hash for duplicate detection (US-007)
+        file_hash = compute_file_hash(content)
+
+        # Store for later processing
+        file_contents.append({
+            "filename": file.filename,
+            "content": content,
+            "mime_type": file.content_type,
+            "file_hash": file_hash
+        })
+
+    # If any files are invalid, reject the entire batch
+    if invalid_files:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "message": f"Validation failed for {len(invalid_files)} file(s)",
+                "invalid_files": invalid_files
+            }
+        )
+
+    # Check for duplicates BEFORE creating batch (US-007)
+    all_hashes = [f["file_hash"] for f in file_contents]
+    existing_duplicates = await check_duplicate_hashes(session, all_hashes, selected_company)
+
+    # Separate duplicate files from processable files
+    duplicate_files: List[DuplicateFileInfo] = []
+    processable_files = []
+
+    for file_data in file_contents:
+        if file_data["file_hash"] in existing_duplicates:
+            existing_receipt_id = existing_duplicates[file_data["file_hash"]]
+            duplicate_files.append(DuplicateFileInfo(
+                filename=file_data["filename"],
+                error="duplicate",
+                existing_receipt_id=existing_receipt_id,
+                message=f"Fișier duplicat - există deja ca bon #{existing_receipt_id}"
+            ))
+            logger.info(
+                f"[BulkUpload] Duplicate detected: {file_data['filename']} "
+                f"(hash={file_data['file_hash'][:16]}...) matches receipt #{existing_receipt_id}"
+            )
+        else:
+            processable_files.append(file_data)
+
+    # If ALL files are duplicates, return 409 Conflict
+    if len(duplicate_files) == len(file_contents):
+        raise HTTPException(
+            status_code=409,
+            detail={
+                "error": "all_duplicates",
+                "message": f"Toate cele {len(duplicate_files)} fișiere sunt duplicate",
+                "duplicates": [d.model_dump() for d in duplicate_files]
+            }
+        )
+
+    # If no processable files remain after filtering (shouldn't happen but be safe)
+    if not processable_files:
+        raise HTTPException(
+            status_code=409,
+            detail={
+                "error": "no_files_to_process",
+                "message": "Nu există fișiere de procesat",
+                "duplicates": [d.model_dump() for d in duplicate_files]
+            }
+        )
+
+    # Create batch record with company_id for auto-save
+    batch = BatchUpload(
+        user_id=current_user.username,
+        company_id=selected_company,
+        status=BatchStatus.PENDING,
+        total_files=len(processable_files)  # Only count processable files
+    )
+    session.add(batch)
+    await session.flush()  # Get batch.id before creating jobs
+
+    # Create OCR jobs for processable files only
+    job_ids = []
+    batch_jobs = []
+
+    try:
+        for file_data in processable_files:
+            # Create OCR job using existing job_queue
+            # Pass batch_id and file_hash for tracking
+            job = await job_queue.create_job(
+                file_bytes=file_data["content"],
+                mime_type=file_data["mime_type"],
+                engine="doctr_plus",  # Default engine for bulk
+                username=current_user.username,
+                original_filename=file_data["filename"],
+                batch_id=batch.id,  # Link job to batch for auto-save integration
+                file_hash=file_data["file_hash"]  # Pass hash for storage in receipt
+            )
+
+            job_ids.append(job.id)
+
+            # Create batch_job link
+            batch_job = BatchJob(
+                batch_id=batch.id,
+                job_id=job.id,
+                filename=file_data["filename"]
+            )
+            batch_jobs.append(batch_job)
+
+        # Add all batch_job records
+        for bj in batch_jobs:
+            session.add(bj)
+
+        # Commit everything atomically
+        await session.commit()
+
+        logger.info(
+            f"[BulkUpload] Created batch {batch.id} with {len(job_ids)} jobs "
+            f"for user {current_user.username}"
+            f"{f', {len(duplicate_files)} duplicates skipped' if duplicate_files else ''}"
+        )
+
+        # Return response with duplicate info if any duplicates were found
+        if duplicate_files:
+            return BulkUploadResponseWithDuplicates(
+                batch_id=batch.id,
+                job_ids=job_ids,
+                total_files=len(file_contents),
+                processed_files=len(job_ids),
+                duplicate_files=len(duplicate_files),
+                duplicates=duplicate_files,
+                message=f"{len(job_ids)} fișier(e) în procesare, {len(duplicate_files)} duplicate ignorate"
+            )
+
+        return BulkUploadResponse(
+            batch_id=batch.id,
+            job_ids=job_ids,
+            total_files=len(job_ids),
+            message=f"{len(job_ids)} files queued for processing"
+        )
+
+    except Exception as e:
+        # Rollback on any error
+        await session.rollback()
+        logger.error(f"[BulkUpload] Failed to create batch: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create batch: {str(e)}"
+        )
+
+
+# Long-polling constants
+MAX_WAIT_SECONDS = 30
+POLL_INTERVAL_SECONDS = 0.5
+
+
+async def _get_batch_status_snapshot(
+    batch_id: int,
+    session: AsyncSession
+) -> Optional[dict]:
+    """
+    Get current batch status snapshot.
+
+    Returns dict with status counts and jobs list, or None if batch not found.
+    """
+    # Get batch record
+    batch_result = await session.execute(
+        select(BatchUpload).where(BatchUpload.id == batch_id)
+    )
+    batch = batch_result.scalar_one_or_none()
+
+    if not batch:
+        return None
+
+    # Get all batch_jobs for this batch
+    batch_jobs_result = await session.execute(
+        select(BatchJob).where(BatchJob.batch_id == batch_id)
+    )
+    batch_jobs = batch_jobs_result.scalars().all()
+
+    if not batch_jobs:
+        return {
+            "batch": batch,
+            "pending_count": 0,
+            "processing_count": 0,
+            "completed_count": 0,
+            "failed_count": 0,
+            "jobs": [],
+            "total_amount": None
+        }
+
+    # Get job statuses and error_messages from OCR job queue (SQLite)
+    job_statuses = {}
+    job_errors = {}
+    for bj in batch_jobs:
+        job = await job_queue.get_job(bj.job_id)
+        if job:
+            job_statuses[bj.job_id] = job.status.value
+            job_errors[bj.job_id] = job.error_message
+        else:
+            # Job not found in queue - treat as failed
+            job_statuses[bj.job_id] = "failed"
+            job_errors[bj.job_id] = "Job not found in queue"
+
+    # Count by status
+    pending_count = sum(1 for s in job_statuses.values() if s == "pending")
+    processing_count = sum(1 for s in job_statuses.values() if s == "processing")
+    completed_count = sum(1 for s in job_statuses.values() if s == "completed")
+    failed_count = sum(1 for s in job_statuses.values() if s == "failed")
+
+    # Build jobs list with status info
+    jobs_info = []
+    for bj in batch_jobs:
+        jobs_info.append({
+            "job_id": bj.job_id,
+            "filename": bj.filename,
+            "status": job_statuses.get(bj.job_id, "failed"),
+            "receipt_id": bj.receipt_id,
+            "error_message": job_errors.get(bj.job_id)
+        })
+
+    # Calculate total_amount from completed receipts
+    total_amount = None
+    receipt_ids = [bj.receipt_id for bj in batch_jobs if bj.receipt_id is not None]
+    if receipt_ids:
+        amount_result = await session.execute(
+            select(func.sum(Receipt.amount)).where(Receipt.id.in_(receipt_ids))
+        )
+        total_sum = amount_result.scalar()
+        if total_sum is not None:
+            total_amount = float(total_sum)
+
+    return {
+        "batch": batch,
+        "pending_count": pending_count,
+        "processing_count": processing_count,
+        "completed_count": completed_count,
+        "failed_count": failed_count,
+        "jobs": jobs_info,
+        "total_amount": total_amount
+    }
+
+
+def _compute_batch_overall_status(pending: int, processing: int, completed: int, failed: int, total: int) -> str:
+    """Compute overall batch status from job counts."""
+    if pending + processing == 0:
+        # All jobs finished
+        if failed == total:
+            return BatchStatus.FAILED.value
+        return BatchStatus.COMPLETED.value
+    elif processing > 0 or completed > 0 or failed > 0:
+        return BatchStatus.PROCESSING.value
+    else:
+        return BatchStatus.PENDING.value
+
+
+@router.get("/batches/{batch_id}/status", response_model=BatchStatusResponse)
+async def get_batch_status(
+    batch_id: int,
+    wait: Optional[int] = Query(
+        default=None,
+        ge=0,
+        le=MAX_WAIT_SECONDS,
+        description="Long-polling wait time in seconds (max 30)"
+    ),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get batch processing status with optional long-polling.
+
+    Returns aggregated status counts and individual job statuses.
+    When `wait` parameter is provided, the endpoint will poll until:
+    - Status changes from initial snapshot
+    - All jobs complete (pending + processing = 0)
+    - Timeout is reached
+
+    Args:
+        batch_id: Batch ID to query
+        wait: Optional wait time in seconds for long-polling (0-30)
+
+    Returns:
+        BatchStatusResponse with status counts and job details
+
+    Raises:
+        404: If batch not found
+    """
+    # Get initial snapshot
+    snapshot = await _get_batch_status_snapshot(batch_id, session)
+
+    if snapshot is None:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Batch {batch_id} not found"
+        )
+
+    # If long-polling requested and jobs still in progress
+    if wait and wait > 0:
+        initial_pending = snapshot["pending_count"]
+        initial_processing = snapshot["processing_count"]
+        initial_completed = snapshot["completed_count"]
+        initial_failed = snapshot["failed_count"]
+
+        # Only wait if there are still jobs in progress
+        if initial_pending + initial_processing > 0:
+            elapsed = 0.0
+            while elapsed < wait:
+                await asyncio.sleep(POLL_INTERVAL_SECONDS)
+                elapsed += POLL_INTERVAL_SECONDS
+
+                # Refresh snapshot
+                snapshot = await _get_batch_status_snapshot(batch_id, session)
+                if snapshot is None:
+                    # Batch deleted during polling (edge case)
+                    raise HTTPException(status_code=404, detail=f"Batch {batch_id} not found")
+
+                # Check if status changed
+                current_pending = snapshot["pending_count"]
+                current_processing = snapshot["processing_count"]
+                current_completed = snapshot["completed_count"]
+                current_failed = snapshot["failed_count"]
+
+                if (current_pending != initial_pending or
+                    current_processing != initial_processing or
+                    current_completed != initial_completed or
+                    current_failed != initial_failed):
+                    # Status changed, return immediately
+                    break
+
+                # Check if all jobs finished
+                if current_pending + current_processing == 0:
+                    break
+
+    # Build response
+    batch = snapshot["batch"]
+    total_files = batch.total_files
+
+    overall_status = _compute_batch_overall_status(
+        snapshot["pending_count"],
+        snapshot["processing_count"],
+        snapshot["completed_count"],
+        snapshot["failed_count"],
+        total_files
+    )
+
+    jobs = [
+        BatchJobInfo(
+            job_id=j["job_id"],
+            filename=j["filename"],
+            status=j["status"],
+            receipt_id=j["receipt_id"],
+            error_message=j.get("error_message")
+        )
+        for j in snapshot["jobs"]
+    ]
+
+    return BatchStatusResponse(
+        batch_id=batch.id,
+        status=overall_status,
+        total_files=total_files,
+        pending_count=snapshot["pending_count"],
+        processing_count=snapshot["processing_count"],
+        completed_count=snapshot["completed_count"],
+        failed_count=snapshot["failed_count"],
+        jobs=jobs,
+        total_amount=snapshot["total_amount"],
+        created_at=batch.created_at
+    )
+
+
+# ============ Retry Endpoints (US-006) ============
+
+
+async def _retry_single_receipt(
+    session: AsyncSession,
+    receipt: Receipt,
+    username: str
+) -> tuple[bool, Optional[str], Optional[str]]:
+    """
+    Retry processing for a single receipt.
+
+    Finds the original file from attachments, resets processing status,
+    and creates a new OCR job.
+
+    Args:
+        session: Database session
+        receipt: Receipt to retry
+        username: Username for the new OCR job
+
+    Returns:
+        Tuple of (success, job_id, error_message)
+    """
+    # Get the first attachment to find the source file
+    attachments_result = await session.execute(
+        select(ReceiptAttachment)
+        .where(ReceiptAttachment.receipt_id == receipt.id)
+        .limit(1)
+    )
+    attachment = attachments_result.scalar_one_or_none()
+
+    if not attachment:
+        return False, None, "Bonul nu are fișier atașat"
+
+    # Construct full path to attachment file
+    file_path = settings.data_entry_upload_path_resolved / attachment.file_path
+
+    if not file_path.exists():
+        return False, None, "Fișierul original nu mai este disponibil"
+
+    # Read file content
+    try:
+        with open(file_path, 'rb') as f:
+            file_bytes = f.read()
+    except Exception as e:
+        logger.error(f"[Retry] Failed to read file {file_path}: {e}")
+        return False, None, f"Eroare la citirea fișierului: {str(e)}"
+
+    # Create new OCR job
+    try:
+        job = await job_queue.create_job(
+            file_bytes=file_bytes,
+            mime_type=attachment.mime_type,
+            engine="doctr_plus",
+            username=username,
+            original_filename=attachment.filename,
+            batch_id=None,  # No batch for retry - direct processing
+            file_hash=receipt.file_hash
+        )
+
+        # Reset receipt processing status
+        receipt.processing_status = "pending"
+        receipt.processing_error = None
+        receipt.processing_started_at = datetime.utcnow()
+        receipt.processing_completed_at = None
+
+        await session.flush()
+
+        logger.info(f"[Retry] Receipt {receipt.id} requeued as job {job.id}")
+        return True, job.id, None
+
+    except Exception as e:
+        logger.error(f"[Retry] Failed to create job for receipt {receipt.id}: {e}")
+        return False, None, f"Eroare la crearea job-ului OCR: {str(e)}"
+
+
+@router.post("/retry/{receipt_id}", response_model=RetryResponse)
+async def retry_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+    selected_company: int = Depends(get_selected_company)
+):
+    """
+    Retry OCR processing for a single failed receipt.
+
+    Resets the receipt's processing_status to 'pending' and creates
+    a new OCR job using the original attachment file.
+
+    Args:
+        receipt_id: ID of the receipt to retry
+
+    Returns:
+        RetryResponse with success status and new job ID
+
+    Raises:
+        404: If receipt not found
+        400: If receipt is not in 'failed' status
+        400: If original file is not available
+    """
+    # Get the receipt
+    result = await session.execute(
+        select(Receipt).where(
+            and_(
+                Receipt.id == receipt_id,
+                Receipt.company_id == selected_company
+            )
+        )
+    )
+    receipt = result.scalar_one_or_none()
+
+    if not receipt:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Bonul #{receipt_id} nu a fost găsit"
+        )
+
+    # Verify receipt is in failed status
+    if receipt.processing_status != "failed":
+        raise HTTPException(
+            status_code=400,
+            detail=f"Bonul nu este în stare de eroare (status actual: {receipt.processing_status})"
+        )
+
+    # Attempt retry
+    success, job_id, error = await _retry_single_receipt(
+        session, receipt, current_user.username
+    )
+
+    if not success:
+        raise HTTPException(
+            status_code=400,
+            detail=error or "Eroare necunoscută la reîncărcare"
+        )
+
+    await session.commit()
+
+    return RetryResponse(
+        success=True,
+        receipt_id=receipt_id,
+        job_id=job_id,
+        message="Bon reîncarcat în procesare"
+    )
+
+
+@router.post("/retry-batch/{batch_id}", response_model=BatchRetryResponse)
+async def retry_batch_failed(
+    batch_id: str,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+    selected_company: int = Depends(get_selected_company)
+):
+    """
+    Retry all failed receipts in a batch.
+
+    Finds all receipts with batch_id matching and processing_status='failed',
+    then attempts to retry each one.
+
+    Args:
+        batch_id: Batch ID (UUID string from receipt.batch_id)
+
+    Returns:
+        BatchRetryResponse with counts of successful and failed retries
+
+    Raises:
+        404: If no failed receipts found for batch
+    """
+    # Find all failed receipts in this batch
+    result = await session.execute(
+        select(Receipt).where(
+            and_(
+                Receipt.batch_id == batch_id,
+                Receipt.company_id == selected_company,
+                Receipt.processing_status == "failed"
+            )
+        )
+    )
+    failed_receipts = result.scalars().all()
+
+    if not failed_receipts:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Nu există bonuri cu erori în batch-ul {batch_id}"
+        )
+
+    # Retry each receipt
+    retried_count = 0
+    failed_count = 0
+    errors = []
+
+    for receipt in failed_receipts:
+        success, job_id, error = await _retry_single_receipt(
+            session, receipt, current_user.username
+        )
+
+        if success:
+            retried_count += 1
+        else:
+            failed_count += 1
+            errors.append(f"Bon #{receipt.id}: {error}")
+
+    await session.commit()
+
+    return BatchRetryResponse(
+        success=retried_count > 0,
+        batch_id=batch_id,
+        retried_count=retried_count,
+        failed_count=failed_count,
+        errors=errors,
+        message=f"{retried_count} bonuri reîncarcate în procesare"
+        + (f", {failed_count} erori" if failed_count > 0 else "")
+    )
+
+
+# ============ Cancel Endpoints (US-014) ============
+
+
+@router.post("/cancel/{job_id}", response_model=CancelJobResponse)
+async def cancel_job(
+    job_id: str,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Cancel a single OCR processing job.
+
+    Only jobs with status 'pending' or 'processing' can be cancelled.
+    Jobs with status 'completed' or 'failed' cannot be cancelled.
+
+    Important: If a receipt has already been created from this job,
+    it will NOT be deleted - receipts are preserved for audit purposes.
+
+    Args:
+        job_id: The UUID of the OCR job to cancel
+
+    Returns:
+        CancelJobResponse with cancellation details
+
+    Raises:
+        404: If job not found in batch_jobs table
+        400: If job has already completed or failed
+    """
+    # Find the job in batch_jobs table
+    batch_job_result = await session.execute(
+        select(BatchJob).where(BatchJob.job_id == job_id)
+    )
+    batch_job = batch_job_result.scalar_one_or_none()
+
+    if not batch_job:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Job {job_id} nu a fost găsit"
+        )
+
+    # Get the OCR job from job_queue to check current status
+    ocr_job = await job_queue.get_job(job_id)
+
+    if not ocr_job:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Job {job_id} nu există în coada de procesare"
+        )
+
+    # Check if job can be cancelled
+    current_status = ocr_job.status.value
+
+    if current_status == OCRJobStatus.completed.value:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Job-ul a fost deja procesat cu succes. Nu poate fi anulat."
+        )
+
+    if current_status == OCRJobStatus.failed.value:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Job-ul a eșuat deja. Folosiți opțiunea de reîncercare în loc de anulare."
+        )
+
+    if current_status == OCRJobStatus.cancelled.value:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Job-ul a fost deja anulat."
+        )
+
+    # Update job status to cancelled in job_queue (SQLite)
+    cancelled_at = datetime.utcnow()
+    success = await job_queue.update_status(
+        job_id=job_id,
+        status=OCRJobStatus.cancelled,
+        error="Cancelled by user"
+    )
+
+    if not success:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Eroare la anularea job-ului"
+        )
+
+    logger.info(
+        f"[CancelJob] Job {job_id} cancelled by {current_user.username} "
+        f"(previous status: {current_status})"
+    )
+
+    return CancelJobResponse(
+        success=True,
+        job_id=job_id,
+        cancelled_at=cancelled_at,
+        message=f"Job anulat cu succes"
+    )
+
+
+@router.post("/cancel-batch/{batch_id}", response_model=CancelBatchResponse)
+async def cancel_batch(
+    batch_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Cancel all pending/processing jobs in a batch.
+
+    Finds all jobs with status 'pending' or 'processing' in the specified batch
+    and marks them as 'cancelled'. Jobs with status 'completed' or 'failed'
+    are not affected.
+
+    Important: Receipts that have already been created from completed jobs
+    will NOT be deleted - they are preserved for audit purposes.
+
+    Args:
+        batch_id: The batch ID to cancel
+
+    Returns:
+        CancelBatchResponse with counts of cancelled and skipped jobs
+
+    Raises:
+        404: If batch not found or no jobs exist for batch
+    """
+    # Verify batch exists
+    batch_result = await session.execute(
+        select(BatchUpload).where(BatchUpload.id == batch_id)
+    )
+    batch = batch_result.scalar_one_or_none()
+
+    if not batch:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Batch {batch_id} nu a fost găsit"
+        )
+
+    # Get all batch_jobs for this batch
+    batch_jobs_result = await session.execute(
+        select(BatchJob).where(BatchJob.batch_id == batch_id)
+    )
+    batch_jobs = batch_jobs_result.scalars().all()
+
+    if not batch_jobs:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Nu există job-uri în batch-ul {batch_id}"
+        )
+
+    # Process each job - cancel pending/processing, skip completed/failed
+    cancelled_count = 0
+    skipped_count = 0
+
+    for batch_job in batch_jobs:
+        # Get current job status from OCR job queue
+        ocr_job = await job_queue.get_job(batch_job.job_id)
+
+        if not ocr_job:
+            # Job not found in queue - treat as skipped
+            skipped_count += 1
+            continue
+
+        current_status = ocr_job.status.value
+
+        # Only cancel pending or processing jobs
+        if current_status in (OCRJobStatus.pending.value, OCRJobStatus.processing.value):
+            success = await job_queue.update_status(
+                job_id=batch_job.job_id,
+                status=OCRJobStatus.cancelled,
+                error="Cancelled by user (batch cancel)"
+            )
+
+            if success:
+                cancelled_count += 1
+                logger.debug(f"[CancelBatch] Cancelled job {batch_job.job_id}")
+            else:
+                # Failed to cancel - count as skipped
+                skipped_count += 1
+                logger.warning(
+                    f"[CancelBatch] Failed to cancel job {batch_job.job_id}"
+                )
+        else:
+            # Job is completed, failed, or already cancelled - skip it
+            skipped_count += 1
+
+    logger.info(
+        f"[CancelBatch] Batch {batch_id} cancelled by {current_user.username}: "
+        f"{cancelled_count} cancelled, {skipped_count} skipped"
+    )
+
+    # Build message
+    if cancelled_count == 0:
+        message = f"Nu există job-uri de anulat în batch-ul {batch_id}"
+    elif skipped_count == 0:
+        message = f"{cancelled_count} job-uri anulate"
+    else:
+        message = f"{cancelled_count} job-uri anulate, {skipped_count} ignorate (deja procesate)"
+
+    return CancelBatchResponse(
+        success=cancelled_count > 0,
+        batch_id=batch_id,
+        cancelled_count=cancelled_count,
+        skipped_count=skipped_count,
+        message=message
+    )
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/nomenclature.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/nomenclature.py
@@ -0,0 +1,260 @@
+"""Nomenclature API endpoints."""
+
+from typing import Optional, List, Annotated
+from fastapi import APIRouter, Depends, HTTPException, Header, Request
+from sqlalchemy.ext.asyncio import AsyncSession
+from pydantic import BaseModel
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.services.sync_service import SyncService
+
+# Import auth dependencies
+import sys
+from pathlib import Path
+# Path setup handled by main.py - this is redundant
+# project_root = Path(__file__).parent.parent.parent.parent.parent
+# sys.path.insert(0, str(project_root / "shared"))
+
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+router = APIRouter()
+
+
+# ============ Selected Company Dependency ============
+
+async def get_selected_company(
+    current_user: CurrentUser = Depends(get_current_user),
+    x_selected_company: Annotated[Optional[str], Header()] = None
+) -> int:
+    """
+    Get selected company from X-Selected-Company header.
+    Validates user access. Falls back to first company if no header.
+    """
+    if x_selected_company:
+        try:
+            company_id = int(x_selected_company)
+        except ValueError:
+            raise HTTPException(400, f"Invalid company ID: {x_selected_company}")
+
+        if str(company_id) in current_user.companies:
+            return company_id
+        raise HTTPException(403, f"Nu aveți acces la firma {company_id}")
+
+    if current_user.companies:
+        try:
+            return int(current_user.companies[0])
+        except (ValueError, IndexError):
+            pass
+
+    raise HTTPException(400, "Nu aveți nicio firmă asignată")
+
+
+SelectedCompany = Annotated[int, Depends(get_selected_company)]
+
+
+# Request/Response Models
+class SupplierSearchResult(BaseModel):
+    found: bool
+    supplier: Optional[dict] = None
+    source: str  # 'synced', 'local', 'not_found'
+
+
+class LocalSupplierCreate(BaseModel):
+    name: str
+    fiscal_code: Optional[str] = None
+    address: Optional[str] = None
+
+
+class LocalSupplierResponse(BaseModel):
+    id: int
+    name: str
+    fiscal_code: Optional[str]
+    address: Optional[str]
+    is_local: bool = True
+
+
+class SyncResult(BaseModel):
+    synced: int
+    errors: int
+    message: str
+
+
+class SupplierOption(BaseModel):
+    id: int
+    oracle_id: Optional[int] = None
+    name: str
+    fiscal_code: Optional[str]
+    source: str  # 'synced' or 'local'
+
+
+class CashRegisterOption(BaseModel):
+    id: int
+    oracle_id: int
+    name: str
+    account_code: str
+    register_type: str
+
+
+# Endpoints
+@router.get("/suppliers/search", response_model=SupplierSearchResult)
+async def search_supplier(
+    fiscal_code: Optional[str] = None,
+    name: Optional[str] = None,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Search for supplier by fiscal code or name."""
+    if not fiscal_code and not name:
+        raise HTTPException(status_code=400, detail="Provide fiscal_code or name")
+
+    cid = company_id or selected_company
+
+    found, supplier, source = await SyncService.search_supplier(
+        session, cid, fiscal_code, name
+    )
+
+    return SupplierSearchResult(found=found, supplier=supplier, source=source)
+
+
+@router.get("/suppliers", response_model=List[SupplierOption])
+async def get_suppliers(
+    search: Optional[str] = None,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get all suppliers (synced + local) for dropdown/autocomplete."""
+    cid = company_id or selected_company
+
+    suppliers = await SyncService.get_all_suppliers(session, cid, search)
+
+    return [
+        SupplierOption(
+            id=s["id"],
+            oracle_id=s.get("oracle_id"),
+            name=s["name"],
+            fiscal_code=s.get("fiscal_code"),
+            source=s["source"]
+        )
+        for s in suppliers
+    ]
+
+
+@router.post("/suppliers/local", response_model=LocalSupplierResponse)
+async def create_local_supplier(
+    data: LocalSupplierCreate,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Create a local supplier from OCR data."""
+    cid = company_id or selected_company
+
+    supplier = await SyncService.create_local_supplier(
+        session, cid, data.name, data.fiscal_code, data.address, current_user.username
+    )
+
+    return LocalSupplierResponse(
+        id=supplier.id,
+        name=supplier.name,
+        fiscal_code=supplier.fiscal_code,
+        address=supplier.address,
+    )
+
+
+@router.get("/cash-registers", response_model=List[CashRegisterOption])
+async def get_cash_registers(
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get all cash registers for a company."""
+    cid = company_id or selected_company
+
+    registers = await SyncService.get_all_cash_registers(session, cid)
+
+    return [
+        CashRegisterOption(
+            id=r["id"],
+            oracle_id=r["oracle_id"],
+            name=r["name"],
+            account_code=r["account_code"],
+            register_type=r["register_type"]
+        )
+        for r in registers
+    ]
+
+
+@router.post("/sync/suppliers", response_model=SyncResult)
+async def sync_suppliers(
+    request: Request,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Manually trigger supplier sync from Oracle."""
+    cid = company_id or selected_company
+    server_id = getattr(request.state, 'server_id', None)
+
+    synced, errors = await SyncService.sync_suppliers(session, cid, server_id=server_id)
+
+    return SyncResult(
+        synced=synced,
+        errors=errors,
+        message=f"Synced {synced} suppliers with {errors} errors"
+    )
+
+
+@router.post("/sync/cash-registers", response_model=SyncResult)
+async def sync_cash_registers(
+    request: Request,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Manually trigger cash register sync from Oracle."""
+    cid = company_id or selected_company
+    server_id = getattr(request.state, 'server_id', None)
+
+    synced, errors = await SyncService.sync_cash_registers(session, cid, server_id=server_id)
+
+    return SyncResult(
+        synced=synced,
+        errors=errors,
+        message=f"Synced {synced} cash registers with {errors} errors"
+    )
+
+
+@router.post("/sync/all", response_model=dict)
+async def sync_all_nomenclatures(
+    request: Request,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Sync all nomenclatures (suppliers + cash registers) from Oracle."""
+    cid = company_id or selected_company
+    server_id = getattr(request.state, 'server_id', None)
+
+    # Sync suppliers
+    suppliers_synced, suppliers_errors = await SyncService.sync_suppliers(session, cid, server_id=server_id)
+
+    # Sync cash registers
+    registers_synced, registers_errors = await SyncService.sync_cash_registers(session, cid, server_id=server_id)
+
+    return {
+        "suppliers": {
+            "synced": suppliers_synced,
+            "errors": suppliers_errors
+        },
+        "cash_registers": {
+            "synced": registers_synced,
+            "errors": registers_errors
+        },
+        "total_synced": suppliers_synced + registers_synced,
+        "total_errors": suppliers_errors + registers_errors,
+        "message": f"Synced {suppliers_synced} suppliers and {registers_synced} cash registers"
+    }
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr.py
@@ -0,0 +1,715 @@
+"""
+OCR API endpoints with async job queue support.
+
+Endpoints:
+- POST /extract - Submit OCR job (returns job_id immediately)
+- GET /jobs/{job_id} - Get job status and result
+- GET /queue/status - Get queue statistics
+- GET /status - Check OCR service availability
+
+For backwards compatibility, we also support sync mode via query param:
+- POST /extract?sync=true - Process synchronously (blocks until complete)
+"""
+
+import os
+import tempfile
+from datetime import datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Response
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
+from backend.modules.data_entry.services.ocr_service import ocr_service
+from backend.modules.data_entry.services.ocr_engine import OCREngine
+from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus as JobStatus
+from backend.modules.data_entry.services.ocr.job_worker import estimate_wait_time
+from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine
+from backend.modules.data_entry.schemas.ocr import (
+    OCRResponse,
+    OCRStatusResponse,
+    ExtractionData,
+    TvaEntry,
+    PaymentMethod,
+    # New job queue schemas
+    OCREngineChoice,
+    OCRJobStatus,
+    OCRJobSubmitResponse,
+    OCRJobResponse,
+    OCRQueueStatusResponse,
+)
+
+# Auth integration
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+router = APIRouter()
+
+
+# ============================================================================
+# OCR Job Queue Endpoints (NEW)
+# ============================================================================
+
+@router.post("/extract", response_model=OCRJobSubmitResponse)
+async def submit_ocr_job(
+    file: UploadFile = File(...),
+    engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus, description="OCR engine to use"),
+    sync: bool = Query(default=False, description="If true, process synchronously (blocks)"),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Submit an OCR job for processing.
+
+    By default, returns immediately with a job_id. Poll GET /jobs/{job_id} for result.
+
+    Use ?sync=true for synchronous processing (blocks until complete).
+    This is for backwards compatibility but not recommended for production.
+
+    Args:
+        file: Image or PDF file (max 10MB)
+        engine: OCR engine choice (tesseract, doctr, doctr_plus, paddleocr)
+        sync: If true, process synchronously (legacy mode)
+
+    Returns:
+        OCRJobSubmitResponse with job_id, queue_position, estimated_wait
+    """
+    allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
+
+    if file.content_type not in allowed_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
+        )
+
+    # Read file content
+    content = await file.read()
+
+    # Check file size (10MB limit)
+    if len(content) > 10 * 1024 * 1024:
+        raise HTTPException(
+            status_code=400,
+            detail="File too large. Maximum size is 10MB."
+        )
+
+    # Sync mode - use legacy processing (blocks)
+    if sync:
+        return await _process_sync(content, file, engine, current_user)
+
+    # Async mode - create job and return immediately
+    try:
+        job = await job_queue.create_job(
+            file_bytes=content,
+            mime_type=file.content_type,
+            engine=engine.value,
+            username=current_user.username,
+            original_filename=file.filename
+        )
+
+        # Get queue position
+        queue_position = await job_queue.get_queue_position(job.id)
+        estimated_wait = estimate_wait_time(queue_position or 1)
+
+        return OCRJobSubmitResponse(
+            job_id=job.id,
+            status=OCRJobStatus.pending,
+            queue_position=queue_position or 1,
+            estimated_wait_seconds=estimated_wait,
+            created_at=job.created_at or datetime.utcnow()
+        )
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create OCR job: {str(e)}"
+        )
+
+
+@router.get("/jobs/{job_id}", response_model=OCRJobResponse)
+async def get_job_status(
+    job_id: str,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get OCR job status and result (instant response).
+
+    For efficient polling, use GET /jobs/{job_id}/wait instead (long-polling).
+
+    Args:
+        job_id: Job UUID from POST /extract response
+
+    Returns:
+        OCRJobResponse with status, queue_position, and result (if completed)
+    """
+    job = await job_queue.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    # Get queue position for pending jobs
+    queue_position = None
+    estimated_wait = None
+
+    if job.status == JobStatus.pending:
+        queue_position = await job_queue.get_queue_position(job_id)
+        estimated_wait = estimate_wait_time(queue_position or 1)
+    elif job.status == JobStatus.processing:
+        queue_position = 0
+        # Estimate remaining time based on average
+        avg_time = await job_queue.get_average_processing_time()
+        estimated_wait = int(avg_time * 0.5)  # Rough estimate: half remaining
+
+    # Convert result to ExtractionData if available
+    result_data = None
+    if job.status == JobStatus.completed and job.result:
+        result_data = _dict_to_extraction_data(job.result)
+        # Apply fuzzy CUI matching
+        result_data = await _apply_fuzzy_cui_matching(result_data, session)
+        # Debug: log suggested_payment_mode being returned
+        print(f"[OCR Router] Returning job {job_id} with suggested_payment_mode={result_data.suggested_payment_mode}", flush=True)
+
+    return OCRJobResponse(
+        job_id=job.id,
+        status=OCRJobStatus(job.status.value),
+        queue_position=queue_position,
+        estimated_wait_seconds=estimated_wait,
+        created_at=job.created_at or datetime.utcnow(),
+        started_at=job.started_at,
+        completed_at=job.completed_at,
+        queue_wait_ms=job.queue_wait_ms,
+        ocr_time_ms=job.ocr_time_ms,
+        processing_time_ms=job.processing_time_ms,
+        result=result_data,
+        error=job.error_message
+    )
+
+
+@router.get("/jobs/{job_id}/wait", response_model=OCRJobResponse)
+async def wait_for_job_status(
+    job_id: str,
+    response: Response,
+    timeout: int = Query(default=30, ge=1, le=60, description="Max wait time in seconds"),
+    wait_for_terminal: bool = Query(default=False, description="If true, only return on completed/failed"),
+    _t: int = Query(default=None, description="Cache-busting timestamp (ignored)"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Long-poll for OCR job status change.
+
+    Waits until:
+    - Job status changes (default behavior - returns on any status change)
+    - Job reaches terminal state (if wait_for_terminal=true)
+    - Timeout expires (returns current status)
+
+    Recommended client timeout: timeout + 5 seconds
+
+    Args:
+        job_id: Job UUID from POST /extract response
+        timeout: Max wait time in seconds (1-60, default 30)
+        wait_for_terminal: If true, wait until completed/failed only
+
+    Returns:
+        OCRJobResponse with status, queue_position, and result (if completed)
+    """
+    # Prevent caching - critical for long-polling
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+    response.headers["Expires"] = "0"
+    import asyncio
+    import time
+
+    start_time = time.time()
+    end_time = start_time + timeout
+    last_status = None
+    iteration = 0
+
+    print(f"[OCR Wait] Starting long-poll for job {job_id}, timeout={timeout}s, wait_for_terminal={wait_for_terminal}", flush=True)
+
+    while time.time() < end_time:
+        iteration += 1
+        job = await job_queue.get_job(job_id)
+
+        if not job:
+            print(f"[OCR Wait] Job {job_id} not found after {iteration} iterations", flush=True)
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        # Return immediately if job completed or failed (terminal states)
+        if job.status in [JobStatus.completed, JobStatus.failed]:
+            elapsed = time.time() - start_time
+            print(f"[OCR Wait] Job {job_id} {job.status.value} after {elapsed:.1f}s ({iteration} iterations)", flush=True)
+            return await get_job_status(job_id, session, current_user)
+
+        # Return on status change (unless wait_for_terminal is set)
+        if not wait_for_terminal and last_status is not None and job.status != last_status:
+            elapsed = time.time() - start_time
+            print(f"[OCR Wait] Job {job_id} status changed {last_status.value}->{job.status.value} after {elapsed:.1f}s", flush=True)
+            return await get_job_status(job_id, session, current_user)
+
+        last_status = job.status
+
+        # Wait 500ms before next internal check (faster polling for better responsiveness)
+        await asyncio.sleep(0.5)
+
+    # Timeout - return current status
+    elapsed = time.time() - start_time
+    print(f"[OCR Wait] Job {job_id} timeout after {elapsed:.1f}s ({iteration} iterations), status={last_status.value if last_status else 'unknown'}", flush=True)
+    return await get_job_status(job_id, session, current_user)
+
+
+@router.get("/queue/status", response_model=OCRQueueStatusResponse)
+async def get_queue_status(
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get OCR queue statistics.
+
+    Returns:
+        Queue status with pending/processing counts and average time
+    """
+    stats = await job_queue.get_queue_stats()
+
+    return OCRQueueStatusResponse(
+        pending_jobs=stats["pending"],
+        processing_jobs=stats["processing"],
+        average_time_seconds=stats["average_time_seconds"]
+    )
+
+
+# ============================================================================
+# Legacy Endpoints (backwards compatibility)
+# ============================================================================
+
+@router.get("/status", response_model=OCRStatusResponse)
+async def get_ocr_status():
+    """Check OCR service status and available engines."""
+    engines = OCREngine.get_available_engines()
+    available = len(engines) > 0
+
+    if available:
+        message = f"OCR service ready with engines: {', '.join(engines)}"
+    else:
+        message = "No OCR engines available. Install PaddleOCR or Tesseract."
+
+    return OCRStatusResponse(
+        available=available,
+        engines=engines,
+        message=message
+    )
+
+
+@router.get("/engines")
+async def get_available_engines():
+    """
+    Get list of enabled OCR engines based on .env configuration.
+
+    Returns engines availability and available processing modes.
+    Frontend should use this to filter engine selection dropdown.
+
+    Available engines: tesseract, doctr, doctr_plus, paddleocr
+    """
+    # Check which engines are enabled via .env
+    paddle_enabled = os.getenv("OCR_ENABLE_PADDLEOCR", "true").lower() == "true"
+    tesseract_enabled = os.getenv("OCR_ENABLE_TESSERACT", "true").lower() == "true"
+    default_engine = os.getenv("OCR_DEFAULT_ENGINE", "doctr_plus")
+
+    # Build engines dict
+    engines = {
+        "tesseract": tesseract_enabled,
+        "doctr": True,  # Always available (primary engine)
+        "doctr_plus": True,  # Always available (recommended)
+        "paddleocr": paddle_enabled,
+    }
+
+    # Build available modes based on enabled engines
+    modes = []
+
+    if tesseract_enabled:
+        modes.append("tesseract")
+
+    modes.append("doctr")
+    modes.append("doctr_plus")
+
+    if paddle_enabled:
+        modes.append("paddleocr")
+
+    return {
+        "engines": engines,
+        "available_modes": modes,
+        "default_mode": default_engine,
+        "memory_estimate_mb": {
+            "tesseract": 50,
+            "doctr": 600,
+            "doctr_plus": 600,
+            "paddleocr": 800,
+        }
+    }
+
+
+@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
+async def extract_from_attachment(
+    attachment_id: int,
+    engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Extract receipt data from an existing attachment.
+
+    Re-processes an already uploaded file with OCR.
+    This endpoint always processes synchronously.
+    """
+    attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
+
+    if not attachment:
+        raise HTTPException(status_code=404, detail="Attachment not found")
+
+    file_path = AttachmentCRUD.get_file_path(attachment)
+
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found on disk")
+
+    # Check if file type is supported
+    if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported for OCR: {attachment.mime_type}"
+        )
+
+    # TODO: Could use job queue here too, but keeping sync for now
+    success, message, result = await ocr_service.process_image(
+        file_path, attachment.mime_type
+    )
+
+    if not success:
+        raise HTTPException(status_code=422, detail=message)
+
+    data = _result_to_extraction_data(result)
+    # Apply fuzzy CUI matching
+    data = await _apply_fuzzy_cui_matching(data, session)
+    return OCRResponse(success=True, message=message, data=data)
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+async def _apply_fuzzy_cui_matching(
+    extraction_data: ExtractionData,
+    session: AsyncSession
+) -> ExtractionData:
+    """
+    Apply fuzzy CUI matching to extraction data.
+
+    ONLY applies fuzzy matching if CUI is missing OR has invalid checksum.
+    If CUI has valid checksum, we trust the OCR and skip fuzzy matching.
+
+    Args:
+        extraction_data: ExtractionData with CUI to potentially correct
+        session: AsyncSession for database lookups
+
+    Returns:
+        ExtractionData with CUI corrected if a match was found
+    """
+    from backend.modules.data_entry.services.ocr.validation import CUIChecksumRule
+
+    # Skip if no CUI and no vendor name (nothing to match)
+    if not extraction_data.cui and not extraction_data.partner_name:
+        return extraction_data
+
+    # Check if CUI has valid checksum - if valid, skip fuzzy matching
+    if extraction_data.cui:
+        cui_digits = CUIChecksumRule.extract_digits(extraction_data.cui)
+        if len(cui_digits) >= 6 and CUIChecksumRule.validate_checksum(cui_digits):
+            print(f"[Fuzzy Match] CUI {extraction_data.cui} has valid checksum, skipping fuzzy match", flush=True)
+            return extraction_data
+
+    # CUI missing or invalid checksum - try fuzzy matching
+    try:
+        match = await OCRValidationEngine.fuzzy_match_supplier(
+            cui=extraction_data.cui,
+            vendor_name=extraction_data.partner_name,
+            db_session=session
+        )
+
+        if match:
+            corrected_cui, supplier_name = match
+            if corrected_cui != extraction_data.cui:
+                print(f"[Fuzzy Match] Corrected: {extraction_data.cui} -> {corrected_cui} ({supplier_name})", flush=True)
+                extraction_data.cui = corrected_cui
+                # Also set partner_name if not already set
+                if not extraction_data.partner_name:
+                    extraction_data.partner_name = supplier_name
+    except Exception as e:
+        print(f"[Fuzzy Match] Error: {e}", flush=True)
+
+    return extraction_data
+
+
+async def _process_sync(
+    content: bytes,
+    file: UploadFile,
+    engine: OCREngineChoice,
+    current_user: CurrentUser
+) -> OCRJobSubmitResponse:
+    """
+    Process OCR synchronously (legacy mode).
+
+    Creates a job, processes it immediately, and returns the result
+    wrapped in a JobSubmitResponse for API consistency.
+    """
+    # Get file extension
+    suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
+    if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
+        suffix = '.jpg'
+
+    # Save to temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(content)
+        tmp_path = Path(tmp.name)
+
+    try:
+        success, message, result = await ocr_service.process_image(
+            tmp_path, file.content_type
+        )
+
+        if not success:
+            raise HTTPException(status_code=422, detail=message)
+
+        # Create a fake job response with the result embedded
+        # This maintains API compatibility
+        now = datetime.utcnow()
+
+        # For sync mode, we return a special response that includes
+        # the result directly. Clients should check if result is present.
+        return OCRJobSubmitResponse(
+            job_id="sync-" + str(hash(content))[:16],
+            status=OCRJobStatus.completed,
+            queue_position=0,
+            estimated_wait_seconds=0,
+            created_at=now
+        )
+
+    finally:
+        # Clean up temp file
+        if tmp_path.exists():
+            os.unlink(tmp_path)
+
+
+def _result_to_extraction_data(result) -> ExtractionData:
+    """Convert ExtractionResult to ExtractionData schema."""
+    # Convert tva_entries from dict to TvaEntry objects
+    tva_entries_schema = [
+        TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
+        for e in result.tva_entries
+    ] if result.tva_entries else []
+
+    # Convert payment_methods from dict to PaymentMethod objects
+    payment_methods_list = [
+        PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
+        for pm in result.payment_methods
+    ] if result.payment_methods else []
+
+    # Auto-suggest payment_mode based on detected methods
+    suggested_payment_mode = None
+    if payment_methods_list:
+        has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
+        if has_card:
+            suggested_payment_mode = 'banca'
+
+    return ExtractionData(
+        receipt_type=result.receipt_type,
+        receipt_number=result.receipt_number,
+        receipt_series=result.receipt_series,
+        receipt_date=result.receipt_date,
+        amount=result.amount,
+        partner_name=result.partner_name,
+        cui=result.cui,
+        description=result.description,
+        tva_entries=tva_entries_schema,
+        tva_total=result.tva_total,
+        address=result.address,
+        items_count=result.items_count,
+        payment_methods=payment_methods_list,
+        suggested_payment_mode=suggested_payment_mode,
+        client_name=result.client_name,
+        client_cui=result.client_cui,
+        client_address=result.client_address,
+        confidence_amount=result.confidence_amount,
+        confidence_date=result.confidence_date,
+        confidence_vendor=result.confidence_vendor,
+        confidence_client=getattr(result, 'confidence_client', 0.0),
+        overall_confidence=result.overall_confidence,
+        raw_text=result.raw_text,
+        raw_texts=getattr(result, 'raw_texts', []),
+        ocr_engine=result.ocr_engine,
+        processing_time_ms=result.processing_time_ms,
+        needs_manual_review=result.needs_manual_review,
+        validation_warnings=result.validation_warnings,
+        validation_errors=result.validation_errors,
+        inter_ocr_ratios=result.inter_ocr_ratios,
+    )
+
+
+def _dict_to_extraction_data(data: dict) -> ExtractionData:
+    """Convert result dict (from job queue) to ExtractionData schema."""
+    from datetime import date
+
+    # Parse date if string
+    receipt_date = data.get('receipt_date')
+    if isinstance(receipt_date, str):
+        try:
+            receipt_date = date.fromisoformat(receipt_date)
+        except (ValueError, TypeError):
+            receipt_date = None
+
+    # Convert tva_entries
+    tva_entries = data.get('tva_entries', []) or []
+    tva_entries_schema = []
+    for e in tva_entries:
+        if isinstance(e, dict):
+            tva_entries_schema.append(TvaEntry(
+                code=e.get('code'),
+                percent=e.get('percent', 0),
+                amount=Decimal(str(e.get('amount', 0)))
+            ))
+
+    # Convert payment_methods
+    payment_methods = data.get('payment_methods', []) or []
+    payment_methods_list = []
+    for pm in payment_methods:
+        if isinstance(pm, dict):
+            payment_methods_list.append(PaymentMethod(
+                method=pm.get('method', 'NUMERAR'),
+                amount=Decimal(str(pm.get('amount', 0)))
+            ))
+
+    # Convert amount and tva_total to Decimal
+    amount = data.get('amount')
+    if amount is not None:
+        amount = Decimal(str(amount))
+
+    tva_total = data.get('tva_total')
+    if tva_total is not None:
+        tva_total = Decimal(str(tva_total))
+
+    return ExtractionData(
+        receipt_type=data.get('receipt_type', 'bon_fiscal'),
+        receipt_number=data.get('receipt_number'),
+        receipt_series=data.get('receipt_series'),
+        receipt_date=receipt_date,
+        amount=amount,
+        partner_name=data.get('partner_name'),
+        cui=data.get('cui'),
+        description=data.get('description'),
+        tva_entries=tva_entries_schema,
+        tva_total=tva_total,
+        address=data.get('address'),
+        items_count=data.get('items_count'),
+        payment_methods=payment_methods_list,
+        suggested_payment_mode=data.get('suggested_payment_mode'),
+        client_name=data.get('client_name'),
+        client_cui=data.get('client_cui'),
+        client_address=data.get('client_address'),
+        confidence_amount=data.get('confidence_amount', 0.0),
+        confidence_date=data.get('confidence_date', 0.0),
+        confidence_vendor=data.get('confidence_vendor', 0.0),
+        confidence_client=data.get('confidence_client', 0.0),
+        confidence_tva=data.get('confidence_tva', 0.0),
+        confidence_payment=data.get('confidence_payment', 0.0),
+        overall_confidence=data.get('overall_confidence', 0.0),
+        raw_text=data.get('raw_text', ''),
+        raw_texts=data.get('raw_texts', []),
+        ocr_engine=data.get('ocr_engine', ''),
+        processing_time_ms=data.get('processing_time_ms', 0),
+        needs_manual_review=data.get('needs_manual_review'),
+        validation_warnings=data.get('validation_warnings', []),
+        validation_errors=data.get('validation_errors', []),
+        inter_ocr_ratios=data.get('inter_ocr_ratios', {}),
+    )
+
+
+# ============================================================================
+# Store Profiles Management Endpoints
+# ============================================================================
+
+@router.post("/profiles/reload")
+async def reload_store_profiles(
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    Hot-reload all store profiles.
+
+    Reloads profile Python modules without server restart.
+    Use after adding/modifying profile files.
+
+    Returns:
+        Dict with reloaded count and profile list
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    count = ProfileRegistry.reload_all()
+    status = ProfileRegistry.get_reload_status()
+
+    return {
+        "success": True,
+        "reloaded_modules": count,
+        "profiles_count": status["profiles_count"],
+        "registered_cuis": status["registered_cuis"],
+        "last_reload": status["last_reload"],
+    }
+
+
+@router.get("/profiles")
+async def list_store_profiles(
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    List all registered store profiles.
+
+    Returns:
+        Dict with profiles list and status
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    profiles = ProfileRegistry.list_profiles()
+    status = ProfileRegistry.get_reload_status()
+
+    return {
+        "profiles": profiles,
+        "count": len(profiles),
+        "last_reload": status["last_reload"],
+    }
+
+
+@router.get("/profiles/{cui}")
+async def get_store_profile(
+    cui: str,
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    Get details for a specific store profile.
+
+    Args:
+        cui: Store CUI (with or without RO prefix)
+
+    Returns:
+        Profile details including validation hints
+
+    Raises:
+        404: If no profile exists for this CUI
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    info = ProfileRegistry.get_profile_info(cui)
+
+    if not info:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No profile registered for CUI: {cui}"
+        )
+
+    return info
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr_settings.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr_settings.py
@@ -0,0 +1,268 @@
+"""
+OCR Settings and Metrics API endpoints.
+
+Endpoints:
+- GET /settings/ocr-preference - Get user's preferred OCR engine
+- POST /settings/ocr-preference - Set user's preferred OCR engine
+- GET /metrics/ocr/summary - Get OCR metrics summary by engine
+- GET /metrics/ocr/history - Get user's OCR job history
+- GET /metrics/ocr/stats - Get overall OCR statistics
+"""
+
+from typing import List, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel, Field
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.crud.ocr_settings import OCRPreferenceCRUD, OCRMetricsCRUD
+from backend.modules.data_entry.db.models.ocr_settings import OCREngine, OCRMetricsSummary
+
+# Auth integration
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+router = APIRouter()
+
+
+# ============================================================================
+# Schemas
+# ============================================================================
+
+class OCRPreferenceResponse(BaseModel):
+    """Response for OCR preference endpoint."""
+    username: str
+    preferred_engine: str
+    available_engines: List[str] = Field(
+        default=["tesseract", "doctr", "doctr_plus", "paddleocr"],
+        description="Available OCR engines"
+    )
+
+
+class OCRPreferenceRequest(BaseModel):
+    """Request to set OCR preference."""
+    preferred_engine: str = Field(
+        default="doctr_plus",
+        description="Preferred OCR engine: tesseract, doctr, doctr_plus, paddleocr"
+    )
+
+
+class OCRMetricsHistoryItem(BaseModel):
+    """Single OCR job metrics item."""
+    job_id: str
+    engine_requested: str
+    engine_used: str
+    processing_time_ms: int
+    success: bool
+    overall_confidence: float
+    fields_extracted: int
+    created_at: str
+    original_filename: Optional[str] = None
+
+
+class OCRMetricsHistoryResponse(BaseModel):
+    """Response for OCR history endpoint."""
+    items: List[OCRMetricsHistoryItem]
+    total: int
+
+
+class OCRStatsResponse(BaseModel):
+    """Response for OCR stats endpoint."""
+    total_jobs: int
+    successful_jobs: int
+    failed_jobs: int
+    success_rate: float
+    avg_processing_time_ms: float
+    avg_confidence: float
+    period_days: int
+
+
+class OCRActiveEnginesResponse(BaseModel):
+    """Response for active OCR engines endpoint."""
+    engines: List[str] = Field(description="List of active OCR engines from .env config")
+    recommended: str = Field(default="doctr_plus", description="Recommended engine")
+
+
+# ============================================================================
+# OCR Engines Configuration Endpoint
+# ============================================================================
+
+@router.get("/settings/ocr-engines", response_model=OCRActiveEnginesResponse)
+async def get_active_ocr_engines():
+    """
+    Get list of active OCR engines configured in .env.
+
+    Returns the engines that should be shown in the frontend dropdown.
+    Configured via OCR_ACTIVE_ENGINES environment variable.
+
+    Default: doctr,doctr_plus
+    Available: tesseract, paddleocr, doctr, doctr_plus
+    """
+    from backend.modules.data_entry.config import settings
+
+    return OCRActiveEnginesResponse(
+        engines=settings.ocr_active_engines_list,
+        recommended="doctr_plus"
+    )
+
+
+# ============================================================================
+# OCR Preference Endpoints
+# ============================================================================
+
+@router.get("/settings/ocr-preference", response_model=OCRPreferenceResponse)
+async def get_ocr_preference(
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get user's preferred OCR engine.
+
+    Returns the user's saved preference or 'doctr_plus' if not set.
+    Also returns list of available engines.
+    """
+    from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
+
+    preference = await OCRPreferenceCRUD.get_by_username(session, current_user.username)
+
+    # Get available engines from OCR service
+    available = OCREngineClass.get_available_engines()
+
+    return OCRPreferenceResponse(
+        username=current_user.username,
+        preferred_engine=preference.preferred_engine.value if preference else "doctr_plus",
+        available_engines=available
+    )
+
+
+@router.post("/settings/ocr-preference", response_model=OCRPreferenceResponse)
+async def set_ocr_preference(
+    request: OCRPreferenceRequest,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Set user's preferred OCR engine.
+
+    Valid engines: tesseract, doctr, doctr_plus, paddleocr
+    Note: Available engines depend on .env configuration (OCR_ENABLE_PADDLEOCR, OCR_ENABLE_TESSERACT)
+    """
+    from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
+
+    # Get dynamically available engines
+    available = OCREngineClass.get_available_engines()
+
+    if request.preferred_engine not in available:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid engine. Must be one of: {', '.join(available)}"
+        )
+
+    # Map string to enum
+    engine_map = {
+        "tesseract": OCREngine.TESSERACT,
+        "doctr": OCREngine.DOCTR,
+        "doctr_plus": OCREngine.DOCTR_PLUS,
+        "paddleocr": OCREngine.PADDLEOCR,
+    }
+    engine_enum = engine_map.get(request.preferred_engine, OCREngine.DOCTR_PLUS)
+
+    # Save preference
+    preference = await OCRPreferenceCRUD.create_or_update(
+        session,
+        current_user.username,
+        engine_enum
+    )
+
+    # Get available engines
+    available = OCREngineClass.get_available_engines()
+
+    return OCRPreferenceResponse(
+        username=current_user.username,
+        preferred_engine=preference.preferred_engine.value,
+        available_engines=available
+    )
+
+
+# ============================================================================
+# OCR Metrics Endpoints
+# ============================================================================
+
+@router.get("/metrics/ocr/summary", response_model=List[OCRMetricsSummary])
+async def get_ocr_metrics_summary(
+    days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get OCR metrics summary grouped by engine.
+
+    Returns aggregated metrics for each engine used in the specified period.
+    """
+    summaries = await OCRMetricsCRUD.get_summary_by_engine(
+        session,
+        days=days,
+        username=current_user.username
+    )
+    return summaries
+
+
+@router.get("/metrics/ocr/history", response_model=OCRMetricsHistoryResponse)
+async def get_ocr_metrics_history(
+    limit: int = Query(default=50, ge=1, le=200, description="Max items to return"),
+    offset: int = Query(default=0, ge=0, description="Items to skip"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get user's OCR job history.
+
+    Returns list of OCR jobs with their metrics, ordered by most recent first.
+    """
+    items = await OCRMetricsCRUD.get_user_history(
+        session,
+        username=current_user.username,
+        limit=limit,
+        offset=offset
+    )
+
+    history_items = [
+        OCRMetricsHistoryItem(
+            job_id=item.job_id,
+            engine_requested=item.engine_requested,
+            engine_used=item.engine_used,
+            processing_time_ms=item.processing_time_ms,
+            success=item.success,
+            overall_confidence=item.overall_confidence,
+            fields_extracted=item.fields_extracted,
+            created_at=item.created_at.isoformat(),
+            original_filename=item.original_filename
+        )
+        for item in items
+    ]
+
+    return OCRMetricsHistoryResponse(
+        items=history_items,
+        total=len(history_items)
+    )
+
+
+@router.get("/metrics/ocr/stats", response_model=OCRStatsResponse)
+async def get_ocr_stats(
+    days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get overall OCR statistics for the user.
+
+    Returns aggregated stats including success rate, average processing time, etc.
+    """
+    stats = await OCRMetricsCRUD.get_overall_stats(
+        session,
+        days=days,
+        username=current_user.username
+    )
+
+    return OCRStatsResponse(**stats)
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/receipts.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/receipts.py
@@ -0,0 +1,705 @@
+"""API endpoints for receipts."""
+
+from typing import List, Optional, Annotated
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Header, Response
+from fastapi.responses import FileResponse, StreamingResponse
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
+from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
+from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
+from backend.modules.data_entry.services.receipt_service import ReceiptService
+from backend.modules.data_entry.services.nomenclature_service import NomenclatureService
+from backend.modules.data_entry.schemas.receipt import (
+    ReceiptCreate,
+    ReceiptUpdate,
+    ReceiptResponse,
+    ReceiptListResponse,
+    ReceiptFilter,
+    ProcessingStats,
+    AttachmentResponse,
+    AccountingEntryResponse,
+    WorkflowAction,
+    RejectRequest,
+    EntriesUpdateRequest,
+    PartnerOption,
+    AccountOption,
+    CashRegisterOption,
+    ExpenseTypeOption,
+    BulkDeleteRequest,
+    BulkDeleteResponse,
+    BulkDeleteFailure,
+)
+from backend.modules.data_entry.db.models.receipt import ReceiptStatus, ReceiptDirection
+from backend.modules.data_entry.services import sse_service
+
+# Auth integration
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+
+router = APIRouter()
+
+
+# ============ Helper for selected company from header ============
+
+async def get_selected_company(
+    current_user: CurrentUser = Depends(get_current_user),
+    x_selected_company: Annotated[Optional[str], Header()] = None
+) -> int:
+    """
+    Get selected company from X-Selected-Company header.
+
+    Validates that the user has access to the specified company.
+    Falls back to user's first company if no header is provided.
+
+    Raises:
+        HTTPException 403: If user doesn't have access to specified company
+        HTTPException 400: If user has no companies assigned
+    """
+    if x_selected_company:
+        try:
+            company_id = int(x_selected_company)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid company ID format: {x_selected_company}"
+            )
+
+        # Validate user has access to this company
+        # Auth stores companies as strings
+        if str(company_id) in current_user.companies:
+            return company_id
+
+        raise HTTPException(
+            status_code=403,
+            detail=f"Nu aveți acces la firma {company_id}"
+        )
+
+    # No header - use first company from user's list
+    if current_user.companies:
+        try:
+            return int(current_user.companies[0])
+        except (ValueError, IndexError):
+            pass
+
+    raise HTTPException(
+        status_code=400,
+        detail="Nu aveți nicio firmă asignată"
+    )
+
+
+# Dependency for injection
+SelectedCompany = Annotated[int, Depends(get_selected_company)]
+
+
+# Legacy function for backwards compatibility (deprecated)
+def get_current_user_company(current_user: CurrentUser) -> int:
+    """
+    DEPRECATED: Use get_selected_company() dependency instead.
+    This function returns the first company, ignoring X-Selected-Company header.
+    """
+    if current_user.companies:
+        try:
+            return int(current_user.companies[0])
+        except (ValueError, IndexError):
+            return 1
+    return 1
+
+
+# ============ SSE Endpoint for Real-time Status Updates ============
+
+@router.get("/sse/status")
+async def sse_status_stream(
+    batch_id: Optional[str] = Query(
+        default=None,
+        description="Optional batch_id to filter events for a specific batch"
+    ),
+):
+    """
+    Server-Sent Events endpoint for real-time receipt status updates.
+
+    This endpoint provides a persistent connection that streams status change
+    events as they occur. Clients receive updates for CRUD operations on receipts
+    without needing to poll.
+
+    Query Parameters:
+        batch_id: Optional filter to only receive events for a specific batch upload.
+
+    Event Format:
+        data: {"receipt_id": 123, "status": "DRAFT", "processing_status": "completed", ...}
+
+    Headers:
+        - Content-Type: text/event-stream
+        - Cache-Control: no-cache
+        - Connection: keep-alive
+
+    Reconnection:
+        The retry: 3000 header hints clients to reconnect after 3 seconds if disconnected.
+
+    Example:
+        curl -N http://localhost:8000/api/data-entry/receipts/sse/status
+        curl -N http://localhost:8000/api/data-entry/receipts/sse/status?batch_id=abc-123
+    """
+    return StreamingResponse(
+        sse_service.subscribe(batch_id=batch_id),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",  # Disable nginx buffering
+        },
+    )
+
+
+# ============ Receipt CRUD Endpoints ============
+
+@router.post("/", response_model=ReceiptResponse)
+async def create_receipt(
+    data: ReceiptCreate,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Create a new receipt in DRAFT status."""
+    receipt = await ReceiptService.create_receipt(session, data, current_user.username)
+    return ReceiptResponse.model_validate(receipt)
+
+
+@router.get("/", response_model=ReceiptListResponse)
+async def list_receipts(
+    response: Response,
+    status: Optional[ReceiptStatus] = None,
+    direction: Optional[ReceiptDirection] = None,
+    company_id: Optional[int] = None,
+    created_by: Optional[str] = None,
+    date_from: Optional[str] = None,
+    date_to: Optional[str] = None,
+    search: Optional[str] = None,
+    # Bulk upload filters (US-012)
+    processing_status: Optional[str] = Query(default=None, description="Filter by processing status: pending, processing, completed, failed"),
+    batch_id: Optional[str] = Query(default=None, description="Filter by batch_id UUID"),
+    sort_by: Optional[str] = Query(default=None, description="Sort field: processing_started_at, processing_started_at_asc"),
+    # Pagination
+    page: int = Query(default=1, ge=1),
+    page_size: int = Query(default=20, ge=1, le=100),
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get paginated list of receipts with filters.
+
+    US-012: Extended with batch_id, processing_status filters and processing_stats.
+    """
+    # Disable browser caching to always get fresh data
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+
+    from datetime import date as date_type
+
+    filters = ReceiptFilter(
+        status=status,
+        direction=direction,
+        company_id=company_id or selected_company,
+        created_by=created_by,
+        date_from=date_type.fromisoformat(date_from) if date_from else None,
+        date_to=date_type.fromisoformat(date_to) if date_to else None,
+        search=search,
+        processing_status=processing_status,
+        batch_id=batch_id,
+        sort_by=sort_by,
+        page=page,
+        page_size=page_size,
+    )
+
+    return await ReceiptService.get_receipts(session, filters)
+
+
+@router.get("/pending", response_model=List[ReceiptResponse])
+async def list_pending_receipts(
+    response: Response,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get all receipts pending review (for accountant view)."""
+    # Disable browser caching to always get fresh data
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+
+    receipts = await ReceiptCRUD.get_pending_review(
+        session, company_id or selected_company
+    )
+    return [ReceiptResponse.model_validate(r) for r in receipts]
+
+
+@router.get("/stats")
+async def get_receipt_stats(
+    response: Response,
+    company_id: Optional[int] = None,
+    my_receipts: bool = False,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Get receipt statistics."""
+    # Disable browser caching to always get fresh data
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+
+    return await ReceiptCRUD.get_stats(
+        session,
+        company_id or selected_company,
+        created_by=current_user.username if my_receipts else None,
+    )
+
+
+@router.get("/{receipt_id}", response_model=ReceiptResponse)
+async def get_receipt(
+    receipt_id: int,
+    response: Response,
+    session: AsyncSession = Depends(get_session),
+):
+    """Get receipt details with attachments and accounting entries."""
+    # Disable browser caching to always get fresh data
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+
+    receipt = await ReceiptService.get_receipt(session, receipt_id)
+
+    if not receipt:
+        raise HTTPException(status_code=404, detail="Receipt not found")
+
+    return ReceiptResponse.model_validate(receipt)
+
+
+@router.put("/{receipt_id}", response_model=ReceiptResponse)
+async def update_receipt(
+    receipt_id: int,
+    data: ReceiptUpdate,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Update receipt (only DRAFT status, only by creator)."""
+    success, message, receipt = await ReceiptService.update_receipt(
+        session, receipt_id, data, current_user.username
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    return ReceiptResponse.model_validate(receipt)
+
+
+@router.delete("/bulk", response_model=BulkDeleteResponse)
+async def bulk_delete_receipts(
+    data: BulkDeleteRequest,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """
+    Bulk delete receipts (US-024).
+
+    Deletes multiple receipts in a single request with partial success support.
+
+    Validation rules:
+    - Each receipt must be in DRAFT status
+    - Each receipt must be created by the current user
+    - Receipts with processing_status 'pending' or 'processing' cannot be deleted
+
+    Returns:
+        BulkDeleteResponse with deleted IDs and failed items with error messages
+    """
+    deleted: List[int] = []
+    failed: List[BulkDeleteFailure] = []
+
+    for receipt_id in data.ids:
+        # Get receipt with relationships for deletion
+        receipt = await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=True)
+
+        if not receipt:
+            failed.append(BulkDeleteFailure(id=receipt_id, error="Bonul nu a fost găsit"))
+            continue
+
+        # Check if receipt is being processed (bulk upload in progress)
+        if receipt.processing_status in ["pending", "processing"]:
+            failed.append(BulkDeleteFailure(
+                id=receipt_id,
+                error="Bonul este în curs de procesare și nu poate fi șters"
+            ))
+            continue
+
+        # Check status - only DRAFT can be deleted
+        if receipt.status != ReceiptStatus.DRAFT:
+            failed.append(BulkDeleteFailure(
+                id=receipt_id,
+                error=f"Doar bonurile în status DRAFT pot fi șterse (status curent: {receipt.status.value})"
+            ))
+            continue
+
+        # Check ownership
+        if receipt.created_by != current_user.username:
+            failed.append(BulkDeleteFailure(
+                id=receipt_id,
+                error="Doar creatorul bonului poate să-l șteargă"
+            ))
+            continue
+
+        # All validations passed - delete the receipt
+        # Note: Cascade delete handles attachments and accounting entries
+        await ReceiptCRUD.delete(session, receipt)
+        deleted.append(receipt_id)
+
+    return BulkDeleteResponse(deleted=deleted, failed=failed)
+
+
+@router.delete("/{receipt_id}")
+async def delete_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Delete receipt (only DRAFT status, only by creator)."""
+    success, message = await ReceiptService.delete_receipt(
+        session, receipt_id, current_user.username
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    return {"success": True, "message": message}
+
+
+# ============ Workflow Endpoints ============
+
+@router.post("/{receipt_id}/submit", response_model=WorkflowAction)
+async def submit_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Submit receipt for review (DRAFT → PENDING_REVIEW)."""
+    success, message, receipt = await ReceiptService.submit_for_review(
+        session, receipt_id, current_user.username
+    )
+
+    # Broadcast SSE event on success (US-030)
+    if success and receipt:
+        await sse_service.broadcast_status_change(
+            receipt_id=receipt.id,
+            status=receipt.status.value,
+            processing_status=receipt.processing_status,
+            batch_id=receipt.batch_id,
+        )
+
+    return WorkflowAction(
+        success=success,
+        message=message,
+        receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
+    )
+
+
+@router.post("/{receipt_id}/approve", response_model=WorkflowAction)
+async def approve_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Approve receipt (PENDING_REVIEW → APPROVED). Accountant action."""
+    success, message, receipt = await ReceiptService.approve_receipt(
+        session, receipt_id, current_user.username
+    )
+
+    # Broadcast SSE event on success (US-030)
+    if success and receipt:
+        await sse_service.broadcast_status_change(
+            receipt_id=receipt.id,
+            status=receipt.status.value,
+            processing_status=receipt.processing_status,
+            batch_id=receipt.batch_id,
+        )
+
+    return WorkflowAction(
+        success=success,
+        message=message,
+        receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
+    )
+
+
+@router.post("/{receipt_id}/reject", response_model=WorkflowAction)
+async def reject_receipt(
+    receipt_id: int,
+    data: RejectRequest,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Reject receipt (PENDING_REVIEW → REJECTED). Accountant action."""
+    success, message, receipt = await ReceiptService.reject_receipt(
+        session, receipt_id, current_user.username, data.reason
+    )
+
+    # Broadcast SSE event on success (US-030)
+    if success and receipt:
+        await sse_service.broadcast_status_change(
+            receipt_id=receipt.id,
+            status=receipt.status.value,
+            processing_status=receipt.processing_status,
+            batch_id=receipt.batch_id,
+        )
+
+    return WorkflowAction(
+        success=success,
+        message=message,
+        receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
+    )
+
+
+@router.post("/{receipt_id}/resubmit", response_model=WorkflowAction)
+async def resubmit_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Resubmit rejected receipt after corrections (REJECTED → PENDING_REVIEW)."""
+    success, message, receipt = await ReceiptService.resubmit_receipt(
+        session, receipt_id, current_user.username
+    )
+
+    # Broadcast SSE event on success (US-030)
+    if success and receipt:
+        await sse_service.broadcast_status_change(
+            receipt_id=receipt.id,
+            status=receipt.status.value,
+            processing_status=receipt.processing_status,
+            batch_id=receipt.batch_id,
+        )
+
+    return WorkflowAction(
+        success=success,
+        message=message,
+        receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
+    )
+
+
+@router.post("/{receipt_id}/unapprove", response_model=WorkflowAction)
+async def unapprove_receipt(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Unapprove receipt (APPROVED → PENDING_REVIEW). Returns to pending for corrections."""
+    success, message, receipt = await ReceiptService.unapprove_receipt(
+        session, receipt_id, current_user.username
+    )
+
+    # Broadcast SSE event on success (US-030)
+    if success and receipt:
+        await sse_service.broadcast_status_change(
+            receipt_id=receipt.id,
+            status=receipt.status.value,
+            processing_status=receipt.processing_status,
+            batch_id=receipt.batch_id,
+        )
+
+    return WorkflowAction(
+        success=success,
+        message=message,
+        receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
+    )
+
+
+# ============ Accounting Entries Endpoints ============
+
+@router.get("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
+async def get_receipt_entries(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+):
+    """Get accounting entries for a receipt."""
+    entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
+    return [AccountingEntryResponse.model_validate(e) for e in entries]
+
+
+@router.put("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
+async def update_receipt_entries(
+    receipt_id: int,
+    data: EntriesUpdateRequest,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Update accounting entries for a receipt (accountant action)."""
+    success, message, entries = await ReceiptService.update_entries(
+        session, receipt_id, data.entries, current_user.username
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    return [AccountingEntryResponse.model_validate(e) for e in entries]
+
+
+@router.post("/{receipt_id}/entries/regenerate", response_model=List[AccountingEntryResponse])
+async def regenerate_entries(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Regenerate accounting entries based on receipt data."""
+    success, message, _ = await ReceiptService.regenerate_entries(
+        session, receipt_id, current_user.username
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
+    return [AccountingEntryResponse.model_validate(e) for e in entries]
+
+
+# ============ Attachment Endpoints ============
+
+@router.post("/{receipt_id}/attachments", response_model=AttachmentResponse)
+async def upload_attachment(
+    receipt_id: int,
+    file: UploadFile = File(...),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Upload attachment for a receipt."""
+    # Check receipt exists and user can modify it
+    receipt = await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=False)
+
+    if not receipt:
+        raise HTTPException(status_code=404, detail="Receipt not found")
+
+    # Only allow uploads for DRAFT and REJECTED receipts
+    if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
+        raise HTTPException(
+            status_code=400,
+            detail="Cannot upload attachments for this receipt status"
+        )
+
+    # Only creator can upload
+    if receipt.created_by != current_user.username:
+        raise HTTPException(
+            status_code=403,
+            detail="Only the creator can upload attachments"
+        )
+
+    try:
+        attachment = await AttachmentCRUD.create(session, receipt_id, file)
+        return AttachmentResponse.model_validate(attachment)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/{receipt_id}/attachments", response_model=List[AttachmentResponse])
+async def list_attachments(
+    receipt_id: int,
+    session: AsyncSession = Depends(get_session),
+):
+    """Get all attachments for a receipt."""
+    attachments = await AttachmentCRUD.get_by_receipt_id(session, receipt_id)
+    return [AttachmentResponse.model_validate(a) for a in attachments]
+
+
+@router.get("/attachments/{attachment_id}/download")
+async def download_attachment(
+    attachment_id: int,
+    session: AsyncSession = Depends(get_session),
+):
+    """Download an attachment file."""
+    attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
+
+    if not attachment:
+        raise HTTPException(status_code=404, detail="Attachment not found")
+
+    file_path = AttachmentCRUD.get_file_path(attachment)
+
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found on disk")
+
+    return FileResponse(
+        path=str(file_path),
+        filename=attachment.filename,
+        media_type=attachment.mime_type,
+    )
+
+
+@router.delete("/attachments/{attachment_id}")
+async def delete_attachment(
+    attachment_id: int,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user),
+):
+    """Delete an attachment."""
+    attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
+
+    if not attachment:
+        raise HTTPException(status_code=404, detail="Attachment not found")
+
+    # Get receipt to check permissions
+    receipt = await ReceiptCRUD.get_by_id(session, attachment.receipt_id, include_relations=False)
+
+    if not receipt:
+        raise HTTPException(status_code=404, detail="Receipt not found")
+
+    # Only allow deletion for DRAFT receipts by creator
+    if receipt.status != ReceiptStatus.DRAFT:
+        raise HTTPException(
+            status_code=400,
+            detail="Cannot delete attachments for this receipt status"
+        )
+
+    if receipt.created_by != current_user.username:
+        raise HTTPException(
+            status_code=403,
+            detail="Only the creator can delete attachments"
+        )
+
+    await AttachmentCRUD.delete(session, attachment)
+    return {"success": True, "message": "Attachment deleted"}
+
+
+# ============ Nomenclature Endpoints ============
+
+@router.get("/nomenclature/partners", response_model=List[PartnerOption])
+async def get_partners(
+    search: Optional[str] = None,
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get partners (suppliers/customers) for dropdown."""
+    return await NomenclatureService.get_partners(
+        company_id or selected_company, search, session
+    )
+
+
+@router.get("/nomenclature/accounts", response_model=List[AccountOption])
+async def get_accounts(
+    prefix: Optional[str] = None,
+    company_id: Optional[int] = None,
+    selected_company: SelectedCompany = None,
+):
+    """Get chart of accounts for dropdown."""
+    return await NomenclatureService.get_accounts(
+        company_id or selected_company, prefix
+    )
+
+
+@router.get("/nomenclature/cash-registers", response_model=List[CashRegisterOption])
+async def get_cash_registers(
+    company_id: Optional[int] = None,
+    session: AsyncSession = Depends(get_session),
+    selected_company: SelectedCompany = None,
+):
+    """Get cash registers and bank accounts for dropdown."""
+    return await NomenclatureService.get_cash_registers(company_id or selected_company, session)
+
+
+@router.get("/nomenclature/expense-types", response_model=List[ExpenseTypeOption])
+async def get_expense_types():
+    """Get predefined expense types for dropdown."""
+    return await NomenclatureService.get_expense_types()