fix telegram

This commit is contained in:
Claude Agent
2026-02-23 15:12:33 +00:00
parent 6c78fec8a7
commit 8bc567a9c5
426 changed files with 112478 additions and 1 deletions

View File

@@ -0,0 +1,39 @@
"""Data Entry module router factory."""
from fastapi import APIRouter
def create_data_entry_router() -> APIRouter:
"""
Create and configure Data Entry module router.
Includes all data entry endpoints:
- /receipts - Receipt CRUD and workflow
- /ocr - OCR processing for receipts
- /nomenclature - Nomenclature syncing from Oracle
- /settings - User settings (OCR preferences)
- /metrics - OCR analytics and metrics
- /bulk - Bulk upload for batch processing
Returns:
APIRouter: Configured router for data entry module
"""
router = APIRouter()
# Import routers here to avoid circular imports
from .receipts import router as receipts_router
from .ocr import router as ocr_router
from .nomenclature import router as nomenclature_router
from .ocr_settings import router as ocr_settings_router
from .bulk import router as bulk_router
# Include all sub-routers (no prefix - already prefixed in main.py with /api/data-entry)
router.include_router(receipts_router, prefix="/receipts", tags=["data-entry-receipts"])
router.include_router(ocr_router, prefix="/ocr", tags=["data-entry-ocr"])
router.include_router(nomenclature_router, prefix="/nomenclature", tags=["data-entry-nomenclature"])
# OCR settings and metrics (endpoints at /settings/* and /metrics/*)
router.include_router(ocr_settings_router, tags=["data-entry-settings"])
# Bulk upload for batch processing
router.include_router(bulk_router, prefix="/bulk", tags=["data-entry-bulk"])
return router

View File

@@ -0,0 +1,997 @@
"""
Bulk upload API endpoints for batch receipt processing.
Endpoints:
- POST /upload - Submit multiple files for OCR processing in a single batch
- GET /batches/{batch_id}/status - Get batch status with optional long-polling
Validation:
- Max 100 files per batch
- Max 10MB per file
- Allowed types: PDF, PNG, JPG
Duplicate Detection (US-007):
- SHA-256 hash calculated for each file
- Duplicate files (same hash + company_id) are rejected with 409 Conflict info
- Duplicates reported in error list, non-duplicates processed normally
"""
import asyncio
import hashlib
import logging
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Annotated, List, Optional, Union
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Header
from sqlalchemy import select, func, and_
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus, Receipt, ReceiptAttachment
from backend.modules.data_entry.schemas.bulk import (
BulkUploadResponse,
BulkUploadResponseWithDuplicates,
BatchStatusResponse,
BatchJobInfo,
DuplicateFileInfo,
RetryResponse,
BatchRetryResponse,
CancelJobResponse,
CancelBatchResponse
)
from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus
from backend.config import settings
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
logger = logging.getLogger(__name__)
router = APIRouter()
# ============ Helper for selected company from header ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates that the user has access to the specified company.
Falls back to user's first company if no header is provided.
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid company ID format: {x_selected_company}"
)
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(
status_code=403,
detail=f"Nu aveți acces la firma {company_id}"
)
# No header - use first company from user's list
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(
status_code=400,
detail="Nu aveți nicio firmă asignată"
)
# Validation constants
MAX_FILES_PER_BATCH = 100
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB
ALLOWED_MIME_TYPES = {"image/jpeg", "image/png", "application/pdf"}
def compute_file_hash(content: bytes) -> str:
"""
Compute SHA-256 hash of file content.
Used for duplicate detection - same file content = same hash.
Args:
content: Raw file bytes
Returns:
Hexadecimal string of SHA-256 hash (64 characters)
"""
return hashlib.sha256(content).hexdigest()
async def check_duplicate_hashes(
session: AsyncSession,
file_hashes: List[str],
company_id: int
) -> dict[str, int]:
"""
Check which file hashes already exist in the database for this company.
Args:
session: Database session
file_hashes: List of SHA-256 hashes to check
company_id: Company ID to scope the duplicate check
Returns:
Dict mapping hash -> existing receipt_id for duplicates found
"""
if not file_hashes:
return {}
# Query for existing receipts with these hashes for this company
result = await session.execute(
select(Receipt.file_hash, Receipt.id).where(
and_(
Receipt.file_hash.in_(file_hashes),
Receipt.company_id == company_id
)
)
)
# Build hash -> receipt_id mapping
# Note: result.all() is synchronous in SQLAlchemy async, returns list of tuples
duplicates = {}
rows = result.all()
for row in rows:
duplicates[row[0]] = row[1]
return duplicates
@router.post("/upload", response_model=Union[BulkUploadResponse, BulkUploadResponseWithDuplicates])
async def bulk_upload(
files: List[UploadFile] = File(..., description="Multiple files to upload"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Upload multiple files for batch OCR processing.
Creates a batch record and queues all files as OCR jobs.
Invalid files cause entire batch rejection (validation errors).
Duplicate files are reported separately and skipped - non-duplicates are processed.
Duplicate Detection (US-007):
- SHA-256 hash calculated for each file before processing
- Files with existing hash for same company are rejected with 409 info
- Response includes duplicate details with existing_receipt_id
Args:
files: List of image/PDF files (max 100 files, max 10MB each)
Returns:
BulkUploadResponse with batch_id and list of job_ids
BulkUploadResponseWithDuplicates if some files were duplicates
Raises:
400: If validation fails (too many files, file too large, invalid type)
409: If ALL files are duplicates
500: If job creation fails
"""
# Validate file count
if len(files) == 0:
raise HTTPException(
status_code=400,
detail="No files provided"
)
if len(files) > MAX_FILES_PER_BATCH:
raise HTTPException(
status_code=400,
detail=f"Too many files. Maximum {MAX_FILES_PER_BATCH} files per batch."
)
# Pre-validate all files before creating any jobs (atomic check)
invalid_files = []
file_contents = []
for file in files:
# Check MIME type
if file.content_type not in ALLOWED_MIME_TYPES:
invalid_files.append(f"{file.filename}: Invalid type ({file.content_type})")
continue
# Read content and check size
content = await file.read()
if len(content) > MAX_FILE_SIZE_BYTES:
invalid_files.append(f"{file.filename}: File too large ({len(content) // (1024*1024)}MB > 10MB)")
continue
# Compute SHA-256 hash for duplicate detection (US-007)
file_hash = compute_file_hash(content)
# Store for later processing
file_contents.append({
"filename": file.filename,
"content": content,
"mime_type": file.content_type,
"file_hash": file_hash
})
# If any files are invalid, reject the entire batch
if invalid_files:
raise HTTPException(
status_code=400,
detail={
"message": f"Validation failed for {len(invalid_files)} file(s)",
"invalid_files": invalid_files
}
)
# Check for duplicates BEFORE creating batch (US-007)
all_hashes = [f["file_hash"] for f in file_contents]
existing_duplicates = await check_duplicate_hashes(session, all_hashes, selected_company)
# Separate duplicate files from processable files
duplicate_files: List[DuplicateFileInfo] = []
processable_files = []
for file_data in file_contents:
if file_data["file_hash"] in existing_duplicates:
existing_receipt_id = existing_duplicates[file_data["file_hash"]]
duplicate_files.append(DuplicateFileInfo(
filename=file_data["filename"],
error="duplicate",
existing_receipt_id=existing_receipt_id,
message=f"Fișier duplicat - există deja ca bon #{existing_receipt_id}"
))
logger.info(
f"[BulkUpload] Duplicate detected: {file_data['filename']} "
f"(hash={file_data['file_hash'][:16]}...) matches receipt #{existing_receipt_id}"
)
else:
processable_files.append(file_data)
# If ALL files are duplicates, return 409 Conflict
if len(duplicate_files) == len(file_contents):
raise HTTPException(
status_code=409,
detail={
"error": "all_duplicates",
"message": f"Toate cele {len(duplicate_files)} fișiere sunt duplicate",
"duplicates": [d.model_dump() for d in duplicate_files]
}
)
# If no processable files remain after filtering (shouldn't happen but be safe)
if not processable_files:
raise HTTPException(
status_code=409,
detail={
"error": "no_files_to_process",
"message": "Nu există fișiere de procesat",
"duplicates": [d.model_dump() for d in duplicate_files]
}
)
# Create batch record with company_id for auto-save
batch = BatchUpload(
user_id=current_user.username,
company_id=selected_company,
status=BatchStatus.PENDING,
total_files=len(processable_files) # Only count processable files
)
session.add(batch)
await session.flush() # Get batch.id before creating jobs
# Create OCR jobs for processable files only
job_ids = []
batch_jobs = []
try:
for file_data in processable_files:
# Create OCR job using existing job_queue
# Pass batch_id and file_hash for tracking
job = await job_queue.create_job(
file_bytes=file_data["content"],
mime_type=file_data["mime_type"],
engine="doctr_plus", # Default engine for bulk
username=current_user.username,
original_filename=file_data["filename"],
batch_id=batch.id, # Link job to batch for auto-save integration
file_hash=file_data["file_hash"] # Pass hash for storage in receipt
)
job_ids.append(job.id)
# Create batch_job link
batch_job = BatchJob(
batch_id=batch.id,
job_id=job.id,
filename=file_data["filename"]
)
batch_jobs.append(batch_job)
# Add all batch_job records
for bj in batch_jobs:
session.add(bj)
# Commit everything atomically
await session.commit()
logger.info(
f"[BulkUpload] Created batch {batch.id} with {len(job_ids)} jobs "
f"for user {current_user.username}"
f"{f', {len(duplicate_files)} duplicates skipped' if duplicate_files else ''}"
)
# Return response with duplicate info if any duplicates were found
if duplicate_files:
return BulkUploadResponseWithDuplicates(
batch_id=batch.id,
job_ids=job_ids,
total_files=len(file_contents),
processed_files=len(job_ids),
duplicate_files=len(duplicate_files),
duplicates=duplicate_files,
message=f"{len(job_ids)} fișier(e) în procesare, {len(duplicate_files)} duplicate ignorate"
)
return BulkUploadResponse(
batch_id=batch.id,
job_ids=job_ids,
total_files=len(job_ids),
message=f"{len(job_ids)} files queued for processing"
)
except Exception as e:
# Rollback on any error
await session.rollback()
logger.error(f"[BulkUpload] Failed to create batch: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to create batch: {str(e)}"
)
# Long-polling constants
MAX_WAIT_SECONDS = 30
POLL_INTERVAL_SECONDS = 0.5
async def _get_batch_status_snapshot(
batch_id: int,
session: AsyncSession
) -> Optional[dict]:
"""
Get current batch status snapshot.
Returns dict with status counts and jobs list, or None if batch not found.
"""
# Get batch record
batch_result = await session.execute(
select(BatchUpload).where(BatchUpload.id == batch_id)
)
batch = batch_result.scalar_one_or_none()
if not batch:
return None
# Get all batch_jobs for this batch
batch_jobs_result = await session.execute(
select(BatchJob).where(BatchJob.batch_id == batch_id)
)
batch_jobs = batch_jobs_result.scalars().all()
if not batch_jobs:
return {
"batch": batch,
"pending_count": 0,
"processing_count": 0,
"completed_count": 0,
"failed_count": 0,
"jobs": [],
"total_amount": None
}
# Get job statuses and error_messages from OCR job queue (SQLite)
job_statuses = {}
job_errors = {}
for bj in batch_jobs:
job = await job_queue.get_job(bj.job_id)
if job:
job_statuses[bj.job_id] = job.status.value
job_errors[bj.job_id] = job.error_message
else:
# Job not found in queue - treat as failed
job_statuses[bj.job_id] = "failed"
job_errors[bj.job_id] = "Job not found in queue"
# Count by status
pending_count = sum(1 for s in job_statuses.values() if s == "pending")
processing_count = sum(1 for s in job_statuses.values() if s == "processing")
completed_count = sum(1 for s in job_statuses.values() if s == "completed")
failed_count = sum(1 for s in job_statuses.values() if s == "failed")
# Build jobs list with status info
jobs_info = []
for bj in batch_jobs:
jobs_info.append({
"job_id": bj.job_id,
"filename": bj.filename,
"status": job_statuses.get(bj.job_id, "failed"),
"receipt_id": bj.receipt_id,
"error_message": job_errors.get(bj.job_id)
})
# Calculate total_amount from completed receipts
total_amount = None
receipt_ids = [bj.receipt_id for bj in batch_jobs if bj.receipt_id is not None]
if receipt_ids:
amount_result = await session.execute(
select(func.sum(Receipt.amount)).where(Receipt.id.in_(receipt_ids))
)
total_sum = amount_result.scalar()
if total_sum is not None:
total_amount = float(total_sum)
return {
"batch": batch,
"pending_count": pending_count,
"processing_count": processing_count,
"completed_count": completed_count,
"failed_count": failed_count,
"jobs": jobs_info,
"total_amount": total_amount
}
def _compute_batch_overall_status(pending: int, processing: int, completed: int, failed: int, total: int) -> str:
"""Compute overall batch status from job counts."""
if pending + processing == 0:
# All jobs finished
if failed == total:
return BatchStatus.FAILED.value
return BatchStatus.COMPLETED.value
elif processing > 0 or completed > 0 or failed > 0:
return BatchStatus.PROCESSING.value
else:
return BatchStatus.PENDING.value
@router.get("/batches/{batch_id}/status", response_model=BatchStatusResponse)
async def get_batch_status(
batch_id: int,
wait: Optional[int] = Query(
default=None,
ge=0,
le=MAX_WAIT_SECONDS,
description="Long-polling wait time in seconds (max 30)"
),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get batch processing status with optional long-polling.
Returns aggregated status counts and individual job statuses.
When `wait` parameter is provided, the endpoint will poll until:
- Status changes from initial snapshot
- All jobs complete (pending + processing = 0)
- Timeout is reached
Args:
batch_id: Batch ID to query
wait: Optional wait time in seconds for long-polling (0-30)
Returns:
BatchStatusResponse with status counts and job details
Raises:
404: If batch not found
"""
# Get initial snapshot
snapshot = await _get_batch_status_snapshot(batch_id, session)
if snapshot is None:
raise HTTPException(
status_code=404,
detail=f"Batch {batch_id} not found"
)
# If long-polling requested and jobs still in progress
if wait and wait > 0:
initial_pending = snapshot["pending_count"]
initial_processing = snapshot["processing_count"]
initial_completed = snapshot["completed_count"]
initial_failed = snapshot["failed_count"]
# Only wait if there are still jobs in progress
if initial_pending + initial_processing > 0:
elapsed = 0.0
while elapsed < wait:
await asyncio.sleep(POLL_INTERVAL_SECONDS)
elapsed += POLL_INTERVAL_SECONDS
# Refresh snapshot
snapshot = await _get_batch_status_snapshot(batch_id, session)
if snapshot is None:
# Batch deleted during polling (edge case)
raise HTTPException(status_code=404, detail=f"Batch {batch_id} not found")
# Check if status changed
current_pending = snapshot["pending_count"]
current_processing = snapshot["processing_count"]
current_completed = snapshot["completed_count"]
current_failed = snapshot["failed_count"]
if (current_pending != initial_pending or
current_processing != initial_processing or
current_completed != initial_completed or
current_failed != initial_failed):
# Status changed, return immediately
break
# Check if all jobs finished
if current_pending + current_processing == 0:
break
# Build response
batch = snapshot["batch"]
total_files = batch.total_files
overall_status = _compute_batch_overall_status(
snapshot["pending_count"],
snapshot["processing_count"],
snapshot["completed_count"],
snapshot["failed_count"],
total_files
)
jobs = [
BatchJobInfo(
job_id=j["job_id"],
filename=j["filename"],
status=j["status"],
receipt_id=j["receipt_id"],
error_message=j.get("error_message")
)
for j in snapshot["jobs"]
]
return BatchStatusResponse(
batch_id=batch.id,
status=overall_status,
total_files=total_files,
pending_count=snapshot["pending_count"],
processing_count=snapshot["processing_count"],
completed_count=snapshot["completed_count"],
failed_count=snapshot["failed_count"],
jobs=jobs,
total_amount=snapshot["total_amount"],
created_at=batch.created_at
)
# ============ Retry Endpoints (US-006) ============
async def _retry_single_receipt(
session: AsyncSession,
receipt: Receipt,
username: str
) -> tuple[bool, Optional[str], Optional[str]]:
"""
Retry processing for a single receipt.
Finds the original file from attachments, resets processing status,
and creates a new OCR job.
Args:
session: Database session
receipt: Receipt to retry
username: Username for the new OCR job
Returns:
Tuple of (success, job_id, error_message)
"""
# Get the first attachment to find the source file
attachments_result = await session.execute(
select(ReceiptAttachment)
.where(ReceiptAttachment.receipt_id == receipt.id)
.limit(1)
)
attachment = attachments_result.scalar_one_or_none()
if not attachment:
return False, None, "Bonul nu are fișier atașat"
# Construct full path to attachment file
file_path = settings.data_entry_upload_path_resolved / attachment.file_path
if not file_path.exists():
return False, None, "Fișierul original nu mai este disponibil"
# Read file content
try:
with open(file_path, 'rb') as f:
file_bytes = f.read()
except Exception as e:
logger.error(f"[Retry] Failed to read file {file_path}: {e}")
return False, None, f"Eroare la citirea fișierului: {str(e)}"
# Create new OCR job
try:
job = await job_queue.create_job(
file_bytes=file_bytes,
mime_type=attachment.mime_type,
engine="doctr_plus",
username=username,
original_filename=attachment.filename,
batch_id=None, # No batch for retry - direct processing
file_hash=receipt.file_hash
)
# Reset receipt processing status
receipt.processing_status = "pending"
receipt.processing_error = None
receipt.processing_started_at = datetime.utcnow()
receipt.processing_completed_at = None
await session.flush()
logger.info(f"[Retry] Receipt {receipt.id} requeued as job {job.id}")
return True, job.id, None
except Exception as e:
logger.error(f"[Retry] Failed to create job for receipt {receipt.id}: {e}")
return False, None, f"Eroare la crearea job-ului OCR: {str(e)}"
@router.post("/retry/{receipt_id}", response_model=RetryResponse)
async def retry_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Retry OCR processing for a single failed receipt.
Resets the receipt's processing_status to 'pending' and creates
a new OCR job using the original attachment file.
Args:
receipt_id: ID of the receipt to retry
Returns:
RetryResponse with success status and new job ID
Raises:
404: If receipt not found
400: If receipt is not in 'failed' status
400: If original file is not available
"""
# Get the receipt
result = await session.execute(
select(Receipt).where(
and_(
Receipt.id == receipt_id,
Receipt.company_id == selected_company
)
)
)
receipt = result.scalar_one_or_none()
if not receipt:
raise HTTPException(
status_code=404,
detail=f"Bonul #{receipt_id} nu a fost găsit"
)
# Verify receipt is in failed status
if receipt.processing_status != "failed":
raise HTTPException(
status_code=400,
detail=f"Bonul nu este în stare de eroare (status actual: {receipt.processing_status})"
)
# Attempt retry
success, job_id, error = await _retry_single_receipt(
session, receipt, current_user.username
)
if not success:
raise HTTPException(
status_code=400,
detail=error or "Eroare necunoscută la reîncărcare"
)
await session.commit()
return RetryResponse(
success=True,
receipt_id=receipt_id,
job_id=job_id,
message="Bon reîncarcat în procesare"
)
@router.post("/retry-batch/{batch_id}", response_model=BatchRetryResponse)
async def retry_batch_failed(
batch_id: str,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
selected_company: int = Depends(get_selected_company)
):
"""
Retry all failed receipts in a batch.
Finds all receipts with batch_id matching and processing_status='failed',
then attempts to retry each one.
Args:
batch_id: Batch ID (UUID string from receipt.batch_id)
Returns:
BatchRetryResponse with counts of successful and failed retries
Raises:
404: If no failed receipts found for batch
"""
# Find all failed receipts in this batch
result = await session.execute(
select(Receipt).where(
and_(
Receipt.batch_id == batch_id,
Receipt.company_id == selected_company,
Receipt.processing_status == "failed"
)
)
)
failed_receipts = result.scalars().all()
if not failed_receipts:
raise HTTPException(
status_code=404,
detail=f"Nu există bonuri cu erori în batch-ul {batch_id}"
)
# Retry each receipt
retried_count = 0
failed_count = 0
errors = []
for receipt in failed_receipts:
success, job_id, error = await _retry_single_receipt(
session, receipt, current_user.username
)
if success:
retried_count += 1
else:
failed_count += 1
errors.append(f"Bon #{receipt.id}: {error}")
await session.commit()
return BatchRetryResponse(
success=retried_count > 0,
batch_id=batch_id,
retried_count=retried_count,
failed_count=failed_count,
errors=errors,
message=f"{retried_count} bonuri reîncarcate în procesare"
+ (f", {failed_count} erori" if failed_count > 0 else "")
)
# ============ Cancel Endpoints (US-014) ============
@router.post("/cancel/{job_id}", response_model=CancelJobResponse)
async def cancel_job(
job_id: str,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Cancel a single OCR processing job.
Only jobs with status 'pending' or 'processing' can be cancelled.
Jobs with status 'completed' or 'failed' cannot be cancelled.
Important: If a receipt has already been created from this job,
it will NOT be deleted - receipts are preserved for audit purposes.
Args:
job_id: The UUID of the OCR job to cancel
Returns:
CancelJobResponse with cancellation details
Raises:
404: If job not found in batch_jobs table
400: If job has already completed or failed
"""
# Find the job in batch_jobs table
batch_job_result = await session.execute(
select(BatchJob).where(BatchJob.job_id == job_id)
)
batch_job = batch_job_result.scalar_one_or_none()
if not batch_job:
raise HTTPException(
status_code=404,
detail=f"Job {job_id} nu a fost găsit"
)
# Get the OCR job from job_queue to check current status
ocr_job = await job_queue.get_job(job_id)
if not ocr_job:
raise HTTPException(
status_code=404,
detail=f"Job {job_id} nu există în coada de procesare"
)
# Check if job can be cancelled
current_status = ocr_job.status.value
if current_status == OCRJobStatus.completed.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a fost deja procesat cu succes. Nu poate fi anulat."
)
if current_status == OCRJobStatus.failed.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a eșuat deja. Folosiți opțiunea de reîncercare în loc de anulare."
)
if current_status == OCRJobStatus.cancelled.value:
raise HTTPException(
status_code=400,
detail=f"Job-ul a fost deja anulat."
)
# Update job status to cancelled in job_queue (SQLite)
cancelled_at = datetime.utcnow()
success = await job_queue.update_status(
job_id=job_id,
status=OCRJobStatus.cancelled,
error="Cancelled by user"
)
if not success:
raise HTTPException(
status_code=500,
detail=f"Eroare la anularea job-ului"
)
logger.info(
f"[CancelJob] Job {job_id} cancelled by {current_user.username} "
f"(previous status: {current_status})"
)
return CancelJobResponse(
success=True,
job_id=job_id,
cancelled_at=cancelled_at,
message=f"Job anulat cu succes"
)
@router.post("/cancel-batch/{batch_id}", response_model=CancelBatchResponse)
async def cancel_batch(
batch_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Cancel all pending/processing jobs in a batch.
Finds all jobs with status 'pending' or 'processing' in the specified batch
and marks them as 'cancelled'. Jobs with status 'completed' or 'failed'
are not affected.
Important: Receipts that have already been created from completed jobs
will NOT be deleted - they are preserved for audit purposes.
Args:
batch_id: The batch ID to cancel
Returns:
CancelBatchResponse with counts of cancelled and skipped jobs
Raises:
404: If batch not found or no jobs exist for batch
"""
# Verify batch exists
batch_result = await session.execute(
select(BatchUpload).where(BatchUpload.id == batch_id)
)
batch = batch_result.scalar_one_or_none()
if not batch:
raise HTTPException(
status_code=404,
detail=f"Batch {batch_id} nu a fost găsit"
)
# Get all batch_jobs for this batch
batch_jobs_result = await session.execute(
select(BatchJob).where(BatchJob.batch_id == batch_id)
)
batch_jobs = batch_jobs_result.scalars().all()
if not batch_jobs:
raise HTTPException(
status_code=404,
detail=f"Nu există job-uri în batch-ul {batch_id}"
)
# Process each job - cancel pending/processing, skip completed/failed
cancelled_count = 0
skipped_count = 0
for batch_job in batch_jobs:
# Get current job status from OCR job queue
ocr_job = await job_queue.get_job(batch_job.job_id)
if not ocr_job:
# Job not found in queue - treat as skipped
skipped_count += 1
continue
current_status = ocr_job.status.value
# Only cancel pending or processing jobs
if current_status in (OCRJobStatus.pending.value, OCRJobStatus.processing.value):
success = await job_queue.update_status(
job_id=batch_job.job_id,
status=OCRJobStatus.cancelled,
error="Cancelled by user (batch cancel)"
)
if success:
cancelled_count += 1
logger.debug(f"[CancelBatch] Cancelled job {batch_job.job_id}")
else:
# Failed to cancel - count as skipped
skipped_count += 1
logger.warning(
f"[CancelBatch] Failed to cancel job {batch_job.job_id}"
)
else:
# Job is completed, failed, or already cancelled - skip it
skipped_count += 1
logger.info(
f"[CancelBatch] Batch {batch_id} cancelled by {current_user.username}: "
f"{cancelled_count} cancelled, {skipped_count} skipped"
)
# Build message
if cancelled_count == 0:
message = f"Nu există job-uri de anulat în batch-ul {batch_id}"
elif skipped_count == 0:
message = f"{cancelled_count} job-uri anulate"
else:
message = f"{cancelled_count} job-uri anulate, {skipped_count} ignorate (deja procesate)"
return CancelBatchResponse(
success=cancelled_count > 0,
batch_id=batch_id,
cancelled_count=cancelled_count,
skipped_count=skipped_count,
message=message
)

View File

@@ -0,0 +1,260 @@
"""Nomenclature API endpoints."""
from typing import Optional, List, Annotated
from fastapi import APIRouter, Depends, HTTPException, Header, Request
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.services.sync_service import SyncService
# Import auth dependencies
import sys
from pathlib import Path
# Path setup handled by main.py - this is redundant
# project_root = Path(__file__).parent.parent.parent.parent.parent
# sys.path.insert(0, str(project_root / "shared"))
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============ Selected Company Dependency ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates user access. Falls back to first company if no header.
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(400, f"Invalid company ID: {x_selected_company}")
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(403, f"Nu aveți acces la firma {company_id}")
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(400, "Nu aveți nicio firmă asignată")
SelectedCompany = Annotated[int, Depends(get_selected_company)]
# Request/Response Models
class SupplierSearchResult(BaseModel):
found: bool
supplier: Optional[dict] = None
source: str # 'synced', 'local', 'not_found'
class LocalSupplierCreate(BaseModel):
name: str
fiscal_code: Optional[str] = None
address: Optional[str] = None
class LocalSupplierResponse(BaseModel):
id: int
name: str
fiscal_code: Optional[str]
address: Optional[str]
is_local: bool = True
class SyncResult(BaseModel):
synced: int
errors: int
message: str
class SupplierOption(BaseModel):
id: int
oracle_id: Optional[int] = None
name: str
fiscal_code: Optional[str]
source: str # 'synced' or 'local'
class CashRegisterOption(BaseModel):
id: int
oracle_id: int
name: str
account_code: str
register_type: str
# Endpoints
@router.get("/suppliers/search", response_model=SupplierSearchResult)
async def search_supplier(
fiscal_code: Optional[str] = None,
name: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Search for supplier by fiscal code or name."""
if not fiscal_code and not name:
raise HTTPException(status_code=400, detail="Provide fiscal_code or name")
cid = company_id or selected_company
found, supplier, source = await SyncService.search_supplier(
session, cid, fiscal_code, name
)
return SupplierSearchResult(found=found, supplier=supplier, source=source)
@router.get("/suppliers", response_model=List[SupplierOption])
async def get_suppliers(
search: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all suppliers (synced + local) for dropdown/autocomplete."""
cid = company_id or selected_company
suppliers = await SyncService.get_all_suppliers(session, cid, search)
return [
SupplierOption(
id=s["id"],
oracle_id=s.get("oracle_id"),
name=s["name"],
fiscal_code=s.get("fiscal_code"),
source=s["source"]
)
for s in suppliers
]
@router.post("/suppliers/local", response_model=LocalSupplierResponse)
async def create_local_supplier(
data: LocalSupplierCreate,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
current_user: CurrentUser = Depends(get_current_user),
):
"""Create a local supplier from OCR data."""
cid = company_id or selected_company
supplier = await SyncService.create_local_supplier(
session, cid, data.name, data.fiscal_code, data.address, current_user.username
)
return LocalSupplierResponse(
id=supplier.id,
name=supplier.name,
fiscal_code=supplier.fiscal_code,
address=supplier.address,
)
@router.get("/cash-registers", response_model=List[CashRegisterOption])
async def get_cash_registers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all cash registers for a company."""
cid = company_id or selected_company
registers = await SyncService.get_all_cash_registers(session, cid)
return [
CashRegisterOption(
id=r["id"],
oracle_id=r["oracle_id"],
name=r["name"],
account_code=r["account_code"],
register_type=r["register_type"]
)
for r in registers
]
@router.post("/sync/suppliers", response_model=SyncResult)
async def sync_suppliers(
request: Request,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Manually trigger supplier sync from Oracle."""
cid = company_id or selected_company
server_id = getattr(request.state, 'server_id', None)
synced, errors = await SyncService.sync_suppliers(session, cid, server_id=server_id)
return SyncResult(
synced=synced,
errors=errors,
message=f"Synced {synced} suppliers with {errors} errors"
)
@router.post("/sync/cash-registers", response_model=SyncResult)
async def sync_cash_registers(
request: Request,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Manually trigger cash register sync from Oracle."""
cid = company_id or selected_company
server_id = getattr(request.state, 'server_id', None)
synced, errors = await SyncService.sync_cash_registers(session, cid, server_id=server_id)
return SyncResult(
synced=synced,
errors=errors,
message=f"Synced {synced} cash registers with {errors} errors"
)
@router.post("/sync/all", response_model=dict)
async def sync_all_nomenclatures(
request: Request,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Sync all nomenclatures (suppliers + cash registers) from Oracle."""
cid = company_id or selected_company
server_id = getattr(request.state, 'server_id', None)
# Sync suppliers
suppliers_synced, suppliers_errors = await SyncService.sync_suppliers(session, cid, server_id=server_id)
# Sync cash registers
registers_synced, registers_errors = await SyncService.sync_cash_registers(session, cid, server_id=server_id)
return {
"suppliers": {
"synced": suppliers_synced,
"errors": suppliers_errors
},
"cash_registers": {
"synced": registers_synced,
"errors": registers_errors
},
"total_synced": suppliers_synced + registers_synced,
"total_errors": suppliers_errors + registers_errors,
"message": f"Synced {suppliers_synced} suppliers and {registers_synced} cash registers"
}

View File

@@ -0,0 +1,715 @@
"""
OCR API endpoints with async job queue support.
Endpoints:
- POST /extract - Submit OCR job (returns job_id immediately)
- GET /jobs/{job_id} - Get job status and result
- GET /queue/status - Get queue statistics
- GET /status - Check OCR service availability
For backwards compatibility, we also support sync mode via query param:
- POST /extract?sync=true - Process synchronously (blocks until complete)
"""
import os
import tempfile
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Response
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
from backend.modules.data_entry.services.ocr_service import ocr_service
from backend.modules.data_entry.services.ocr_engine import OCREngine
from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus as JobStatus
from backend.modules.data_entry.services.ocr.job_worker import estimate_wait_time
from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine
from backend.modules.data_entry.schemas.ocr import (
OCRResponse,
OCRStatusResponse,
ExtractionData,
TvaEntry,
PaymentMethod,
# New job queue schemas
OCREngineChoice,
OCRJobStatus,
OCRJobSubmitResponse,
OCRJobResponse,
OCRQueueStatusResponse,
)
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============================================================================
# OCR Job Queue Endpoints (NEW)
# ============================================================================
@router.post("/extract", response_model=OCRJobSubmitResponse)
async def submit_ocr_job(
file: UploadFile = File(...),
engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus, description="OCR engine to use"),
sync: bool = Query(default=False, description="If true, process synchronously (blocks)"),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Submit an OCR job for processing.
By default, returns immediately with a job_id. Poll GET /jobs/{job_id} for result.
Use ?sync=true for synchronous processing (blocks until complete).
This is for backwards compatibility but not recommended for production.
Args:
file: Image or PDF file (max 10MB)
engine: OCR engine choice (tesseract, doctr, doctr_plus, paddleocr)
sync: If true, process synchronously (legacy mode)
Returns:
OCRJobSubmitResponse with job_id, queue_position, estimated_wait
"""
allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
if file.content_type not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
)
# Read file content
content = await file.read()
# Check file size (10MB limit)
if len(content) > 10 * 1024 * 1024:
raise HTTPException(
status_code=400,
detail="File too large. Maximum size is 10MB."
)
# Sync mode - use legacy processing (blocks)
if sync:
return await _process_sync(content, file, engine, current_user)
# Async mode - create job and return immediately
try:
job = await job_queue.create_job(
file_bytes=content,
mime_type=file.content_type,
engine=engine.value,
username=current_user.username,
original_filename=file.filename
)
# Get queue position
queue_position = await job_queue.get_queue_position(job.id)
estimated_wait = estimate_wait_time(queue_position or 1)
return OCRJobSubmitResponse(
job_id=job.id,
status=OCRJobStatus.pending,
queue_position=queue_position or 1,
estimated_wait_seconds=estimated_wait,
created_at=job.created_at or datetime.utcnow()
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to create OCR job: {str(e)}"
)
@router.get("/jobs/{job_id}", response_model=OCRJobResponse)
async def get_job_status(
job_id: str,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get OCR job status and result (instant response).
For efficient polling, use GET /jobs/{job_id}/wait instead (long-polling).
Args:
job_id: Job UUID from POST /extract response
Returns:
OCRJobResponse with status, queue_position, and result (if completed)
"""
job = await job_queue.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
# Get queue position for pending jobs
queue_position = None
estimated_wait = None
if job.status == JobStatus.pending:
queue_position = await job_queue.get_queue_position(job_id)
estimated_wait = estimate_wait_time(queue_position or 1)
elif job.status == JobStatus.processing:
queue_position = 0
# Estimate remaining time based on average
avg_time = await job_queue.get_average_processing_time()
estimated_wait = int(avg_time * 0.5) # Rough estimate: half remaining
# Convert result to ExtractionData if available
result_data = None
if job.status == JobStatus.completed and job.result:
result_data = _dict_to_extraction_data(job.result)
# Apply fuzzy CUI matching
result_data = await _apply_fuzzy_cui_matching(result_data, session)
# Debug: log suggested_payment_mode being returned
print(f"[OCR Router] Returning job {job_id} with suggested_payment_mode={result_data.suggested_payment_mode}", flush=True)
return OCRJobResponse(
job_id=job.id,
status=OCRJobStatus(job.status.value),
queue_position=queue_position,
estimated_wait_seconds=estimated_wait,
created_at=job.created_at or datetime.utcnow(),
started_at=job.started_at,
completed_at=job.completed_at,
queue_wait_ms=job.queue_wait_ms,
ocr_time_ms=job.ocr_time_ms,
processing_time_ms=job.processing_time_ms,
result=result_data,
error=job.error_message
)
@router.get("/jobs/{job_id}/wait", response_model=OCRJobResponse)
async def wait_for_job_status(
job_id: str,
response: Response,
timeout: int = Query(default=30, ge=1, le=60, description="Max wait time in seconds"),
wait_for_terminal: bool = Query(default=False, description="If true, only return on completed/failed"),
_t: int = Query(default=None, description="Cache-busting timestamp (ignored)"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Long-poll for OCR job status change.
Waits until:
- Job status changes (default behavior - returns on any status change)
- Job reaches terminal state (if wait_for_terminal=true)
- Timeout expires (returns current status)
Recommended client timeout: timeout + 5 seconds
Args:
job_id: Job UUID from POST /extract response
timeout: Max wait time in seconds (1-60, default 30)
wait_for_terminal: If true, wait until completed/failed only
Returns:
OCRJobResponse with status, queue_position, and result (if completed)
"""
# Prevent caching - critical for long-polling
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
import asyncio
import time
start_time = time.time()
end_time = start_time + timeout
last_status = None
iteration = 0
print(f"[OCR Wait] Starting long-poll for job {job_id}, timeout={timeout}s, wait_for_terminal={wait_for_terminal}", flush=True)
while time.time() < end_time:
iteration += 1
job = await job_queue.get_job(job_id)
if not job:
print(f"[OCR Wait] Job {job_id} not found after {iteration} iterations", flush=True)
raise HTTPException(status_code=404, detail="Job not found")
# Return immediately if job completed or failed (terminal states)
if job.status in [JobStatus.completed, JobStatus.failed]:
elapsed = time.time() - start_time
print(f"[OCR Wait] Job {job_id} {job.status.value} after {elapsed:.1f}s ({iteration} iterations)", flush=True)
return await get_job_status(job_id, session, current_user)
# Return on status change (unless wait_for_terminal is set)
if not wait_for_terminal and last_status is not None and job.status != last_status:
elapsed = time.time() - start_time
print(f"[OCR Wait] Job {job_id} status changed {last_status.value}->{job.status.value} after {elapsed:.1f}s", flush=True)
return await get_job_status(job_id, session, current_user)
last_status = job.status
# Wait 500ms before next internal check (faster polling for better responsiveness)
await asyncio.sleep(0.5)
# Timeout - return current status
elapsed = time.time() - start_time
print(f"[OCR Wait] Job {job_id} timeout after {elapsed:.1f}s ({iteration} iterations), status={last_status.value if last_status else 'unknown'}", flush=True)
return await get_job_status(job_id, session, current_user)
@router.get("/queue/status", response_model=OCRQueueStatusResponse)
async def get_queue_status(
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get OCR queue statistics.
Returns:
Queue status with pending/processing counts and average time
"""
stats = await job_queue.get_queue_stats()
return OCRQueueStatusResponse(
pending_jobs=stats["pending"],
processing_jobs=stats["processing"],
average_time_seconds=stats["average_time_seconds"]
)
# ============================================================================
# Legacy Endpoints (backwards compatibility)
# ============================================================================
@router.get("/status", response_model=OCRStatusResponse)
async def get_ocr_status():
"""Check OCR service status and available engines."""
engines = OCREngine.get_available_engines()
available = len(engines) > 0
if available:
message = f"OCR service ready with engines: {', '.join(engines)}"
else:
message = "No OCR engines available. Install PaddleOCR or Tesseract."
return OCRStatusResponse(
available=available,
engines=engines,
message=message
)
@router.get("/engines")
async def get_available_engines():
"""
Get list of enabled OCR engines based on .env configuration.
Returns engines availability and available processing modes.
Frontend should use this to filter engine selection dropdown.
Available engines: tesseract, doctr, doctr_plus, paddleocr
"""
# Check which engines are enabled via .env
paddle_enabled = os.getenv("OCR_ENABLE_PADDLEOCR", "true").lower() == "true"
tesseract_enabled = os.getenv("OCR_ENABLE_TESSERACT", "true").lower() == "true"
default_engine = os.getenv("OCR_DEFAULT_ENGINE", "doctr_plus")
# Build engines dict
engines = {
"tesseract": tesseract_enabled,
"doctr": True, # Always available (primary engine)
"doctr_plus": True, # Always available (recommended)
"paddleocr": paddle_enabled,
}
# Build available modes based on enabled engines
modes = []
if tesseract_enabled:
modes.append("tesseract")
modes.append("doctr")
modes.append("doctr_plus")
if paddle_enabled:
modes.append("paddleocr")
return {
"engines": engines,
"available_modes": modes,
"default_mode": default_engine,
"memory_estimate_mb": {
"tesseract": 50,
"doctr": 600,
"doctr_plus": 600,
"paddleocr": 800,
}
}
@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
async def extract_from_attachment(
attachment_id: int,
engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Extract receipt data from an existing attachment.
Re-processes an already uploaded file with OCR.
This endpoint always processes synchronously.
"""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
file_path = AttachmentCRUD.get_file_path(attachment)
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
# Check if file type is supported
if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
raise HTTPException(
status_code=400,
detail=f"File type not supported for OCR: {attachment.mime_type}"
)
# TODO: Could use job queue here too, but keeping sync for now
success, message, result = await ocr_service.process_image(
file_path, attachment.mime_type
)
if not success:
raise HTTPException(status_code=422, detail=message)
data = _result_to_extraction_data(result)
# Apply fuzzy CUI matching
data = await _apply_fuzzy_cui_matching(data, session)
return OCRResponse(success=True, message=message, data=data)
# ============================================================================
# Helper Functions
# ============================================================================
async def _apply_fuzzy_cui_matching(
extraction_data: ExtractionData,
session: AsyncSession
) -> ExtractionData:
"""
Apply fuzzy CUI matching to extraction data.
ONLY applies fuzzy matching if CUI is missing OR has invalid checksum.
If CUI has valid checksum, we trust the OCR and skip fuzzy matching.
Args:
extraction_data: ExtractionData with CUI to potentially correct
session: AsyncSession for database lookups
Returns:
ExtractionData with CUI corrected if a match was found
"""
from backend.modules.data_entry.services.ocr.validation import CUIChecksumRule
# Skip if no CUI and no vendor name (nothing to match)
if not extraction_data.cui and not extraction_data.partner_name:
return extraction_data
# Check if CUI has valid checksum - if valid, skip fuzzy matching
if extraction_data.cui:
cui_digits = CUIChecksumRule.extract_digits(extraction_data.cui)
if len(cui_digits) >= 6 and CUIChecksumRule.validate_checksum(cui_digits):
print(f"[Fuzzy Match] CUI {extraction_data.cui} has valid checksum, skipping fuzzy match", flush=True)
return extraction_data
# CUI missing or invalid checksum - try fuzzy matching
try:
match = await OCRValidationEngine.fuzzy_match_supplier(
cui=extraction_data.cui,
vendor_name=extraction_data.partner_name,
db_session=session
)
if match:
corrected_cui, supplier_name = match
if corrected_cui != extraction_data.cui:
print(f"[Fuzzy Match] Corrected: {extraction_data.cui} -> {corrected_cui} ({supplier_name})", flush=True)
extraction_data.cui = corrected_cui
# Also set partner_name if not already set
if not extraction_data.partner_name:
extraction_data.partner_name = supplier_name
except Exception as e:
print(f"[Fuzzy Match] Error: {e}", flush=True)
return extraction_data
async def _process_sync(
content: bytes,
file: UploadFile,
engine: OCREngineChoice,
current_user: CurrentUser
) -> OCRJobSubmitResponse:
"""
Process OCR synchronously (legacy mode).
Creates a job, processes it immediately, and returns the result
wrapped in a JobSubmitResponse for API consistency.
"""
# Get file extension
suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
suffix = '.jpg'
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(content)
tmp_path = Path(tmp.name)
try:
success, message, result = await ocr_service.process_image(
tmp_path, file.content_type
)
if not success:
raise HTTPException(status_code=422, detail=message)
# Create a fake job response with the result embedded
# This maintains API compatibility
now = datetime.utcnow()
# For sync mode, we return a special response that includes
# the result directly. Clients should check if result is present.
return OCRJobSubmitResponse(
job_id="sync-" + str(hash(content))[:16],
status=OCRJobStatus.completed,
queue_position=0,
estimated_wait_seconds=0,
created_at=now
)
finally:
# Clean up temp file
if tmp_path.exists():
os.unlink(tmp_path)
def _result_to_extraction_data(result) -> ExtractionData:
"""Convert ExtractionResult to ExtractionData schema."""
# Convert tva_entries from dict to TvaEntry objects
tva_entries_schema = [
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
for e in result.tva_entries
] if result.tva_entries else []
# Convert payment_methods from dict to PaymentMethod objects
payment_methods_list = [
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
for pm in result.payment_methods
] if result.payment_methods else []
# Auto-suggest payment_mode based on detected methods
suggested_payment_mode = None
if payment_methods_list:
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
if has_card:
suggested_payment_mode = 'banca'
return ExtractionData(
receipt_type=result.receipt_type,
receipt_number=result.receipt_number,
receipt_series=result.receipt_series,
receipt_date=result.receipt_date,
amount=result.amount,
partner_name=result.partner_name,
cui=result.cui,
description=result.description,
tva_entries=tva_entries_schema,
tva_total=result.tva_total,
address=result.address,
items_count=result.items_count,
payment_methods=payment_methods_list,
suggested_payment_mode=suggested_payment_mode,
client_name=result.client_name,
client_cui=result.client_cui,
client_address=result.client_address,
confidence_amount=result.confidence_amount,
confidence_date=result.confidence_date,
confidence_vendor=result.confidence_vendor,
confidence_client=getattr(result, 'confidence_client', 0.0),
overall_confidence=result.overall_confidence,
raw_text=result.raw_text,
raw_texts=getattr(result, 'raw_texts', []),
ocr_engine=result.ocr_engine,
processing_time_ms=result.processing_time_ms,
needs_manual_review=result.needs_manual_review,
validation_warnings=result.validation_warnings,
validation_errors=result.validation_errors,
inter_ocr_ratios=result.inter_ocr_ratios,
)
def _dict_to_extraction_data(data: dict) -> ExtractionData:
"""Convert result dict (from job queue) to ExtractionData schema."""
from datetime import date
# Parse date if string
receipt_date = data.get('receipt_date')
if isinstance(receipt_date, str):
try:
receipt_date = date.fromisoformat(receipt_date)
except (ValueError, TypeError):
receipt_date = None
# Convert tva_entries
tva_entries = data.get('tva_entries', []) or []
tva_entries_schema = []
for e in tva_entries:
if isinstance(e, dict):
tva_entries_schema.append(TvaEntry(
code=e.get('code'),
percent=e.get('percent', 0),
amount=Decimal(str(e.get('amount', 0)))
))
# Convert payment_methods
payment_methods = data.get('payment_methods', []) or []
payment_methods_list = []
for pm in payment_methods:
if isinstance(pm, dict):
payment_methods_list.append(PaymentMethod(
method=pm.get('method', 'NUMERAR'),
amount=Decimal(str(pm.get('amount', 0)))
))
# Convert amount and tva_total to Decimal
amount = data.get('amount')
if amount is not None:
amount = Decimal(str(amount))
tva_total = data.get('tva_total')
if tva_total is not None:
tva_total = Decimal(str(tva_total))
return ExtractionData(
receipt_type=data.get('receipt_type', 'bon_fiscal'),
receipt_number=data.get('receipt_number'),
receipt_series=data.get('receipt_series'),
receipt_date=receipt_date,
amount=amount,
partner_name=data.get('partner_name'),
cui=data.get('cui'),
description=data.get('description'),
tva_entries=tva_entries_schema,
tva_total=tva_total,
address=data.get('address'),
items_count=data.get('items_count'),
payment_methods=payment_methods_list,
suggested_payment_mode=data.get('suggested_payment_mode'),
client_name=data.get('client_name'),
client_cui=data.get('client_cui'),
client_address=data.get('client_address'),
confidence_amount=data.get('confidence_amount', 0.0),
confidence_date=data.get('confidence_date', 0.0),
confidence_vendor=data.get('confidence_vendor', 0.0),
confidence_client=data.get('confidence_client', 0.0),
confidence_tva=data.get('confidence_tva', 0.0),
confidence_payment=data.get('confidence_payment', 0.0),
overall_confidence=data.get('overall_confidence', 0.0),
raw_text=data.get('raw_text', ''),
raw_texts=data.get('raw_texts', []),
ocr_engine=data.get('ocr_engine', ''),
processing_time_ms=data.get('processing_time_ms', 0),
needs_manual_review=data.get('needs_manual_review'),
validation_warnings=data.get('validation_warnings', []),
validation_errors=data.get('validation_errors', []),
inter_ocr_ratios=data.get('inter_ocr_ratios', {}),
)
# ============================================================================
# Store Profiles Management Endpoints
# ============================================================================
@router.post("/profiles/reload")
async def reload_store_profiles(
current_user: CurrentUser = Depends(get_current_user)
) -> dict:
"""
Hot-reload all store profiles.
Reloads profile Python modules without server restart.
Use after adding/modifying profile files.
Returns:
Dict with reloaded count and profile list
"""
from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
count = ProfileRegistry.reload_all()
status = ProfileRegistry.get_reload_status()
return {
"success": True,
"reloaded_modules": count,
"profiles_count": status["profiles_count"],
"registered_cuis": status["registered_cuis"],
"last_reload": status["last_reload"],
}
@router.get("/profiles")
async def list_store_profiles(
current_user: CurrentUser = Depends(get_current_user)
) -> dict:
"""
List all registered store profiles.
Returns:
Dict with profiles list and status
"""
from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
profiles = ProfileRegistry.list_profiles()
status = ProfileRegistry.get_reload_status()
return {
"profiles": profiles,
"count": len(profiles),
"last_reload": status["last_reload"],
}
@router.get("/profiles/{cui}")
async def get_store_profile(
cui: str,
current_user: CurrentUser = Depends(get_current_user)
) -> dict:
"""
Get details for a specific store profile.
Args:
cui: Store CUI (with or without RO prefix)
Returns:
Profile details including validation hints
Raises:
404: If no profile exists for this CUI
"""
from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
info = ProfileRegistry.get_profile_info(cui)
if not info:
raise HTTPException(
status_code=404,
detail=f"No profile registered for CUI: {cui}"
)
return info

View File

@@ -0,0 +1,268 @@
"""
OCR Settings and Metrics API endpoints.
Endpoints:
- GET /settings/ocr-preference - Get user's preferred OCR engine
- POST /settings/ocr-preference - Set user's preferred OCR engine
- GET /metrics/ocr/summary - Get OCR metrics summary by engine
- GET /metrics/ocr/history - Get user's OCR job history
- GET /metrics/ocr/stats - Get overall OCR statistics
"""
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.ocr_settings import OCRPreferenceCRUD, OCRMetricsCRUD
from backend.modules.data_entry.db.models.ocr_settings import OCREngine, OCRMetricsSummary
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============================================================================
# Schemas
# ============================================================================
class OCRPreferenceResponse(BaseModel):
"""Response for OCR preference endpoint."""
username: str
preferred_engine: str
available_engines: List[str] = Field(
default=["tesseract", "doctr", "doctr_plus", "paddleocr"],
description="Available OCR engines"
)
class OCRPreferenceRequest(BaseModel):
"""Request to set OCR preference."""
preferred_engine: str = Field(
default="doctr_plus",
description="Preferred OCR engine: tesseract, doctr, doctr_plus, paddleocr"
)
class OCRMetricsHistoryItem(BaseModel):
"""Single OCR job metrics item."""
job_id: str
engine_requested: str
engine_used: str
processing_time_ms: int
success: bool
overall_confidence: float
fields_extracted: int
created_at: str
original_filename: Optional[str] = None
class OCRMetricsHistoryResponse(BaseModel):
"""Response for OCR history endpoint."""
items: List[OCRMetricsHistoryItem]
total: int
class OCRStatsResponse(BaseModel):
"""Response for OCR stats endpoint."""
total_jobs: int
successful_jobs: int
failed_jobs: int
success_rate: float
avg_processing_time_ms: float
avg_confidence: float
period_days: int
class OCRActiveEnginesResponse(BaseModel):
"""Response for active OCR engines endpoint."""
engines: List[str] = Field(description="List of active OCR engines from .env config")
recommended: str = Field(default="doctr_plus", description="Recommended engine")
# ============================================================================
# OCR Engines Configuration Endpoint
# ============================================================================
@router.get("/settings/ocr-engines", response_model=OCRActiveEnginesResponse)
async def get_active_ocr_engines():
"""
Get list of active OCR engines configured in .env.
Returns the engines that should be shown in the frontend dropdown.
Configured via OCR_ACTIVE_ENGINES environment variable.
Default: doctr,doctr_plus
Available: tesseract, paddleocr, doctr, doctr_plus
"""
from backend.modules.data_entry.config import settings
return OCRActiveEnginesResponse(
engines=settings.ocr_active_engines_list,
recommended="doctr_plus"
)
# ============================================================================
# OCR Preference Endpoints
# ============================================================================
@router.get("/settings/ocr-preference", response_model=OCRPreferenceResponse)
async def get_ocr_preference(
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get user's preferred OCR engine.
Returns the user's saved preference or 'doctr_plus' if not set.
Also returns list of available engines.
"""
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
preference = await OCRPreferenceCRUD.get_by_username(session, current_user.username)
# Get available engines from OCR service
available = OCREngineClass.get_available_engines()
return OCRPreferenceResponse(
username=current_user.username,
preferred_engine=preference.preferred_engine.value if preference else "doctr_plus",
available_engines=available
)
@router.post("/settings/ocr-preference", response_model=OCRPreferenceResponse)
async def set_ocr_preference(
request: OCRPreferenceRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Set user's preferred OCR engine.
Valid engines: tesseract, doctr, doctr_plus, paddleocr
Note: Available engines depend on .env configuration (OCR_ENABLE_PADDLEOCR, OCR_ENABLE_TESSERACT)
"""
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
# Get dynamically available engines
available = OCREngineClass.get_available_engines()
if request.preferred_engine not in available:
raise HTTPException(
status_code=400,
detail=f"Invalid engine. Must be one of: {', '.join(available)}"
)
# Map string to enum
engine_map = {
"tesseract": OCREngine.TESSERACT,
"doctr": OCREngine.DOCTR,
"doctr_plus": OCREngine.DOCTR_PLUS,
"paddleocr": OCREngine.PADDLEOCR,
}
engine_enum = engine_map.get(request.preferred_engine, OCREngine.DOCTR_PLUS)
# Save preference
preference = await OCRPreferenceCRUD.create_or_update(
session,
current_user.username,
engine_enum
)
# Get available engines
available = OCREngineClass.get_available_engines()
return OCRPreferenceResponse(
username=current_user.username,
preferred_engine=preference.preferred_engine.value,
available_engines=available
)
# ============================================================================
# OCR Metrics Endpoints
# ============================================================================
@router.get("/metrics/ocr/summary", response_model=List[OCRMetricsSummary])
async def get_ocr_metrics_summary(
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get OCR metrics summary grouped by engine.
Returns aggregated metrics for each engine used in the specified period.
"""
summaries = await OCRMetricsCRUD.get_summary_by_engine(
session,
days=days,
username=current_user.username
)
return summaries
@router.get("/metrics/ocr/history", response_model=OCRMetricsHistoryResponse)
async def get_ocr_metrics_history(
limit: int = Query(default=50, ge=1, le=200, description="Max items to return"),
offset: int = Query(default=0, ge=0, description="Items to skip"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get user's OCR job history.
Returns list of OCR jobs with their metrics, ordered by most recent first.
"""
items = await OCRMetricsCRUD.get_user_history(
session,
username=current_user.username,
limit=limit,
offset=offset
)
history_items = [
OCRMetricsHistoryItem(
job_id=item.job_id,
engine_requested=item.engine_requested,
engine_used=item.engine_used,
processing_time_ms=item.processing_time_ms,
success=item.success,
overall_confidence=item.overall_confidence,
fields_extracted=item.fields_extracted,
created_at=item.created_at.isoformat(),
original_filename=item.original_filename
)
for item in items
]
return OCRMetricsHistoryResponse(
items=history_items,
total=len(history_items)
)
@router.get("/metrics/ocr/stats", response_model=OCRStatsResponse)
async def get_ocr_stats(
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get overall OCR statistics for the user.
Returns aggregated stats including success rate, average processing time, etc.
"""
stats = await OCRMetricsCRUD.get_overall_stats(
session,
days=days,
username=current_user.username
)
return OCRStatsResponse(**stats)

View File

@@ -0,0 +1,705 @@
"""API endpoints for receipts."""
from typing import List, Optional, Annotated
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Header, Response
from fastapi.responses import FileResponse, StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
from backend.modules.data_entry.services.receipt_service import ReceiptService
from backend.modules.data_entry.services.nomenclature_service import NomenclatureService
from backend.modules.data_entry.schemas.receipt import (
ReceiptCreate,
ReceiptUpdate,
ReceiptResponse,
ReceiptListResponse,
ReceiptFilter,
ProcessingStats,
AttachmentResponse,
AccountingEntryResponse,
WorkflowAction,
RejectRequest,
EntriesUpdateRequest,
PartnerOption,
AccountOption,
CashRegisterOption,
ExpenseTypeOption,
BulkDeleteRequest,
BulkDeleteResponse,
BulkDeleteFailure,
)
from backend.modules.data_entry.db.models.receipt import ReceiptStatus, ReceiptDirection
from backend.modules.data_entry.services import sse_service
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============ Helper for selected company from header ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates that the user has access to the specified company.
Falls back to user's first company if no header is provided.
Raises:
HTTPException 403: If user doesn't have access to specified company
HTTPException 400: If user has no companies assigned
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid company ID format: {x_selected_company}"
)
# Validate user has access to this company
# Auth stores companies as strings
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(
status_code=403,
detail=f"Nu aveți acces la firma {company_id}"
)
# No header - use first company from user's list
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(
status_code=400,
detail="Nu aveți nicio firmă asignată"
)
# Dependency for injection
SelectedCompany = Annotated[int, Depends(get_selected_company)]
# Legacy function for backwards compatibility (deprecated)
def get_current_user_company(current_user: CurrentUser) -> int:
"""
DEPRECATED: Use get_selected_company() dependency instead.
This function returns the first company, ignoring X-Selected-Company header.
"""
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
return 1
return 1
# ============ SSE Endpoint for Real-time Status Updates ============
@router.get("/sse/status")
async def sse_status_stream(
batch_id: Optional[str] = Query(
default=None,
description="Optional batch_id to filter events for a specific batch"
),
):
"""
Server-Sent Events endpoint for real-time receipt status updates.
This endpoint provides a persistent connection that streams status change
events as they occur. Clients receive updates for CRUD operations on receipts
without needing to poll.
Query Parameters:
batch_id: Optional filter to only receive events for a specific batch upload.
Event Format:
data: {"receipt_id": 123, "status": "DRAFT", "processing_status": "completed", ...}
Headers:
- Content-Type: text/event-stream
- Cache-Control: no-cache
- Connection: keep-alive
Reconnection:
The retry: 3000 header hints clients to reconnect after 3 seconds if disconnected.
Example:
curl -N http://localhost:8000/api/data-entry/receipts/sse/status
curl -N http://localhost:8000/api/data-entry/receipts/sse/status?batch_id=abc-123
"""
return StreamingResponse(
sse_service.subscribe(batch_id=batch_id),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
},
)
# ============ Receipt CRUD Endpoints ============
@router.post("/", response_model=ReceiptResponse)
async def create_receipt(
data: ReceiptCreate,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Create a new receipt in DRAFT status."""
receipt = await ReceiptService.create_receipt(session, data, current_user.username)
return ReceiptResponse.model_validate(receipt)
@router.get("/", response_model=ReceiptListResponse)
async def list_receipts(
response: Response,
status: Optional[ReceiptStatus] = None,
direction: Optional[ReceiptDirection] = None,
company_id: Optional[int] = None,
created_by: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
search: Optional[str] = None,
# Bulk upload filters (US-012)
processing_status: Optional[str] = Query(default=None, description="Filter by processing status: pending, processing, completed, failed"),
batch_id: Optional[str] = Query(default=None, description="Filter by batch_id UUID"),
sort_by: Optional[str] = Query(default=None, description="Sort field: processing_started_at, processing_started_at_asc"),
# Pagination
page: int = Query(default=1, ge=1),
page_size: int = Query(default=20, ge=1, le=100),
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get paginated list of receipts with filters.
US-012: Extended with batch_id, processing_status filters and processing_stats.
"""
# Disable browser caching to always get fresh data
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
from datetime import date as date_type
filters = ReceiptFilter(
status=status,
direction=direction,
company_id=company_id or selected_company,
created_by=created_by,
date_from=date_type.fromisoformat(date_from) if date_from else None,
date_to=date_type.fromisoformat(date_to) if date_to else None,
search=search,
processing_status=processing_status,
batch_id=batch_id,
sort_by=sort_by,
page=page,
page_size=page_size,
)
return await ReceiptService.get_receipts(session, filters)
@router.get("/pending", response_model=List[ReceiptResponse])
async def list_pending_receipts(
response: Response,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all receipts pending review (for accountant view)."""
# Disable browser caching to always get fresh data
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
receipts = await ReceiptCRUD.get_pending_review(
session, company_id or selected_company
)
return [ReceiptResponse.model_validate(r) for r in receipts]
@router.get("/stats")
async def get_receipt_stats(
response: Response,
company_id: Optional[int] = None,
my_receipts: bool = False,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
current_user: CurrentUser = Depends(get_current_user),
):
"""Get receipt statistics."""
# Disable browser caching to always get fresh data
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
return await ReceiptCRUD.get_stats(
session,
company_id or selected_company,
created_by=current_user.username if my_receipts else None,
)
@router.get("/{receipt_id}", response_model=ReceiptResponse)
async def get_receipt(
receipt_id: int,
response: Response,
session: AsyncSession = Depends(get_session),
):
"""Get receipt details with attachments and accounting entries."""
# Disable browser caching to always get fresh data
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
receipt = await ReceiptService.get_receipt(session, receipt_id)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
return ReceiptResponse.model_validate(receipt)
@router.put("/{receipt_id}", response_model=ReceiptResponse)
async def update_receipt(
receipt_id: int,
data: ReceiptUpdate,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Update receipt (only DRAFT status, only by creator)."""
success, message, receipt = await ReceiptService.update_receipt(
session, receipt_id, data, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return ReceiptResponse.model_validate(receipt)
@router.delete("/bulk", response_model=BulkDeleteResponse)
async def bulk_delete_receipts(
data: BulkDeleteRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""
Bulk delete receipts (US-024).
Deletes multiple receipts in a single request with partial success support.
Validation rules:
- Each receipt must be in DRAFT status
- Each receipt must be created by the current user
- Receipts with processing_status 'pending' or 'processing' cannot be deleted
Returns:
BulkDeleteResponse with deleted IDs and failed items with error messages
"""
deleted: List[int] = []
failed: List[BulkDeleteFailure] = []
for receipt_id in data.ids:
# Get receipt with relationships for deletion
receipt = await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=True)
if not receipt:
failed.append(BulkDeleteFailure(id=receipt_id, error="Bonul nu a fost găsit"))
continue
# Check if receipt is being processed (bulk upload in progress)
if receipt.processing_status in ["pending", "processing"]:
failed.append(BulkDeleteFailure(
id=receipt_id,
error="Bonul este în curs de procesare și nu poate fi șters"
))
continue
# Check status - only DRAFT can be deleted
if receipt.status != ReceiptStatus.DRAFT:
failed.append(BulkDeleteFailure(
id=receipt_id,
error=f"Doar bonurile în status DRAFT pot fi șterse (status curent: {receipt.status.value})"
))
continue
# Check ownership
if receipt.created_by != current_user.username:
failed.append(BulkDeleteFailure(
id=receipt_id,
error="Doar creatorul bonului poate să-l șteargă"
))
continue
# All validations passed - delete the receipt
# Note: Cascade delete handles attachments and accounting entries
await ReceiptCRUD.delete(session, receipt)
deleted.append(receipt_id)
return BulkDeleteResponse(deleted=deleted, failed=failed)
@router.delete("/{receipt_id}")
async def delete_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Delete receipt (only DRAFT status, only by creator)."""
success, message = await ReceiptService.delete_receipt(
session, receipt_id, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return {"success": True, "message": message}
# ============ Workflow Endpoints ============
@router.post("/{receipt_id}/submit", response_model=WorkflowAction)
async def submit_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Submit receipt for review (DRAFT → PENDING_REVIEW)."""
success, message, receipt = await ReceiptService.submit_for_review(
session, receipt_id, current_user.username
)
# Broadcast SSE event on success (US-030)
if success and receipt:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/approve", response_model=WorkflowAction)
async def approve_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Approve receipt (PENDING_REVIEW → APPROVED). Accountant action."""
success, message, receipt = await ReceiptService.approve_receipt(
session, receipt_id, current_user.username
)
# Broadcast SSE event on success (US-030)
if success and receipt:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/reject", response_model=WorkflowAction)
async def reject_receipt(
receipt_id: int,
data: RejectRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Reject receipt (PENDING_REVIEW → REJECTED). Accountant action."""
success, message, receipt = await ReceiptService.reject_receipt(
session, receipt_id, current_user.username, data.reason
)
# Broadcast SSE event on success (US-030)
if success and receipt:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/resubmit", response_model=WorkflowAction)
async def resubmit_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Resubmit rejected receipt after corrections (REJECTED → PENDING_REVIEW)."""
success, message, receipt = await ReceiptService.resubmit_receipt(
session, receipt_id, current_user.username
)
# Broadcast SSE event on success (US-030)
if success and receipt:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/unapprove", response_model=WorkflowAction)
async def unapprove_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Unapprove receipt (APPROVED → PENDING_REVIEW). Returns to pending for corrections."""
success, message, receipt = await ReceiptService.unapprove_receipt(
session, receipt_id, current_user.username
)
# Broadcast SSE event on success (US-030)
if success and receipt:
await sse_service.broadcast_status_change(
receipt_id=receipt.id,
status=receipt.status.value,
processing_status=receipt.processing_status,
batch_id=receipt.batch_id,
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
# ============ Accounting Entries Endpoints ============
@router.get("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
async def get_receipt_entries(
receipt_id: int,
session: AsyncSession = Depends(get_session),
):
"""Get accounting entries for a receipt."""
entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
return [AccountingEntryResponse.model_validate(e) for e in entries]
@router.put("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
async def update_receipt_entries(
receipt_id: int,
data: EntriesUpdateRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Update accounting entries for a receipt (accountant action)."""
success, message, entries = await ReceiptService.update_entries(
session, receipt_id, data.entries, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return [AccountingEntryResponse.model_validate(e) for e in entries]
@router.post("/{receipt_id}/entries/regenerate", response_model=List[AccountingEntryResponse])
async def regenerate_entries(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Regenerate accounting entries based on receipt data."""
success, message, _ = await ReceiptService.regenerate_entries(
session, receipt_id, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
return [AccountingEntryResponse.model_validate(e) for e in entries]
# ============ Attachment Endpoints ============
@router.post("/{receipt_id}/attachments", response_model=AttachmentResponse)
async def upload_attachment(
receipt_id: int,
file: UploadFile = File(...),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Upload attachment for a receipt."""
# Check receipt exists and user can modify it
receipt = await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=False)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
# Only allow uploads for DRAFT and REJECTED receipts
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
raise HTTPException(
status_code=400,
detail="Cannot upload attachments for this receipt status"
)
# Only creator can upload
if receipt.created_by != current_user.username:
raise HTTPException(
status_code=403,
detail="Only the creator can upload attachments"
)
try:
attachment = await AttachmentCRUD.create(session, receipt_id, file)
return AttachmentResponse.model_validate(attachment)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.get("/{receipt_id}/attachments", response_model=List[AttachmentResponse])
async def list_attachments(
receipt_id: int,
session: AsyncSession = Depends(get_session),
):
"""Get all attachments for a receipt."""
attachments = await AttachmentCRUD.get_by_receipt_id(session, receipt_id)
return [AttachmentResponse.model_validate(a) for a in attachments]
@router.get("/attachments/{attachment_id}/download")
async def download_attachment(
attachment_id: int,
session: AsyncSession = Depends(get_session),
):
"""Download an attachment file."""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
file_path = AttachmentCRUD.get_file_path(attachment)
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(
path=str(file_path),
filename=attachment.filename,
media_type=attachment.mime_type,
)
@router.delete("/attachments/{attachment_id}")
async def delete_attachment(
attachment_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Delete an attachment."""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
# Get receipt to check permissions
receipt = await ReceiptCRUD.get_by_id(session, attachment.receipt_id, include_relations=False)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
# Only allow deletion for DRAFT receipts by creator
if receipt.status != ReceiptStatus.DRAFT:
raise HTTPException(
status_code=400,
detail="Cannot delete attachments for this receipt status"
)
if receipt.created_by != current_user.username:
raise HTTPException(
status_code=403,
detail="Only the creator can delete attachments"
)
await AttachmentCRUD.delete(session, attachment)
return {"success": True, "message": "Attachment deleted"}
# ============ Nomenclature Endpoints ============
@router.get("/nomenclature/partners", response_model=List[PartnerOption])
async def get_partners(
search: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get partners (suppliers/customers) for dropdown."""
return await NomenclatureService.get_partners(
company_id or selected_company, search, session
)
@router.get("/nomenclature/accounts", response_model=List[AccountOption])
async def get_accounts(
prefix: Optional[str] = None,
company_id: Optional[int] = None,
selected_company: SelectedCompany = None,
):
"""Get chart of accounts for dropdown."""
return await NomenclatureService.get_accounts(
company_id or selected_company, prefix
)
@router.get("/nomenclature/cash-registers", response_model=List[CashRegisterOption])
async def get_cash_registers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get cash registers and bank accounts for dropdown."""
return await NomenclatureService.get_cash_registers(company_id or selected_company, session)
@router.get("/nomenclature/expense-types", response_model=List[ExpenseTypeOption])
async def get_expense_types():
"""Get predefined expense types for dropdown."""
return await NomenclatureService.get_expense_types()