feat(data-entry): Bulk Receipt Upload cu Mobile UX Android Nativ

## Funcționalități Principale

### Bulk Upload & Processing
- Drag & drop pentru upload bonuri multiple oriunde pe pagină
- Batch processing cu job queue și worker pool
- Real-time updates via SSE (Server-Sent Events) cu fallback polling
- Duplicate detection via SHA-256 file hash
- Auto-retry pentru job-uri failed
- Cancel individual jobs sau batch complet

### Mobile UX - Android Native Style
- Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter)
- Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări)
- FAB (Floating Action Button) cu hide/show on scroll
- Filter chips orizontal scrollabile
- Selecție multiplă prin long-press (500ms)
- Select All + Bulk Delete cu confirmare
- Layout Android pentru Create/Edit/View bon (Gmail compose style)

### Bug Fixes
- Refresh individual via SSE în loc de refresh total pagină
- Bonurile cu eroare OCR rămân vizibile pentru editare manuală
- Afișare nume fișier original pentru toate bonurile
- Upload stabil pe mobil (fix race condition File API)
- Păstrare ordine bonuri la refresh (nu se reordonează)

### Backend
- SSE endpoint pentru status updates real-time
- Bulk delete endpoint cu partial success
- Auto-cleanup bonuri failed după 7 zile
- Batch model cu tracking complet

### Testing
- E2E tests cu Playwright
- Unit tests pentru bulk upload, auto-create, cleanup

## Commits Squashed: 43 user stories (US-001 → US-043)
## Branch: ralph/bulk-receipt-upload
## Timp dezvoltare: ~3 zile (Ralph autonomous)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-01-12 08:33:17 +00:00
parent b4a226409c
commit 7b3541403f
53 changed files with 15810 additions and 196 deletions

View File

@@ -23,7 +23,9 @@ Schema:
ocr_time_ms INTEGER, -- Actual OCR engine processing time
created_by TEXT, -- Username
original_filename TEXT,
expires_at TIMESTAMP
expires_at TIMESTAMP,
batch_id INTEGER, -- Foreign key to batch_uploads (for bulk processing)
file_hash TEXT -- SHA-256 hash for duplicate detection (US-007)
)
"""
@@ -66,6 +68,7 @@ class OCRJobStatus(str, Enum):
processing = "processing"
completed = "completed"
failed = "failed"
cancelled = "cancelled"
@dataclass
@@ -86,6 +89,8 @@ class OCRJob:
created_by: Optional[str] = None
original_filename: Optional[str] = None
expires_at: Optional[datetime] = None
batch_id: Optional[int] = None # Links to batch_uploads table for bulk processing
file_hash: Optional[str] = None # SHA-256 hash for duplicate detection (US-007)
@property
def queue_wait_ms(self) -> Optional[int]:
@@ -163,7 +168,8 @@ class OCRJobQueue:
ocr_time_ms INTEGER,
created_by TEXT,
original_filename TEXT,
expires_at TIMESTAMP
expires_at TIMESTAMP,
batch_id INTEGER
)
''')
@@ -174,6 +180,20 @@ class OCRJobQueue:
except Exception:
pass # Column already exists
# Migration: add batch_id column if it doesn't exist
try:
await db.execute('ALTER TABLE ocr_jobs ADD COLUMN batch_id INTEGER')
logger.info("[OCRJobQueue] Added batch_id column to existing table")
except Exception:
pass # Column already exists
# Migration: add file_hash column if it doesn't exist (US-007)
try:
await db.execute('ALTER TABLE ocr_jobs ADD COLUMN file_hash TEXT')
logger.info("[OCRJobQueue] Added file_hash column to existing table")
except Exception:
pass # Column already exists
# Index for efficient queue queries
await db.execute('''
CREATE INDEX IF NOT EXISTS idx_ocr_jobs_status
@@ -197,7 +217,9 @@ class OCRJobQueue:
mime_type: str,
engine: str = "doctr_plus",
username: Optional[str] = None,
original_filename: Optional[str] = None
original_filename: Optional[str] = None,
batch_id: Optional[int] = None,
file_hash: Optional[str] = None
) -> OCRJob:
"""
Create a new OCR job.
@@ -210,6 +232,8 @@ class OCRJobQueue:
engine: OCR engine ('tesseract', 'doctr', 'doctr_plus', 'paddleocr')
username: Username of requester
original_filename: Original filename from upload
batch_id: Optional batch ID for bulk upload processing
file_hash: Optional SHA-256 hash for duplicate detection (US-007)
Returns:
Created OCRJob instance
@@ -241,15 +265,15 @@ class OCRJobQueue:
await db.execute('''
INSERT INTO ocr_jobs (
id, status, file_path, mime_type, engine,
created_at, created_by, original_filename, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
created_at, created_by, original_filename, expires_at, batch_id, file_hash
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
job_id, OCRJobStatus.pending.value, str(file_path), mime_type, engine,
now.isoformat(), username, original_filename, expires_at.isoformat()
now.isoformat(), username, original_filename, expires_at.isoformat(), batch_id, file_hash
))
await db.commit()
logger.info(f"[OCRJobQueue] Created job {job_id}: engine={engine}, file={file_path.name}")
logger.info(f"[OCRJobQueue] Created job {job_id}: engine={engine}, file={file_path.name}, batch_id={batch_id}")
return OCRJob(
id=job_id,
@@ -260,7 +284,9 @@ class OCRJobQueue:
created_at=now,
created_by=username,
original_filename=original_filename,
expires_at=expires_at
expires_at=expires_at,
batch_id=batch_id,
file_hash=file_hash
)
async def get_job(self, job_id: str) -> Optional[OCRJob]:
@@ -601,6 +627,8 @@ class OCRJobQueue:
created_by=row['created_by'],
original_filename=row['original_filename'],
expires_at=parse_datetime(row['expires_at']),
batch_id=row['batch_id'] if 'batch_id' in row.keys() else None,
file_hash=row['file_hash'] if 'file_hash' in row.keys() else None,
)

View File

@@ -28,6 +28,7 @@ from typing import Optional, Set
from .job_queue import job_queue, OCRJobStatus, OCRJob
from .ocr_worker_pool import ocr_worker_pool
from backend.modules.data_entry.schemas.ocr import ExtractionData
logger = logging.getLogger(__name__)
@@ -223,6 +224,21 @@ async def _process_job(job: OCRJob) -> None:
validation_errors_count=len(extraction.get('validation_errors', [])),
)
# Auto-save receipt for batch jobs
if job.batch_id:
auto_save_result = await _auto_save_batch_receipt(
job=job,
extraction=extraction,
file_path=str(file_path)
)
if not auto_save_result:
# Auto-save failed - mark job as failed
# Note: job_queue status already updated to 'completed' above
# We need to update it back to failed with the auto-save error
logger.warning(
f"[JobWorker] Job {job.id} OCR succeeded but auto-save failed"
)
else:
# Job failed
error_msg = result.get("error", "Unknown error")
@@ -543,3 +559,107 @@ def _count_extracted_fields(extraction: dict) -> int:
count += 1
return count
# ============================================================================
# Auto-Save Batch Receipt Helper
# ============================================================================
async def _auto_save_batch_receipt(
job: OCRJob,
extraction: dict,
file_path: str
) -> bool:
"""
Automatically create a receipt from OCR result for batch jobs.
Called when a batch job completes successfully. Creates the receipt,
attachment, and accounting entries using ReceiptAutoCreateService.
Args:
job: Completed OCRJob with batch_id set
extraction: OCR extraction result dict
file_path: Path to the original uploaded file
Returns:
True if receipt created successfully, False otherwise
"""
if not job.batch_id:
return True # Not a batch job, nothing to do
logger.info(f"[JobWorker] Auto-saving receipt for batch job {job.id} (batch_id={job.batch_id})")
try:
# Import here to avoid circular imports
from backend.modules.data_entry.db.database import get_db_session
from backend.modules.data_entry.db.models import BatchUpload
from backend.modules.data_entry.services.receipt_auto_create import ReceiptAutoCreateService
from sqlalchemy import select
# Convert extraction dict to ExtractionData schema
ocr_result = ExtractionData(**extraction)
async with await get_db_session() as session:
# Get batch info to retrieve company_id and user_id
batch_result = await session.execute(
select(BatchUpload).where(BatchUpload.id == job.batch_id)
)
batch = batch_result.scalar_one_or_none()
if not batch:
error_msg = f"Batch {job.batch_id} not found"
logger.error(f"[JobWorker] Auto-save failed for job {job.id}: {error_msg}")
await job_queue.update_status(
job_id=job.id,
status=OCRJobStatus.failed,
error=f"Auto-save error: {error_msg}"
)
return False
# Call ReceiptAutoCreateService
result = await ReceiptAutoCreateService.create_from_ocr_result(
session=session,
job_id=job.id,
ocr_result=ocr_result,
username=job.created_by or batch.user_id,
batch_id=job.batch_id,
company_id=batch.company_id,
file_path=file_path,
original_filename=job.original_filename,
file_hash=job.file_hash # Pass file_hash for duplicate detection (US-007)
)
if result.success:
logger.info(
f"[JobWorker] Auto-save successful for job {job.id}: "
f"receipt_id={result.receipt_id}"
)
return True
else:
error_msg = result.error_message or "Unknown error"
logger.warning(
f"[JobWorker] Auto-save validation failed for job {job.id}: {error_msg}"
)
# Update job status to failed with the auto-save error
await job_queue.update_status(
job_id=job.id,
status=OCRJobStatus.failed,
error=f"Auto-save error: {error_msg}"
)
return False
except Exception as e:
error_msg = str(e)
logger.error(f"[JobWorker] Auto-save exception for job {job.id}: {error_msg}")
# Update job status to failed
try:
await job_queue.update_status(
job_id=job.id,
status=OCRJobStatus.failed,
error=f"Auto-save error: {error_msg}"
)
except Exception as update_err:
logger.error(f"[JobWorker] Failed to update job status after auto-save error: {update_err}")
return False