Files
roa2web-service-auto/tests/backend/test_bulk_upload.py
Claude Agent 7b3541403f feat(data-entry): Bulk Receipt Upload cu Mobile UX Android Nativ
## Funcționalități Principale

### Bulk Upload & Processing
- Drag & drop pentru upload bonuri multiple oriunde pe pagină
- Batch processing cu job queue și worker pool
- Real-time updates via SSE (Server-Sent Events) cu fallback polling
- Duplicate detection via SHA-256 file hash
- Auto-retry pentru job-uri failed
- Cancel individual jobs sau batch complet

### Mobile UX - Android Native Style
- Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter)
- Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări)
- FAB (Floating Action Button) cu hide/show on scroll
- Filter chips orizontal scrollabile
- Selecție multiplă prin long-press (500ms)
- Select All + Bulk Delete cu confirmare
- Layout Android pentru Create/Edit/View bon (Gmail compose style)

### Bug Fixes
- Refresh individual via SSE în loc de refresh total pagină
- Bonurile cu eroare OCR rămân vizibile pentru editare manuală
- Afișare nume fișier original pentru toate bonurile
- Upload stabil pe mobil (fix race condition File API)
- Păstrare ordine bonuri la refresh (nu se reordonează)

### Backend
- SSE endpoint pentru status updates real-time
- Bulk delete endpoint cu partial success
- Auto-cleanup bonuri failed după 7 zile
- Batch model cu tracking complet

### Testing
- E2E tests cu Playwright
- Unit tests pentru bulk upload, auto-create, cleanup

## Commits Squashed: 43 user stories (US-001 → US-043)
## Branch: ralph/bulk-receipt-upload
## Timp dezvoltare: ~3 zile (Ralph autonomous)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:33:17 +00:00

1206 lines
48 KiB
Python

"""
Tests for bulk upload endpoints (US-008, US-010).
US-008 Acceptance Criteria:
- POST `/api/data-entry/bulk/upload` accepts multipart with multiple files
- Returns list of job_ids for tracking
- Validation: max 100 files per batch, max 10MB per file
- Jobs are created atomically (all or nothing)
- Returns batch_id for grouping
- Creates BatchUpload model in DB with fields: id, user_id, created_at, status, total_files
- Creates batch_jobs table for batch_id ↔ job_id relationship
- pytest tests pass
- API returns correct response schema with batch_id and job_ids list
US-010 Acceptance Criteria:
- GET `/api/data-entry/bulk/batches/{batch_id}/status` endpoint funcțional
- Returnează status agregat: pending_count, processing_count, completed_count, failed_count
- Include lista de jobs cu: job_id, status, filename, receipt_id (dacă completed)
- Include receipt_id pentru jobs completate cu succes
- Suportă parametru `wait` pentru long-polling (max 30 secunde)
- Returnează total_amount suma tuturor receipt-urilor create
- pytest tests pass
"""
import io
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime
from decimal import Decimal
from fastapi import FastAPI
from fastapi.testclient import TestClient
from httpx import AsyncClient, ASGITransport
# Import the router and models we're testing
from backend.modules.data_entry.routers.bulk import (
router, MAX_FILES_PER_BATCH, MAX_FILE_SIZE_BYTES, ALLOWED_MIME_TYPES,
MAX_WAIT_SECONDS, _compute_batch_overall_status
)
from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus
from backend.modules.data_entry.schemas.bulk import BulkUploadResponse, BatchStatusResponse, BatchJobInfo
from backend.modules.data_entry.services.ocr.job_queue import OCRJobStatus
from shared.auth.dependencies import get_current_user
from backend.modules.data_entry.db.database import get_session
# Mock user for authentication
class MockCurrentUser:
username = "test_user"
user_id = 1
# companies is List[str] of company IDs (per CurrentUser model in shared/auth/models.py)
companies = ["1"]
permissions = ["data_entry"]
# Create test app with dependency overrides
def create_test_app(mock_session=None, mock_user=None):
"""Create a test FastAPI app with the bulk router and dependency overrides."""
app = FastAPI()
app.include_router(router, prefix="/api/data-entry/bulk")
# Override auth dependency
if mock_user is not None:
app.dependency_overrides[get_current_user] = lambda: mock_user
# Override session dependency
if mock_session is not None:
async def override_session():
yield mock_session
app.dependency_overrides[get_session] = override_session
return app
# Fixtures
@pytest.fixture
def mock_current_user():
return MockCurrentUser()
@pytest.fixture
def mock_session():
"""Create a mock async session."""
session = AsyncMock()
session.add = MagicMock()
session.flush = AsyncMock()
session.commit = AsyncMock()
session.rollback = AsyncMock()
return session
@pytest.fixture
def mock_ocr_job():
"""Create a mock OCR job response."""
class MockJob:
def __init__(self, job_id):
self.id = job_id
return MockJob
def create_test_file(filename: str, content: bytes, content_type: str):
"""Create a test file-like object for upload."""
return (filename, io.BytesIO(content), content_type)
# ============================================================================
# Unit Tests for Validation Constants
# ============================================================================
class TestValidationConstants:
"""Test validation constants are properly defined."""
def test_max_files_per_batch(self):
"""Verify max files per batch is 100."""
assert MAX_FILES_PER_BATCH == 100
def test_max_file_size(self):
"""Verify max file size is 10MB."""
assert MAX_FILE_SIZE_BYTES == 10 * 1024 * 1024
def test_allowed_mime_types(self):
"""Verify allowed MIME types include PDF, PNG, JPG."""
assert "image/jpeg" in ALLOWED_MIME_TYPES
assert "image/png" in ALLOWED_MIME_TYPES
assert "application/pdf" in ALLOWED_MIME_TYPES
# ============================================================================
# Unit Tests for Models
# ============================================================================
class TestBatchUploadModel:
"""Test BatchUpload model definition."""
def test_batch_upload_has_required_fields(self):
"""Verify BatchUpload has all required fields."""
# Create a BatchUpload instance to verify fields exist
batch = BatchUpload(
user_id="test_user",
status=BatchStatus.PENDING,
total_files=5
)
assert batch.user_id == "test_user"
assert batch.status == BatchStatus.PENDING
assert batch.total_files == 5
assert batch.created_at is not None
def test_batch_status_enum(self):
"""Verify BatchStatus enum values."""
assert BatchStatus.PENDING == "pending"
assert BatchStatus.PROCESSING == "processing"
assert BatchStatus.COMPLETED == "completed"
assert BatchStatus.FAILED == "failed"
class TestBatchJobModel:
"""Test BatchJob model definition."""
def test_batch_job_has_required_fields(self):
"""Verify BatchJob has all required fields."""
batch_job = BatchJob(
batch_id=1,
job_id="test-job-uuid",
filename="test.pdf"
)
assert batch_job.batch_id == 1
assert batch_job.job_id == "test-job-uuid"
assert batch_job.filename == "test.pdf"
assert batch_job.receipt_id is None # Optional field
assert batch_job.created_at is not None
# ============================================================================
# Unit Tests for Response Schema
# ============================================================================
class TestBulkUploadResponseSchema:
"""Test BulkUploadResponse schema."""
def test_response_schema_structure(self):
"""Verify response schema has correct structure."""
response = BulkUploadResponse(
batch_id=1,
job_ids=["job-1", "job-2"],
total_files=2,
message="2 files queued for processing"
)
assert response.batch_id == 1
assert len(response.job_ids) == 2
assert response.total_files == 2
assert "queued" in response.message
# ============================================================================
# Integration Tests with Mocked Dependencies
# ============================================================================
class TestBulkUploadEndpoint:
"""Test POST /api/data-entry/bulk/upload endpoint."""
@pytest.mark.asyncio
async def test_upload_single_file_success(self, mock_session, mock_current_user, mock_ocr_job):
"""Test successful upload of a single file."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \
patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check:
mock_job = mock_ocr_job("test-job-id-1")
mock_job_queue.create_job = AsyncMock(return_value=mock_job)
# Mock duplicate check to return no duplicates
mock_dup_check.return_value = {}
# Make flush assign batch.id
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("test.pdf", b"fake pdf content", "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 200
data = response.json()
assert "batch_id" in data
assert "job_ids" in data
assert "total_files" in data
assert data["total_files"] == 1
@pytest.mark.asyncio
async def test_upload_multiple_files_success(self, mock_session, mock_current_user, mock_ocr_job):
"""Test successful upload of multiple files."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \
patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check:
job_counter = [0]
def create_mock_job(*args, **kwargs):
job_counter[0] += 1
return mock_ocr_job(f"test-job-id-{job_counter[0]}")
mock_job_queue.create_job = AsyncMock(side_effect=create_mock_job)
# Mock duplicate check to return no duplicates
mock_dup_check.return_value = {}
# Make flush assign batch.id
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", ("bon1.pdf", b"pdf content 1", "application/pdf")),
("files", ("bon2.jpg", b"jpeg content 2", "image/jpeg")),
("files", ("bon3.png", b"png content 3", "image/png")),
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 200
data = response.json()
assert data["total_files"] == 3
assert len(data["job_ids"]) == 3
@pytest.mark.asyncio
async def test_upload_no_files_returns_400(self, mock_session, mock_current_user):
"""Test that empty file list returns 400 error."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.post("/api/data-entry/bulk/upload", files=[])
# FastAPI returns 422 for missing required field
assert response.status_code == 422
@pytest.mark.asyncio
async def test_upload_invalid_mime_type_returns_400(self, mock_session, mock_current_user):
"""Test that invalid MIME type returns 400 error."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("test.txt", b"text content", "text/plain"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 400
data = response.json()
assert "invalid_files" in str(data) or "Validation failed" in str(data)
@pytest.mark.asyncio
async def test_upload_file_too_large_returns_400(self, mock_session, mock_current_user):
"""Test that file larger than 10MB returns 400 error."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
large_content = b"x" * (11 * 1024 * 1024)
files = [("files", ("large.pdf", large_content, "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 400
data = response.json()
assert "too large" in str(data).lower() or "10mb" in str(data).lower()
@pytest.mark.asyncio
async def test_upload_too_many_files_returns_400(self, mock_session, mock_current_user):
"""Test that more than 100 files returns 400 error."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", (f"file{i}.pdf", b"content", "application/pdf"))
for i in range(101)
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 400
data = response.json()
assert "100" in str(data) or "Too many" in str(data)
@pytest.mark.asyncio
async def test_upload_atomic_rollback_on_failure(self, mock_session, mock_current_user, mock_ocr_job):
"""Test that all jobs are rolled back if any job creation fails."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \
patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check:
call_count = [0]
async def failing_create_job(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 2:
raise Exception("Simulated job creation failure")
return mock_ocr_job(f"test-job-{call_count[0]}")
mock_job_queue.create_job = failing_create_job
# Mock duplicate check to return no duplicates
mock_dup_check.return_value = {}
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", ("bon1.pdf", b"content1", "application/pdf")),
("files", ("bon2.pdf", b"content2", "application/pdf")),
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 500
mock_session.rollback.assert_called_once()
@pytest.mark.asyncio
async def test_upload_mixed_valid_invalid_files_returns_400(self, mock_session, mock_current_user):
"""Test that batch with mix of valid and invalid files returns 400 (all or nothing)."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", ("valid.pdf", b"pdf content", "application/pdf")),
("files", ("invalid.txt", b"text content", "text/plain")),
("files", ("valid.jpg", b"jpeg content", "image/jpeg")),
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 400
# ============================================================================
# Response Schema Validation Tests
# ============================================================================
class TestResponseSchema:
"""Test response schema compliance."""
@pytest.mark.asyncio
async def test_response_contains_batch_id(self, mock_session, mock_current_user, mock_ocr_job):
"""Verify response contains batch_id field."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \
patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check:
mock_job_queue.create_job = AsyncMock(return_value=mock_ocr_job("test-id"))
# Mock duplicate check to return no duplicates
mock_dup_check.return_value = {}
# Make flush assign batch.id
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("test.pdf", b"content", "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
data = response.json()
assert "batch_id" in data
assert isinstance(data["batch_id"], int)
@pytest.mark.asyncio
async def test_response_contains_job_ids_list(self, mock_session, mock_current_user, mock_ocr_job):
"""Verify response contains job_ids as a list."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \
patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check:
job_counter = [0]
def create_mock_job(*args, **kwargs):
job_counter[0] += 1
return mock_ocr_job(f"job-{job_counter[0]}")
mock_job_queue.create_job = AsyncMock(side_effect=create_mock_job)
# Mock duplicate check to return no duplicates
mock_dup_check.return_value = {}
# Make flush assign batch.id
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", ("file1.pdf", b"c1", "application/pdf")),
("files", ("file2.pdf", b"c2", "application/pdf")),
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
data = response.json()
assert "job_ids" in data
assert isinstance(data["job_ids"], list)
assert len(data["job_ids"]) == 2
# ============================================================================
# US-010: Batch Status Endpoint Tests
# ============================================================================
class TestBatchStatusEndpointConstants:
"""Test batch status endpoint constants."""
def test_max_wait_seconds(self):
"""Verify max wait time is 30 seconds."""
assert MAX_WAIT_SECONDS == 30
class TestComputeBatchOverallStatus:
"""Test _compute_batch_overall_status helper function."""
def test_all_pending_returns_pending(self):
"""When all jobs are pending, status should be pending."""
status = _compute_batch_overall_status(
pending=5, processing=0, completed=0, failed=0, total=5
)
assert status == "pending"
def test_some_processing_returns_processing(self):
"""When some jobs are processing, status should be processing."""
status = _compute_batch_overall_status(
pending=2, processing=1, completed=2, failed=0, total=5
)
assert status == "processing"
def test_all_completed_returns_completed(self):
"""When all jobs are completed, status should be completed."""
status = _compute_batch_overall_status(
pending=0, processing=0, completed=5, failed=0, total=5
)
assert status == "completed"
def test_all_failed_returns_failed(self):
"""When all jobs failed, status should be failed."""
status = _compute_batch_overall_status(
pending=0, processing=0, completed=0, failed=5, total=5
)
assert status == "failed"
def test_mixed_completed_failed_returns_completed(self):
"""When some completed and some failed, status should be completed."""
status = _compute_batch_overall_status(
pending=0, processing=0, completed=3, failed=2, total=5
)
assert status == "completed"
def test_some_completed_some_pending_returns_processing(self):
"""When some completed but others pending, status should be processing."""
status = _compute_batch_overall_status(
pending=2, processing=0, completed=3, failed=0, total=5
)
assert status == "processing"
class TestBatchStatusResponseSchema:
"""Test BatchStatusResponse schema."""
def test_response_schema_structure(self):
"""Verify response schema has correct structure."""
response = BatchStatusResponse(
batch_id=1,
status="processing",
total_files=5,
pending_count=2,
processing_count=1,
completed_count=2,
failed_count=0,
jobs=[
BatchJobInfo(
job_id="job-1",
filename="bon1.pdf",
status="completed",
receipt_id=10
),
BatchJobInfo(
job_id="job-2",
filename="bon2.pdf",
status="processing",
receipt_id=None
)
],
total_amount=150.50,
created_at=datetime.utcnow()
)
assert response.batch_id == 1
assert response.status == "processing"
assert response.pending_count == 2
assert response.processing_count == 1
assert response.completed_count == 2
assert response.failed_count == 0
assert len(response.jobs) == 2
assert response.total_amount == 150.50
def test_batch_job_info_with_receipt_id(self):
"""Verify BatchJobInfo includes receipt_id for completed jobs."""
job = BatchJobInfo(
job_id="test-job",
filename="test.pdf",
status="completed",
receipt_id=42
)
assert job.receipt_id == 42
def test_batch_job_info_without_receipt_id(self):
"""Verify BatchJobInfo handles None receipt_id."""
job = BatchJobInfo(
job_id="test-job",
filename="test.pdf",
status="pending",
receipt_id=None
)
assert job.receipt_id is None
class TestBatchStatusEndpoint:
"""Test GET /api/data-entry/bulk/batches/{batch_id}/status endpoint."""
@pytest.fixture
def mock_batch(self):
"""Create a mock batch."""
batch = MagicMock()
batch.id = 1
batch.user_id = "test_user"
batch.status = BatchStatus.PROCESSING
batch.total_files = 3
batch.created_at = datetime.utcnow()
return batch
@pytest.fixture
def mock_batch_jobs(self):
"""Create mock batch_jobs."""
jobs = []
for i in range(3):
job = MagicMock()
job.batch_id = 1
job.job_id = f"job-{i+1}"
job.filename = f"bon{i+1}.pdf"
job.receipt_id = (i + 10) if i == 0 else None # First job has receipt
jobs.append(job)
return jobs
@pytest.fixture
def mock_ocr_jobs(self):
"""Create mock OCR jobs with different statuses."""
def create_ocr_job(job_id, status):
job = MagicMock()
job.id = job_id
job.status = OCRJobStatus(status)
return job
return {
"job-1": create_ocr_job("job-1", "completed"),
"job-2": create_ocr_job("job-2", "processing"),
"job-3": create_ocr_job("job-3", "pending"),
}
@pytest.mark.asyncio
async def test_get_batch_status_success(
self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs
):
"""Test successful batch status retrieval."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
# Mock batch query
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
# Mock batch_jobs query
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
# Mock amount query (return sum of receipts)
amount_result = MagicMock()
amount_result.scalar = MagicMock(return_value=Decimal("125.50"))
# Setup session.execute to return different results for different queries
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
elif call_count[0] == 2:
return jobs_result
else:
return amount_result
mock_session.execute = mock_execute
# Mock job_queue.get_job
async def get_job(job_id):
return mock_ocr_jobs.get(job_id)
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.get("/api/data-entry/bulk/batches/1/status")
assert response.status_code == 200
data = response.json()
# Verify response structure
assert data["batch_id"] == 1
assert data["total_files"] == 3
assert "pending_count" in data
assert "processing_count" in data
assert "completed_count" in data
assert "failed_count" in data
assert "jobs" in data
assert "total_amount" in data
assert "created_at" in data
@pytest.mark.asyncio
async def test_batch_not_found_returns_404(self, mock_session, mock_current_user):
"""Test that non-existent batch returns 404."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
# Mock batch query returning None
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=None)
async def mock_execute(query):
return batch_result
mock_session.execute = mock_execute
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.get("/api/data-entry/bulk/batches/99999/status")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()
@pytest.mark.asyncio
async def test_wait_parameter_validation(self, mock_session, mock_current_user, mock_batch):
"""Test that wait parameter is validated (0-30 seconds)."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
# Wait > 30 should fail validation
response = await client.get("/api/data-entry/bulk/batches/1/status?wait=31")
# FastAPI returns 422 for validation errors
assert response.status_code == 422
@pytest.mark.asyncio
async def test_status_counts_are_correct(
self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs
):
"""Test that status counts match job statuses."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
amount_result = MagicMock()
amount_result.scalar = MagicMock(return_value=None)
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
elif call_count[0] == 2:
return jobs_result
else:
return amount_result
mock_session.execute = mock_execute
async def get_job(job_id):
return mock_ocr_jobs.get(job_id)
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.get("/api/data-entry/bulk/batches/1/status")
data = response.json()
# mock_ocr_jobs has 1 completed, 1 processing, 1 pending
assert data["completed_count"] == 1
assert data["processing_count"] == 1
assert data["pending_count"] == 1
assert data["failed_count"] == 0
@pytest.mark.asyncio
async def test_jobs_list_includes_receipt_id(
self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs
):
"""Test that jobs list includes receipt_id for completed jobs."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
amount_result = MagicMock()
amount_result.scalar = MagicMock(return_value=Decimal("100.00"))
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
elif call_count[0] == 2:
return jobs_result
else:
return amount_result
mock_session.execute = mock_execute
async def get_job(job_id):
return mock_ocr_jobs.get(job_id)
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.get("/api/data-entry/bulk/batches/1/status")
data = response.json()
jobs = data["jobs"]
assert len(jobs) == 3
# Find the completed job (job-1 has receipt_id=10)
completed_job = next((j for j in jobs if j["job_id"] == "job-1"), None)
assert completed_job is not None
assert completed_job["receipt_id"] == 10
# Other jobs should have None receipt_id
pending_job = next((j for j in jobs if j["job_id"] == "job-3"), None)
assert pending_job is not None
assert pending_job["receipt_id"] is None
@pytest.mark.asyncio
async def test_total_amount_calculation(
self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs
):
"""Test that total_amount sums receipt amounts correctly."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
# Total amount should be 250.75
amount_result = MagicMock()
amount_result.scalar = MagicMock(return_value=Decimal("250.75"))
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
elif call_count[0] == 2:
return jobs_result
else:
return amount_result
mock_session.execute = mock_execute
async def get_job(job_id):
return mock_ocr_jobs.get(job_id)
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
response = await client.get("/api/data-entry/bulk/batches/1/status")
data = response.json()
assert data["total_amount"] == 250.75
class TestBatchStatusLongPolling:
"""Test long-polling behavior of batch status endpoint."""
@pytest.mark.asyncio
async def test_wait_zero_returns_immediately(self, mock_session, mock_current_user):
"""Test that wait=0 returns immediately without waiting."""
mock_batch = MagicMock()
mock_batch.id = 1
mock_batch.total_files = 2
mock_batch.created_at = datetime.utcnow()
mock_batch_jobs = [
MagicMock(batch_id=1, job_id="job-1", filename="bon1.pdf", receipt_id=None),
MagicMock(batch_id=1, job_id="job-2", filename="bon2.pdf", receipt_id=None),
]
mock_ocr_job = MagicMock()
mock_ocr_job.status = OCRJobStatus.pending
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
else:
return jobs_result
mock_session.execute = mock_execute
async def get_job(job_id):
return mock_ocr_job
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
import time
start = time.time()
response = await client.get("/api/data-entry/bulk/batches/1/status?wait=0")
elapsed = time.time() - start
assert response.status_code == 200
# Should return almost immediately (under 0.5 seconds)
assert elapsed < 0.5
@pytest.mark.asyncio
async def test_no_wait_param_returns_immediately(self, mock_session, mock_current_user):
"""Test that no wait param returns immediately."""
mock_batch = MagicMock()
mock_batch.id = 1
mock_batch.total_files = 1
mock_batch.created_at = datetime.utcnow()
mock_batch_jobs = [
MagicMock(batch_id=1, job_id="job-1", filename="bon1.pdf", receipt_id=None),
]
mock_ocr_job = MagicMock()
mock_ocr_job.status = OCRJobStatus.pending
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
batch_result = MagicMock()
batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch)
jobs_result = MagicMock()
jobs_scalars = MagicMock()
jobs_scalars.all = MagicMock(return_value=mock_batch_jobs)
jobs_result.scalars = MagicMock(return_value=jobs_scalars)
call_count = [0]
async def mock_execute(query):
call_count[0] += 1
if call_count[0] == 1:
return batch_result
else:
return jobs_result
mock_session.execute = mock_execute
async def get_job(job_id):
return mock_ocr_job
mock_job_queue.get_job = get_job
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
import time
start = time.time()
response = await client.get("/api/data-entry/bulk/batches/1/status")
elapsed = time.time() - start
assert response.status_code == 200
assert elapsed < 0.5
# ============================================================================
# US-007: Duplicate Detection Tests
# ============================================================================
class TestDuplicateDetectionHelpers:
"""Test helper functions for duplicate detection."""
def test_compute_file_hash_produces_sha256(self):
"""Verify compute_file_hash produces valid SHA-256 hash."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
content = b"test content for hashing"
hash_result = compute_file_hash(content)
# SHA-256 produces 64 hex characters
assert len(hash_result) == 64
assert all(c in "0123456789abcdef" for c in hash_result)
def test_compute_file_hash_deterministic(self):
"""Verify same content produces same hash."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
content = b"test content"
hash1 = compute_file_hash(content)
hash2 = compute_file_hash(content)
assert hash1 == hash2
def test_compute_file_hash_different_content(self):
"""Verify different content produces different hash."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
hash1 = compute_file_hash(b"content A")
hash2 = compute_file_hash(b"content B")
assert hash1 != hash2
class TestDuplicateDetectionInBulkUpload:
"""Test duplicate detection in bulk upload endpoint (US-007)."""
@pytest.mark.asyncio
async def test_all_duplicates_returns_409(self, mock_session, mock_current_user, mock_ocr_job):
"""Test that uploading all duplicate files returns 409 Conflict."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
from backend.modules.data_entry.db.models import Receipt
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
# Calculate hash for test content
test_content = b"duplicate pdf content"
test_hash = compute_file_hash(test_content)
# Mock database query to return existing receipt with this hash
existing_receipt = MagicMock()
existing_receipt.id = 123
existing_receipt.file_hash = test_hash
# Mock the duplicate check query
mock_result = MagicMock()
mock_result.all = MagicMock(return_value=[(test_hash, 123)])
async def mock_execute(query):
return mock_result
mock_session.execute = mock_execute
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("duplicate.pdf", test_content, "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 409
data = response.json()
assert "all_duplicates" in str(data) or "duplicate" in str(data).lower()
@pytest.mark.asyncio
async def test_partial_duplicates_processes_non_duplicates(self, mock_session, mock_current_user, mock_ocr_job):
"""Test that with some duplicates, non-duplicates are processed normally."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
duplicate_content = b"duplicate content"
new_content = b"new unique content"
duplicate_hash = compute_file_hash(duplicate_content)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
mock_job = mock_ocr_job("test-job-id")
mock_job_queue.create_job = AsyncMock(return_value=mock_job)
# Track batch creation
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
# Mock the duplicate check query - only duplicate_hash exists
mock_result = MagicMock()
mock_result.all = MagicMock(return_value=[(duplicate_hash, 123)])
async def mock_execute(query):
return mock_result
mock_session.execute = mock_execute
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [
("files", ("duplicate.pdf", duplicate_content, "application/pdf")),
("files", ("new.pdf", new_content, "application/pdf")),
]
response = await client.post("/api/data-entry/bulk/upload", files=files)
# Should succeed (200) because one file can be processed
assert response.status_code == 200
data = response.json()
# Response should contain duplicate info
assert "duplicates" in data or "duplicate_files" in data
@pytest.mark.asyncio
async def test_no_duplicates_normal_processing(self, mock_session, mock_current_user, mock_ocr_job):
"""Test that files without duplicates are processed normally."""
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue:
mock_job = mock_ocr_job("test-job-id")
mock_job_queue.create_job = AsyncMock(return_value=mock_job)
# Track batch creation
batch_id_counter = [0]
original_add = mock_session.add
def tracking_add(obj):
if hasattr(obj, 'id') and obj.id is None:
batch_id_counter[0] += 1
obj.id = batch_id_counter[0]
return original_add(obj)
mock_session.add = tracking_add
# Mock the duplicate check query - no duplicates found
mock_result = MagicMock()
mock_result.all = MagicMock(return_value=[])
async def mock_execute(query):
return mock_result
mock_session.execute = mock_execute
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("new.pdf", b"brand new content", "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 200
data = response.json()
assert "batch_id" in data
assert "job_ids" in data
@pytest.mark.asyncio
async def test_duplicate_response_includes_existing_receipt_id(self, mock_session, mock_current_user):
"""Test that duplicate error includes existing_receipt_id (US-007 AC3)."""
from backend.modules.data_entry.routers.bulk import compute_file_hash
app = create_test_app(mock_session=mock_session, mock_user=mock_current_user)
test_content = b"duplicate content"
test_hash = compute_file_hash(test_content)
existing_receipt_id = 456
# Mock the duplicate check query
mock_result = MagicMock()
mock_result.all = MagicMock(return_value=[(test_hash, existing_receipt_id)])
async def mock_execute(query):
return mock_result
mock_session.execute = mock_execute
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
files = [("files", ("test.pdf", test_content, "application/pdf"))]
response = await client.post("/api/data-entry/bulk/upload", files=files)
assert response.status_code == 409
data = response.json()
# Check that the response contains duplicate info with receipt ID
detail = data.get("detail", {})
duplicates = detail.get("duplicates", [])
assert len(duplicates) > 0
assert duplicates[0]["existing_receipt_id"] == existing_receipt_id
assert duplicates[0]["error"] == "duplicate"
class TestDuplicateResponseSchema:
"""Test response schemas for duplicate detection."""
def test_duplicate_file_info_schema(self):
"""Verify DuplicateFileInfo schema has correct structure."""
from backend.modules.data_entry.schemas.bulk import DuplicateFileInfo
info = DuplicateFileInfo(
filename="test.pdf",
error="duplicate",
existing_receipt_id=123,
message="Fișier duplicat - există deja ca bon #123"
)
assert info.filename == "test.pdf"
assert info.error == "duplicate"
assert info.existing_receipt_id == 123
assert "123" in info.message
def test_bulk_upload_response_with_duplicates_schema(self):
"""Verify BulkUploadResponseWithDuplicates schema has correct structure."""
from backend.modules.data_entry.schemas.bulk import (
BulkUploadResponseWithDuplicates,
DuplicateFileInfo
)
response = BulkUploadResponseWithDuplicates(
batch_id=1,
job_ids=["job-1", "job-2"],
total_files=4,
processed_files=2,
duplicate_files=2,
duplicates=[
DuplicateFileInfo(
filename="dup1.pdf",
error="duplicate",
existing_receipt_id=10,
message="Fișier duplicat - există deja ca bon #10"
),
DuplicateFileInfo(
filename="dup2.pdf",
error="duplicate",
existing_receipt_id=11,
message="Fișier duplicat - există deja ca bon #11"
),
],
message="2 fișier(e) în procesare, 2 duplicate ignorate"
)
assert response.batch_id == 1
assert response.total_files == 4
assert response.processed_files == 2
assert response.duplicate_files == 2
assert len(response.duplicates) == 2
if __name__ == "__main__":
pytest.main([__file__, "-v"])