""" Tests for bulk upload endpoints (US-008, US-010). US-008 Acceptance Criteria: - POST `/api/data-entry/bulk/upload` accepts multipart with multiple files - Returns list of job_ids for tracking - Validation: max 100 files per batch, max 10MB per file - Jobs are created atomically (all or nothing) - Returns batch_id for grouping - Creates BatchUpload model in DB with fields: id, user_id, created_at, status, total_files - Creates batch_jobs table for batch_id ↔ job_id relationship - pytest tests pass - API returns correct response schema with batch_id and job_ids list US-010 Acceptance Criteria: - GET `/api/data-entry/bulk/batches/{batch_id}/status` endpoint funcțional - Returnează status agregat: pending_count, processing_count, completed_count, failed_count - Include lista de jobs cu: job_id, status, filename, receipt_id (dacă completed) - Include receipt_id pentru jobs completate cu succes - Suportă parametru `wait` pentru long-polling (max 30 secunde) - Returnează total_amount suma tuturor receipt-urilor create - pytest tests pass """ import io import pytest from unittest.mock import AsyncMock, MagicMock, patch from datetime import datetime from decimal import Decimal from fastapi import FastAPI from fastapi.testclient import TestClient from httpx import AsyncClient, ASGITransport # Import the router and models we're testing from backend.modules.data_entry.routers.bulk import ( router, MAX_FILES_PER_BATCH, MAX_FILE_SIZE_BYTES, ALLOWED_MIME_TYPES, MAX_WAIT_SECONDS, _compute_batch_overall_status ) from backend.modules.data_entry.db.models import BatchUpload, BatchJob, BatchStatus from backend.modules.data_entry.schemas.bulk import BulkUploadResponse, BatchStatusResponse, BatchJobInfo from backend.modules.data_entry.services.ocr.job_queue import OCRJobStatus from shared.auth.dependencies import get_current_user from backend.modules.data_entry.db.database import get_session # Mock user for authentication class MockCurrentUser: username = "test_user" user_id = 1 # companies is List[str] of company IDs (per CurrentUser model in shared/auth/models.py) companies = ["1"] permissions = ["data_entry"] # Create test app with dependency overrides def create_test_app(mock_session=None, mock_user=None): """Create a test FastAPI app with the bulk router and dependency overrides.""" app = FastAPI() app.include_router(router, prefix="/api/data-entry/bulk") # Override auth dependency if mock_user is not None: app.dependency_overrides[get_current_user] = lambda: mock_user # Override session dependency if mock_session is not None: async def override_session(): yield mock_session app.dependency_overrides[get_session] = override_session return app # Fixtures @pytest.fixture def mock_current_user(): return MockCurrentUser() @pytest.fixture def mock_session(): """Create a mock async session.""" session = AsyncMock() session.add = MagicMock() session.flush = AsyncMock() session.commit = AsyncMock() session.rollback = AsyncMock() return session @pytest.fixture def mock_ocr_job(): """Create a mock OCR job response.""" class MockJob: def __init__(self, job_id): self.id = job_id return MockJob def create_test_file(filename: str, content: bytes, content_type: str): """Create a test file-like object for upload.""" return (filename, io.BytesIO(content), content_type) # ============================================================================ # Unit Tests for Validation Constants # ============================================================================ class TestValidationConstants: """Test validation constants are properly defined.""" def test_max_files_per_batch(self): """Verify max files per batch is 100.""" assert MAX_FILES_PER_BATCH == 100 def test_max_file_size(self): """Verify max file size is 10MB.""" assert MAX_FILE_SIZE_BYTES == 10 * 1024 * 1024 def test_allowed_mime_types(self): """Verify allowed MIME types include PDF, PNG, JPG.""" assert "image/jpeg" in ALLOWED_MIME_TYPES assert "image/png" in ALLOWED_MIME_TYPES assert "application/pdf" in ALLOWED_MIME_TYPES # ============================================================================ # Unit Tests for Models # ============================================================================ class TestBatchUploadModel: """Test BatchUpload model definition.""" def test_batch_upload_has_required_fields(self): """Verify BatchUpload has all required fields.""" # Create a BatchUpload instance to verify fields exist batch = BatchUpload( user_id="test_user", status=BatchStatus.PENDING, total_files=5 ) assert batch.user_id == "test_user" assert batch.status == BatchStatus.PENDING assert batch.total_files == 5 assert batch.created_at is not None def test_batch_status_enum(self): """Verify BatchStatus enum values.""" assert BatchStatus.PENDING == "pending" assert BatchStatus.PROCESSING == "processing" assert BatchStatus.COMPLETED == "completed" assert BatchStatus.FAILED == "failed" class TestBatchJobModel: """Test BatchJob model definition.""" def test_batch_job_has_required_fields(self): """Verify BatchJob has all required fields.""" batch_job = BatchJob( batch_id=1, job_id="test-job-uuid", filename="test.pdf" ) assert batch_job.batch_id == 1 assert batch_job.job_id == "test-job-uuid" assert batch_job.filename == "test.pdf" assert batch_job.receipt_id is None # Optional field assert batch_job.created_at is not None # ============================================================================ # Unit Tests for Response Schema # ============================================================================ class TestBulkUploadResponseSchema: """Test BulkUploadResponse schema.""" def test_response_schema_structure(self): """Verify response schema has correct structure.""" response = BulkUploadResponse( batch_id=1, job_ids=["job-1", "job-2"], total_files=2, message="2 files queued for processing" ) assert response.batch_id == 1 assert len(response.job_ids) == 2 assert response.total_files == 2 assert "queued" in response.message # ============================================================================ # Integration Tests with Mocked Dependencies # ============================================================================ class TestBulkUploadEndpoint: """Test POST /api/data-entry/bulk/upload endpoint.""" @pytest.mark.asyncio async def test_upload_single_file_success(self, mock_session, mock_current_user, mock_ocr_job): """Test successful upload of a single file.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \ patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check: mock_job = mock_ocr_job("test-job-id-1") mock_job_queue.create_job = AsyncMock(return_value=mock_job) # Mock duplicate check to return no duplicates mock_dup_check.return_value = {} # Make flush assign batch.id batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("test.pdf", b"fake pdf content", "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 200 data = response.json() assert "batch_id" in data assert "job_ids" in data assert "total_files" in data assert data["total_files"] == 1 @pytest.mark.asyncio async def test_upload_multiple_files_success(self, mock_session, mock_current_user, mock_ocr_job): """Test successful upload of multiple files.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \ patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check: job_counter = [0] def create_mock_job(*args, **kwargs): job_counter[0] += 1 return mock_ocr_job(f"test-job-id-{job_counter[0]}") mock_job_queue.create_job = AsyncMock(side_effect=create_mock_job) # Mock duplicate check to return no duplicates mock_dup_check.return_value = {} # Make flush assign batch.id batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", ("bon1.pdf", b"pdf content 1", "application/pdf")), ("files", ("bon2.jpg", b"jpeg content 2", "image/jpeg")), ("files", ("bon3.png", b"png content 3", "image/png")), ] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 200 data = response.json() assert data["total_files"] == 3 assert len(data["job_ids"]) == 3 @pytest.mark.asyncio async def test_upload_no_files_returns_400(self, mock_session, mock_current_user): """Test that empty file list returns 400 error.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.post("/api/data-entry/bulk/upload", files=[]) # FastAPI returns 422 for missing required field assert response.status_code == 422 @pytest.mark.asyncio async def test_upload_invalid_mime_type_returns_400(self, mock_session, mock_current_user): """Test that invalid MIME type returns 400 error.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("test.txt", b"text content", "text/plain"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 400 data = response.json() assert "invalid_files" in str(data) or "Validation failed" in str(data) @pytest.mark.asyncio async def test_upload_file_too_large_returns_400(self, mock_session, mock_current_user): """Test that file larger than 10MB returns 400 error.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: large_content = b"x" * (11 * 1024 * 1024) files = [("files", ("large.pdf", large_content, "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 400 data = response.json() assert "too large" in str(data).lower() or "10mb" in str(data).lower() @pytest.mark.asyncio async def test_upload_too_many_files_returns_400(self, mock_session, mock_current_user): """Test that more than 100 files returns 400 error.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", (f"file{i}.pdf", b"content", "application/pdf")) for i in range(101) ] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 400 data = response.json() assert "100" in str(data) or "Too many" in str(data) @pytest.mark.asyncio async def test_upload_atomic_rollback_on_failure(self, mock_session, mock_current_user, mock_ocr_job): """Test that all jobs are rolled back if any job creation fails.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \ patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check: call_count = [0] async def failing_create_job(*args, **kwargs): call_count[0] += 1 if call_count[0] == 2: raise Exception("Simulated job creation failure") return mock_ocr_job(f"test-job-{call_count[0]}") mock_job_queue.create_job = failing_create_job # Mock duplicate check to return no duplicates mock_dup_check.return_value = {} transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", ("bon1.pdf", b"content1", "application/pdf")), ("files", ("bon2.pdf", b"content2", "application/pdf")), ] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 500 mock_session.rollback.assert_called_once() @pytest.mark.asyncio async def test_upload_mixed_valid_invalid_files_returns_400(self, mock_session, mock_current_user): """Test that batch with mix of valid and invalid files returns 400 (all or nothing).""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", ("valid.pdf", b"pdf content", "application/pdf")), ("files", ("invalid.txt", b"text content", "text/plain")), ("files", ("valid.jpg", b"jpeg content", "image/jpeg")), ] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 400 # ============================================================================ # Response Schema Validation Tests # ============================================================================ class TestResponseSchema: """Test response schema compliance.""" @pytest.mark.asyncio async def test_response_contains_batch_id(self, mock_session, mock_current_user, mock_ocr_job): """Verify response contains batch_id field.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \ patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check: mock_job_queue.create_job = AsyncMock(return_value=mock_ocr_job("test-id")) # Mock duplicate check to return no duplicates mock_dup_check.return_value = {} # Make flush assign batch.id batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("test.pdf", b"content", "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) data = response.json() assert "batch_id" in data assert isinstance(data["batch_id"], int) @pytest.mark.asyncio async def test_response_contains_job_ids_list(self, mock_session, mock_current_user, mock_ocr_job): """Verify response contains job_ids as a list.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue, \ patch("backend.modules.data_entry.routers.bulk.check_duplicate_hashes") as mock_dup_check: job_counter = [0] def create_mock_job(*args, **kwargs): job_counter[0] += 1 return mock_ocr_job(f"job-{job_counter[0]}") mock_job_queue.create_job = AsyncMock(side_effect=create_mock_job) # Mock duplicate check to return no duplicates mock_dup_check.return_value = {} # Make flush assign batch.id batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", ("file1.pdf", b"c1", "application/pdf")), ("files", ("file2.pdf", b"c2", "application/pdf")), ] response = await client.post("/api/data-entry/bulk/upload", files=files) data = response.json() assert "job_ids" in data assert isinstance(data["job_ids"], list) assert len(data["job_ids"]) == 2 # ============================================================================ # US-010: Batch Status Endpoint Tests # ============================================================================ class TestBatchStatusEndpointConstants: """Test batch status endpoint constants.""" def test_max_wait_seconds(self): """Verify max wait time is 30 seconds.""" assert MAX_WAIT_SECONDS == 30 class TestComputeBatchOverallStatus: """Test _compute_batch_overall_status helper function.""" def test_all_pending_returns_pending(self): """When all jobs are pending, status should be pending.""" status = _compute_batch_overall_status( pending=5, processing=0, completed=0, failed=0, total=5 ) assert status == "pending" def test_some_processing_returns_processing(self): """When some jobs are processing, status should be processing.""" status = _compute_batch_overall_status( pending=2, processing=1, completed=2, failed=0, total=5 ) assert status == "processing" def test_all_completed_returns_completed(self): """When all jobs are completed, status should be completed.""" status = _compute_batch_overall_status( pending=0, processing=0, completed=5, failed=0, total=5 ) assert status == "completed" def test_all_failed_returns_failed(self): """When all jobs failed, status should be failed.""" status = _compute_batch_overall_status( pending=0, processing=0, completed=0, failed=5, total=5 ) assert status == "failed" def test_mixed_completed_failed_returns_completed(self): """When some completed and some failed, status should be completed.""" status = _compute_batch_overall_status( pending=0, processing=0, completed=3, failed=2, total=5 ) assert status == "completed" def test_some_completed_some_pending_returns_processing(self): """When some completed but others pending, status should be processing.""" status = _compute_batch_overall_status( pending=2, processing=0, completed=3, failed=0, total=5 ) assert status == "processing" class TestBatchStatusResponseSchema: """Test BatchStatusResponse schema.""" def test_response_schema_structure(self): """Verify response schema has correct structure.""" response = BatchStatusResponse( batch_id=1, status="processing", total_files=5, pending_count=2, processing_count=1, completed_count=2, failed_count=0, jobs=[ BatchJobInfo( job_id="job-1", filename="bon1.pdf", status="completed", receipt_id=10 ), BatchJobInfo( job_id="job-2", filename="bon2.pdf", status="processing", receipt_id=None ) ], total_amount=150.50, created_at=datetime.utcnow() ) assert response.batch_id == 1 assert response.status == "processing" assert response.pending_count == 2 assert response.processing_count == 1 assert response.completed_count == 2 assert response.failed_count == 0 assert len(response.jobs) == 2 assert response.total_amount == 150.50 def test_batch_job_info_with_receipt_id(self): """Verify BatchJobInfo includes receipt_id for completed jobs.""" job = BatchJobInfo( job_id="test-job", filename="test.pdf", status="completed", receipt_id=42 ) assert job.receipt_id == 42 def test_batch_job_info_without_receipt_id(self): """Verify BatchJobInfo handles None receipt_id.""" job = BatchJobInfo( job_id="test-job", filename="test.pdf", status="pending", receipt_id=None ) assert job.receipt_id is None class TestBatchStatusEndpoint: """Test GET /api/data-entry/bulk/batches/{batch_id}/status endpoint.""" @pytest.fixture def mock_batch(self): """Create a mock batch.""" batch = MagicMock() batch.id = 1 batch.user_id = "test_user" batch.status = BatchStatus.PROCESSING batch.total_files = 3 batch.created_at = datetime.utcnow() return batch @pytest.fixture def mock_batch_jobs(self): """Create mock batch_jobs.""" jobs = [] for i in range(3): job = MagicMock() job.batch_id = 1 job.job_id = f"job-{i+1}" job.filename = f"bon{i+1}.pdf" job.receipt_id = (i + 10) if i == 0 else None # First job has receipt jobs.append(job) return jobs @pytest.fixture def mock_ocr_jobs(self): """Create mock OCR jobs with different statuses.""" def create_ocr_job(job_id, status): job = MagicMock() job.id = job_id job.status = OCRJobStatus(status) return job return { "job-1": create_ocr_job("job-1", "completed"), "job-2": create_ocr_job("job-2", "processing"), "job-3": create_ocr_job("job-3", "pending"), } @pytest.mark.asyncio async def test_get_batch_status_success( self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs ): """Test successful batch status retrieval.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: # Mock batch query batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) # Mock batch_jobs query jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) # Mock amount query (return sum of receipts) amount_result = MagicMock() amount_result.scalar = MagicMock(return_value=Decimal("125.50")) # Setup session.execute to return different results for different queries call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result elif call_count[0] == 2: return jobs_result else: return amount_result mock_session.execute = mock_execute # Mock job_queue.get_job async def get_job(job_id): return mock_ocr_jobs.get(job_id) mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.get("/api/data-entry/bulk/batches/1/status") assert response.status_code == 200 data = response.json() # Verify response structure assert data["batch_id"] == 1 assert data["total_files"] == 3 assert "pending_count" in data assert "processing_count" in data assert "completed_count" in data assert "failed_count" in data assert "jobs" in data assert "total_amount" in data assert "created_at" in data @pytest.mark.asyncio async def test_batch_not_found_returns_404(self, mock_session, mock_current_user): """Test that non-existent batch returns 404.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) # Mock batch query returning None batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=None) async def mock_execute(query): return batch_result mock_session.execute = mock_execute transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.get("/api/data-entry/bulk/batches/99999/status") assert response.status_code == 404 assert "not found" in response.json()["detail"].lower() @pytest.mark.asyncio async def test_wait_parameter_validation(self, mock_session, mock_current_user, mock_batch): """Test that wait parameter is validated (0-30 seconds).""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: # Wait > 30 should fail validation response = await client.get("/api/data-entry/bulk/batches/1/status?wait=31") # FastAPI returns 422 for validation errors assert response.status_code == 422 @pytest.mark.asyncio async def test_status_counts_are_correct( self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs ): """Test that status counts match job statuses.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) amount_result = MagicMock() amount_result.scalar = MagicMock(return_value=None) call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result elif call_count[0] == 2: return jobs_result else: return amount_result mock_session.execute = mock_execute async def get_job(job_id): return mock_ocr_jobs.get(job_id) mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.get("/api/data-entry/bulk/batches/1/status") data = response.json() # mock_ocr_jobs has 1 completed, 1 processing, 1 pending assert data["completed_count"] == 1 assert data["processing_count"] == 1 assert data["pending_count"] == 1 assert data["failed_count"] == 0 @pytest.mark.asyncio async def test_jobs_list_includes_receipt_id( self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs ): """Test that jobs list includes receipt_id for completed jobs.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) amount_result = MagicMock() amount_result.scalar = MagicMock(return_value=Decimal("100.00")) call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result elif call_count[0] == 2: return jobs_result else: return amount_result mock_session.execute = mock_execute async def get_job(job_id): return mock_ocr_jobs.get(job_id) mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.get("/api/data-entry/bulk/batches/1/status") data = response.json() jobs = data["jobs"] assert len(jobs) == 3 # Find the completed job (job-1 has receipt_id=10) completed_job = next((j for j in jobs if j["job_id"] == "job-1"), None) assert completed_job is not None assert completed_job["receipt_id"] == 10 # Other jobs should have None receipt_id pending_job = next((j for j in jobs if j["job_id"] == "job-3"), None) assert pending_job is not None assert pending_job["receipt_id"] is None @pytest.mark.asyncio async def test_total_amount_calculation( self, mock_session, mock_current_user, mock_batch, mock_batch_jobs, mock_ocr_jobs ): """Test that total_amount sums receipt amounts correctly.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) # Total amount should be 250.75 amount_result = MagicMock() amount_result.scalar = MagicMock(return_value=Decimal("250.75")) call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result elif call_count[0] == 2: return jobs_result else: return amount_result mock_session.execute = mock_execute async def get_job(job_id): return mock_ocr_jobs.get(job_id) mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.get("/api/data-entry/bulk/batches/1/status") data = response.json() assert data["total_amount"] == 250.75 class TestBatchStatusLongPolling: """Test long-polling behavior of batch status endpoint.""" @pytest.mark.asyncio async def test_wait_zero_returns_immediately(self, mock_session, mock_current_user): """Test that wait=0 returns immediately without waiting.""" mock_batch = MagicMock() mock_batch.id = 1 mock_batch.total_files = 2 mock_batch.created_at = datetime.utcnow() mock_batch_jobs = [ MagicMock(batch_id=1, job_id="job-1", filename="bon1.pdf", receipt_id=None), MagicMock(batch_id=1, job_id="job-2", filename="bon2.pdf", receipt_id=None), ] mock_ocr_job = MagicMock() mock_ocr_job.status = OCRJobStatus.pending app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result else: return jobs_result mock_session.execute = mock_execute async def get_job(job_id): return mock_ocr_job mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: import time start = time.time() response = await client.get("/api/data-entry/bulk/batches/1/status?wait=0") elapsed = time.time() - start assert response.status_code == 200 # Should return almost immediately (under 0.5 seconds) assert elapsed < 0.5 @pytest.mark.asyncio async def test_no_wait_param_returns_immediately(self, mock_session, mock_current_user): """Test that no wait param returns immediately.""" mock_batch = MagicMock() mock_batch.id = 1 mock_batch.total_files = 1 mock_batch.created_at = datetime.utcnow() mock_batch_jobs = [ MagicMock(batch_id=1, job_id="job-1", filename="bon1.pdf", receipt_id=None), ] mock_ocr_job = MagicMock() mock_ocr_job.status = OCRJobStatus.pending app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: batch_result = MagicMock() batch_result.scalar_one_or_none = MagicMock(return_value=mock_batch) jobs_result = MagicMock() jobs_scalars = MagicMock() jobs_scalars.all = MagicMock(return_value=mock_batch_jobs) jobs_result.scalars = MagicMock(return_value=jobs_scalars) call_count = [0] async def mock_execute(query): call_count[0] += 1 if call_count[0] == 1: return batch_result else: return jobs_result mock_session.execute = mock_execute async def get_job(job_id): return mock_ocr_job mock_job_queue.get_job = get_job transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: import time start = time.time() response = await client.get("/api/data-entry/bulk/batches/1/status") elapsed = time.time() - start assert response.status_code == 200 assert elapsed < 0.5 # ============================================================================ # US-007: Duplicate Detection Tests # ============================================================================ class TestDuplicateDetectionHelpers: """Test helper functions for duplicate detection.""" def test_compute_file_hash_produces_sha256(self): """Verify compute_file_hash produces valid SHA-256 hash.""" from backend.modules.data_entry.routers.bulk import compute_file_hash content = b"test content for hashing" hash_result = compute_file_hash(content) # SHA-256 produces 64 hex characters assert len(hash_result) == 64 assert all(c in "0123456789abcdef" for c in hash_result) def test_compute_file_hash_deterministic(self): """Verify same content produces same hash.""" from backend.modules.data_entry.routers.bulk import compute_file_hash content = b"test content" hash1 = compute_file_hash(content) hash2 = compute_file_hash(content) assert hash1 == hash2 def test_compute_file_hash_different_content(self): """Verify different content produces different hash.""" from backend.modules.data_entry.routers.bulk import compute_file_hash hash1 = compute_file_hash(b"content A") hash2 = compute_file_hash(b"content B") assert hash1 != hash2 class TestDuplicateDetectionInBulkUpload: """Test duplicate detection in bulk upload endpoint (US-007).""" @pytest.mark.asyncio async def test_all_duplicates_returns_409(self, mock_session, mock_current_user, mock_ocr_job): """Test that uploading all duplicate files returns 409 Conflict.""" from backend.modules.data_entry.routers.bulk import compute_file_hash from backend.modules.data_entry.db.models import Receipt app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) # Calculate hash for test content test_content = b"duplicate pdf content" test_hash = compute_file_hash(test_content) # Mock database query to return existing receipt with this hash existing_receipt = MagicMock() existing_receipt.id = 123 existing_receipt.file_hash = test_hash # Mock the duplicate check query mock_result = MagicMock() mock_result.all = MagicMock(return_value=[(test_hash, 123)]) async def mock_execute(query): return mock_result mock_session.execute = mock_execute transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("duplicate.pdf", test_content, "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 409 data = response.json() assert "all_duplicates" in str(data) or "duplicate" in str(data).lower() @pytest.mark.asyncio async def test_partial_duplicates_processes_non_duplicates(self, mock_session, mock_current_user, mock_ocr_job): """Test that with some duplicates, non-duplicates are processed normally.""" from backend.modules.data_entry.routers.bulk import compute_file_hash app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) duplicate_content = b"duplicate content" new_content = b"new unique content" duplicate_hash = compute_file_hash(duplicate_content) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: mock_job = mock_ocr_job("test-job-id") mock_job_queue.create_job = AsyncMock(return_value=mock_job) # Track batch creation batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add # Mock the duplicate check query - only duplicate_hash exists mock_result = MagicMock() mock_result.all = MagicMock(return_value=[(duplicate_hash, 123)]) async def mock_execute(query): return mock_result mock_session.execute = mock_execute transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [ ("files", ("duplicate.pdf", duplicate_content, "application/pdf")), ("files", ("new.pdf", new_content, "application/pdf")), ] response = await client.post("/api/data-entry/bulk/upload", files=files) # Should succeed (200) because one file can be processed assert response.status_code == 200 data = response.json() # Response should contain duplicate info assert "duplicates" in data or "duplicate_files" in data @pytest.mark.asyncio async def test_no_duplicates_normal_processing(self, mock_session, mock_current_user, mock_ocr_job): """Test that files without duplicates are processed normally.""" app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) with patch("backend.modules.data_entry.routers.bulk.job_queue") as mock_job_queue: mock_job = mock_ocr_job("test-job-id") mock_job_queue.create_job = AsyncMock(return_value=mock_job) # Track batch creation batch_id_counter = [0] original_add = mock_session.add def tracking_add(obj): if hasattr(obj, 'id') and obj.id is None: batch_id_counter[0] += 1 obj.id = batch_id_counter[0] return original_add(obj) mock_session.add = tracking_add # Mock the duplicate check query - no duplicates found mock_result = MagicMock() mock_result.all = MagicMock(return_value=[]) async def mock_execute(query): return mock_result mock_session.execute = mock_execute transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("new.pdf", b"brand new content", "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 200 data = response.json() assert "batch_id" in data assert "job_ids" in data @pytest.mark.asyncio async def test_duplicate_response_includes_existing_receipt_id(self, mock_session, mock_current_user): """Test that duplicate error includes existing_receipt_id (US-007 AC3).""" from backend.modules.data_entry.routers.bulk import compute_file_hash app = create_test_app(mock_session=mock_session, mock_user=mock_current_user) test_content = b"duplicate content" test_hash = compute_file_hash(test_content) existing_receipt_id = 456 # Mock the duplicate check query mock_result = MagicMock() mock_result.all = MagicMock(return_value=[(test_hash, existing_receipt_id)]) async def mock_execute(query): return mock_result mock_session.execute = mock_execute transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url="http://test") as client: files = [("files", ("test.pdf", test_content, "application/pdf"))] response = await client.post("/api/data-entry/bulk/upload", files=files) assert response.status_code == 409 data = response.json() # Check that the response contains duplicate info with receipt ID detail = data.get("detail", {}) duplicates = detail.get("duplicates", []) assert len(duplicates) > 0 assert duplicates[0]["existing_receipt_id"] == existing_receipt_id assert duplicates[0]["error"] == "duplicate" class TestDuplicateResponseSchema: """Test response schemas for duplicate detection.""" def test_duplicate_file_info_schema(self): """Verify DuplicateFileInfo schema has correct structure.""" from backend.modules.data_entry.schemas.bulk import DuplicateFileInfo info = DuplicateFileInfo( filename="test.pdf", error="duplicate", existing_receipt_id=123, message="Fișier duplicat - există deja ca bon #123" ) assert info.filename == "test.pdf" assert info.error == "duplicate" assert info.existing_receipt_id == 123 assert "123" in info.message def test_bulk_upload_response_with_duplicates_schema(self): """Verify BulkUploadResponseWithDuplicates schema has correct structure.""" from backend.modules.data_entry.schemas.bulk import ( BulkUploadResponseWithDuplicates, DuplicateFileInfo ) response = BulkUploadResponseWithDuplicates( batch_id=1, job_ids=["job-1", "job-2"], total_files=4, processed_files=2, duplicate_files=2, duplicates=[ DuplicateFileInfo( filename="dup1.pdf", error="duplicate", existing_receipt_id=10, message="Fișier duplicat - există deja ca bon #10" ), DuplicateFileInfo( filename="dup2.pdf", error="duplicate", existing_receipt_id=11, message="Fișier duplicat - există deja ca bon #11" ), ], message="2 fișier(e) în procesare, 2 duplicate ignorate" ) assert response.batch_id == 1 assert response.total_files == 4 assert response.processed_files == 2 assert response.duplicate_files == 2 assert len(response.duplicates) == 2 if __name__ == "__main__": pytest.main([__file__, "-v"])