roa2web-service-auto/backend/modules/data_entry/tests/test_ocr_validation_integration.py

"""
Integration tests for OCR validation system.

These tests verify the end-to-end validation flow with real OCR processing.

IMPORTANT: These tests require:
1. PaddleOCR models downloaded
2. Tesseract installed
3. Test receipt files in docs/data-entry/

Run with: pytest backend/modules/data_entry/tests/test_ocr_validation_integration.py -v
"""

import pytest
from pathlib import Path
from decimal import Decimal


# Mark all tests as integration tests (slower, require OCR models)
pytestmark = pytest.mark.integration


@pytest.fixture
def five_holding_receipt_path():
    """Path to Five-Holding production receipt (85.99 LEI test case)."""
    return Path("docs/data-entry/igiena 14 decembrie five-holding.pdf")


class TestProductionCaseFiveHolding:
    """Test the critical Five-Holding receipt case (85.99 not 859,762.16)."""

    def test_correct_amount_extracted(self, five_holding_receipt_path):
        """Verify Five-Holding receipt extracts 85.99 LEI, not 859,762.16."""
        # TODO: Implement when OCR service is running
        # from backend.modules.data_entry.services.ocr_service import OCRService
        # service = OCRService()
        # success, message, extraction = service.process_receipt(five_holding_receipt_path)
        #
        # assert success is True
        # assert extraction.amount == Decimal('85.99'), f"Expected 85.99, got {extraction.amount}"
        # assert extraction.tva_total == Decimal('14.92'), f"Expected 14.92, got {extraction.tva_total}"
        pytest.skip("Requires running OCR service - manual test")

    def test_no_magnitude_errors(self, five_holding_receipt_path):
        """Verify no 10,000x magnitude errors."""
        # TODO: Verify extraction.amount < 1000 (not 859,762.16)
        pytest.skip("Requires running OCR service - manual test")

    def test_validation_warnings_if_any(self, five_holding_receipt_path):
        """Check validation warnings on Five-Holding receipt."""
        # TODO: extraction.validation_warnings should be empty or minimal
        pytest.skip("Requires running OCR service - manual test")


class TestValidationIntegration:
    """Test validation integration with OCR pipeline."""

    def test_payment_sum_validation_mock(self):
        """Test payment sum validation with mocked data."""
        # This can run without OCR - just tests validation logic
        from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine

        validator = OCRValidationEngine()

        # Case: Payment sum mismatch
        data = {
            'amount': 100.0,
            'card_amount': 50.0,
            'cash_amount': 40.0,  # Sum = 90, diff = 10
        }

        result = validator.validate_extraction(data)

        assert result.needs_manual_review is True
        assert len(result.validation_warnings) > 0
        assert any('Payment sum' in w for w in result.validation_warnings)

    def test_tva_ratio_validation_mock(self):
        """Test TVA ratio validation with mocked data."""
        from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine

        validator = OCRValidationEngine()

        # Case: TVA too high (> 24%)
        data = {
            'amount': 100.0,
            'tva': 30.0,  # 30% - invalid!
        }

        result = validator.validate_extraction(data)

        assert result.needs_manual_review is True
        assert any('TVA ratio' in w for w in result.validation_warnings)

    def test_amount_range_validation_mock(self):
        """Test amount range validation with mocked data."""
        from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine

        validator = OCRValidationEngine()

        # Case: Amount too high (> 100,000)
        data = {
            'amount': 859_762.16,  # Production error case!
        }

        result = validator.validate_extraction(data)

        assert result.needs_manual_review is True
        assert len(result.validation_errors) > 0
        assert any('exceeds maximum' in e for e in result.validation_errors)

    def test_medium_ocr_preprocessing(self):
        """Test that Medium OCR preprocessing works."""
        pytest.skip("Requires OCR models - manual test")
        # TODO:
        # from backend.modules.data_entry.services.image_preprocessor import ImagePreprocessor
        # preprocessor = ImagePreprocessor()
        # # Load test image
        # # Apply preprocess_medium()
        # # Verify output shape and values


class TestDatabaseIntegration:
    """Test database integration for needs_manual_review field."""

    def test_receipt_model_has_validation_field(self):
        """Verify Receipt model has needs_manual_review field."""
        # TODO: Check Receipt model
        pytest.skip("Requires database connection")

    def test_migration_adds_column(self):
        """Verify migration adds needs_manual_review column."""
        # TODO: Run migration and check column exists
        pytest.skip("Requires database connection")


# =============================================================================
# MANUAL TESTING CHECKLIST
# =============================================================================
"""
MANUAL TESTS TO PERFORM:

1. Five-Holding Receipt Test (Production Case)
   □ Upload: docs/data-entry/igiena 14 decembrie five-holding.pdf
   □ Verify TOTAL: 85.99 LEI (not 859,762.16)
   □ Verify TVA: 14.92 LEI (not 149,214.92)
   □ Verify CUI: R010562600
   □ Verify no validation warnings (or only minor ones)

2. Database Migration Test
   □ Run: alembic upgrade head
   □ Check: receipts table has needs_manual_review column
   □ Verify: Existing receipts have NULL value
   □ Verify: New receipts get TRUE/FALSE values

3. API Response Test
   □ POST /api/ocr/extract with test receipt
   □ Verify response includes: needs_manual_review, validation_warnings
   □ Verify Save button works even with warnings

4. Validation Rules Test
   □ Test with receipt having wrong amounts (should flag)
   □ Test with receipt having correct amounts (should pass)
   □ Test payment sum mismatch detection
   □ Test TVA ratio validation

5. Medium OCR vs Heavy OCR
   □ Compare results on clear PDFs
   □ Verify no digit concatenation errors
   □ Check processing time is similar

6. Unit Tests
   □ Run: pytest backend/modules/data_entry/tests/test_ocr_validation.py -v
   □ Verify: All tests pass
   □ Check: Coverage > 90%
"""


if __name__ == "__main__":
    pytest.main([__file__, "-v", "--tb=short"])