feat: Migrate to ultrathin monolith architecture

Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified architecture with single backend and frontend: Backend Changes: - Unified FastAPI backend at backend/ with modular structure - Modules: reports, data_entry, telegram in backend/modules/ - Centralized config.py and main.py with all routers registered - Single worker mode (--workers 1) for Telegram bot compatibility - Shared Oracle connection pool and JWT authentication - Unified requirements.txt and environment configuration Frontend Changes: - Single Vue.js SPA with module-based routing - Unified frontend at src/ with modules in src/modules/{reports,data-entry}/ - Shared components and stores in src/shared/ - Error boundaries for module isolation - Dual API proxy in Vite for module communication Infrastructure: - New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh - Environment templates: .env.dev.example, .env.test.example, .env.prod.example - Updated deployment scripts for Windows IIS - Simplified SSH tunnel management Documentation: - Comprehensive CLAUDE.md with architecture overview - Module-specific docs in docs/{data-entry,telegram}/ - Architecture decision records in docs/ARCHITECTURE-DECISIONS.md - Deployment guides consolidated in deployment/windows/docs/ This migration reduces complexity, improves maintainability, and enables easier deployment while maintaining all existing functionality. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-29 23:48:14 +02:00
parent 2a101f1ef5
commit c5e051ad80
378 changed files with 7566 additions and 73730 deletions
--- a/backend/modules/data_entry/routers/ocr.py
+++ b/backend/modules/data_entry/routers/ocr.py
@@ -0,0 +1,218 @@
+"""OCR API endpoints."""
+
+import os
+import tempfile
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
+from backend.modules.data_entry.services.ocr_service import ocr_service
+from backend.modules.data_entry.services.ocr_engine import OCREngine
+from backend.modules.data_entry.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry, PaymentMethod
+
+# Auth integration (will be protected by middleware)
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+router = APIRouter()
+
+
+@router.get("/status", response_model=OCRStatusResponse)
+async def get_ocr_status():
+    """Check OCR service status and available engines."""
+    engines = OCREngine.get_available_engines()
+    available = len(engines) > 0
+
+    if available:
+        message = f"OCR service ready with engines: {', '.join(engines)}"
+    else:
+        message = "No OCR engines available. Install PaddleOCR or Tesseract."
+
+    return OCRStatusResponse(
+        available=available,
+        engines=engines,
+        message=message
+    )
+
+
+@router.post("/extract", response_model=OCRResponse)
+async def extract_from_image(file: UploadFile = File(...)):
+    """
+    Extract receipt data from uploaded image.
+
+    Accepts JPG, PNG, or PDF files (max 10MB).
+    Returns extracted fields with confidence scores.
+    """
+    allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
+
+    if file.content_type not in allowed_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
+        )
+
+    # Get file extension
+    suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
+    if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
+        suffix = '.jpg'
+
+    # Save to temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        content = await file.read()
+
+        # Check file size (10MB limit)
+        if len(content) > 10 * 1024 * 1024:
+            raise HTTPException(
+                status_code=400,
+                detail="File too large. Maximum size is 10MB."
+            )
+
+        tmp.write(content)
+        tmp_path = Path(tmp.name)
+
+    try:
+        success, message, result = await ocr_service.process_image(
+            tmp_path, file.content_type
+        )
+
+        if not success:
+            raise HTTPException(status_code=422, detail=message)
+
+        # Convert ExtractionResult to ExtractionData schema
+        # Convert tva_entries from dict to TvaEntry objects
+        tva_entries_schema = [
+            TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
+            for e in result.tva_entries
+        ] if result.tva_entries else []
+
+        # Convert payment_methods from dict to PaymentMethod objects
+        from decimal import Decimal
+        payment_methods_list = [
+            PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
+            for pm in result.payment_methods
+        ] if result.payment_methods else []
+
+        # Auto-suggest payment_mode based on detected methods
+        suggested_payment_mode = None
+        if payment_methods_list:
+            has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
+            if has_card:
+                suggested_payment_mode = 'banca'
+            # NUMERAR -> no auto-suggestion, user chooses between casa/avans
+
+        data = ExtractionData(
+            receipt_type=result.receipt_type,
+            receipt_number=result.receipt_number,
+            receipt_series=result.receipt_series,
+            receipt_date=result.receipt_date,
+            amount=result.amount,
+            partner_name=result.partner_name,
+            cui=result.cui,
+            description=result.description,
+            tva_entries=tva_entries_schema,
+            tva_total=result.tva_total,
+            address=result.address,
+            items_count=result.items_count,
+            payment_methods=payment_methods_list,
+            suggested_payment_mode=suggested_payment_mode,
+            confidence_amount=result.confidence_amount,
+            confidence_date=result.confidence_date,
+            confidence_vendor=result.confidence_vendor,
+            overall_confidence=result.overall_confidence,
+            raw_text=result.raw_text,
+            ocr_engine=result.ocr_engine,
+            processing_time_ms=result.processing_time_ms,
+        )
+
+        return OCRResponse(success=True, message=message, data=data)
+
+    finally:
+        # Clean up temp file
+        if tmp_path.exists():
+            os.unlink(tmp_path)
+
+
+@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
+async def extract_from_attachment(
+    attachment_id: int,
+    session: AsyncSession = Depends(get_session),
+):
+    """
+    Extract receipt data from an existing attachment.
+
+    Re-processes an already uploaded file with OCR.
+    """
+    attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
+
+    if not attachment:
+        raise HTTPException(status_code=404, detail="Attachment not found")
+
+    file_path = AttachmentCRUD.get_file_path(attachment)
+
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found on disk")
+
+    # Check if file type is supported
+    if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported for OCR: {attachment.mime_type}"
+        )
+
+    success, message, result = await ocr_service.process_image(
+        file_path, attachment.mime_type
+    )
+
+    if not success:
+        raise HTTPException(status_code=422, detail=message)
+
+    # Convert ExtractionResult to ExtractionData schema
+    # Convert tva_entries from dict to TvaEntry objects
+    tva_entries_schema = [
+        TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
+        for e in result.tva_entries
+    ] if result.tva_entries else []
+
+    # Convert payment_methods from dict to PaymentMethod objects
+    from decimal import Decimal
+    payment_methods_list = [
+        PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
+        for pm in result.payment_methods
+    ] if result.payment_methods else []
+
+    # Auto-suggest payment_mode based on detected methods
+    suggested_payment_mode = None
+    if payment_methods_list:
+        has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
+        if has_card:
+            suggested_payment_mode = 'banca'
+        # NUMERAR -> no auto-suggestion, user chooses between casa/avans
+
+    data = ExtractionData(
+        receipt_type=result.receipt_type,
+        receipt_number=result.receipt_number,
+        receipt_series=result.receipt_series,
+        receipt_date=result.receipt_date,
+        amount=result.amount,
+        partner_name=result.partner_name,
+        cui=result.cui,
+        description=result.description,
+        tva_entries=tva_entries_schema,
+        tva_total=result.tva_total,
+        address=result.address,
+        items_count=result.items_count,
+        payment_methods=payment_methods_list,
+        suggested_payment_mode=suggested_payment_mode,
+        confidence_amount=result.confidence_amount,
+        confidence_date=result.confidence_date,
+        confidence_vendor=result.confidence_vendor,
+        overall_confidence=result.overall_confidence,
+        raw_text=result.raw_text,
+        ocr_engine=result.ocr_engine,
+        processing_time_ms=result.processing_time_ms,
+    )
+
+    return OCRResponse(success=True, message=message, data=data)