feat: Migrate to ultrathin monolith architecture
Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified
architecture with single backend and frontend:
Backend Changes:
- Unified FastAPI backend at backend/ with modular structure
- Modules: reports, data_entry, telegram in backend/modules/
- Centralized config.py and main.py with all routers registered
- Single worker mode (--workers 1) for Telegram bot compatibility
- Shared Oracle connection pool and JWT authentication
- Unified requirements.txt and environment configuration
Frontend Changes:
- Single Vue.js SPA with module-based routing
- Unified frontend at src/ with modules in src/modules/{reports,data-entry}/
- Shared components and stores in src/shared/
- Error boundaries for module isolation
- Dual API proxy in Vite for module communication
Infrastructure:
- New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh
- Environment templates: .env.dev.example, .env.test.example, .env.prod.example
- Updated deployment scripts for Windows IIS
- Simplified SSH tunnel management
Documentation:
- Comprehensive CLAUDE.md with architecture overview
- Module-specific docs in docs/{data-entry,telegram}/
- Architecture decision records in docs/ARCHITECTURE-DECISIONS.md
- Deployment guides consolidated in deployment/windows/docs/
This migration reduces complexity, improves maintainability, and enables easier
deployment while maintaining all existing functionality.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
218
backend/modules/data_entry/routers/ocr.py
Normal file
218
backend/modules/data_entry/routers/ocr.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""OCR API endpoints."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.modules.data_entry.db.database import get_session
|
||||
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
|
||||
from backend.modules.data_entry.services.ocr_service import ocr_service
|
||||
from backend.modules.data_entry.services.ocr_engine import OCREngine
|
||||
from backend.modules.data_entry.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry, PaymentMethod
|
||||
|
||||
# Auth integration (will be protected by middleware)
|
||||
from shared.auth.dependencies import get_current_user
|
||||
from shared.auth.models import CurrentUser
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/status", response_model=OCRStatusResponse)
|
||||
async def get_ocr_status():
|
||||
"""Check OCR service status and available engines."""
|
||||
engines = OCREngine.get_available_engines()
|
||||
available = len(engines) > 0
|
||||
|
||||
if available:
|
||||
message = f"OCR service ready with engines: {', '.join(engines)}"
|
||||
else:
|
||||
message = "No OCR engines available. Install PaddleOCR or Tesseract."
|
||||
|
||||
return OCRStatusResponse(
|
||||
available=available,
|
||||
engines=engines,
|
||||
message=message
|
||||
)
|
||||
|
||||
|
||||
@router.post("/extract", response_model=OCRResponse)
|
||||
async def extract_from_image(file: UploadFile = File(...)):
|
||||
"""
|
||||
Extract receipt data from uploaded image.
|
||||
|
||||
Accepts JPG, PNG, or PDF files (max 10MB).
|
||||
Returns extracted fields with confidence scores.
|
||||
"""
|
||||
allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
|
||||
|
||||
if file.content_type not in allowed_types:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
|
||||
)
|
||||
|
||||
# Get file extension
|
||||
suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
|
||||
if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
|
||||
suffix = '.jpg'
|
||||
|
||||
# Save to temp file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
content = await file.read()
|
||||
|
||||
# Check file size (10MB limit)
|
||||
if len(content) > 10 * 1024 * 1024:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="File too large. Maximum size is 10MB."
|
||||
)
|
||||
|
||||
tmp.write(content)
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
success, message, result = await ocr_service.process_image(
|
||||
tmp_path, file.content_type
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=422, detail=message)
|
||||
|
||||
# Convert ExtractionResult to ExtractionData schema
|
||||
# Convert tva_entries from dict to TvaEntry objects
|
||||
tva_entries_schema = [
|
||||
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
||||
for e in result.tva_entries
|
||||
] if result.tva_entries else []
|
||||
|
||||
# Convert payment_methods from dict to PaymentMethod objects
|
||||
from decimal import Decimal
|
||||
payment_methods_list = [
|
||||
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
|
||||
for pm in result.payment_methods
|
||||
] if result.payment_methods else []
|
||||
|
||||
# Auto-suggest payment_mode based on detected methods
|
||||
suggested_payment_mode = None
|
||||
if payment_methods_list:
|
||||
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
|
||||
if has_card:
|
||||
suggested_payment_mode = 'banca'
|
||||
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
|
||||
|
||||
data = ExtractionData(
|
||||
receipt_type=result.receipt_type,
|
||||
receipt_number=result.receipt_number,
|
||||
receipt_series=result.receipt_series,
|
||||
receipt_date=result.receipt_date,
|
||||
amount=result.amount,
|
||||
partner_name=result.partner_name,
|
||||
cui=result.cui,
|
||||
description=result.description,
|
||||
tva_entries=tva_entries_schema,
|
||||
tva_total=result.tva_total,
|
||||
address=result.address,
|
||||
items_count=result.items_count,
|
||||
payment_methods=payment_methods_list,
|
||||
suggested_payment_mode=suggested_payment_mode,
|
||||
confidence_amount=result.confidence_amount,
|
||||
confidence_date=result.confidence_date,
|
||||
confidence_vendor=result.confidence_vendor,
|
||||
overall_confidence=result.overall_confidence,
|
||||
raw_text=result.raw_text,
|
||||
ocr_engine=result.ocr_engine,
|
||||
processing_time_ms=result.processing_time_ms,
|
||||
)
|
||||
|
||||
return OCRResponse(success=True, message=message, data=data)
|
||||
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if tmp_path.exists():
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
|
||||
async def extract_from_attachment(
|
||||
attachment_id: int,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""
|
||||
Extract receipt data from an existing attachment.
|
||||
|
||||
Re-processes an already uploaded file with OCR.
|
||||
"""
|
||||
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
|
||||
|
||||
if not attachment:
|
||||
raise HTTPException(status_code=404, detail="Attachment not found")
|
||||
|
||||
file_path = AttachmentCRUD.get_file_path(attachment)
|
||||
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail="File not found on disk")
|
||||
|
||||
# Check if file type is supported
|
||||
if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File type not supported for OCR: {attachment.mime_type}"
|
||||
)
|
||||
|
||||
success, message, result = await ocr_service.process_image(
|
||||
file_path, attachment.mime_type
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=422, detail=message)
|
||||
|
||||
# Convert ExtractionResult to ExtractionData schema
|
||||
# Convert tva_entries from dict to TvaEntry objects
|
||||
tva_entries_schema = [
|
||||
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
||||
for e in result.tva_entries
|
||||
] if result.tva_entries else []
|
||||
|
||||
# Convert payment_methods from dict to PaymentMethod objects
|
||||
from decimal import Decimal
|
||||
payment_methods_list = [
|
||||
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
|
||||
for pm in result.payment_methods
|
||||
] if result.payment_methods else []
|
||||
|
||||
# Auto-suggest payment_mode based on detected methods
|
||||
suggested_payment_mode = None
|
||||
if payment_methods_list:
|
||||
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
|
||||
if has_card:
|
||||
suggested_payment_mode = 'banca'
|
||||
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
|
||||
|
||||
data = ExtractionData(
|
||||
receipt_type=result.receipt_type,
|
||||
receipt_number=result.receipt_number,
|
||||
receipt_series=result.receipt_series,
|
||||
receipt_date=result.receipt_date,
|
||||
amount=result.amount,
|
||||
partner_name=result.partner_name,
|
||||
cui=result.cui,
|
||||
description=result.description,
|
||||
tva_entries=tva_entries_schema,
|
||||
tva_total=result.tva_total,
|
||||
address=result.address,
|
||||
items_count=result.items_count,
|
||||
payment_methods=payment_methods_list,
|
||||
suggested_payment_mode=suggested_payment_mode,
|
||||
confidence_amount=result.confidence_amount,
|
||||
confidence_date=result.confidence_date,
|
||||
confidence_vendor=result.confidence_vendor,
|
||||
overall_confidence=result.overall_confidence,
|
||||
raw_text=result.raw_text,
|
||||
ocr_engine=result.ocr_engine,
|
||||
processing_time_ms=result.processing_time_ms,
|
||||
)
|
||||
|
||||
return OCRResponse(success=True, message=message, data=data)
|
||||
Reference in New Issue
Block a user