Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified
architecture with single backend and frontend:
Backend Changes:
- Unified FastAPI backend at backend/ with modular structure
- Modules: reports, data_entry, telegram in backend/modules/
- Centralized config.py and main.py with all routers registered
- Single worker mode (--workers 1) for Telegram bot compatibility
- Shared Oracle connection pool and JWT authentication
- Unified requirements.txt and environment configuration
Frontend Changes:
- Single Vue.js SPA with module-based routing
- Unified frontend at src/ with modules in src/modules/{reports,data-entry}/
- Shared components and stores in src/shared/
- Error boundaries for module isolation
- Dual API proxy in Vite for module communication
Infrastructure:
- New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh
- Environment templates: .env.dev.example, .env.test.example, .env.prod.example
- Updated deployment scripts for Windows IIS
- Simplified SSH tunnel management
Documentation:
- Comprehensive CLAUDE.md with architecture overview
- Module-specific docs in docs/{data-entry,telegram}/
- Architecture decision records in docs/ARCHITECTURE-DECISIONS.md
- Deployment guides consolidated in deployment/windows/docs/
This migration reduces complexity, improves maintainability, and enables easier
deployment while maintaining all existing functionality.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
219 lines
7.7 KiB
Python
219 lines
7.7 KiB
Python
"""OCR API endpoints."""
|
|
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from backend.modules.data_entry.db.database import get_session
|
|
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
|
|
from backend.modules.data_entry.services.ocr_service import ocr_service
|
|
from backend.modules.data_entry.services.ocr_engine import OCREngine
|
|
from backend.modules.data_entry.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry, PaymentMethod
|
|
|
|
# Auth integration (will be protected by middleware)
|
|
from shared.auth.dependencies import get_current_user
|
|
from shared.auth.models import CurrentUser
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/status", response_model=OCRStatusResponse)
|
|
async def get_ocr_status():
|
|
"""Check OCR service status and available engines."""
|
|
engines = OCREngine.get_available_engines()
|
|
available = len(engines) > 0
|
|
|
|
if available:
|
|
message = f"OCR service ready with engines: {', '.join(engines)}"
|
|
else:
|
|
message = "No OCR engines available. Install PaddleOCR or Tesseract."
|
|
|
|
return OCRStatusResponse(
|
|
available=available,
|
|
engines=engines,
|
|
message=message
|
|
)
|
|
|
|
|
|
@router.post("/extract", response_model=OCRResponse)
|
|
async def extract_from_image(file: UploadFile = File(...)):
|
|
"""
|
|
Extract receipt data from uploaded image.
|
|
|
|
Accepts JPG, PNG, or PDF files (max 10MB).
|
|
Returns extracted fields with confidence scores.
|
|
"""
|
|
allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
|
|
|
|
if file.content_type not in allowed_types:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
|
|
)
|
|
|
|
# Get file extension
|
|
suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
|
|
if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
|
|
suffix = '.jpg'
|
|
|
|
# Save to temp file
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
content = await file.read()
|
|
|
|
# Check file size (10MB limit)
|
|
if len(content) > 10 * 1024 * 1024:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="File too large. Maximum size is 10MB."
|
|
)
|
|
|
|
tmp.write(content)
|
|
tmp_path = Path(tmp.name)
|
|
|
|
try:
|
|
success, message, result = await ocr_service.process_image(
|
|
tmp_path, file.content_type
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(status_code=422, detail=message)
|
|
|
|
# Convert ExtractionResult to ExtractionData schema
|
|
# Convert tva_entries from dict to TvaEntry objects
|
|
tva_entries_schema = [
|
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
|
for e in result.tva_entries
|
|
] if result.tva_entries else []
|
|
|
|
# Convert payment_methods from dict to PaymentMethod objects
|
|
from decimal import Decimal
|
|
payment_methods_list = [
|
|
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
|
|
for pm in result.payment_methods
|
|
] if result.payment_methods else []
|
|
|
|
# Auto-suggest payment_mode based on detected methods
|
|
suggested_payment_mode = None
|
|
if payment_methods_list:
|
|
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
|
|
if has_card:
|
|
suggested_payment_mode = 'banca'
|
|
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
|
|
|
|
data = ExtractionData(
|
|
receipt_type=result.receipt_type,
|
|
receipt_number=result.receipt_number,
|
|
receipt_series=result.receipt_series,
|
|
receipt_date=result.receipt_date,
|
|
amount=result.amount,
|
|
partner_name=result.partner_name,
|
|
cui=result.cui,
|
|
description=result.description,
|
|
tva_entries=tva_entries_schema,
|
|
tva_total=result.tva_total,
|
|
address=result.address,
|
|
items_count=result.items_count,
|
|
payment_methods=payment_methods_list,
|
|
suggested_payment_mode=suggested_payment_mode,
|
|
confidence_amount=result.confidence_amount,
|
|
confidence_date=result.confidence_date,
|
|
confidence_vendor=result.confidence_vendor,
|
|
overall_confidence=result.overall_confidence,
|
|
raw_text=result.raw_text,
|
|
ocr_engine=result.ocr_engine,
|
|
processing_time_ms=result.processing_time_ms,
|
|
)
|
|
|
|
return OCRResponse(success=True, message=message, data=data)
|
|
|
|
finally:
|
|
# Clean up temp file
|
|
if tmp_path.exists():
|
|
os.unlink(tmp_path)
|
|
|
|
|
|
@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
|
|
async def extract_from_attachment(
|
|
attachment_id: int,
|
|
session: AsyncSession = Depends(get_session),
|
|
):
|
|
"""
|
|
Extract receipt data from an existing attachment.
|
|
|
|
Re-processes an already uploaded file with OCR.
|
|
"""
|
|
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
|
|
|
|
if not attachment:
|
|
raise HTTPException(status_code=404, detail="Attachment not found")
|
|
|
|
file_path = AttachmentCRUD.get_file_path(attachment)
|
|
|
|
if not file_path.exists():
|
|
raise HTTPException(status_code=404, detail="File not found on disk")
|
|
|
|
# Check if file type is supported
|
|
if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"File type not supported for OCR: {attachment.mime_type}"
|
|
)
|
|
|
|
success, message, result = await ocr_service.process_image(
|
|
file_path, attachment.mime_type
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(status_code=422, detail=message)
|
|
|
|
# Convert ExtractionResult to ExtractionData schema
|
|
# Convert tva_entries from dict to TvaEntry objects
|
|
tva_entries_schema = [
|
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
|
for e in result.tva_entries
|
|
] if result.tva_entries else []
|
|
|
|
# Convert payment_methods from dict to PaymentMethod objects
|
|
from decimal import Decimal
|
|
payment_methods_list = [
|
|
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
|
|
for pm in result.payment_methods
|
|
] if result.payment_methods else []
|
|
|
|
# Auto-suggest payment_mode based on detected methods
|
|
suggested_payment_mode = None
|
|
if payment_methods_list:
|
|
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
|
|
if has_card:
|
|
suggested_payment_mode = 'banca'
|
|
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
|
|
|
|
data = ExtractionData(
|
|
receipt_type=result.receipt_type,
|
|
receipt_number=result.receipt_number,
|
|
receipt_series=result.receipt_series,
|
|
receipt_date=result.receipt_date,
|
|
amount=result.amount,
|
|
partner_name=result.partner_name,
|
|
cui=result.cui,
|
|
description=result.description,
|
|
tva_entries=tva_entries_schema,
|
|
tva_total=result.tva_total,
|
|
address=result.address,
|
|
items_count=result.items_count,
|
|
payment_methods=payment_methods_list,
|
|
suggested_payment_mode=suggested_payment_mode,
|
|
confidence_amount=result.confidence_amount,
|
|
confidence_date=result.confidence_date,
|
|
confidence_vendor=result.confidence_vendor,
|
|
overall_confidence=result.overall_confidence,
|
|
raw_text=result.raw_text,
|
|
ocr_engine=result.ocr_engine,
|
|
processing_time_ms=result.processing_time_ms,
|
|
)
|
|
|
|
return OCRResponse(success=True, message=message, data=data)
|