Files
roa2web-service-auto/data-entry-app/backend/app/main.py
Marius Mutu 9f06482681 feat: Improve OCR adaptive pipeline with early exit and better pattern matching
- Add adaptive 3-step OCR pipeline with early exit when all 5 fields found
- Add pattern for "C. I. F." with spaces (OCR artifact from PaddleOCR)
- Add pattern for YYYY. MM. DD date format with spaces (OMV/Petrom receipts)
- Add pattern for "OTAL TAXE" with T cut off and reversed amount position
- Make TVA rate pattern more flexible (code letter optional, handle "-21%")
- Replace logger.info with print(flush=True) for better debugging visibility
- Improve OCRPreview.vue to show extraction progress and raw OCR text

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:54:52 +02:00

111 lines
2.9 KiB
Python

"""FastAPI application entry point for Data Entry App."""
import sys
import logging
import threading
from pathlib import Path
from contextlib import asynccontextmanager
from fastapi import FastAPI
# Configure logging to show INFO level messages
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
# Add shared modules to path
project_root = Path(__file__).parent.parent.parent.parent
sys.path.insert(0, str(project_root / "shared"))
from app.config import settings
from app.db.database import init_db
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan - startup and shutdown events."""
# Startup
print(f"Starting {settings.app_name} v{settings.app_version}")
# Initialize database
await init_db()
print("Database initialized")
# Ensure upload directory exists
settings.upload_path_resolved
print(f"Upload path: {settings.upload_path_resolved}")
# Pre-initialize OCR engine in background (PaddleOCR takes 15-20s)
def init_ocr_background():
try:
from app.services.ocr_service import ocr_service
ocr_service.ocr_engine._init_paddle_lazy()
print("OCR engine ready")
except Exception as e:
print(f"Warning: OCR engine pre-load failed: {e}")
print("Starting OCR engine pre-load (background)...")
threading.Thread(target=init_ocr_background, daemon=True).start()
yield
# Shutdown
print("Shutting down...")
# Create FastAPI app
app = FastAPI(
title=settings.app_name,
version=settings.app_version,
description="API pentru introducere bonuri fiscale cu workflow de aprobare",
lifespan=lifespan,
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount static files for uploads (optional - can serve through nginx in prod)
uploads_path = Path(settings.upload_path)
if uploads_path.exists():
app.mount("/uploads", StaticFiles(directory=str(uploads_path)), name="uploads")
# Health check endpoint
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"app": settings.app_name,
"version": settings.app_version,
}
# Import and include routers
from app.routers import receipts, ocr
app.include_router(receipts.router, prefix="/api/receipts", tags=["receipts"])
app.include_router(ocr.router, prefix="/api/ocr", tags=["ocr"])
# Root endpoint
@app.get("/")
async def root():
"""Root endpoint - API information."""
return {
"name": settings.app_name,
"version": settings.app_version,
"docs": "/docs",
"health": "/health",
}