feat: Improve OCR adaptive pipeline with early exit and better pattern matching
- Add adaptive 3-step OCR pipeline with early exit when all 5 fields found - Add pattern for "C. I. F." with spaces (OCR artifact from PaddleOCR) - Add pattern for YYYY. MM. DD date format with spaces (OMV/Petrom receipts) - Add pattern for "OTAL TAXE" with T cut off and reversed amount position - Make TVA rate pattern more flexible (code letter optional, handle "-21%") - Replace logger.info with print(flush=True) for better debugging visibility - Improve OCRPreview.vue to show extraction progress and raw OCR text 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,10 +1,19 @@
|
||||
"""FastAPI application entry point for Data Entry App."""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
# Configure logging to show INFO level messages
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
@@ -30,6 +39,18 @@ async def lifespan(app: FastAPI):
|
||||
settings.upload_path_resolved
|
||||
print(f"Upload path: {settings.upload_path_resolved}")
|
||||
|
||||
# Pre-initialize OCR engine in background (PaddleOCR takes 15-20s)
|
||||
def init_ocr_background():
|
||||
try:
|
||||
from app.services.ocr_service import ocr_service
|
||||
ocr_service.ocr_engine._init_paddle_lazy()
|
||||
print("OCR engine ready")
|
||||
except Exception as e:
|
||||
print(f"Warning: OCR engine pre-load failed: {e}")
|
||||
|
||||
print("Starting OCR engine pre-load (background)...")
|
||||
threading.Thread(target=init_ocr_background, daemon=True).start()
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
|
||||
Reference in New Issue
Block a user