Files
roa2web-service-auto/backend/modules/data_entry/routers/ocr_settings.py
Marius Mutu 495790411f feat(ocr): Add docTR OCR engine with metrics infrastructure
Add docTR as primary OCR engine with 2-tier sequential processing,
OCR metrics tracking, and simplified engine selection.

Features:
- docTR OCR engine with light+medium preprocessing tiers
- doctr_plus mode with early exit optimization (~65% fast path)
- OCR metrics dashboard with per-engine statistics
- User OCR preference persistence
- Parallel worker pool for OCR processing
- Cross-validation for extraction quality

Engine options: tesseract, doctr, doctr_plus (recommended), paddleocr

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-02 05:37:16 +02:00

269 lines
8.5 KiB
Python

"""
OCR Settings and Metrics API endpoints.
Endpoints:
- GET /settings/ocr-preference - Get user's preferred OCR engine
- POST /settings/ocr-preference - Set user's preferred OCR engine
- GET /metrics/ocr/summary - Get OCR metrics summary by engine
- GET /metrics/ocr/history - Get user's OCR job history
- GET /metrics/ocr/stats - Get overall OCR statistics
"""
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.ocr_settings import OCRPreferenceCRUD, OCRMetricsCRUD
from backend.modules.data_entry.db.models.ocr_settings import OCREngine, OCRMetricsSummary
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============================================================================
# Schemas
# ============================================================================
class OCRPreferenceResponse(BaseModel):
"""Response for OCR preference endpoint."""
username: str
preferred_engine: str
available_engines: List[str] = Field(
default=["tesseract", "doctr", "doctr_plus", "paddleocr"],
description="Available OCR engines"
)
class OCRPreferenceRequest(BaseModel):
"""Request to set OCR preference."""
preferred_engine: str = Field(
default="doctr_plus",
description="Preferred OCR engine: tesseract, doctr, doctr_plus, paddleocr"
)
class OCRMetricsHistoryItem(BaseModel):
"""Single OCR job metrics item."""
job_id: str
engine_requested: str
engine_used: str
processing_time_ms: int
success: bool
overall_confidence: float
fields_extracted: int
created_at: str
original_filename: Optional[str] = None
class OCRMetricsHistoryResponse(BaseModel):
"""Response for OCR history endpoint."""
items: List[OCRMetricsHistoryItem]
total: int
class OCRStatsResponse(BaseModel):
"""Response for OCR stats endpoint."""
total_jobs: int
successful_jobs: int
failed_jobs: int
success_rate: float
avg_processing_time_ms: float
avg_confidence: float
period_days: int
class OCRActiveEnginesResponse(BaseModel):
"""Response for active OCR engines endpoint."""
engines: List[str] = Field(description="List of active OCR engines from .env config")
recommended: str = Field(default="doctr_plus", description="Recommended engine")
# ============================================================================
# OCR Engines Configuration Endpoint
# ============================================================================
@router.get("/settings/ocr-engines", response_model=OCRActiveEnginesResponse)
async def get_active_ocr_engines():
"""
Get list of active OCR engines configured in .env.
Returns the engines that should be shown in the frontend dropdown.
Configured via OCR_ACTIVE_ENGINES environment variable.
Default: doctr,doctr_plus
Available: tesseract, paddleocr, doctr, doctr_plus
"""
from backend.modules.data_entry.config import settings
return OCRActiveEnginesResponse(
engines=settings.ocr_active_engines_list,
recommended="doctr_plus"
)
# ============================================================================
# OCR Preference Endpoints
# ============================================================================
@router.get("/settings/ocr-preference", response_model=OCRPreferenceResponse)
async def get_ocr_preference(
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get user's preferred OCR engine.
Returns the user's saved preference or 'doctr_plus' if not set.
Also returns list of available engines.
"""
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
preference = await OCRPreferenceCRUD.get_by_username(session, current_user.username)
# Get available engines from OCR service
available = OCREngineClass.get_available_engines()
return OCRPreferenceResponse(
username=current_user.username,
preferred_engine=preference.preferred_engine.value if preference else "doctr_plus",
available_engines=available
)
@router.post("/settings/ocr-preference", response_model=OCRPreferenceResponse)
async def set_ocr_preference(
request: OCRPreferenceRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Set user's preferred OCR engine.
Valid engines: tesseract, doctr, doctr_plus, paddleocr
Note: Available engines depend on .env configuration (OCR_ENABLE_PADDLEOCR, OCR_ENABLE_TESSERACT)
"""
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
# Get dynamically available engines
available = OCREngineClass.get_available_engines()
if request.preferred_engine not in available:
raise HTTPException(
status_code=400,
detail=f"Invalid engine. Must be one of: {', '.join(available)}"
)
# Map string to enum
engine_map = {
"tesseract": OCREngine.TESSERACT,
"doctr": OCREngine.DOCTR,
"doctr_plus": OCREngine.DOCTR_PLUS,
"paddleocr": OCREngine.PADDLEOCR,
}
engine_enum = engine_map.get(request.preferred_engine, OCREngine.DOCTR_PLUS)
# Save preference
preference = await OCRPreferenceCRUD.create_or_update(
session,
current_user.username,
engine_enum
)
# Get available engines
available = OCREngineClass.get_available_engines()
return OCRPreferenceResponse(
username=current_user.username,
preferred_engine=preference.preferred_engine.value,
available_engines=available
)
# ============================================================================
# OCR Metrics Endpoints
# ============================================================================
@router.get("/metrics/ocr/summary", response_model=List[OCRMetricsSummary])
async def get_ocr_metrics_summary(
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get OCR metrics summary grouped by engine.
Returns aggregated metrics for each engine used in the specified period.
"""
summaries = await OCRMetricsCRUD.get_summary_by_engine(
session,
days=days,
username=current_user.username
)
return summaries
@router.get("/metrics/ocr/history", response_model=OCRMetricsHistoryResponse)
async def get_ocr_metrics_history(
limit: int = Query(default=50, ge=1, le=200, description="Max items to return"),
offset: int = Query(default=0, ge=0, description="Items to skip"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get user's OCR job history.
Returns list of OCR jobs with their metrics, ordered by most recent first.
"""
items = await OCRMetricsCRUD.get_user_history(
session,
username=current_user.username,
limit=limit,
offset=offset
)
history_items = [
OCRMetricsHistoryItem(
job_id=item.job_id,
engine_requested=item.engine_requested,
engine_used=item.engine_used,
processing_time_ms=item.processing_time_ms,
success=item.success,
overall_confidence=item.overall_confidence,
fields_extracted=item.fields_extracted,
created_at=item.created_at.isoformat(),
original_filename=item.original_filename
)
for item in items
]
return OCRMetricsHistoryResponse(
items=history_items,
total=len(history_items)
)
@router.get("/metrics/ocr/stats", response_model=OCRStatsResponse)
async def get_ocr_stats(
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user)
):
"""
Get overall OCR statistics for the user.
Returns aggregated stats including success rate, average processing time, etc.
"""
stats = await OCRMetricsCRUD.get_overall_stats(
session,
days=days,
username=current_user.username
)
return OCRStatsResponse(**stats)