Add docTR as primary OCR engine with 2-tier sequential processing, OCR metrics tracking, and simplified engine selection. Features: - docTR OCR engine with light+medium preprocessing tiers - doctr_plus mode with early exit optimization (~65% fast path) - OCR metrics dashboard with per-engine statistics - User OCR preference persistence - Parallel worker pool for OCR processing - Cross-validation for extraction quality Engine options: tesseract, doctr, doctr_plus (recommended), paddleocr 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
269 lines
8.5 KiB
Python
269 lines
8.5 KiB
Python
"""
|
|
OCR Settings and Metrics API endpoints.
|
|
|
|
Endpoints:
|
|
- GET /settings/ocr-preference - Get user's preferred OCR engine
|
|
- POST /settings/ocr-preference - Set user's preferred OCR engine
|
|
- GET /metrics/ocr/summary - Get OCR metrics summary by engine
|
|
- GET /metrics/ocr/history - Get user's OCR job history
|
|
- GET /metrics/ocr/stats - Get overall OCR statistics
|
|
"""
|
|
|
|
from typing import List, Optional
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from pydantic import BaseModel, Field
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from backend.modules.data_entry.db.database import get_session
|
|
from backend.modules.data_entry.db.crud.ocr_settings import OCRPreferenceCRUD, OCRMetricsCRUD
|
|
from backend.modules.data_entry.db.models.ocr_settings import OCREngine, OCRMetricsSummary
|
|
|
|
# Auth integration
|
|
from shared.auth.dependencies import get_current_user
|
|
from shared.auth.models import CurrentUser
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# ============================================================================
|
|
# Schemas
|
|
# ============================================================================
|
|
|
|
class OCRPreferenceResponse(BaseModel):
|
|
"""Response for OCR preference endpoint."""
|
|
username: str
|
|
preferred_engine: str
|
|
available_engines: List[str] = Field(
|
|
default=["tesseract", "doctr", "doctr_plus", "paddleocr"],
|
|
description="Available OCR engines"
|
|
)
|
|
|
|
|
|
class OCRPreferenceRequest(BaseModel):
|
|
"""Request to set OCR preference."""
|
|
preferred_engine: str = Field(
|
|
default="doctr_plus",
|
|
description="Preferred OCR engine: tesseract, doctr, doctr_plus, paddleocr"
|
|
)
|
|
|
|
|
|
class OCRMetricsHistoryItem(BaseModel):
|
|
"""Single OCR job metrics item."""
|
|
job_id: str
|
|
engine_requested: str
|
|
engine_used: str
|
|
processing_time_ms: int
|
|
success: bool
|
|
overall_confidence: float
|
|
fields_extracted: int
|
|
created_at: str
|
|
original_filename: Optional[str] = None
|
|
|
|
|
|
class OCRMetricsHistoryResponse(BaseModel):
|
|
"""Response for OCR history endpoint."""
|
|
items: List[OCRMetricsHistoryItem]
|
|
total: int
|
|
|
|
|
|
class OCRStatsResponse(BaseModel):
|
|
"""Response for OCR stats endpoint."""
|
|
total_jobs: int
|
|
successful_jobs: int
|
|
failed_jobs: int
|
|
success_rate: float
|
|
avg_processing_time_ms: float
|
|
avg_confidence: float
|
|
period_days: int
|
|
|
|
|
|
class OCRActiveEnginesResponse(BaseModel):
|
|
"""Response for active OCR engines endpoint."""
|
|
engines: List[str] = Field(description="List of active OCR engines from .env config")
|
|
recommended: str = Field(default="doctr_plus", description="Recommended engine")
|
|
|
|
|
|
# ============================================================================
|
|
# OCR Engines Configuration Endpoint
|
|
# ============================================================================
|
|
|
|
@router.get("/settings/ocr-engines", response_model=OCRActiveEnginesResponse)
|
|
async def get_active_ocr_engines():
|
|
"""
|
|
Get list of active OCR engines configured in .env.
|
|
|
|
Returns the engines that should be shown in the frontend dropdown.
|
|
Configured via OCR_ACTIVE_ENGINES environment variable.
|
|
|
|
Default: doctr,doctr_plus
|
|
Available: tesseract, paddleocr, doctr, doctr_plus
|
|
"""
|
|
from backend.modules.data_entry.config import settings
|
|
|
|
return OCRActiveEnginesResponse(
|
|
engines=settings.ocr_active_engines_list,
|
|
recommended="doctr_plus"
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# OCR Preference Endpoints
|
|
# ============================================================================
|
|
|
|
@router.get("/settings/ocr-preference", response_model=OCRPreferenceResponse)
|
|
async def get_ocr_preference(
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get user's preferred OCR engine.
|
|
|
|
Returns the user's saved preference or 'doctr_plus' if not set.
|
|
Also returns list of available engines.
|
|
"""
|
|
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
|
|
|
|
preference = await OCRPreferenceCRUD.get_by_username(session, current_user.username)
|
|
|
|
# Get available engines from OCR service
|
|
available = OCREngineClass.get_available_engines()
|
|
|
|
return OCRPreferenceResponse(
|
|
username=current_user.username,
|
|
preferred_engine=preference.preferred_engine.value if preference else "doctr_plus",
|
|
available_engines=available
|
|
)
|
|
|
|
|
|
@router.post("/settings/ocr-preference", response_model=OCRPreferenceResponse)
|
|
async def set_ocr_preference(
|
|
request: OCRPreferenceRequest,
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Set user's preferred OCR engine.
|
|
|
|
Valid engines: tesseract, doctr, doctr_plus, paddleocr
|
|
Note: Available engines depend on .env configuration (OCR_ENABLE_PADDLEOCR, OCR_ENABLE_TESSERACT)
|
|
"""
|
|
from backend.modules.data_entry.services.ocr_engine import OCREngine as OCREngineClass
|
|
|
|
# Get dynamically available engines
|
|
available = OCREngineClass.get_available_engines()
|
|
|
|
if request.preferred_engine not in available:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid engine. Must be one of: {', '.join(available)}"
|
|
)
|
|
|
|
# Map string to enum
|
|
engine_map = {
|
|
"tesseract": OCREngine.TESSERACT,
|
|
"doctr": OCREngine.DOCTR,
|
|
"doctr_plus": OCREngine.DOCTR_PLUS,
|
|
"paddleocr": OCREngine.PADDLEOCR,
|
|
}
|
|
engine_enum = engine_map.get(request.preferred_engine, OCREngine.DOCTR_PLUS)
|
|
|
|
# Save preference
|
|
preference = await OCRPreferenceCRUD.create_or_update(
|
|
session,
|
|
current_user.username,
|
|
engine_enum
|
|
)
|
|
|
|
# Get available engines
|
|
available = OCREngineClass.get_available_engines()
|
|
|
|
return OCRPreferenceResponse(
|
|
username=current_user.username,
|
|
preferred_engine=preference.preferred_engine.value,
|
|
available_engines=available
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# OCR Metrics Endpoints
|
|
# ============================================================================
|
|
|
|
@router.get("/metrics/ocr/summary", response_model=List[OCRMetricsSummary])
|
|
async def get_ocr_metrics_summary(
|
|
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get OCR metrics summary grouped by engine.
|
|
|
|
Returns aggregated metrics for each engine used in the specified period.
|
|
"""
|
|
summaries = await OCRMetricsCRUD.get_summary_by_engine(
|
|
session,
|
|
days=days,
|
|
username=current_user.username
|
|
)
|
|
return summaries
|
|
|
|
|
|
@router.get("/metrics/ocr/history", response_model=OCRMetricsHistoryResponse)
|
|
async def get_ocr_metrics_history(
|
|
limit: int = Query(default=50, ge=1, le=200, description="Max items to return"),
|
|
offset: int = Query(default=0, ge=0, description="Items to skip"),
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get user's OCR job history.
|
|
|
|
Returns list of OCR jobs with their metrics, ordered by most recent first.
|
|
"""
|
|
items = await OCRMetricsCRUD.get_user_history(
|
|
session,
|
|
username=current_user.username,
|
|
limit=limit,
|
|
offset=offset
|
|
)
|
|
|
|
history_items = [
|
|
OCRMetricsHistoryItem(
|
|
job_id=item.job_id,
|
|
engine_requested=item.engine_requested,
|
|
engine_used=item.engine_used,
|
|
processing_time_ms=item.processing_time_ms,
|
|
success=item.success,
|
|
overall_confidence=item.overall_confidence,
|
|
fields_extracted=item.fields_extracted,
|
|
created_at=item.created_at.isoformat(),
|
|
original_filename=item.original_filename
|
|
)
|
|
for item in items
|
|
]
|
|
|
|
return OCRMetricsHistoryResponse(
|
|
items=history_items,
|
|
total=len(history_items)
|
|
)
|
|
|
|
|
|
@router.get("/metrics/ocr/stats", response_model=OCRStatsResponse)
|
|
async def get_ocr_stats(
|
|
days: int = Query(default=30, ge=1, le=365, description="Number of days to include"),
|
|
session: AsyncSession = Depends(get_session),
|
|
current_user: CurrentUser = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get overall OCR statistics for the user.
|
|
|
|
Returns aggregated stats including success rate, average processing time, etc.
|
|
"""
|
|
stats = await OCRMetricsCRUD.get_overall_stats(
|
|
session,
|
|
days=days,
|
|
username=current_user.username
|
|
)
|
|
|
|
return OCRStatsResponse(**stats)
|