"""OCR settings and metrics SQLModel models.""" from datetime import datetime from decimal import Decimal from enum import Enum from typing import Optional from sqlmodel import SQLModel, Field class OCREngine(str, Enum): """Available OCR engines.""" TESSERACT = "tesseract" DOCTR = "doctr" DOCTR_PLUS = "doctr_plus" # docTR with 2-tier sequential processing + early exit (optimized, recommended) PADDLEOCR = "paddleocr" class UserOCRPreference(SQLModel, table=True): """ User's preferred OCR engine setting. Each user can have one preferred OCR engine that will be auto-selected when they upload new receipts for processing. """ __tablename__ = "user_ocr_preferences" id: Optional[int] = Field(default=None, primary_key=True) # User identification username: str = Field(max_length=100, unique=True, index=True) # Preference settings preferred_engine: OCREngine = Field(default=OCREngine.DOCTR_PLUS) # Timestamps created_at: datetime = Field(default_factory=datetime.utcnow) updated_at: datetime = Field(default_factory=datetime.utcnow) class OCRJobMetrics(SQLModel, table=True): """ OCR job processing metrics for analytics. Stores metrics for each OCR job to enable: - Performance tracking by engine - Success rate analysis - Processing time trends - User-specific analytics """ __tablename__ = "ocr_job_metrics" id: Optional[int] = Field(default=None, primary_key=True) # Job identification job_id: str = Field(max_length=50, unique=True, index=True) # User and company context username: str = Field(max_length=100, index=True) company_id: Optional[int] = Field(default=None, index=True) # Engine used engine_requested: str = Field(max_length=20) # What user/auto requested engine_used: str = Field(max_length=50) # What was actually used (e.g., "doctr-light") # Processing metrics processing_time_ms: int = Field(default=0) file_size_bytes: int = Field(default=0) file_type: str = Field(max_length=50, default="image/jpeg") # MIME type original_filename: Optional[str] = Field(default=None, max_length=255) # Original uploaded filename # Success metrics success: bool = Field(default=True) error_message: Optional[str] = Field(default=None, max_length=500) # Extraction quality metrics overall_confidence: float = Field(default=0.0) fields_extracted: int = Field(default=0) # Number of fields successfully extracted needs_manual_review: Optional[bool] = Field(default=None) validation_warnings_count: int = Field(default=0) validation_errors_count: int = Field(default=0) # Timestamps created_at: datetime = Field(default_factory=datetime.utcnow) class OCRMetricsSummary(SQLModel): """ Summary metrics for OCR analytics. Not a database table - used for API responses. """ engine: str total_jobs: int successful_jobs: int failed_jobs: int success_rate: float # Computed: successful_jobs / total_jobs avg_processing_time_ms: float avg_confidence: float avg_fields_extracted: float