Files
roa2web-service-auto/backend/modules/data_entry/config.py
Marius Mutu 495790411f feat(ocr): Add docTR OCR engine with metrics infrastructure
Add docTR as primary OCR engine with 2-tier sequential processing,
OCR metrics tracking, and simplified engine selection.

Features:
- docTR OCR engine with light+medium preprocessing tiers
- doctr_plus mode with early exit optimization (~65% fast path)
- OCR metrics dashboard with per-engine statistics
- User OCR preference persistence
- Parallel worker pool for OCR processing
- Cross-validation for extraction quality

Engine options: tesseract, doctr, doctr_plus (recommended), paddleocr

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-02 05:37:16 +02:00

107 lines
2.9 KiB
Python

"""Application configuration using pydantic-settings."""
import os
from pathlib import Path
from typing import List
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# App info
app_name: str = "Data Entry API"
app_version: str = "1.0.0"
debug: bool = False
# API
api_host: str = "0.0.0.0"
api_port: int = 8003
# SQLite Database
sqlite_database_path: str = "data/receipts/receipts.db"
# File uploads
upload_path: str = "data/uploads"
max_upload_size_mb: int = 10
allowed_mime_types: List[str] = [
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"application/pdf",
]
# Oracle Database (for nomenclatures)
oracle_user: str = ""
oracle_password: str = ""
oracle_host: str = "localhost"
oracle_port: int = 1526
oracle_sid: str = "ROA"
# JWT Authentication
jwt_secret_key: str = "change-me-in-production"
jwt_algorithm: str = "HS256"
jwt_expire_minutes: int = 480
# CORS
cors_origins: str = "http://localhost:3010,http://localhost:3000"
# OCR Engines (comma-separated list of active engines shown in UI)
# Available: tesseract, paddleocr, doctr, doctr_plus
# doctr_plus is recommended (2-tier sequential with early exit)
ocr_active_engines: str = "doctr,doctr_plus"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
@property
def database_url(self) -> str:
"""Get SQLite database URL for async."""
return f"sqlite+aiosqlite:///{self.sqlite_database_path}"
@property
def sync_database_url(self) -> str:
"""Get SQLite database URL for sync operations (Alembic)."""
return f"sqlite:///{self.sqlite_database_path}"
@property
def upload_path_resolved(self) -> Path:
"""Get resolved upload path."""
path = Path(self.upload_path)
path.mkdir(parents=True, exist_ok=True)
return path
@property
def max_upload_size_bytes(self) -> int:
"""Get max upload size in bytes."""
return self.max_upload_size_mb * 1024 * 1024
@property
def cors_origins_list(self) -> List[str]:
"""Get CORS origins as list."""
return [origin.strip() for origin in self.cors_origins.split(",")]
@property
def ocr_active_engines_list(self) -> List[str]:
"""Get OCR active engines as list."""
return [engine.strip() for engine in self.ocr_active_engines.split(",")]
@property
def oracle_dsn(self) -> str:
"""Get Oracle DSN string."""
return f"{self.oracle_host}:{self.oracle_port}/{self.oracle_sid}"
@lru_cache()
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()
# Convenience instance
settings = get_settings()