Files
roa2web-service-auto/backend/.env.example
Marius Mutu 495790411f feat(ocr): Add docTR OCR engine with metrics infrastructure
Add docTR as primary OCR engine with 2-tier sequential processing,
OCR metrics tracking, and simplified engine selection.

Features:
- docTR OCR engine with light+medium preprocessing tiers
- doctr_plus mode with early exit optimization (~65% fast path)
- OCR metrics dashboard with per-engine statistics
- User OCR preference persistence
- Parallel worker pool for OCR processing
- Cross-validation for extraction quality

Engine options: tesseract, doctr, doctr_plus (recommended), paddleocr

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-02 05:37:16 +02:00

181 lines
7.0 KiB
Plaintext

# ============================================================================
# ROA2WEB Unified Backend - Environment Configuration Template
# ============================================================================
# Single backend process serving Reports, Data Entry, and Telegram modules
#
# SETUP INSTRUCTIONS:
# 1. Copy this template: cp .env.example .env.dev
# 2. Fill in your actual values in .env.dev
# 3. Run: ./start-dev.sh (auto-copies .env.dev to .env)
#
# ENVIRONMENT FILES:
# - .env.dev → Development config (committed to git with real values)
# - .env.test → Test config (committed to git)
# - .env.prod → Production config template (committed, use placeholders!)
# - .env → Active config (auto-generated, NOT committed)
#
# IMPORTANT: Never manually edit .env - edit .env.dev instead!
# ============================================================================
# ORACLE DATABASE CONFIGURATION (REQUIRED - Shared by all modules)
# ============================================================================
# Connection to CONTAFIN_ORACLE schema for authentication and user management
# Each company is a separate schema in Oracle Database
# Development: Through SSH tunnel (localhost:1526)
# Windows Production: Direct connection to Oracle server
ORACLE_USER=CONTAFIN_ORACLE
ORACLE_PASSWORD=SET_IN_PRODUCTION_ENV
ORACLE_HOST=localhost
ORACLE_PORT=1526
ORACLE_SID=ROA
# Development Only: Start SSH tunnel before running backend
# ./ssh_tunnel.sh start
# ./ssh_tunnel.sh status
# ============================================================================
# JWT AUTHENTICATION (REQUIRED - Shared by all modules)
# ============================================================================
# Used for JWT token generation and validation (shared/auth/jwt_handler.py)
# Generate strong secret: python3 -c "import secrets; print(secrets.token_urlsafe(32))"
JWT_SECRET_KEY=GENERATE_STRONG_SECRET_IN_PRODUCTION
JWT_ALGORITHM=HS256
# Token expiration settings (used by shared/auth/jwt_handler.py)
ACCESS_TOKEN_EXPIRE_MINUTES=30
REFRESH_TOKEN_EXPIRE_DAYS=7
# ============================================================================
# SESSION SECURITY - EMAIL 2FA (REQUIRED for Telegram email login)
# ============================================================================
# Used by Telegram module for session token validation
# Generate with: python3 -c "import secrets; print(secrets.token_urlsafe(32))"
AUTH_SESSION_SECRET=your-secure-random-secret-here-min-32-chars
# ============================================================================
# SERVER CONFIGURATION
# ============================================================================
# Unified backend server settings
API_HOST=0.0.0.0
API_PORT=8000
DEBUG=false
# CORS Origins (comma-separated)
CORS_ORIGINS=http://localhost:3000,http://localhost:5173
# ============================================================================
# REPORTS MODULE - CACHE CONFIGURATION (OPTIONAL - defaults provided)
# ============================================================================
# Two-tier hybrid cache system (L1: in-memory LRU, L2: SQLite persistent)
# Used by backend/modules/reports/cache/config.py
# Core Settings
REPORTS_CACHE_ENABLED=True
REPORTS_CACHE_TYPE=hybrid
REPORTS_CACHE_SQLITE_PATH=./data/cache/roa2web_cache.db
REPORTS_CACHE_MEMORY_MAX_SIZE=1000
REPORTS_CACHE_DEFAULT_TTL=900
# TTL per Cache Type (seconds)
REPORTS_CACHE_TTL_SCHEMA=86400
REPORTS_CACHE_TTL_COMPANIES=1800
REPORTS_CACHE_TTL_DASHBOARD_SUMMARY=1800
REPORTS_CACHE_TTL_DASHBOARD_TRENDS=1800
REPORTS_CACHE_TTL_INVOICES=600
REPORTS_CACHE_TTL_INVOICES_SUMMARY=900
REPORTS_CACHE_TTL_TREASURY=600
# Maintenance
REPORTS_CACHE_CLEANUP_INTERVAL=3600
# Event-Based Invalidation (experimental)
REPORTS_CACHE_AUTO_INVALIDATE=False
REPORTS_CACHE_CHECK_INTERVAL=300
# Performance Tracking
REPORTS_CACHE_TRACK_PERFORMANCE=True
REPORTS_CACHE_BENCHMARK_ON_STARTUP=False
# ============================================================================
# DATA ENTRY MODULE - CONFIGURATION
# ============================================================================
# Data Entry module settings (receipts, OCR, etc.)
# SQLite Database
DATA_ENTRY_SQLITE_DATABASE_PATH=data/receipts/receipts.db
# File uploads
DATA_ENTRY_UPLOAD_PATH=data/receipts/uploads
DATA_ENTRY_MAX_UPLOAD_SIZE_MB=10
# ============================================================================
# OCR ENGINE CONFIGURATION
# ============================================================================
# Control which OCR engines are loaded at startup.
# Disabling engines saves memory but limits available OCR modes.
# Enable/disable PaddleOCR (set to 'false' to save ~800MB RAM)
# When disabled: 'paddleocr' engine unavailable
OCR_ENABLE_PADDLEOCR=true
# Enable/disable Tesseract (set to 'false' to save ~50MB RAM)
# When disabled: 'tesseract' engine unavailable
OCR_ENABLE_TESSERACT=true
# Default OCR engine when not specified in request
# Options: tesseract, doctr, doctr_plus, paddleocr
# Recommended: doctr_plus (2-tier sequential with early exit, ~7.5s avg)
OCR_DEFAULT_ENGINE=doctr_plus
# Active OCR engines shown in frontend dropdown (comma-separated)
# Options: tesseract, doctr, doctr_plus, paddleocr
# doctr_plus: 73.3% perfect, 7.5s avg, 65% fast path (recommended)
# doctr: 63.3% perfect, simpler but faster
OCR_ACTIVE_ENGINES=tesseract,doctr,doctr_plus,paddleocr
# OCR Worker Pool Configuration
# Number of parallel OCR workers (each loads ~1GB for docTR)
# Recommended: 2 for 8GB RAM, 3 for 16GB RAM
OCR_WORKERS=2
# Max tasks per worker before restart (0 = no restart, saves 40-60s warmup time)
# Set to 0 for testing, 10-20 for production (prevents memory leaks)
OCR_MAX_TASKS_PER_CHILD=0
# ============================================================================
# TELEGRAM MODULE - BOT CONFIGURATION (REQUIRED for Telegram features)
# ============================================================================
# Obtain bot token from @BotFather on Telegram
TELEGRAM_BOT_TOKEN=your_bot_token_here
# ============================================================================
# TELEGRAM MODULE - EMAIL AUTHENTICATION (SMTP) (REQUIRED for email 2FA)
# ============================================================================
# Required for email-based 2FA authentication flow
# Users can login with email + password instead of web app linking
# SMTP Server Configuration
TELEGRAM_SMTP_HOST=mail.romfast.ro
TELEGRAM_SMTP_PORT=587
TELEGRAM_SMTP_USER=ups@romfast.ro
TELEGRAM_SMTP_PASSWORD=your_smtp_password_here
TELEGRAM_SMTP_FROM_EMAIL=ups@romfast.ro
TELEGRAM_SMTP_FROM_NAME=ROA2WEB
TELEGRAM_SMTP_USE_TLS=true
# Email Retry Settings
TELEGRAM_EMAIL_MAX_RETRIES=3
TELEGRAM_EMAIL_RETRY_DELAY=2.0
# ============================================================================
# TELEGRAM MODULE - DATABASE (SQLite for bot data)
# ============================================================================
# Separate SQLite database for Telegram bot auth codes and sessions
TELEGRAM_SQLITE_DATABASE_PATH=data/telegram/telegram.db