""" SQLite persistent cache (L2 cache) Persistent, survives restarts, unlimited size Uses singleton connection pattern with asyncio.Lock for write serialization to prevent "database is locked" errors under concurrent access. """ import time import json import logging import asyncio import aiosqlite from typing import Any, Optional, List, Dict from pathlib import Path from decimal import Decimal from datetime import datetime, date # SQLite busy timeout in milliseconds (wait for lock instead of failing immediately) SQLITE_BUSY_TIMEOUT_MS = 5000 logger = logging.getLogger(__name__) class CustomJSONEncoder(json.JSONEncoder): """Custom JSON encoder that handles Pydantic models, Decimal, datetime, etc.""" def default(self, obj): # Handle Pydantic models if hasattr(obj, 'dict'): return obj.dict() if hasattr(obj, 'model_dump'): # Pydantic v2 return obj.model_dump() # Handle Decimal if isinstance(obj, Decimal): return float(obj) # Handle datetime/date if isinstance(obj, (datetime, date)): return obj.isoformat() return super().default(obj) class SQLiteConnectionManager: """ Singleton connection manager with write serialization. Solves "database is locked" errors by: 1. Maintaining a single persistent connection (instead of N connections per request) 2. Serializing all write operations through an asyncio.Lock 3. Using WAL mode for better concurrent read performance Architecture: ┌─────────────────────────────────────┐ │ SQLiteConnectionManager │ │ (SINGLETON) │ │ │ │ _connection: aiosqlite.Connection │ │ _write_lock: asyncio.Lock │ └─────────────────────────────────────┘ │ ┌───────────────┼───────────────┐ ▼ ▼ ▼ Task 1 Task 2 Task N cache.get() cache.set() cache.get() │ │ │ └───────────────┴───────────────┘ │ async with _write_lock: (serialized writes) """ _instance: Optional['SQLiteConnectionManager'] = None _instance_lock: asyncio.Lock = None # Will be created on first use def __init__(self, db_path: str): """ Initialize connection manager (called only by get_instance). Args: db_path: Path to SQLite database file """ self.db_path = db_path self._connection: Optional[aiosqlite.Connection] = None self._write_lock: Optional[asyncio.Lock] = None self._initialized = False @classmethod async def get_instance(cls, db_path: str) -> 'SQLiteConnectionManager': """ Get or create singleton instance. Thread-safe singleton pattern using asyncio.Lock. Args: db_path: Path to SQLite database file Returns: SQLiteConnectionManager singleton instance """ # Create instance lock on first call (must be done in async context) if cls._instance_lock is None: cls._instance_lock = asyncio.Lock() async with cls._instance_lock: if cls._instance is None or cls._instance.db_path != db_path: cls._instance = cls(db_path) return cls._instance async def initialize(self): """ Create connection with WAL mode and busy timeout. Sets up: - Busy timeout (5 seconds) - wait for locks instead of failing - WAL journal mode - allows concurrent reads while writing - Write lock for serializing write operations """ if self._initialized: return # Create write lock in async context self._write_lock = asyncio.Lock() # Create persistent connection self._connection = await aiosqlite.connect(self.db_path) await self._connection.execute(f"PRAGMA busy_timeout={SQLITE_BUSY_TIMEOUT_MS}") await self._connection.execute("PRAGMA journal_mode=WAL") await self._connection.commit() self._initialized = True logger.info(f"SQLite connection manager initialized: {self.db_path}") async def get_connection(self) -> aiosqlite.Connection: """ Get the persistent connection, with health check. If connection is unhealthy (closed or stale), reconnects automatically. Returns: Active aiosqlite connection """ if self._connection is None or not await self._is_healthy(): await self._reconnect() return self._connection async def _is_healthy(self) -> bool: """ Check if connection is valid. Returns: True if connection can execute queries, False otherwise """ try: async with self._connection.execute("SELECT 1") as cursor: await cursor.fetchone() return True except Exception: return False async def _reconnect(self): """Reconnect if connection was lost.""" logger.warning("SQLite connection unhealthy, reconnecting...") # Close old connection if exists if self._connection: try: await self._connection.close() except Exception: pass # Create new connection self._connection = await aiosqlite.connect(self.db_path) await self._connection.execute(f"PRAGMA busy_timeout={SQLITE_BUSY_TIMEOUT_MS}") await self._connection.execute("PRAGMA journal_mode=WAL") await self._connection.commit() logger.info("SQLite connection re-established") @property def write_lock(self) -> asyncio.Lock: """Get the write lock for serializing write operations.""" return self._write_lock async def close(self): """Close the connection and reset singleton.""" if self._connection: try: await self._connection.close() except Exception as e: logger.warning(f"Error closing SQLite connection: {e}") self._connection = None self._initialized = False # Reset singleton SQLiteConnectionManager._instance = None logger.info("SQLite connection manager closed") class SQLiteCache: """ SQLite-based persistent cache Features: - Persistent storage (survives restarts) - JSON serialization for complex objects - Schema mappings (permanent cache for company->schema) - Watermarks for event-based invalidation - Performance tracking and benchmarks - Singleton connection with write serialization (prevents "database is locked") """ def __init__(self, db_path: str): """ Initialize SQLite cache Args: db_path: Path to SQLite database file """ self.db_path = db_path self._conn_manager: Optional[SQLiteConnectionManager] = None self._ensure_db_dir() def _ensure_db_dir(self): """Ensure database directory exists""" db_dir = Path(self.db_path).parent db_dir.mkdir(parents=True, exist_ok=True) async def init_db(self): """Initialize database schema with WAL mode enabled""" # Get or create singleton connection manager self._conn_manager = await SQLiteConnectionManager.get_instance(self.db_path) await self._conn_manager.initialize() # Create tables using the persistent connection async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() # Table: cache_entries await conn.execute(""" CREATE TABLE IF NOT EXISTS cache_entries ( cache_key TEXT PRIMARY KEY, cache_type TEXT NOT NULL, company_id INTEGER, data_json TEXT NOT NULL, created_at REAL NOT NULL, expires_at REAL NOT NULL, hit_count INTEGER DEFAULT 0, last_accessed REAL ) """) await conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_type ON cache_entries(cache_type)") await conn.execute("CREATE INDEX IF NOT EXISTS idx_company_id ON cache_entries(company_id)") await conn.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON cache_entries(expires_at)") # Table: schema_mappings (PERMANENT) await conn.execute(""" CREATE TABLE IF NOT EXISTS schema_mappings ( id_firma INTEGER PRIMARY KEY, schema TEXT NOT NULL, created_at REAL NOT NULL, last_verified REAL ) """) # Table: query_benchmarks await conn.execute(""" CREATE TABLE IF NOT EXISTS query_benchmarks ( cache_type TEXT PRIMARY KEY, avg_time_ms REAL NOT NULL, sample_count INTEGER DEFAULT 0, last_updated REAL ) """) # Table: performance_log await conn.execute(""" CREATE TABLE IF NOT EXISTS performance_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, cache_type TEXT NOT NULL, company_id INTEGER, cache_hit BOOLEAN NOT NULL, response_time_ms REAL NOT NULL, estimated_oracle_time_ms REAL, time_saved_ms REAL, username TEXT, timestamp REAL NOT NULL ) """) await conn.execute("CREATE INDEX IF NOT EXISTS idx_perf_timestamp ON performance_log(timestamp)") await conn.execute("CREATE INDEX IF NOT EXISTS idx_perf_cache_type ON performance_log(cache_type)") # Table: user_cache_settings await conn.execute(""" CREATE TABLE IF NOT EXISTS user_cache_settings ( username TEXT PRIMARY KEY, cache_enabled BOOLEAN DEFAULT TRUE, created_at REAL, updated_at REAL ) """) # Table: cache_config await conn.execute(""" CREATE TABLE IF NOT EXISTS cache_config ( key TEXT PRIMARY KEY, value TEXT NOT NULL, updated_at REAL ) """) # Table: cache_watermarks await conn.execute(""" CREATE TABLE IF NOT EXISTS cache_watermarks ( company_id INTEGER PRIMARY KEY, schema TEXT NOT NULL, max_id_act INTEGER NOT NULL, checked_at REAL NOT NULL ) """) await conn.commit() logger.info("SQLite cache database initialized") async def get(self, key: str) -> Optional[Any]: """ Get value from cache Args: key: Cache key Returns: Cached value or None if not found/expired """ # Use write lock because we may update hit_count or delete expired entries async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT data_json, expires_at FROM cache_entries WHERE cache_key = ? """, (key,)) as cursor: result = await cursor.fetchone() if not result: return None data_json, expires_at = result # Check TTL expiration if expires_at < time.time(): # Expired - delete and return None await conn.execute("DELETE FROM cache_entries WHERE cache_key = ?", (key,)) await conn.commit() logger.debug(f"SQLite cache expired: {key}") return None # Update hit_count and last_accessed await conn.execute(""" UPDATE cache_entries SET hit_count = hit_count + 1, last_accessed = ? WHERE cache_key = ? """, (time.time(), key)) await conn.commit() logger.debug(f"SQLite cache HIT: {key}") return json.loads(data_json) async def set(self, key: str, value: Any, cache_type: str, company_id: Optional[int], ttl: int): """ Set value in cache Args: key: Cache key value: Value to cache cache_type: Type of cache entry company_id: Company ID (None for global caches) ttl: Time to live in seconds """ # Use custom encoder to handle Pydantic models, Decimal, datetime, etc. data_json = json.dumps(value, cls=CustomJSONEncoder) now = time.time() expires_at = now + ttl async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT OR REPLACE INTO cache_entries (cache_key, cache_type, company_id, data_json, created_at, expires_at, hit_count, last_accessed) VALUES (?, ?, ?, ?, ?, ?, 0, ?) """, (key, cache_type, company_id, data_json, now, expires_at, now)) await conn.commit() logger.debug(f"SQLite cache SET: {key} (TTL: {ttl}s)") async def delete(self, key: str) -> bool: """Delete entry from cache""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() cursor = await conn.execute("DELETE FROM cache_entries WHERE cache_key = ?", (key,)) await conn.commit() deleted = cursor.rowcount > 0 if deleted: logger.debug(f"SQLite cache deleted: {key}") return deleted async def clear(self): """Clear all cache entries""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() cursor = await conn.execute("DELETE FROM cache_entries") await conn.commit() count = cursor.rowcount logger.info(f"SQLite cache cleared: {count} entries removed") async def clear_by_company(self, company_id: int): """Clear all entries for specific company""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() cursor = await conn.execute("DELETE FROM cache_entries WHERE company_id = ?", (company_id,)) await conn.commit() count = cursor.rowcount logger.info(f"SQLite cache cleared for company {company_id}: {count} entries") async def clear_by_type(self, cache_type: str): """Clear all entries of specific type""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() cursor = await conn.execute("DELETE FROM cache_entries WHERE cache_type = ?", (cache_type,)) await conn.commit() count = cursor.rowcount logger.info(f"SQLite cache cleared for type '{cache_type}': {count} entries") async def cleanup_expired(self): """Remove all expired entries""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() cursor = await conn.execute("DELETE FROM cache_entries WHERE expires_at < ?", (time.time(),)) await conn.commit() count = cursor.rowcount if count > 0: logger.info(f"SQLite cache cleanup: {count} expired entries removed") # Schema Mappings (PERMANENT) async def get_schema_mapping(self, company_id: int) -> Optional[str]: """Get permanent cached schema for company (READ-ONLY, no lock needed)""" conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT schema FROM schema_mappings WHERE id_firma = ? """, (company_id,)) as cursor: result = await cursor.fetchone() return result[0] if result else None async def set_schema_mapping(self, company_id: int, schema: str): """Set permanent schema mapping (never expires)""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT OR REPLACE INTO schema_mappings (id_firma, schema, created_at, last_verified) VALUES (?, ?, ?, ?) """, (company_id, schema, time.time(), time.time())) await conn.commit() # Benchmarks async def get_benchmark(self, cache_type: str) -> Optional[float]: """Get average benchmark time for cache type (READ-ONLY, no lock needed)""" conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT avg_time_ms FROM query_benchmarks WHERE cache_type = ? """, (cache_type,)) as cursor: result = await cursor.fetchone() return result[0] if result else None async def set_benchmark(self, cache_type: str, avg_time_ms: float, sample_count: int): """Set/update benchmark""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT OR REPLACE INTO query_benchmarks (cache_type, avg_time_ms, sample_count, last_updated) VALUES (?, ?, ?, ?) """, (cache_type, avg_time_ms, sample_count, time.time())) await conn.commit() # Performance Tracking async def log_performance(self, cache_type: str, company_id: Optional[int], cache_hit: bool, response_time_ms: float, estimated_oracle_time_ms: Optional[float], time_saved_ms: Optional[float], username: Optional[str]): """Log performance metric""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT INTO performance_log (cache_type, company_id, cache_hit, response_time_ms, estimated_oracle_time_ms, time_saved_ms, username, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (cache_type, company_id, cache_hit, response_time_ms, estimated_oracle_time_ms, time_saved_ms, username, time.time())) await conn.commit() # User Settings async def get_user_cache_enabled(self, username: str) -> bool: """Get user cache setting (default True) - READ-ONLY, no lock needed""" conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT cache_enabled FROM user_cache_settings WHERE username = ? """, (username,)) as cursor: result = await cursor.fetchone() return bool(result[0]) if result else True # Default enabled, explicit bool conversion async def set_user_cache_enabled(self, username: str, enabled: bool): """Set user cache setting""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT OR REPLACE INTO user_cache_settings (username, cache_enabled, created_at, updated_at) VALUES (?, ?, ?, ?) """, (username, enabled, time.time(), time.time())) await conn.commit() # Watermarks async def get_watermark(self, company_id: int) -> Optional[int]: """Get cached watermark (max_id_act) for company - READ-ONLY, no lock needed""" conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT max_id_act FROM cache_watermarks WHERE company_id = ? """, (company_id,)) as cursor: result = await cursor.fetchone() return result[0] if result else None async def set_watermark(self, company_id: int, schema: str, max_id_act: int): """Set/update watermark for company""" async with self._conn_manager.write_lock: conn = await self._conn_manager.get_connection() await conn.execute(""" INSERT OR REPLACE INTO cache_watermarks (company_id, schema, max_id_act, checked_at) VALUES (?, ?, ?, ?) """, (company_id, schema, max_id_act, time.time())) await conn.commit() async def get_cached_company_ids(self) -> List[int]: """Get list of company_ids with active cache entries - READ-ONLY, no lock needed""" conn = await self._conn_manager.get_connection() async with conn.execute(""" SELECT DISTINCT company_id FROM cache_entries WHERE company_id IS NOT NULL AND expires_at > ? """, (time.time(),)) as cursor: results = await cursor.fetchall() return [row[0] for row in results] # Statistics async def get_stats(self) -> Dict[str, Any]: """Get cache statistics - READ-ONLY, no lock needed""" conn = await self._conn_manager.get_connection() # Total entries async with conn.execute("SELECT COUNT(*) FROM cache_entries") as cursor: total_entries = (await cursor.fetchone())[0] # Active entries (not expired) async with conn.execute(""" SELECT COUNT(*) FROM cache_entries WHERE expires_at > ? """, (time.time(),)) as cursor: active_entries = (await cursor.fetchone())[0] return { 'total_entries': total_entries, 'active_entries': active_entries, 'expired_entries': total_entries - active_entries } async def close(self): """Close the connection manager""" if self._conn_manager: await self._conn_manager.close() self._conn_manager = None