feat(oracle): auto-recover Oracle pool + surface status, stop silent import failures

After a power loss the app started before Oracle was ready; init_oracle() failed
once, the pool stayed None forever (no retry), and every sync silently failed
("Oracle pool not initialized") while still hammering the GoMag API each minute,
and order-detail 500'd.

- database.ensure_oracle_pool(force): thread-safe (re)create of the pool, called
  at the start of every sync cycle → self-heals within one cycle once Oracle is
  back (incl. after an Oracle service restart). init_oracle_client made idempotent
  so re-init can't fall back to thin mode.
- database.oracle_status() exposed; main.py startup is non-fatal via ensure pool.
- run_sync ensures the pool before the GoMag download; on failure it records a
  clear run status instead of crashing and skips the wasted API calls.
- /api/sync/health reports oracle_ready/last_error; dashboard health pill shows
  "Oracle indisponibil" (top priority). Recovery via the existing Start Sync button.
- order_detail degrades gracefully (200 without CODMAT + notice) instead of 500.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-06-26 07:33:01 +00:00
parent 395e2b997a
commit cd7eb628dd
9 changed files with 140 additions and 24 deletions

View File

@@ -3,39 +3,59 @@ import aiosqlite
import sqlite3
import logging
import os
import threading
from datetime import datetime
from .config import settings
logger = logging.getLogger(__name__)
# ---- Oracle Pool ----
pool = None
_pool_lock = threading.Lock()
_pool_last_error = None # str — reason the last (re)init failed, or None
_pool_last_attempt = None # ISO str — when we last tried to (re)init
_client_initialized = False # init_oracle_client may only be called once/process
def init_oracle():
"""Initialize Oracle client mode and create connection pool."""
global pool
def _init_oracle_client_once():
"""Load the Oracle client library exactly once.
init_oracle_client() loads the thick-mode driver (it does NOT connect to the
DB), so it succeeds even when Oracle is down. Calling it a second time raises,
which on a pool re-init would wrongly fall back to thin mode — so we guard it.
"""
global _client_initialized
if _client_initialized:
return
force_thin = settings.FORCE_THIN_MODE
instantclient_path = settings.INSTANTCLIENTPATH
dsn = settings.ORACLE_DSN
# Ensure TNS_ADMIN is set as OS env var so oracledb can find tnsnames.ora
if settings.TNS_ADMIN:
os.environ['TNS_ADMIN'] = settings.TNS_ADMIN
logger.info(f"Oracle config: DSN={dsn}, TNS_ADMIN={settings.TNS_ADMIN or os.environ.get('TNS_ADMIN', '(not set)')}, INSTANTCLIENTPATH={instantclient_path or '(not set)'}")
logger.info(f"Oracle config: DSN={settings.ORACLE_DSN}, TNS_ADMIN={settings.TNS_ADMIN or os.environ.get('TNS_ADMIN', '(not set)')}, INSTANTCLIENTPATH={instantclient_path or '(not set)'}")
if force_thin:
logger.info(f"FORCE_THIN_MODE=true: thin mode for {dsn}")
logger.info(f"FORCE_THIN_MODE=true: thin mode for {settings.ORACLE_DSN}")
elif instantclient_path:
try:
oracledb.init_oracle_client(lib_dir=instantclient_path)
logger.info(f"Thick mode activated for {dsn}")
logger.info(f"Thick mode activated for {settings.ORACLE_DSN}")
except Exception as e:
logger.error(f"Thick mode error: {e}")
logger.info("Fallback to thin mode")
else:
logger.info(f"Thin mode (default) for {dsn}")
logger.info(f"Thin mode (default) for {settings.ORACLE_DSN}")
_client_initialized = True
def init_oracle():
"""Initialize Oracle client mode and create the connection pool. Raises on failure."""
global pool
_init_oracle_client_once()
pool = oracledb.create_pool(
user=settings.ORACLE_USER,
password=settings.ORACLE_PASSWORD,
@@ -44,9 +64,49 @@ def init_oracle():
max=4,
increment=1
)
logger.info(f"Oracle pool created for {dsn}")
logger.info(f"Oracle pool created for {settings.ORACLE_DSN}")
return pool
def ensure_oracle_pool(force: bool = False) -> bool:
"""Ensure the Oracle pool exists, (re)creating it if needed. Returns True if ready.
Thread-safe and idempotent — safe to call at the start of every sync cycle so
the app self-heals after Oracle becomes reachable again (e.g. the DB service
was restarted after a power loss). On failure it records the reason and leaves
pool=None so callers can surface a clear status instead of crashing.
"""
global pool, _pool_last_error, _pool_last_attempt
with _pool_lock:
if pool is not None and not force:
return True
if force and pool is not None:
try:
pool.close()
except Exception:
pass
pool = None
_pool_last_attempt = datetime.now().isoformat()
try:
init_oracle()
_pool_last_error = None
return True
except Exception as e:
pool = None
_pool_last_error = str(e)
logger.error(f"Oracle pool init failed: {e}")
return False
def oracle_status() -> dict:
"""Snapshot of Oracle pool readiness for health endpoints."""
return {
"ready": pool is not None,
"last_error": _pool_last_error,
"last_attempt_at": _pool_last_attempt,
}
def get_oracle_connection():
"""Get a connection from the Oracle pool."""
if pool is None: