#!/usr/bin/env python3 """ Flux complet WhatsApp: PDF → OCR (via queue) → Oracle Usage: python roa2web_whatsapp.py [--save] """ import asyncio import json import shutil import sys import uuid from datetime import datetime, timedelta from pathlib import Path from backend.modules.data_entry.services.oracle_receipt_writer import write_receipt # OCR Queue paths QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue") DB_PATH = QUEUE_DIR / "ocr_jobs.db" FILES_DIR = QUEUE_DIR / "files" # Oracle config ORACLE_CONFIG = { "user": "MARIUSM_AUTO", "password": "ROMFASTSOFT", "dsn": "10.0.20.121:1521/ROA" } async def submit_ocr_job(file_path: Path) -> str: import aiosqlite job_id = str(uuid.uuid4()) FILES_DIR.mkdir(parents=True, exist_ok=True) dest_path = FILES_DIR / f"{job_id}_{file_path.name}" shutil.copy(file_path, dest_path) mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg" now = datetime.now() async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db: await db.execute(""" INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (job_id, "pending", str(dest_path), mime_type, "doctr_plus", now.isoformat(), file_path.name, (now + timedelta(hours=24)).isoformat())) await db.commit() return job_id async def wait_for_result(job_id: str, timeout: int = 120) -> dict: import aiosqlite start = datetime.now() while (datetime.now() - start).seconds < timeout: async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db: db.row_factory = aiosqlite.Row async with db.execute( "SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?", (job_id,) ) as cursor: row = await cursor.fetchone() if row and row["status"] == "completed": return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]} elif row and row["status"] == "failed": return {"success": False, "error": row["error_message"]} await asyncio.sleep(0.3) return {"success": False, "error": "Timeout"} def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict: try: date_str = ocr_result.get("receipt_date") receipt_date = ( datetime.strptime(date_str[:10], "%Y-%m-%d").date() if date_str else None ) effective_date = receipt_date or datetime.now().date() an, luna = effective_date.year, effective_date.month receipt_dict = {**ocr_result, "receipt_date": receipt_date} cod, _mesaj = write_receipt(receipt_dict, ORACLE_CONFIG, commit=do_commit) return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": do_commit} except Exception as e: return {"success": False, "error": str(e)} async def process_whatsapp_file(file_path: Path, do_save: bool = False): print(f"📄 Procesez: {file_path.name}") # OCR print("🔍 OCR...") job_id = await submit_ocr_job(file_path) result = await wait_for_result(job_id) if not result["success"]: print(f"❌ OCR Error: {result.get('error')}") return None ocr = result["result"] ocr_time = result["time_ms"] print(f"✅ OCR OK ({ocr_time}ms)") print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}") print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}") # Oracle print("💾 Oracle...") oracle_result = save_to_oracle(ocr, do_commit=do_save) if oracle_result["success"]: if oracle_result["saved"]: print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}") else: print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}") else: print(f"❌ Oracle Error: {oracle_result.get('error')}") return { "ocr": ocr, "ocr_time_ms": ocr_time, "oracle": oracle_result } if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python roa2web_whatsapp.py [--save]") sys.exit(1) file_path = Path(sys.argv[1]) do_save = "--save" in sys.argv if not file_path.exists(): print(f"File not found: {file_path}") sys.exit(1) asyncio.run(process_whatsapp_file(file_path, do_save))