Files
roa2web-service-auto/backend/scripts/whatsapp_import/whatsapp_flow.py
Claude Agent 1366dbc11c feat: Add WhatsApp import scripts for receipt processing
- OCR client for SQLite queue
- WhatsApp flow: PDF -> OCR -> SQLite -> Oracle
- PACK_CONTAFIN integration for Oracle save
- README with flux documentation
2026-02-03 15:33:22 +00:00

190 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
Usage: python roa2web_whatsapp.py <file_path> [--save]
"""
import asyncio
import json
import shutil
import sys
import uuid
import oracledb
from datetime import datetime, timedelta
from pathlib import Path
from decimal import Decimal
# OCR Queue paths
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
# Oracle config
ORACLE_CONFIG = {
"user": "MARIUSM_AUTO",
"password": "ROMFASTSOFT",
"dsn": "10.0.20.121:1521/ROA"
}
CUI_TO_CONT = {
"11201891": "6022",
"1590082": "6022",
"14991381": "6022",
"10562600": "6021",
}
def get_cont(cui: str) -> str:
return CUI_TO_CONT.get(cui.upper().replace("RO", "").strip(), "6028")
async def submit_ocr_job(file_path: Path) -> str:
import aiosqlite
job_id = str(uuid.uuid4())
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row and row["status"] == "completed":
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
elif row and row["status"] == "failed":
return {"success": False, "error": row["error_message"]}
await asyncio.sleep(0.3)
return {"success": False, "error": "Timeout"}
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
conn = oracledb.connect(**ORACLE_CONFIG)
cursor = conn.cursor()
try:
# Parse date
date_str = ocr_result.get("receipt_date")
if date_str:
receipt_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
else:
receipt_date = datetime.now().date()
an, luna = receipt_date.year, receipt_date.month
# Init
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [0, datetime.now(), an, luna, 0, 0, 0, 0])
# Get COD
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
cod = cursor.fetchone()[0]
# Partner
cui_clean = (ocr_result.get("cui") or "").upper().replace("RO", "").strip()
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
cui=cui_clean, cui2="RO"+cui_clean)
row = cursor.fetchone()
id_part = row[0] if row else 0
# Amounts
total = float(ocr_result.get("amount") or 0)
tva = float(ocr_result.get("tva_total") or 0)
fara_tva = total - tva
nract = int(ocr_result.get("receipt_number") or 0) if str(ocr_result.get("receipt_number", "")).isdigit() else 0
cont = get_cont(ocr_result.get("cui") or "")
expl = f"OCR: {ocr_result.get('partner_name') or 'N/A'}"
# Insert lines
lines = [
(cont, "401", fara_tva, expl, id_part, 0),
("401", "5311", total, f"Plata {expl}", 0, id_part),
]
if tva > 0:
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
for scd, scc, suma, e, id_partc, id_partd in lines:
cursor.execute("""
INSERT INTO ACT_TEMP (LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_PARTD, ID_UTIL, DATAORA)
VALUES (:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_partc, :id_partd, 0, SYSDATE)
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_partc=id_partc, id_partd=id_partd)
# Finalize
mesaj = cursor.var(oracledb.STRING, 4000)
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [0, cod, 0, 0, 0, mesaj])
if do_commit:
conn.commit()
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": True}
else:
conn.rollback()
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": False}
except Exception as e:
conn.rollback()
return {"success": False, "error": str(e)}
finally:
cursor.close()
conn.close()
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
print(f"📄 Procesez: {file_path.name}")
# OCR
print("🔍 OCR...")
job_id = await submit_ocr_job(file_path)
result = await wait_for_result(job_id)
if not result["success"]:
print(f"❌ OCR Error: {result.get('error')}")
return None
ocr = result["result"]
ocr_time = result["time_ms"]
print(f"✅ OCR OK ({ocr_time}ms)")
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
# Oracle
print("💾 Oracle...")
oracle_result = save_to_oracle(ocr, do_commit=do_save)
if oracle_result["success"]:
if oracle_result["saved"]:
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
else:
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
else:
print(f"❌ Oracle Error: {oracle_result.get('error')}")
return {
"ocr": ocr,
"ocr_time_ms": ocr_time,
"oracle": oracle_result
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
sys.exit(1)
file_path = Path(sys.argv[1])
do_save = "--save" in sys.argv
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_whatsapp_file(file_path, do_save))