- OCR client for SQLite queue - WhatsApp flow: PDF -> OCR -> SQLite -> Oracle - PACK_CONTAFIN integration for Oracle save - README with flux documentation
190 lines
6.9 KiB
Python
190 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
|
|
Usage: python roa2web_whatsapp.py <file_path> [--save]
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import shutil
|
|
import sys
|
|
import uuid
|
|
import oracledb
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from decimal import Decimal
|
|
|
|
# OCR Queue paths
|
|
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
|
|
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
|
FILES_DIR = QUEUE_DIR / "files"
|
|
|
|
# Oracle config
|
|
ORACLE_CONFIG = {
|
|
"user": "MARIUSM_AUTO",
|
|
"password": "ROMFASTSOFT",
|
|
"dsn": "10.0.20.121:1521/ROA"
|
|
}
|
|
|
|
CUI_TO_CONT = {
|
|
"11201891": "6022",
|
|
"1590082": "6022",
|
|
"14991381": "6022",
|
|
"10562600": "6021",
|
|
}
|
|
|
|
def get_cont(cui: str) -> str:
|
|
return CUI_TO_CONT.get(cui.upper().replace("RO", "").strip(), "6028")
|
|
|
|
async def submit_ocr_job(file_path: Path) -> str:
|
|
import aiosqlite
|
|
job_id = str(uuid.uuid4())
|
|
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
|
shutil.copy(file_path, dest_path)
|
|
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
|
|
|
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
|
await db.execute("""
|
|
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
|
|
datetime.now().isoformat(), file_path.name,
|
|
(datetime.now() + timedelta(hours=24)).isoformat()))
|
|
await db.commit()
|
|
return job_id
|
|
|
|
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
|
import aiosqlite
|
|
start = datetime.now()
|
|
while (datetime.now() - start).seconds < timeout:
|
|
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
async with db.execute(
|
|
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
|
(job_id,)
|
|
) as cursor:
|
|
row = await cursor.fetchone()
|
|
if row and row["status"] == "completed":
|
|
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
|
|
elif row and row["status"] == "failed":
|
|
return {"success": False, "error": row["error_message"]}
|
|
await asyncio.sleep(0.3)
|
|
return {"success": False, "error": "Timeout"}
|
|
|
|
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
|
|
conn = oracledb.connect(**ORACLE_CONFIG)
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Parse date
|
|
date_str = ocr_result.get("receipt_date")
|
|
if date_str:
|
|
receipt_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
|
|
else:
|
|
receipt_date = datetime.now().date()
|
|
|
|
an, luna = receipt_date.year, receipt_date.month
|
|
|
|
# Init
|
|
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [0, datetime.now(), an, luna, 0, 0, 0, 0])
|
|
|
|
# Get COD
|
|
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
|
cod = cursor.fetchone()[0]
|
|
|
|
# Partner
|
|
cui_clean = (ocr_result.get("cui") or "").upper().replace("RO", "").strip()
|
|
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
|
cui=cui_clean, cui2="RO"+cui_clean)
|
|
row = cursor.fetchone()
|
|
id_part = row[0] if row else 0
|
|
|
|
# Amounts
|
|
total = float(ocr_result.get("amount") or 0)
|
|
tva = float(ocr_result.get("tva_total") or 0)
|
|
fara_tva = total - tva
|
|
nract = int(ocr_result.get("receipt_number") or 0) if str(ocr_result.get("receipt_number", "")).isdigit() else 0
|
|
cont = get_cont(ocr_result.get("cui") or "")
|
|
expl = f"OCR: {ocr_result.get('partner_name') or 'N/A'}"
|
|
|
|
# Insert lines
|
|
lines = [
|
|
(cont, "401", fara_tva, expl, id_part, 0),
|
|
("401", "5311", total, f"Plata {expl}", 0, id_part),
|
|
]
|
|
if tva > 0:
|
|
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
|
|
|
|
for scd, scc, suma, e, id_partc, id_partd in lines:
|
|
cursor.execute("""
|
|
INSERT INTO ACT_TEMP (LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_PARTD, ID_UTIL, DATAORA)
|
|
VALUES (:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_partc, :id_partd, 0, SYSDATE)
|
|
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_partc=id_partc, id_partd=id_partd)
|
|
|
|
# Finalize
|
|
mesaj = cursor.var(oracledb.STRING, 4000)
|
|
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [0, cod, 0, 0, 0, mesaj])
|
|
|
|
if do_commit:
|
|
conn.commit()
|
|
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": True}
|
|
else:
|
|
conn.rollback()
|
|
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": False}
|
|
except Exception as e:
|
|
conn.rollback()
|
|
return {"success": False, "error": str(e)}
|
|
finally:
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
|
|
print(f"📄 Procesez: {file_path.name}")
|
|
|
|
# OCR
|
|
print("🔍 OCR...")
|
|
job_id = await submit_ocr_job(file_path)
|
|
result = await wait_for_result(job_id)
|
|
|
|
if not result["success"]:
|
|
print(f"❌ OCR Error: {result.get('error')}")
|
|
return None
|
|
|
|
ocr = result["result"]
|
|
ocr_time = result["time_ms"]
|
|
print(f"✅ OCR OK ({ocr_time}ms)")
|
|
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
|
|
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
|
|
|
|
# Oracle
|
|
print("💾 Oracle...")
|
|
oracle_result = save_to_oracle(ocr, do_commit=do_save)
|
|
|
|
if oracle_result["success"]:
|
|
if oracle_result["saved"]:
|
|
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
|
|
else:
|
|
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
|
|
else:
|
|
print(f"❌ Oracle Error: {oracle_result.get('error')}")
|
|
|
|
return {
|
|
"ocr": ocr,
|
|
"ocr_time_ms": ocr_time,
|
|
"oracle": oracle_result
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
|
|
sys.exit(1)
|
|
|
|
file_path = Path(sys.argv[1])
|
|
do_save = "--save" in sys.argv
|
|
|
|
if not file_path.exists():
|
|
print(f"File not found: {file_path}")
|
|
sys.exit(1)
|
|
|
|
asyncio.run(process_whatsapp_file(file_path, do_save))
|