feat: Add WhatsApp import scripts for receipt processing
- OCR client for SQLite queue - WhatsApp flow: PDF -> OCR -> SQLite -> Oracle - PACK_CONTAFIN integration for Oracle save - README with flux documentation
This commit is contained in:
189
backend/scripts/whatsapp_import/whatsapp_flow.py
Normal file
189
backend/scripts/whatsapp_import/whatsapp_flow.py
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
|
||||
Usage: python roa2web_whatsapp.py <file_path> [--save]
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import uuid
|
||||
import oracledb
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from decimal import Decimal
|
||||
|
||||
# OCR Queue paths
|
||||
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
|
||||
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
||||
FILES_DIR = QUEUE_DIR / "files"
|
||||
|
||||
# Oracle config
|
||||
ORACLE_CONFIG = {
|
||||
"user": "MARIUSM_AUTO",
|
||||
"password": "ROMFASTSOFT",
|
||||
"dsn": "10.0.20.121:1521/ROA"
|
||||
}
|
||||
|
||||
CUI_TO_CONT = {
|
||||
"11201891": "6022",
|
||||
"1590082": "6022",
|
||||
"14991381": "6022",
|
||||
"10562600": "6021",
|
||||
}
|
||||
|
||||
def get_cont(cui: str) -> str:
|
||||
return CUI_TO_CONT.get(cui.upper().replace("RO", "").strip(), "6028")
|
||||
|
||||
async def submit_ocr_job(file_path: Path) -> str:
|
||||
import aiosqlite
|
||||
job_id = str(uuid.uuid4())
|
||||
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
||||
shutil.copy(file_path, dest_path)
|
||||
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
await db.execute("""
|
||||
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
|
||||
datetime.now().isoformat(), file_path.name,
|
||||
(datetime.now() + timedelta(hours=24)).isoformat()))
|
||||
await db.commit()
|
||||
return job_id
|
||||
|
||||
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
||||
import aiosqlite
|
||||
start = datetime.now()
|
||||
while (datetime.now() - start).seconds < timeout:
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
||||
(job_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row and row["status"] == "completed":
|
||||
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
|
||||
elif row and row["status"] == "failed":
|
||||
return {"success": False, "error": row["error_message"]}
|
||||
await asyncio.sleep(0.3)
|
||||
return {"success": False, "error": "Timeout"}
|
||||
|
||||
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
|
||||
conn = oracledb.connect(**ORACLE_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Parse date
|
||||
date_str = ocr_result.get("receipt_date")
|
||||
if date_str:
|
||||
receipt_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
|
||||
else:
|
||||
receipt_date = datetime.now().date()
|
||||
|
||||
an, luna = receipt_date.year, receipt_date.month
|
||||
|
||||
# Init
|
||||
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [0, datetime.now(), an, luna, 0, 0, 0, 0])
|
||||
|
||||
# Get COD
|
||||
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
# Partner
|
||||
cui_clean = (ocr_result.get("cui") or "").upper().replace("RO", "").strip()
|
||||
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
||||
cui=cui_clean, cui2="RO"+cui_clean)
|
||||
row = cursor.fetchone()
|
||||
id_part = row[0] if row else 0
|
||||
|
||||
# Amounts
|
||||
total = float(ocr_result.get("amount") or 0)
|
||||
tva = float(ocr_result.get("tva_total") or 0)
|
||||
fara_tva = total - tva
|
||||
nract = int(ocr_result.get("receipt_number") or 0) if str(ocr_result.get("receipt_number", "")).isdigit() else 0
|
||||
cont = get_cont(ocr_result.get("cui") or "")
|
||||
expl = f"OCR: {ocr_result.get('partner_name') or 'N/A'}"
|
||||
|
||||
# Insert lines
|
||||
lines = [
|
||||
(cont, "401", fara_tva, expl, id_part, 0),
|
||||
("401", "5311", total, f"Plata {expl}", 0, id_part),
|
||||
]
|
||||
if tva > 0:
|
||||
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
|
||||
|
||||
for scd, scc, suma, e, id_partc, id_partd in lines:
|
||||
cursor.execute("""
|
||||
INSERT INTO ACT_TEMP (LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_PARTD, ID_UTIL, DATAORA)
|
||||
VALUES (:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_partc, :id_partd, 0, SYSDATE)
|
||||
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_partc=id_partc, id_partd=id_partd)
|
||||
|
||||
# Finalize
|
||||
mesaj = cursor.var(oracledb.STRING, 4000)
|
||||
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [0, cod, 0, 0, 0, mesaj])
|
||||
|
||||
if do_commit:
|
||||
conn.commit()
|
||||
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": True}
|
||||
else:
|
||||
conn.rollback()
|
||||
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": False}
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
|
||||
print(f"📄 Procesez: {file_path.name}")
|
||||
|
||||
# OCR
|
||||
print("🔍 OCR...")
|
||||
job_id = await submit_ocr_job(file_path)
|
||||
result = await wait_for_result(job_id)
|
||||
|
||||
if not result["success"]:
|
||||
print(f"❌ OCR Error: {result.get('error')}")
|
||||
return None
|
||||
|
||||
ocr = result["result"]
|
||||
ocr_time = result["time_ms"]
|
||||
print(f"✅ OCR OK ({ocr_time}ms)")
|
||||
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
|
||||
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
|
||||
|
||||
# Oracle
|
||||
print("💾 Oracle...")
|
||||
oracle_result = save_to_oracle(ocr, do_commit=do_save)
|
||||
|
||||
if oracle_result["success"]:
|
||||
if oracle_result["saved"]:
|
||||
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
|
||||
else:
|
||||
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
|
||||
else:
|
||||
print(f"❌ Oracle Error: {oracle_result.get('error')}")
|
||||
|
||||
return {
|
||||
"ocr": ocr,
|
||||
"ocr_time_ms": ocr_time,
|
||||
"oracle": oracle_result
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = Path(sys.argv[1])
|
||||
do_save = "--save" in sys.argv
|
||||
|
||||
if not file_path.exists():
|
||||
print(f"File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
asyncio.run(process_whatsapp_file(file_path, do_save))
|
||||
Reference in New Issue
Block a user