#!/usr/bin/env python3 """ Script complet: PDF → OCR → Oracle Usage: python process_and_save.py [--save] Fără --save: doar arată ce ar salva (dry run) Cu --save: salvează efectiv în Oracle """ import sys sys.path.insert(0, "/workspace/roa2web") import asyncio import oracledb from datetime import datetime from decimal import Decimal from pathlib import Path ORACLE_CONFIG = { "user": "CONTAFIN_ORACLE", "password": "ROMFASTSOFT", "dsn": "10.0.20.121:1521/ROA" } CUI_TO_CONT = { "11201891": "6022", # MOL "1590082": "6022", # OMV Petrom "10562600": "6021", # Dedeman } def get_cont_cheltuiala(cui: str) -> str: cui_clean = cui.upper().replace("RO", "").strip() return CUI_TO_CONT.get(cui_clean, "6028") async def process_pdf(pdf_path: Path): from backend.modules.data_entry.services.ocr_service import ocr_service mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg" print(f"\n[OCR] Processing: {pdf_path.name}") print("-" * 50) success, message, result = await ocr_service.process_image(pdf_path, mime_type) if not success: print(f"ERROR: {message}") return None print(f"Partner: {result.partner_name}") print(f"CUI: {result.cui}") print(f"Data: {result.receipt_date}") print(f"Numar: {result.receipt_number}") print(f"Total: {result.amount}") print(f"TVA: {result.tva_total}") print(f"Confidence: {result.overall_confidence:.0%}") return result def save_to_oracle(result, do_commit: bool = False): mode = "SAVE" if do_commit else "DRY RUN" print(f"\n[Oracle] {mode}") print("-" * 50) conn = oracledb.connect(**ORACLE_CONFIG) cursor = conn.cursor() try: an = result.receipt_date.year if result.receipt_date else datetime.now().year luna = result.receipt_date.month if result.receipt_date else datetime.now().month receipt_date = result.receipt_date or datetime.now().date() cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM MARIUSM_AUTO.ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna) cod = cursor.fetchone()[0] cui_clean = (result.cui or "").upper().replace("RO", "").strip() cursor.execute("SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2", cui=cui_clean, cui2="RO"+cui_clean) row = cursor.fetchone() id_part = row[0] if row else 0 total = float(result.amount or 0) tva = float(result.tva_total or 0) fara_tva = total - tva nract = int(result.receipt_number) if result.receipt_number and result.receipt_number.isdigit() else 0 cont = get_cont_cheltuiala(result.cui or "") expl = f"OCR: {result.partner_name or 'N/A'}" print(f"COD: {cod}") print(f"Partner ID: {id_part} (CUI: {cui_clean})") print(f"Cont: {cont}") lines = [ (cont, "401", fara_tva, expl), ("401", "5311", total, f"Plata {expl}"), ] if tva > 0: lines.insert(1, ("4426", "401", tva, f"TVA {expl}")) for scd, scc, suma, e in lines: cursor.execute(""" INSERT INTO MARIUSM_AUTO.ACT_TEMP ( LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_UTIL, DATAORA ) VALUES ( :luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_part, 0, SYSDATE ) """, luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_part=id_part) print(f" {scd} = {scc}: {suma:.2f}") if do_commit: conn.commit() print(f"\nSAVED to Oracle (COD={cod})") else: conn.rollback() print(f"\nDRY RUN - not saved (would be COD={cod})") return cod except Exception as e: print(f"ERROR: {e}") conn.rollback() raise finally: cursor.close() conn.close() async def main(): if len(sys.argv) < 2: print("Usage: python process_and_save.py [--save]") return pdf_path = Path(sys.argv[1]) if not pdf_path.exists(): pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}") if not pdf_path.exists(): print(f"File not found: {sys.argv[1]}") return do_save = "--save" in sys.argv print("=" * 50) print("PDF -> OCR -> Oracle") print("=" * 50) result = await process_pdf(pdf_path) if result: save_to_oracle(result, do_commit=do_save) print("\n" + "=" * 50) if __name__ == "__main__": asyncio.run(main())