feat: Add WhatsApp import scripts for receipt processing

- OCR client for SQLite queue
- WhatsApp flow: PDF -> OCR -> SQLite -> Oracle
- PACK_CONTAFIN integration for Oracle save
- README with flux documentation
This commit is contained in:
Claude Agent
2026-02-03 15:33:22 +00:00
parent 2e1ead69e1
commit 1366dbc11c
8 changed files with 975 additions and 0 deletions

View File

@@ -0,0 +1,92 @@
# WhatsApp Import - Flux Bonuri Fiscale
## Descriere
Scripturi pentru importul automat al bonurilor fiscale primite pe WhatsApp.
## Flux Complet
```
PDF (WhatsApp) --> OCR API (doctr+, ~4sec) --> SQLite (receipts) --> Oracle (ACT)
```
## Pași
### 1. Recepție PDF (Clawdbot pe moltbot)
- PDF primit pe WhatsApp ajunge în `~/.clawdbot/media/inbound/`
- Clawdbot detectează fișierul și pornește procesarea
### 2. OCR prin API (claude-agent:8000)
```bash
# Login
TOKEN=$(curl -s -X POST "http://localhost:8000/api/auth/login" \
-H "Content-Type: application/json" \
-d '{"username":"USER","password":"PASS","server_id":"central"}' \
| jq -r .access_token)
# Submit OCR job
JOB=$(curl -s -X POST "http://localhost:8000/api/data-entry/ocr/extract" \
-H "Authorization: Bearer $TOKEN" \
-F "file=@bon.pdf" | jq -r .job_id)
# Wait for result (~4 sec)
sleep 5
curl -s "http://localhost:8000/api/data-entry/ocr/jobs/$JOB" \
-H "Authorization: Bearer $TOKEN"
```
### 3. Creare Receipt în SQLite
```bash
curl -s -X POST "http://localhost:8000/api/data-entry/receipts/" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{
"receipt_type": "bon_fiscal",
"receipt_number": "NR_BON",
"receipt_date": "YYYY-MM-DD",
"amount": 123.45,
"partner_name": "FURNIZOR",
"cui": "RO12345678",
"tva_total": 23.45,
"payment_mode": "banca",
"company_id": 110
}'
```
### 4. Aprobare în Frontend
- User verifică în http://claude-agent:3000/data-entry
- Editează dacă e necesar
- Aprobă bonul
### 5. Salvare Oracle (după aprobare)
- API-ul face automat salvarea în Oracle via PACK_CONTAFIN
- Sau manual cu `save_to_oracle.py`
## Scripturi
| Script | Descriere |
|--------|-----------|
| `ocr_client.py` | Client OCR - submit job în SQLite queue |
| `whatsapp_flow.py` | Flux complet: OCR + SQLite + Oracle |
| `process_v2.py` | OCR + PACK_CONTAFIN (direct, fără API) |
| `save_to_oracle.py` | Doar salvare Oracle cu PACK_CONTAFIN |
## Configurare
### Server OCR (claude-agent)
- Backend: http://localhost:8000
- Frontend: http://localhost:3000
- Start: `./start.sh test` sau `./start.sh central`
### Credențiale
- User: din CONTAFIN_ORACLE.NOM_UTILIZATORI
- Server: `central` (sau ce e configurat în .env)
- Company ID: din token după login
## Note
- OCR folosește doctr-plus (~4 sec per bon)
- Bonurile apar în frontend cu status "draft"
- Salvarea Oracle se face după aprobare
- Fluxul e asincron - OCR rulează în background
---
*Creat: 2026-02-03 de Echo*

View File

@@ -0,0 +1,13 @@
"""
Root pytest configuration for ROA2WEB.
Ensures proper Python path setup for all test imports.
"""
import sys
from pathlib import Path
# Add project root to Python path at import time
project_root = Path(__file__).parent
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))

View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
"""
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
"""
import asyncio
import json
import shutil
import sys
import uuid
from datetime import datetime, timedelta
from pathlib import Path
# Paths
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
"""Submit OCR job to queue, return job_id."""
import aiosqlite
job_id = str(uuid.uuid4())
# Copy file to queue
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
# Determine mime type
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (
id, status, file_path, mime_type, engine,
created_at, original_filename, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
job_id, "pending", str(dest_path), mime_type, engine,
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()
))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
"""Wait for job completion and return result."""
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row:
if row["status"] == "completed":
return {
"success": True,
"result": json.loads(row["result_json"]) if row["result_json"] else None,
"time_ms": row["processing_time_ms"]
}
elif row["status"] == "failed":
return {
"success": False,
"error": row["error_message"]
}
await asyncio.sleep(0.5)
return {"success": False, "error": "Timeout"}
async def process_file(file_path: Path):
"""Process file through OCR queue."""
print(f"[OCR Queue] Submitting: {file_path.name}")
job_id = await submit_ocr_job(file_path)
print(f"[OCR Queue] Job ID: {job_id}")
print(f"[OCR Queue] Waiting for result...")
result = await wait_for_result(job_id)
if result["success"]:
r = result["result"]
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
print(f" CUI: {r.get('cui')}")
print(f" Data: {r.get('receipt_date')}")
print(f" Total: {r.get('amount')}")
print(f" TVA: {r.get('tva_total')}")
return r
else:
print(f"\n❌ Error: {result['error']}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python roa2web_api_client.py <file_path>")
sys.exit(1)
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_file(file_path))

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
Script complet: PDF → OCR → Oracle
Usage: python process_and_save.py <path_to_pdf> [--save]
Fără --save: doar arată ce ar salva (dry run)
Cu --save: salvează efectiv în Oracle
"""
import sys
sys.path.insert(0, "/workspace/roa2web")
import asyncio
import oracledb
from datetime import datetime
from decimal import Decimal
from pathlib import Path
ORACLE_CONFIG = {
"user": "CONTAFIN_ORACLE",
"password": "ROMFASTSOFT",
"dsn": "10.0.20.121:1521/ROA"
}
CUI_TO_CONT = {
"11201891": "6022", # MOL
"1590082": "6022", # OMV Petrom
"10562600": "6021", # Dedeman
}
def get_cont_cheltuiala(cui: str) -> str:
cui_clean = cui.upper().replace("RO", "").strip()
return CUI_TO_CONT.get(cui_clean, "6028")
async def process_pdf(pdf_path: Path):
from backend.modules.data_entry.services.ocr_service import ocr_service
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
print(f"\n[OCR] Processing: {pdf_path.name}")
print("-" * 50)
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
if not success:
print(f"ERROR: {message}")
return None
print(f"Partner: {result.partner_name}")
print(f"CUI: {result.cui}")
print(f"Data: {result.receipt_date}")
print(f"Numar: {result.receipt_number}")
print(f"Total: {result.amount}")
print(f"TVA: {result.tva_total}")
print(f"Confidence: {result.overall_confidence:.0%}")
return result
def save_to_oracle(result, do_commit: bool = False):
mode = "SAVE" if do_commit else "DRY RUN"
print(f"\n[Oracle] {mode}")
print("-" * 50)
conn = oracledb.connect(**ORACLE_CONFIG)
cursor = conn.cursor()
try:
an = result.receipt_date.year if result.receipt_date else datetime.now().year
luna = result.receipt_date.month if result.receipt_date else datetime.now().month
receipt_date = result.receipt_date or datetime.now().date()
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM MARIUSM_AUTO.ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
cod = cursor.fetchone()[0]
cui_clean = (result.cui or "").upper().replace("RO", "").strip()
cursor.execute("SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
cui=cui_clean, cui2="RO"+cui_clean)
row = cursor.fetchone()
id_part = row[0] if row else 0
total = float(result.amount or 0)
tva = float(result.tva_total or 0)
fara_tva = total - tva
nract = int(result.receipt_number) if result.receipt_number and result.receipt_number.isdigit() else 0
cont = get_cont_cheltuiala(result.cui or "")
expl = f"OCR: {result.partner_name or 'N/A'}"
print(f"COD: {cod}")
print(f"Partner ID: {id_part} (CUI: {cui_clean})")
print(f"Cont: {cont}")
lines = [
(cont, "401", fara_tva, expl),
("401", "5311", total, f"Plata {expl}"),
]
if tva > 0:
lines.insert(1, ("4426", "401", tva, f"TVA {expl}"))
for scd, scc, suma, e in lines:
cursor.execute("""
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
)
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract,
expl=e, scd=scd, scc=scc, suma=suma, id_part=id_part)
print(f" {scd} = {scc}: {suma:.2f}")
if do_commit:
conn.commit()
print(f"\nSAVED to Oracle (COD={cod})")
else:
conn.rollback()
print(f"\nDRY RUN - not saved (would be COD={cod})")
return cod
except Exception as e:
print(f"ERROR: {e}")
conn.rollback()
raise
finally:
cursor.close()
conn.close()
async def main():
if len(sys.argv) < 2:
print("Usage: python process_and_save.py <pdf_path> [--save]")
return
pdf_path = Path(sys.argv[1])
if not pdf_path.exists():
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
if not pdf_path.exists():
print(f"File not found: {sys.argv[1]}")
return
do_save = "--save" in sys.argv
print("=" * 50)
print("PDF -> OCR -> Oracle")
print("=" * 50)
result = await process_pdf(pdf_path)
if result:
save_to_oracle(result, do_commit=do_save)
print("\n" + "=" * 50)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,190 @@
#!/usr/bin/env python3
"""
Script complet: PDF → OCR → Oracle (cu PACK_CONTAFIN)
Usage: python process_and_save.py <path_to_pdf> [--save]
"""
import sys
sys.path.insert(0, "/workspace/roa2web")
import asyncio
import oracledb
from datetime import datetime
from decimal import Decimal
from pathlib import Path
ORACLE_CONFIG = {
"user": "MARIUSM_AUTO",
"password": "ROMFASTSOFT",
"dsn": "10.0.20.121:1521/ROA"
}
CUI_TO_CONT = {
"11201891": "6022", # MOL
"1590082": "6022", # OMV Petrom
"14991381": "6022", # MOL Romania
"10562600": "6021", # Dedeman
}
def get_cont_cheltuiala(cui: str) -> str:
cui_clean = cui.upper().replace("RO", "").strip()
return CUI_TO_CONT.get(cui_clean, "6028")
async def process_pdf(pdf_path: Path):
from backend.modules.data_entry.services.ocr_service import ocr_service
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
print(f"\n[OCR] Processing: {pdf_path.name}")
print("-" * 50)
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
if not success:
print(f"ERROR: {message}")
return None
print(f"Partner: {result.partner_name}")
print(f"CUI: {result.cui}")
print(f"Data: {result.receipt_date}")
print(f"Numar: {result.receipt_number}")
print(f"Total: {result.amount}")
print(f"TVA: {result.tva_total}")
print(f"Confidence: {result.overall_confidence:.0%}")
return result
def save_to_oracle_with_pack(result, do_commit: bool = False):
mode = "SAVE" if do_commit else "DRY RUN"
print(f"\n[Oracle + PACK_CONTAFIN] {mode}")
print("-" * 50)
conn = oracledb.connect(**ORACLE_CONFIG)
cursor = conn.cursor()
try:
an = result.receipt_date.year if result.receipt_date else datetime.now().year
luna = result.receipt_date.month if result.receipt_date else datetime.now().month
receipt_date = result.receipt_date or datetime.now().date()
# Parametri
id_util = 0 # ID utilizator implicit
id_sucursala = 0
# 1. INITIALIZEAZA
print("[1] INITIALIZEAZA_SCRIERE_ACT_RUL...")
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [
id_util,
datetime.now(),
an,
luna,
0, # suprascriere_cod
0, # suprascriere_anluna
0, # scrie_sterge (0=scrie)
id_sucursala
])
print(" OK")
# Obține COD din secvență sau calculează
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
cod = cursor.fetchone()[0]
# Partner
cui_clean = (result.cui or "").upper().replace("RO", "").strip()
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
cui=cui_clean, cui2="RO"+cui_clean)
row = cursor.fetchone()
id_part = row[0] if row else 0
total = float(result.amount or 0)
tva = float(result.tva_total or 0)
fara_tva = total - tva
nract = int(result.receipt_number) if result.receipt_number and result.receipt_number.isdigit() else 0
cont = get_cont_cheltuiala(result.cui or "")
expl = f"OCR: {result.partner_name or 'N/A'}"
print(f" COD: {cod}, Partner ID: {id_part}, Cont: {cont}")
# 2. INSERT în ACT_TEMP
print("[2] INSERT ACT_TEMP...")
lines = [
(cont, "401", fara_tva, expl, id_part, 0), # cheltuială - partener pe credit
("401", "5311", total, f"Plata {expl}", 0, id_part), # plată - partener pe debit
]
if tva > 0:
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0)) # TVA - partener pe credit
for scd, scc, suma, e, id_partc, id_partd in lines:
cursor.execute("""
INSERT INTO ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA,
ID_PARTC, ID_PARTD, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma,
:id_partc, :id_partd, :id_util, SYSDATE
)
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract,
expl=e, scd=scd, scc=scc, suma=suma,
id_partc=id_partc, id_partd=id_partd, id_util=id_util)
print(f" {scd} = {scc}: {suma:.2f}")
# 3. FINALIZEAZA
print("[3] FINALIZEAZA_SCRIERE_ACT_RUL...")
mesaj = cursor.var(oracledb.STRING, 4000)
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [
id_util,
cod,
0, # scrie_sterge
0, # modificare_nota
0, # scrie_cump_vanz
mesaj
])
result_msg = mesaj.getvalue()
print(f" Mesaj: {result_msg}")
if do_commit:
conn.commit()
print(f"\n✅ SALVAT în Oracle (COD={cod})")
else:
conn.rollback()
print(f"\n⚠️ DRY RUN - rollback (COD ar fi fost {cod})")
return cod, result_msg
except Exception as e:
print(f"❌ Eroare: {e}")
conn.rollback()
raise
finally:
cursor.close()
conn.close()
async def main():
if len(sys.argv) < 2:
print("Usage: python process_and_save.py <pdf_path> [--save]")
return
pdf_path = Path(sys.argv[1])
if not pdf_path.exists():
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
if not pdf_path.exists():
print(f"File not found: {sys.argv[1]}")
return
do_save = "--save" in sys.argv
print("=" * 50)
print("PDF -> OCR -> Oracle (PACK_CONTAFIN)")
print("=" * 50)
result = await process_pdf(pdf_path)
if result:
save_to_oracle_with_pack(result, do_commit=do_save)
print("\n" + "=" * 50)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python3
"""
Script pentru salvare note contabile în Oracle din date OCR.
"""
import sys
sys.path.insert(0, "/workspace/roa2web")
import oracledb
from datetime import datetime
from decimal import Decimal
ORACLE_CONFIG = {
"user": "CONTAFIN_ORACLE",
"password": "ROMFASTSOFT",
"dsn": "10.0.20.121:1521/ROA"
}
def get_partner_id(cursor, cui: str) -> int:
"""Găsește partener după CUI."""
cui_clean = cui.upper().replace("RO", "").strip()
cursor.execute(
"SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui",
cui=cui_clean
)
row = cursor.fetchone()
if row:
print(f" Partner găsit: ID={row[0]} pentru CUI={cui_clean}")
return row[0]
# Încearcă și cu RO prefix
cursor.execute(
"SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui",
cui="RO" + cui_clean
)
row = cursor.fetchone()
if row:
print(f" Partner găsit: ID={row[0]} pentru CUI=RO{cui_clean}")
return row[0]
print(f" ⚠️ Partner NU găsit pentru CUI {cui_clean}, folosim ID=0")
return 0
def save_bon_achizitie(
cursor,
receipt_date: datetime,
receipt_number: str,
amount: Decimal,
tva_amount: Decimal,
partner_cui: str,
partner_name: str,
cont_cheltuiala: str = "6028",
explicatie: str = "Import OCR"
):
"""
Salvează bon fiscal în ACT_TEMP.
Notă: NU apelează PACK_CONTAFIN - doar inserează în staging.
"""
an = receipt_date.year
luna = receipt_date.month
# Generează cod unic
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM MARIUSM_AUTO.ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
cod = cursor.fetchone()[0]
id_part = get_partner_id(cursor, partner_cui)
total_cu_tva = float(amount)
total_fara_tva = float(amount - tva_amount)
tva = float(tva_amount)
nract = int(receipt_number) if receipt_number and receipt_number.isdigit() else 0
print(f"\nNote contabile (COD={cod}):")
print(f" Data: {receipt_date.date()}, Nr: {nract}")
print(f" Fără TVA: {total_fara_tva:.2f}, TVA: {tva:.2f}, Total: {total_cu_tva:.2f}")
# LINIA 1: Cheltuială = Furnizor
cursor.execute("""
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
)
""",
luna=luna, an=an, cod=cod,
dataact=receipt_date.date(), nract=nract,
expl=f"{explicatie} - {partner_name}",
scd=cont_cheltuiala, scc="401", suma=total_fara_tva, id_part=id_part
)
print(f"{cont_cheltuiala} = 401: {total_fara_tva:.2f}")
# LINIA 2: TVA = Furnizor
if tva > 0:
cursor.execute("""
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA, PROC_TVA, ID_PARTC, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma, :proc_tva, :id_part, 0, SYSDATE
)
""",
luna=luna, an=an, cod=cod,
dataact=receipt_date.date(), nract=nract,
expl=f"TVA {explicatie}",
scd="4426", scc="401", suma=tva, proc_tva=1.19, id_part=id_part
)
print(f" ✓ 4426 = 401: {tva:.2f}")
# LINIA 3: Furnizor = Casă
cursor.execute("""
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA, ID_PARTD, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
)
""",
luna=luna, an=an, cod=cod,
dataact=receipt_date.date(), nract=nract,
expl=f"Plata {explicatie}",
scd="401", scc="5311", suma=total_cu_tva, id_part=id_part
)
print(f" ✓ 401 = 5311: {total_cu_tva:.2f}")
return cod
def test_insert():
"""Test inserare în ACT_TEMP."""
print("="*50)
print("TEST: Salvare note contabile în Oracle")
print("="*50)
print("\nConectare la Oracle TEST...")
conn = oracledb.connect(**ORACLE_CONFIG)
cursor = conn.cursor()
print("Conexiune OK!")
# Date de test din OCR
receipt_date = datetime(2024, 8, 1)
receipt_number = "1200302"
amount = Decimal("263.28")
tva_amount = Decimal("42.04")
partner_cui = "RO11201891"
partner_name = "Benzinărie Test"
try:
cod = save_bon_achizitie(
cursor,
receipt_date=receipt_date,
receipt_number=receipt_number,
amount=amount,
tva_amount=tva_amount,
partner_cui=partner_cui,
partner_name=partner_name,
cont_cheltuiala="6022",
explicatie=f"Bon benzină {receipt_number}"
)
# Verificare
cursor.execute("SELECT SCD, SCC, SUMA FROM MARIUSM_AUTO.ACT_TEMP WHERE COD = :cod ORDER BY ROWID", cod=cod)
print(f"\nVerificare ACT_TEMP (COD={cod}):")
for row in cursor:
print(f" {row[0]} = {row[1]}: {row[2]:.2f}")
# ROLLBACK - doar test
print("\n⚠️ ROLLBACK (test only)")
conn.rollback()
print("\n" + "="*50)
print("✅ TEST REUȘIT!")
print("="*50)
except Exception as e:
print(f"\n❌ EROARE: {e}")
conn.rollback()
raise
finally:
cursor.close()
conn.close()
if __name__ == "__main__":
test_insert()

View File

@@ -0,0 +1,43 @@
#!/usr/bin/env python3
import sys
import asyncio
from pathlib import Path
sys.path.insert(0, "/workspace/roa2web/backend")
async def main():
if len(sys.argv) < 2:
print("Usage: python test_ocr_simple.py <path_to_pdf>")
return
pdf_path = Path(sys.argv[1])
if not pdf_path.exists():
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
if not pdf_path.exists():
print(f"Error: File not found: {sys.argv[1]}")
return
print(f"Processing: {pdf_path}")
from backend.modules.data_entry.services.ocr_service import ocr_service
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
print("Running OCR...")
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
if not success:
print(f"Error: {message}")
return
print(f"Partner: {result.partner_name}")
print(f"CUI: {result.cui}")
print(f"Date: {result.receipt_date}")
print(f"Amount: {result.amount}")
print(f"TVA: {result.tva_total}")
print(f"Confidence: {result.overall_confidence:.2%}")
print("SUCCESS!")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
Usage: python roa2web_whatsapp.py <file_path> [--save]
"""
import asyncio
import json
import shutil
import sys
import uuid
import oracledb
from datetime import datetime, timedelta
from pathlib import Path
from decimal import Decimal
# OCR Queue paths
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
# Oracle config
ORACLE_CONFIG = {
"user": "MARIUSM_AUTO",
"password": "ROMFASTSOFT",
"dsn": "10.0.20.121:1521/ROA"
}
CUI_TO_CONT = {
"11201891": "6022",
"1590082": "6022",
"14991381": "6022",
"10562600": "6021",
}
def get_cont(cui: str) -> str:
return CUI_TO_CONT.get(cui.upper().replace("RO", "").strip(), "6028")
async def submit_ocr_job(file_path: Path) -> str:
import aiosqlite
job_id = str(uuid.uuid4())
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row and row["status"] == "completed":
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
elif row and row["status"] == "failed":
return {"success": False, "error": row["error_message"]}
await asyncio.sleep(0.3)
return {"success": False, "error": "Timeout"}
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
conn = oracledb.connect(**ORACLE_CONFIG)
cursor = conn.cursor()
try:
# Parse date
date_str = ocr_result.get("receipt_date")
if date_str:
receipt_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
else:
receipt_date = datetime.now().date()
an, luna = receipt_date.year, receipt_date.month
# Init
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [0, datetime.now(), an, luna, 0, 0, 0, 0])
# Get COD
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
cod = cursor.fetchone()[0]
# Partner
cui_clean = (ocr_result.get("cui") or "").upper().replace("RO", "").strip()
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
cui=cui_clean, cui2="RO"+cui_clean)
row = cursor.fetchone()
id_part = row[0] if row else 0
# Amounts
total = float(ocr_result.get("amount") or 0)
tva = float(ocr_result.get("tva_total") or 0)
fara_tva = total - tva
nract = int(ocr_result.get("receipt_number") or 0) if str(ocr_result.get("receipt_number", "")).isdigit() else 0
cont = get_cont(ocr_result.get("cui") or "")
expl = f"OCR: {ocr_result.get('partner_name') or 'N/A'}"
# Insert lines
lines = [
(cont, "401", fara_tva, expl, id_part, 0),
("401", "5311", total, f"Plata {expl}", 0, id_part),
]
if tva > 0:
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
for scd, scc, suma, e, id_partc, id_partd in lines:
cursor.execute("""
INSERT INTO ACT_TEMP (LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_PARTD, ID_UTIL, DATAORA)
VALUES (:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_partc, :id_partd, 0, SYSDATE)
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_partc=id_partc, id_partd=id_partd)
# Finalize
mesaj = cursor.var(oracledb.STRING, 4000)
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [0, cod, 0, 0, 0, mesaj])
if do_commit:
conn.commit()
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": True}
else:
conn.rollback()
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": False}
except Exception as e:
conn.rollback()
return {"success": False, "error": str(e)}
finally:
cursor.close()
conn.close()
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
print(f"📄 Procesez: {file_path.name}")
# OCR
print("🔍 OCR...")
job_id = await submit_ocr_job(file_path)
result = await wait_for_result(job_id)
if not result["success"]:
print(f"❌ OCR Error: {result.get('error')}")
return None
ocr = result["result"]
ocr_time = result["time_ms"]
print(f"✅ OCR OK ({ocr_time}ms)")
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
# Oracle
print("💾 Oracle...")
oracle_result = save_to_oracle(ocr, do_commit=do_save)
if oracle_result["success"]:
if oracle_result["saved"]:
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
else:
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
else:
print(f"❌ Oracle Error: {oracle_result.get('error')}")
return {
"ocr": ocr,
"ocr_time_ms": ocr_time,
"oracle": oracle_result
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
sys.exit(1)
file_path = Path(sys.argv[1])
do_save = "--save" in sys.argv
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_whatsapp_file(file_path, do_save))