feat: Add WhatsApp import scripts for receipt processing
- OCR client for SQLite queue - WhatsApp flow: PDF -> OCR -> SQLite -> Oracle - PACK_CONTAFIN integration for Oracle save - README with flux documentation
This commit is contained in:
92
backend/scripts/whatsapp_import/README.md
Normal file
92
backend/scripts/whatsapp_import/README.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# WhatsApp Import - Flux Bonuri Fiscale
|
||||
|
||||
## Descriere
|
||||
Scripturi pentru importul automat al bonurilor fiscale primite pe WhatsApp.
|
||||
|
||||
## Flux Complet
|
||||
|
||||
```
|
||||
PDF (WhatsApp) --> OCR API (doctr+, ~4sec) --> SQLite (receipts) --> Oracle (ACT)
|
||||
```
|
||||
|
||||
## Pași
|
||||
|
||||
### 1. Recepție PDF (Clawdbot pe moltbot)
|
||||
- PDF primit pe WhatsApp ajunge în `~/.clawdbot/media/inbound/`
|
||||
- Clawdbot detectează fișierul și pornește procesarea
|
||||
|
||||
### 2. OCR prin API (claude-agent:8000)
|
||||
```bash
|
||||
# Login
|
||||
TOKEN=$(curl -s -X POST "http://localhost:8000/api/auth/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username":"USER","password":"PASS","server_id":"central"}' \
|
||||
| jq -r .access_token)
|
||||
|
||||
# Submit OCR job
|
||||
JOB=$(curl -s -X POST "http://localhost:8000/api/data-entry/ocr/extract" \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-F "file=@bon.pdf" | jq -r .job_id)
|
||||
|
||||
# Wait for result (~4 sec)
|
||||
sleep 5
|
||||
curl -s "http://localhost:8000/api/data-entry/ocr/jobs/$JOB" \
|
||||
-H "Authorization: Bearer $TOKEN"
|
||||
```
|
||||
|
||||
### 3. Creare Receipt în SQLite
|
||||
```bash
|
||||
curl -s -X POST "http://localhost:8000/api/data-entry/receipts/" \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"receipt_type": "bon_fiscal",
|
||||
"receipt_number": "NR_BON",
|
||||
"receipt_date": "YYYY-MM-DD",
|
||||
"amount": 123.45,
|
||||
"partner_name": "FURNIZOR",
|
||||
"cui": "RO12345678",
|
||||
"tva_total": 23.45,
|
||||
"payment_mode": "banca",
|
||||
"company_id": 110
|
||||
}'
|
||||
```
|
||||
|
||||
### 4. Aprobare în Frontend
|
||||
- User verifică în http://claude-agent:3000/data-entry
|
||||
- Editează dacă e necesar
|
||||
- Aprobă bonul
|
||||
|
||||
### 5. Salvare Oracle (după aprobare)
|
||||
- API-ul face automat salvarea în Oracle via PACK_CONTAFIN
|
||||
- Sau manual cu `save_to_oracle.py`
|
||||
|
||||
## Scripturi
|
||||
|
||||
| Script | Descriere |
|
||||
|--------|-----------|
|
||||
| `ocr_client.py` | Client OCR - submit job în SQLite queue |
|
||||
| `whatsapp_flow.py` | Flux complet: OCR + SQLite + Oracle |
|
||||
| `process_v2.py` | OCR + PACK_CONTAFIN (direct, fără API) |
|
||||
| `save_to_oracle.py` | Doar salvare Oracle cu PACK_CONTAFIN |
|
||||
|
||||
## Configurare
|
||||
|
||||
### Server OCR (claude-agent)
|
||||
- Backend: http://localhost:8000
|
||||
- Frontend: http://localhost:3000
|
||||
- Start: `./start.sh test` sau `./start.sh central`
|
||||
|
||||
### Credențiale
|
||||
- User: din CONTAFIN_ORACLE.NOM_UTILIZATORI
|
||||
- Server: `central` (sau ce e configurat în .env)
|
||||
- Company ID: din token după login
|
||||
|
||||
## Note
|
||||
- OCR folosește doctr-plus (~4 sec per bon)
|
||||
- Bonurile apar în frontend cu status "draft"
|
||||
- Salvarea Oracle se face după aprobare
|
||||
- Fluxul e asincron - OCR rulează în background
|
||||
|
||||
---
|
||||
*Creat: 2026-02-03 de Echo*
|
||||
13
backend/scripts/whatsapp_import/conftest.py
Normal file
13
backend/scripts/whatsapp_import/conftest.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""
|
||||
Root pytest configuration for ROA2WEB.
|
||||
|
||||
Ensures proper Python path setup for all test imports.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to Python path at import time
|
||||
project_root = Path(__file__).parent
|
||||
if str(project_root) not in sys.path:
|
||||
sys.path.insert(0, str(project_root))
|
||||
108
backend/scripts/whatsapp_import/ocr_client.py
Normal file
108
backend/scripts/whatsapp_import/ocr_client.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
|
||||
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# Paths
|
||||
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
|
||||
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
||||
FILES_DIR = QUEUE_DIR / "files"
|
||||
|
||||
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
|
||||
"""Submit OCR job to queue, return job_id."""
|
||||
import aiosqlite
|
||||
|
||||
job_id = str(uuid.uuid4())
|
||||
|
||||
# Copy file to queue
|
||||
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
||||
shutil.copy(file_path, dest_path)
|
||||
|
||||
# Determine mime type
|
||||
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
await db.execute("""
|
||||
INSERT INTO ocr_jobs (
|
||||
id, status, file_path, mime_type, engine,
|
||||
created_at, original_filename, expires_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
job_id, "pending", str(dest_path), mime_type, engine,
|
||||
datetime.now().isoformat(), file_path.name,
|
||||
(datetime.now() + timedelta(hours=24)).isoformat()
|
||||
))
|
||||
await db.commit()
|
||||
|
||||
return job_id
|
||||
|
||||
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
||||
"""Wait for job completion and return result."""
|
||||
import aiosqlite
|
||||
|
||||
start = datetime.now()
|
||||
while (datetime.now() - start).seconds < timeout:
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
||||
(job_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
if row["status"] == "completed":
|
||||
return {
|
||||
"success": True,
|
||||
"result": json.loads(row["result_json"]) if row["result_json"] else None,
|
||||
"time_ms": row["processing_time_ms"]
|
||||
}
|
||||
elif row["status"] == "failed":
|
||||
return {
|
||||
"success": False,
|
||||
"error": row["error_message"]
|
||||
}
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return {"success": False, "error": "Timeout"}
|
||||
|
||||
async def process_file(file_path: Path):
|
||||
"""Process file through OCR queue."""
|
||||
print(f"[OCR Queue] Submitting: {file_path.name}")
|
||||
job_id = await submit_ocr_job(file_path)
|
||||
print(f"[OCR Queue] Job ID: {job_id}")
|
||||
print(f"[OCR Queue] Waiting for result...")
|
||||
|
||||
result = await wait_for_result(job_id)
|
||||
|
||||
if result["success"]:
|
||||
r = result["result"]
|
||||
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
|
||||
print(f" CUI: {r.get('cui')}")
|
||||
print(f" Data: {r.get('receipt_date')}")
|
||||
print(f" Total: {r.get('amount')}")
|
||||
print(f" TVA: {r.get('tva_total')}")
|
||||
return r
|
||||
else:
|
||||
print(f"\n❌ Error: {result['error']}")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python roa2web_api_client.py <file_path>")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = Path(sys.argv[1])
|
||||
if not file_path.exists():
|
||||
print(f"File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
asyncio.run(process_file(file_path))
|
||||
154
backend/scripts/whatsapp_import/process_and_save.py
Normal file
154
backend/scripts/whatsapp_import/process_and_save.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script complet: PDF → OCR → Oracle
|
||||
Usage: python process_and_save.py <path_to_pdf> [--save]
|
||||
|
||||
Fără --save: doar arată ce ar salva (dry run)
|
||||
Cu --save: salvează efectiv în Oracle
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, "/workspace/roa2web")
|
||||
|
||||
import asyncio
|
||||
import oracledb
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
ORACLE_CONFIG = {
|
||||
"user": "CONTAFIN_ORACLE",
|
||||
"password": "ROMFASTSOFT",
|
||||
"dsn": "10.0.20.121:1521/ROA"
|
||||
}
|
||||
|
||||
CUI_TO_CONT = {
|
||||
"11201891": "6022", # MOL
|
||||
"1590082": "6022", # OMV Petrom
|
||||
"10562600": "6021", # Dedeman
|
||||
}
|
||||
|
||||
def get_cont_cheltuiala(cui: str) -> str:
|
||||
cui_clean = cui.upper().replace("RO", "").strip()
|
||||
return CUI_TO_CONT.get(cui_clean, "6028")
|
||||
|
||||
async def process_pdf(pdf_path: Path):
|
||||
from backend.modules.data_entry.services.ocr_service import ocr_service
|
||||
|
||||
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
print(f"\n[OCR] Processing: {pdf_path.name}")
|
||||
print("-" * 50)
|
||||
|
||||
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
|
||||
|
||||
if not success:
|
||||
print(f"ERROR: {message}")
|
||||
return None
|
||||
|
||||
print(f"Partner: {result.partner_name}")
|
||||
print(f"CUI: {result.cui}")
|
||||
print(f"Data: {result.receipt_date}")
|
||||
print(f"Numar: {result.receipt_number}")
|
||||
print(f"Total: {result.amount}")
|
||||
print(f"TVA: {result.tva_total}")
|
||||
print(f"Confidence: {result.overall_confidence:.0%}")
|
||||
|
||||
return result
|
||||
|
||||
def save_to_oracle(result, do_commit: bool = False):
|
||||
mode = "SAVE" if do_commit else "DRY RUN"
|
||||
print(f"\n[Oracle] {mode}")
|
||||
print("-" * 50)
|
||||
|
||||
conn = oracledb.connect(**ORACLE_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
an = result.receipt_date.year if result.receipt_date else datetime.now().year
|
||||
luna = result.receipt_date.month if result.receipt_date else datetime.now().month
|
||||
receipt_date = result.receipt_date or datetime.now().date()
|
||||
|
||||
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM MARIUSM_AUTO.ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
cui_clean = (result.cui or "").upper().replace("RO", "").strip()
|
||||
cursor.execute("SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
||||
cui=cui_clean, cui2="RO"+cui_clean)
|
||||
row = cursor.fetchone()
|
||||
id_part = row[0] if row else 0
|
||||
|
||||
total = float(result.amount or 0)
|
||||
tva = float(result.tva_total or 0)
|
||||
fara_tva = total - tva
|
||||
nract = int(result.receipt_number) if result.receipt_number and result.receipt_number.isdigit() else 0
|
||||
cont = get_cont_cheltuiala(result.cui or "")
|
||||
expl = f"OCR: {result.partner_name or 'N/A'}"
|
||||
|
||||
print(f"COD: {cod}")
|
||||
print(f"Partner ID: {id_part} (CUI: {cui_clean})")
|
||||
print(f"Cont: {cont}")
|
||||
|
||||
lines = [
|
||||
(cont, "401", fara_tva, expl),
|
||||
("401", "5311", total, f"Plata {expl}"),
|
||||
]
|
||||
if tva > 0:
|
||||
lines.insert(1, ("4426", "401", tva, f"TVA {expl}"))
|
||||
|
||||
for scd, scc, suma, e in lines:
|
||||
cursor.execute("""
|
||||
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
|
||||
)
|
||||
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract,
|
||||
expl=e, scd=scd, scc=scc, suma=suma, id_part=id_part)
|
||||
print(f" {scd} = {scc}: {suma:.2f}")
|
||||
|
||||
if do_commit:
|
||||
conn.commit()
|
||||
print(f"\nSAVED to Oracle (COD={cod})")
|
||||
else:
|
||||
conn.rollback()
|
||||
print(f"\nDRY RUN - not saved (would be COD={cod})")
|
||||
|
||||
return cod
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}")
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python process_and_save.py <pdf_path> [--save]")
|
||||
return
|
||||
|
||||
pdf_path = Path(sys.argv[1])
|
||||
if not pdf_path.exists():
|
||||
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
|
||||
|
||||
if not pdf_path.exists():
|
||||
print(f"File not found: {sys.argv[1]}")
|
||||
return
|
||||
|
||||
do_save = "--save" in sys.argv
|
||||
|
||||
print("=" * 50)
|
||||
print("PDF -> OCR -> Oracle")
|
||||
print("=" * 50)
|
||||
|
||||
result = await process_pdf(pdf_path)
|
||||
if result:
|
||||
save_to_oracle(result, do_commit=do_save)
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
190
backend/scripts/whatsapp_import/process_v2.py
Normal file
190
backend/scripts/whatsapp_import/process_v2.py
Normal file
@@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script complet: PDF → OCR → Oracle (cu PACK_CONTAFIN)
|
||||
Usage: python process_and_save.py <path_to_pdf> [--save]
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, "/workspace/roa2web")
|
||||
|
||||
import asyncio
|
||||
import oracledb
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
ORACLE_CONFIG = {
|
||||
"user": "MARIUSM_AUTO",
|
||||
"password": "ROMFASTSOFT",
|
||||
"dsn": "10.0.20.121:1521/ROA"
|
||||
}
|
||||
|
||||
CUI_TO_CONT = {
|
||||
"11201891": "6022", # MOL
|
||||
"1590082": "6022", # OMV Petrom
|
||||
"14991381": "6022", # MOL Romania
|
||||
"10562600": "6021", # Dedeman
|
||||
}
|
||||
|
||||
def get_cont_cheltuiala(cui: str) -> str:
|
||||
cui_clean = cui.upper().replace("RO", "").strip()
|
||||
return CUI_TO_CONT.get(cui_clean, "6028")
|
||||
|
||||
async def process_pdf(pdf_path: Path):
|
||||
from backend.modules.data_entry.services.ocr_service import ocr_service
|
||||
|
||||
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
print(f"\n[OCR] Processing: {pdf_path.name}")
|
||||
print("-" * 50)
|
||||
|
||||
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
|
||||
|
||||
if not success:
|
||||
print(f"ERROR: {message}")
|
||||
return None
|
||||
|
||||
print(f"Partner: {result.partner_name}")
|
||||
print(f"CUI: {result.cui}")
|
||||
print(f"Data: {result.receipt_date}")
|
||||
print(f"Numar: {result.receipt_number}")
|
||||
print(f"Total: {result.amount}")
|
||||
print(f"TVA: {result.tva_total}")
|
||||
print(f"Confidence: {result.overall_confidence:.0%}")
|
||||
|
||||
return result
|
||||
|
||||
def save_to_oracle_with_pack(result, do_commit: bool = False):
|
||||
mode = "SAVE" if do_commit else "DRY RUN"
|
||||
print(f"\n[Oracle + PACK_CONTAFIN] {mode}")
|
||||
print("-" * 50)
|
||||
|
||||
conn = oracledb.connect(**ORACLE_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
an = result.receipt_date.year if result.receipt_date else datetime.now().year
|
||||
luna = result.receipt_date.month if result.receipt_date else datetime.now().month
|
||||
receipt_date = result.receipt_date or datetime.now().date()
|
||||
|
||||
# Parametri
|
||||
id_util = 0 # ID utilizator implicit
|
||||
id_sucursala = 0
|
||||
|
||||
# 1. INITIALIZEAZA
|
||||
print("[1] INITIALIZEAZA_SCRIERE_ACT_RUL...")
|
||||
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [
|
||||
id_util,
|
||||
datetime.now(),
|
||||
an,
|
||||
luna,
|
||||
0, # suprascriere_cod
|
||||
0, # suprascriere_anluna
|
||||
0, # scrie_sterge (0=scrie)
|
||||
id_sucursala
|
||||
])
|
||||
print(" OK")
|
||||
|
||||
# Obține COD din secvență sau calculează
|
||||
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
# Partner
|
||||
cui_clean = (result.cui or "").upper().replace("RO", "").strip()
|
||||
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
||||
cui=cui_clean, cui2="RO"+cui_clean)
|
||||
row = cursor.fetchone()
|
||||
id_part = row[0] if row else 0
|
||||
|
||||
total = float(result.amount or 0)
|
||||
tva = float(result.tva_total or 0)
|
||||
fara_tva = total - tva
|
||||
nract = int(result.receipt_number) if result.receipt_number and result.receipt_number.isdigit() else 0
|
||||
cont = get_cont_cheltuiala(result.cui or "")
|
||||
expl = f"OCR: {result.partner_name or 'N/A'}"
|
||||
|
||||
print(f" COD: {cod}, Partner ID: {id_part}, Cont: {cont}")
|
||||
|
||||
# 2. INSERT în ACT_TEMP
|
||||
print("[2] INSERT ACT_TEMP...")
|
||||
|
||||
lines = [
|
||||
(cont, "401", fara_tva, expl, id_part, 0), # cheltuială - partener pe credit
|
||||
("401", "5311", total, f"Plata {expl}", 0, id_part), # plată - partener pe debit
|
||||
]
|
||||
if tva > 0:
|
||||
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0)) # TVA - partener pe credit
|
||||
|
||||
for scd, scc, suma, e, id_partc, id_partd in lines:
|
||||
cursor.execute("""
|
||||
INSERT INTO ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA,
|
||||
ID_PARTC, ID_PARTD, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma,
|
||||
:id_partc, :id_partd, :id_util, SYSDATE
|
||||
)
|
||||
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract,
|
||||
expl=e, scd=scd, scc=scc, suma=suma,
|
||||
id_partc=id_partc, id_partd=id_partd, id_util=id_util)
|
||||
print(f" {scd} = {scc}: {suma:.2f}")
|
||||
|
||||
# 3. FINALIZEAZA
|
||||
print("[3] FINALIZEAZA_SCRIERE_ACT_RUL...")
|
||||
mesaj = cursor.var(oracledb.STRING, 4000)
|
||||
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [
|
||||
id_util,
|
||||
cod,
|
||||
0, # scrie_sterge
|
||||
0, # modificare_nota
|
||||
0, # scrie_cump_vanz
|
||||
mesaj
|
||||
])
|
||||
result_msg = mesaj.getvalue()
|
||||
print(f" Mesaj: {result_msg}")
|
||||
|
||||
if do_commit:
|
||||
conn.commit()
|
||||
print(f"\n✅ SALVAT în Oracle (COD={cod})")
|
||||
else:
|
||||
conn.rollback()
|
||||
print(f"\n⚠️ DRY RUN - rollback (COD ar fi fost {cod})")
|
||||
|
||||
return cod, result_msg
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Eroare: {e}")
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python process_and_save.py <pdf_path> [--save]")
|
||||
return
|
||||
|
||||
pdf_path = Path(sys.argv[1])
|
||||
if not pdf_path.exists():
|
||||
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
|
||||
|
||||
if not pdf_path.exists():
|
||||
print(f"File not found: {sys.argv[1]}")
|
||||
return
|
||||
|
||||
do_save = "--save" in sys.argv
|
||||
|
||||
print("=" * 50)
|
||||
print("PDF -> OCR -> Oracle (PACK_CONTAFIN)")
|
||||
print("=" * 50)
|
||||
|
||||
result = await process_pdf(pdf_path)
|
||||
if result:
|
||||
save_to_oracle_with_pack(result, do_commit=do_save)
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
186
backend/scripts/whatsapp_import/save_to_oracle.py
Normal file
186
backend/scripts/whatsapp_import/save_to_oracle.py
Normal file
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script pentru salvare note contabile în Oracle din date OCR.
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, "/workspace/roa2web")
|
||||
|
||||
import oracledb
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
ORACLE_CONFIG = {
|
||||
"user": "CONTAFIN_ORACLE",
|
||||
"password": "ROMFASTSOFT",
|
||||
"dsn": "10.0.20.121:1521/ROA"
|
||||
}
|
||||
|
||||
def get_partner_id(cursor, cui: str) -> int:
|
||||
"""Găsește partener după CUI."""
|
||||
cui_clean = cui.upper().replace("RO", "").strip()
|
||||
|
||||
cursor.execute(
|
||||
"SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui",
|
||||
cui=cui_clean
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
print(f" Partner găsit: ID={row[0]} pentru CUI={cui_clean}")
|
||||
return row[0]
|
||||
|
||||
# Încearcă și cu RO prefix
|
||||
cursor.execute(
|
||||
"SELECT ID_PART FROM MARIUSM_AUTO.NOM_PARTENERI WHERE COD_FISCAL = :cui",
|
||||
cui="RO" + cui_clean
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
print(f" Partner găsit: ID={row[0]} pentru CUI=RO{cui_clean}")
|
||||
return row[0]
|
||||
|
||||
print(f" ⚠️ Partner NU găsit pentru CUI {cui_clean}, folosim ID=0")
|
||||
return 0
|
||||
|
||||
def save_bon_achizitie(
|
||||
cursor,
|
||||
receipt_date: datetime,
|
||||
receipt_number: str,
|
||||
amount: Decimal,
|
||||
tva_amount: Decimal,
|
||||
partner_cui: str,
|
||||
partner_name: str,
|
||||
cont_cheltuiala: str = "6028",
|
||||
explicatie: str = "Import OCR"
|
||||
):
|
||||
"""
|
||||
Salvează bon fiscal în ACT_TEMP.
|
||||
Notă: NU apelează PACK_CONTAFIN - doar inserează în staging.
|
||||
"""
|
||||
an = receipt_date.year
|
||||
luna = receipt_date.month
|
||||
|
||||
# Generează cod unic
|
||||
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM MARIUSM_AUTO.ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
id_part = get_partner_id(cursor, partner_cui)
|
||||
|
||||
total_cu_tva = float(amount)
|
||||
total_fara_tva = float(amount - tva_amount)
|
||||
tva = float(tva_amount)
|
||||
nract = int(receipt_number) if receipt_number and receipt_number.isdigit() else 0
|
||||
|
||||
print(f"\nNote contabile (COD={cod}):")
|
||||
print(f" Data: {receipt_date.date()}, Nr: {nract}")
|
||||
print(f" Fără TVA: {total_fara_tva:.2f}, TVA: {tva:.2f}, Total: {total_cu_tva:.2f}")
|
||||
|
||||
# LINIA 1: Cheltuială = Furnizor
|
||||
cursor.execute("""
|
||||
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
|
||||
)
|
||||
""",
|
||||
luna=luna, an=an, cod=cod,
|
||||
dataact=receipt_date.date(), nract=nract,
|
||||
expl=f"{explicatie} - {partner_name}",
|
||||
scd=cont_cheltuiala, scc="401", suma=total_fara_tva, id_part=id_part
|
||||
)
|
||||
print(f" ✓ {cont_cheltuiala} = 401: {total_fara_tva:.2f}")
|
||||
|
||||
# LINIA 2: TVA = Furnizor
|
||||
if tva > 0:
|
||||
cursor.execute("""
|
||||
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA, PROC_TVA, ID_PARTC, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma, :proc_tva, :id_part, 0, SYSDATE
|
||||
)
|
||||
""",
|
||||
luna=luna, an=an, cod=cod,
|
||||
dataact=receipt_date.date(), nract=nract,
|
||||
expl=f"TVA {explicatie}",
|
||||
scd="4426", scc="401", suma=tva, proc_tva=1.19, id_part=id_part
|
||||
)
|
||||
print(f" ✓ 4426 = 401: {tva:.2f}")
|
||||
|
||||
# LINIA 3: Furnizor = Casă
|
||||
cursor.execute("""
|
||||
INSERT INTO MARIUSM_AUTO.ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA, ID_PARTD, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma, :id_part, 0, SYSDATE
|
||||
)
|
||||
""",
|
||||
luna=luna, an=an, cod=cod,
|
||||
dataact=receipt_date.date(), nract=nract,
|
||||
expl=f"Plata {explicatie}",
|
||||
scd="401", scc="5311", suma=total_cu_tva, id_part=id_part
|
||||
)
|
||||
print(f" ✓ 401 = 5311: {total_cu_tva:.2f}")
|
||||
|
||||
return cod
|
||||
|
||||
def test_insert():
|
||||
"""Test inserare în ACT_TEMP."""
|
||||
print("="*50)
|
||||
print("TEST: Salvare note contabile în Oracle")
|
||||
print("="*50)
|
||||
print("\nConectare la Oracle TEST...")
|
||||
|
||||
conn = oracledb.connect(**ORACLE_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
print("Conexiune OK!")
|
||||
|
||||
# Date de test din OCR
|
||||
receipt_date = datetime(2024, 8, 1)
|
||||
receipt_number = "1200302"
|
||||
amount = Decimal("263.28")
|
||||
tva_amount = Decimal("42.04")
|
||||
partner_cui = "RO11201891"
|
||||
partner_name = "Benzinărie Test"
|
||||
|
||||
try:
|
||||
cod = save_bon_achizitie(
|
||||
cursor,
|
||||
receipt_date=receipt_date,
|
||||
receipt_number=receipt_number,
|
||||
amount=amount,
|
||||
tva_amount=tva_amount,
|
||||
partner_cui=partner_cui,
|
||||
partner_name=partner_name,
|
||||
cont_cheltuiala="6022",
|
||||
explicatie=f"Bon benzină {receipt_number}"
|
||||
)
|
||||
|
||||
# Verificare
|
||||
cursor.execute("SELECT SCD, SCC, SUMA FROM MARIUSM_AUTO.ACT_TEMP WHERE COD = :cod ORDER BY ROWID", cod=cod)
|
||||
print(f"\nVerificare ACT_TEMP (COD={cod}):")
|
||||
for row in cursor:
|
||||
print(f" {row[0]} = {row[1]}: {row[2]:.2f}")
|
||||
|
||||
# ROLLBACK - doar test
|
||||
print("\n⚠️ ROLLBACK (test only)")
|
||||
conn.rollback()
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("✅ TEST REUȘIT!")
|
||||
print("="*50)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ EROARE: {e}")
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_insert()
|
||||
43
backend/scripts/whatsapp_import/test_ocr_simple.py
Normal file
43
backend/scripts/whatsapp_import/test_ocr_simple.py
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, "/workspace/roa2web/backend")
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python test_ocr_simple.py <path_to_pdf>")
|
||||
return
|
||||
|
||||
pdf_path = Path(sys.argv[1])
|
||||
if not pdf_path.exists():
|
||||
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
|
||||
|
||||
if not pdf_path.exists():
|
||||
print(f"Error: File not found: {sys.argv[1]}")
|
||||
return
|
||||
|
||||
print(f"Processing: {pdf_path}")
|
||||
|
||||
from backend.modules.data_entry.services.ocr_service import ocr_service
|
||||
|
||||
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
print("Running OCR...")
|
||||
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
|
||||
|
||||
if not success:
|
||||
print(f"Error: {message}")
|
||||
return
|
||||
|
||||
print(f"Partner: {result.partner_name}")
|
||||
print(f"CUI: {result.cui}")
|
||||
print(f"Date: {result.receipt_date}")
|
||||
print(f"Amount: {result.amount}")
|
||||
print(f"TVA: {result.tva_total}")
|
||||
print(f"Confidence: {result.overall_confidence:.2%}")
|
||||
print("SUCCESS!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
189
backend/scripts/whatsapp_import/whatsapp_flow.py
Normal file
189
backend/scripts/whatsapp_import/whatsapp_flow.py
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
|
||||
Usage: python roa2web_whatsapp.py <file_path> [--save]
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import uuid
|
||||
import oracledb
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from decimal import Decimal
|
||||
|
||||
# OCR Queue paths
|
||||
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
|
||||
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
||||
FILES_DIR = QUEUE_DIR / "files"
|
||||
|
||||
# Oracle config
|
||||
ORACLE_CONFIG = {
|
||||
"user": "MARIUSM_AUTO",
|
||||
"password": "ROMFASTSOFT",
|
||||
"dsn": "10.0.20.121:1521/ROA"
|
||||
}
|
||||
|
||||
CUI_TO_CONT = {
|
||||
"11201891": "6022",
|
||||
"1590082": "6022",
|
||||
"14991381": "6022",
|
||||
"10562600": "6021",
|
||||
}
|
||||
|
||||
def get_cont(cui: str) -> str:
|
||||
return CUI_TO_CONT.get(cui.upper().replace("RO", "").strip(), "6028")
|
||||
|
||||
async def submit_ocr_job(file_path: Path) -> str:
|
||||
import aiosqlite
|
||||
job_id = str(uuid.uuid4())
|
||||
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
||||
shutil.copy(file_path, dest_path)
|
||||
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
await db.execute("""
|
||||
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
|
||||
datetime.now().isoformat(), file_path.name,
|
||||
(datetime.now() + timedelta(hours=24)).isoformat()))
|
||||
await db.commit()
|
||||
return job_id
|
||||
|
||||
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
||||
import aiosqlite
|
||||
start = datetime.now()
|
||||
while (datetime.now() - start).seconds < timeout:
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
||||
(job_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row and row["status"] == "completed":
|
||||
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
|
||||
elif row and row["status"] == "failed":
|
||||
return {"success": False, "error": row["error_message"]}
|
||||
await asyncio.sleep(0.3)
|
||||
return {"success": False, "error": "Timeout"}
|
||||
|
||||
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
|
||||
conn = oracledb.connect(**ORACLE_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Parse date
|
||||
date_str = ocr_result.get("receipt_date")
|
||||
if date_str:
|
||||
receipt_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
|
||||
else:
|
||||
receipt_date = datetime.now().date()
|
||||
|
||||
an, luna = receipt_date.year, receipt_date.month
|
||||
|
||||
# Init
|
||||
cursor.callproc('PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL', [0, datetime.now(), an, luna, 0, 0, 0, 0])
|
||||
|
||||
# Get COD
|
||||
cursor.execute("SELECT NVL(MAX(COD), 0) + 1 FROM ACT WHERE AN = :an AND LUNA = :luna", an=an, luna=luna)
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
# Partner
|
||||
cui_clean = (ocr_result.get("cui") or "").upper().replace("RO", "").strip()
|
||||
cursor.execute("SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
||||
cui=cui_clean, cui2="RO"+cui_clean)
|
||||
row = cursor.fetchone()
|
||||
id_part = row[0] if row else 0
|
||||
|
||||
# Amounts
|
||||
total = float(ocr_result.get("amount") or 0)
|
||||
tva = float(ocr_result.get("tva_total") or 0)
|
||||
fara_tva = total - tva
|
||||
nract = int(ocr_result.get("receipt_number") or 0) if str(ocr_result.get("receipt_number", "")).isdigit() else 0
|
||||
cont = get_cont(ocr_result.get("cui") or "")
|
||||
expl = f"OCR: {ocr_result.get('partner_name') or 'N/A'}"
|
||||
|
||||
# Insert lines
|
||||
lines = [
|
||||
(cont, "401", fara_tva, expl, id_part, 0),
|
||||
("401", "5311", total, f"Plata {expl}", 0, id_part),
|
||||
]
|
||||
if tva > 0:
|
||||
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
|
||||
|
||||
for scd, scc, suma, e, id_partc, id_partd in lines:
|
||||
cursor.execute("""
|
||||
INSERT INTO ACT_TEMP (LUNA, AN, COD, DATAIREG, DATAACT, NRACT, EXPLICATIA, SCD, SCC, SUMA, ID_PARTC, ID_PARTD, ID_UTIL, DATAORA)
|
||||
VALUES (:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract, :expl, :scd, :scc, :suma, :id_partc, :id_partd, 0, SYSDATE)
|
||||
""", luna=luna, an=an, cod=cod, dataact=receipt_date, nract=nract, expl=e, scd=scd, scc=scc, suma=suma, id_partc=id_partc, id_partd=id_partd)
|
||||
|
||||
# Finalize
|
||||
mesaj = cursor.var(oracledb.STRING, 4000)
|
||||
cursor.callproc('PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL', [0, cod, 0, 0, 0, mesaj])
|
||||
|
||||
if do_commit:
|
||||
conn.commit()
|
||||
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": True}
|
||||
else:
|
||||
conn.rollback()
|
||||
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": False}
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
|
||||
print(f"📄 Procesez: {file_path.name}")
|
||||
|
||||
# OCR
|
||||
print("🔍 OCR...")
|
||||
job_id = await submit_ocr_job(file_path)
|
||||
result = await wait_for_result(job_id)
|
||||
|
||||
if not result["success"]:
|
||||
print(f"❌ OCR Error: {result.get('error')}")
|
||||
return None
|
||||
|
||||
ocr = result["result"]
|
||||
ocr_time = result["time_ms"]
|
||||
print(f"✅ OCR OK ({ocr_time}ms)")
|
||||
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
|
||||
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
|
||||
|
||||
# Oracle
|
||||
print("💾 Oracle...")
|
||||
oracle_result = save_to_oracle(ocr, do_commit=do_save)
|
||||
|
||||
if oracle_result["success"]:
|
||||
if oracle_result["saved"]:
|
||||
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
|
||||
else:
|
||||
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
|
||||
else:
|
||||
print(f"❌ Oracle Error: {oracle_result.get('error')}")
|
||||
|
||||
return {
|
||||
"ocr": ocr,
|
||||
"ocr_time_ms": ocr_time,
|
||||
"oracle": oracle_result
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = Path(sys.argv[1])
|
||||
do_save = "--save" in sys.argv
|
||||
|
||||
if not file_path.exists():
|
||||
print(f"File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
asyncio.run(process_whatsapp_file(file_path, do_save))
|
||||
Reference in New Issue
Block a user