- US-001: mută queue_client.py în data_entry/services/ocr/ - US-002/003/004: oracle_receipt_writer + oracle_server_id în DB - US-005: receipt_handlers.py (PDF/photo/callback flow) - US-006: wire handlers în main.py, per-schema connect, seq_cod.nextval - US-007: .gitignore secrets/*.oracle_pass - US-008/009/010: teste unit + integration + E2E - setup-secrets.sh helper + template - docs/telegram/README.md actualizat cu arhitectura nouă Testat E2E pe DB live (MARIUSM_AUTO). COD din seq_cod.nextval. pypdfium2 fallback pentru PDF decode (fără poppler). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
130 lines
4.6 KiB
Python
130 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Flux complet WhatsApp: PDF → OCR (via queue) → Oracle
|
|
Usage: python roa2web_whatsapp.py <file_path> [--save]
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import shutil
|
|
import sys
|
|
import uuid
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
from backend.modules.data_entry.services.oracle_receipt_writer import write_receipt
|
|
|
|
# OCR Queue paths
|
|
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
|
|
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
|
FILES_DIR = QUEUE_DIR / "files"
|
|
|
|
# Oracle config
|
|
ORACLE_CONFIG = {
|
|
"user": "MARIUSM_AUTO",
|
|
"password": "ROMFASTSOFT",
|
|
"dsn": "10.0.20.121:1521/ROA"
|
|
}
|
|
|
|
async def submit_ocr_job(file_path: Path) -> str:
|
|
import aiosqlite
|
|
job_id = str(uuid.uuid4())
|
|
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
|
shutil.copy(file_path, dest_path)
|
|
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
|
|
|
now = datetime.now()
|
|
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
|
await db.execute("""
|
|
INSERT INTO ocr_jobs (id, status, file_path, mime_type, engine, created_at, original_filename, expires_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (job_id, "pending", str(dest_path), mime_type, "doctr_plus",
|
|
now.isoformat(), file_path.name,
|
|
(now + timedelta(hours=24)).isoformat()))
|
|
await db.commit()
|
|
return job_id
|
|
|
|
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
|
import aiosqlite
|
|
start = datetime.now()
|
|
while (datetime.now() - start).seconds < timeout:
|
|
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
async with db.execute(
|
|
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
|
(job_id,)
|
|
) as cursor:
|
|
row = await cursor.fetchone()
|
|
if row and row["status"] == "completed":
|
|
return {"success": True, "result": json.loads(row["result_json"]), "time_ms": row["processing_time_ms"]}
|
|
elif row and row["status"] == "failed":
|
|
return {"success": False, "error": row["error_message"]}
|
|
await asyncio.sleep(0.3)
|
|
return {"success": False, "error": "Timeout"}
|
|
|
|
def save_to_oracle(ocr_result: dict, do_commit: bool = False) -> dict:
|
|
try:
|
|
date_str = ocr_result.get("receipt_date")
|
|
receipt_date = (
|
|
datetime.strptime(date_str[:10], "%Y-%m-%d").date() if date_str else None
|
|
)
|
|
effective_date = receipt_date or datetime.now().date()
|
|
an, luna = effective_date.year, effective_date.month
|
|
|
|
receipt_dict = {**ocr_result, "receipt_date": receipt_date}
|
|
|
|
cod, _mesaj = write_receipt(receipt_dict, ORACLE_CONFIG, commit=do_commit)
|
|
return {"success": True, "cod": cod, "luna": luna, "an": an, "saved": do_commit}
|
|
except Exception as e:
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def process_whatsapp_file(file_path: Path, do_save: bool = False):
|
|
print(f"📄 Procesez: {file_path.name}")
|
|
|
|
# OCR
|
|
print("🔍 OCR...")
|
|
job_id = await submit_ocr_job(file_path)
|
|
result = await wait_for_result(job_id)
|
|
|
|
if not result["success"]:
|
|
print(f"❌ OCR Error: {result.get('error')}")
|
|
return None
|
|
|
|
ocr = result["result"]
|
|
ocr_time = result["time_ms"]
|
|
print(f"✅ OCR OK ({ocr_time}ms)")
|
|
print(f" CUI: {ocr.get('cui')}, Data: {ocr.get('receipt_date')}")
|
|
print(f" Total: {ocr.get('amount')} RON, TVA: {ocr.get('tva_total')}")
|
|
|
|
# Oracle
|
|
print("💾 Oracle...")
|
|
oracle_result = save_to_oracle(ocr, do_commit=do_save)
|
|
|
|
if oracle_result["success"]:
|
|
if oracle_result["saved"]:
|
|
print(f"✅ SALVAT: COD={oracle_result['cod']}, {oracle_result['luna']:02d}/{oracle_result['an']}")
|
|
else:
|
|
print(f"⚠️ DRY RUN: ar fi COD={oracle_result['cod']}")
|
|
else:
|
|
print(f"❌ Oracle Error: {oracle_result.get('error')}")
|
|
|
|
return {
|
|
"ocr": ocr,
|
|
"ocr_time_ms": ocr_time,
|
|
"oracle": oracle_result
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python roa2web_whatsapp.py <file_path> [--save]")
|
|
sys.exit(1)
|
|
|
|
file_path = Path(sys.argv[1])
|
|
do_save = "--save" in sys.argv
|
|
|
|
if not file_path.exists():
|
|
print(f"File not found: {file_path}")
|
|
sys.exit(1)
|
|
|
|
asyncio.run(process_whatsapp_file(file_path, do_save))
|