Files
roa2web-service-auto/backend/scripts/whatsapp_import/ocr_client.py
Claude Agent 1366dbc11c feat: Add WhatsApp import scripts for receipt processing
- OCR client for SQLite queue
- WhatsApp flow: PDF -> OCR -> SQLite -> Oracle
- PACK_CONTAFIN integration for Oracle save
- README with flux documentation
2026-02-03 15:33:22 +00:00

109 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
"""
import asyncio
import json
import shutil
import sys
import uuid
from datetime import datetime, timedelta
from pathlib import Path
# Paths
QUEUE_DIR = Path("/workspace/roa2web/backend/data/ocr_queue")
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
"""Submit OCR job to queue, return job_id."""
import aiosqlite
job_id = str(uuid.uuid4())
# Copy file to queue
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
# Determine mime type
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (
id, status, file_path, mime_type, engine,
created_at, original_filename, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
job_id, "pending", str(dest_path), mime_type, engine,
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()
))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
"""Wait for job completion and return result."""
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row:
if row["status"] == "completed":
return {
"success": True,
"result": json.loads(row["result_json"]) if row["result_json"] else None,
"time_ms": row["processing_time_ms"]
}
elif row["status"] == "failed":
return {
"success": False,
"error": row["error_message"]
}
await asyncio.sleep(0.5)
return {"success": False, "error": "Timeout"}
async def process_file(file_path: Path):
"""Process file through OCR queue."""
print(f"[OCR Queue] Submitting: {file_path.name}")
job_id = await submit_ocr_job(file_path)
print(f"[OCR Queue] Job ID: {job_id}")
print(f"[OCR Queue] Waiting for result...")
result = await wait_for_result(job_id)
if result["success"]:
r = result["result"]
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
print(f" CUI: {r.get('cui')}")
print(f" Data: {r.get('receipt_date')}")
print(f" Total: {r.get('amount')}")
print(f" TVA: {r.get('tva_total')}")
return r
else:
print(f"\n❌ Error: {result['error']}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python roa2web_api_client.py <file_path>")
sys.exit(1)
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_file(file_path))