Files
roa2web-service-auto/backend/modules/data_entry/services/ocr/queue_client.py
Marius Mutu e257fa5d5f feat(telegram): bot bonuri fiscale — OCR → preview → Oracle write
- US-001: mută queue_client.py în data_entry/services/ocr/
- US-002/003/004: oracle_receipt_writer + oracle_server_id în DB
- US-005: receipt_handlers.py (PDF/photo/callback flow)
- US-006: wire handlers în main.py, per-schema connect, seq_cod.nextval
- US-007: .gitignore secrets/*.oracle_pass
- US-008/009/010: teste unit + integration + E2E
- setup-secrets.sh helper + template
- docs/telegram/README.md actualizat cu arhitectura nouă

Testat E2E pe DB live (MARIUSM_AUTO). COD din seq_cod.nextval.
pypdfium2 fallback pentru PDF decode (fără poppler).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-05 09:26:58 +00:00

105 lines
3.4 KiB
Python

"""
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
"""
import asyncio
import json
import shutil
import sys
import uuid
from datetime import datetime, timedelta
from pathlib import Path
QUEUE_DIR = Path(__file__).parents[4] / "data" / "ocr_queue"
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
"""Submit OCR job to queue, return job_id."""
import aiosqlite
job_id = str(uuid.uuid4())
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (
id, status, file_path, mime_type, engine,
created_at, original_filename, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
job_id, "pending", str(dest_path), mime_type, engine,
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()
))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
"""Wait for job completion and return result."""
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row and row["status"] == "completed":
return {
"success": True,
"result": json.loads(row["result_json"]) if row["result_json"] else None,
"time_ms": row["processing_time_ms"]
}
if row and row["status"] == "failed":
return {"success": False, "error": row["error_message"]}
await asyncio.sleep(0.5)
return {"success": False, "error": "Timeout"}
async def process_file(file_path: Path):
"""Process file through OCR queue."""
print(f"[OCR Queue] Submitting: {file_path.name}")
job_id = await submit_ocr_job(file_path)
print(f"[OCR Queue] Job ID: {job_id}")
print(f"[OCR Queue] Waiting for result...")
result = await wait_for_result(job_id)
if result["success"]:
r = result["result"]
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
print(f" CUI: {r.get('cui')}")
print(f" Data: {r.get('receipt_date')}")
print(f" Total: {r.get('amount')}")
print(f" TVA: {r.get('tva_total')}")
return r
else:
print(f"\n❌ Error: {result['error']}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python queue_client.py <file_path>")
sys.exit(1)
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_file(file_path))