feat(telegram): bot bonuri fiscale — OCR → preview → Oracle write

- US-001: mută queue_client.py în data_entry/services/ocr/
- US-002/003/004: oracle_receipt_writer + oracle_server_id în DB
- US-005: receipt_handlers.py (PDF/photo/callback flow)
- US-006: wire handlers în main.py, per-schema connect, seq_cod.nextval
- US-007: .gitignore secrets/*.oracle_pass
- US-008/009/010: teste unit + integration + E2E
- setup-secrets.sh helper + template
- docs/telegram/README.md actualizat cu arhitectura nouă

Testat E2E pe DB live (MARIUSM_AUTO). COD din seq_cod.nextval.
pypdfium2 fallback pentru PDF decode (fără poppler).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-05 09:26:58 +00:00
parent 8234103884
commit e257fa5d5f
35 changed files with 4531 additions and 227 deletions

View File

@@ -3,6 +3,7 @@ from .receipt_service import ReceiptService
from .nomenclature_service import NomenclatureService
from .expense_types import EXPENSE_TYPES, ExpenseType
from .receipt_auto_create import ReceiptAutoCreateService, ReceiptCreateResult
from .oracle_receipt_writer import write_receipt
from . import sse_service
__all__ = [
@@ -12,5 +13,6 @@ __all__ = [
"ExpenseType",
"ReceiptAutoCreateService",
"ReceiptCreateResult",
"write_receipt",
"sse_service",
]

View File

@@ -257,18 +257,31 @@ def _decode_image(image_bytes: bytes) -> Optional[np.ndarray]:
# Try as PDF - use 200 DPI for faster processing (sufficient for receipts)
try:
import pdf2image
from PIL import Image
# 200 DPI is sufficient for receipt text recognition
# 300 DPI was overkill and slowed down processing
images = pdf2image.convert_from_bytes(image_bytes, dpi=200)
if images:
# Convert first page to numpy array
pil_img = images[0]
print(f"[Worker {os.getpid()}] PDF decoded: {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
print(f"[Worker {os.getpid()}] PDF decoded (poppler): {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
return np.array(pil_img)
except Exception as e:
print(f"[Worker {os.getpid()}] PDF decode error: {e}", flush=True)
# pdf2image needs poppler (pdftoppm/pdfinfo) on PATH; fall back to pypdfium2.
print(f"[Worker {os.getpid()}] pdf2image unavailable ({e}); trying pypdfium2 fallback...", flush=True)
try:
import pypdfium2 as pdfium
pdf = pdfium.PdfDocument(image_bytes)
if len(pdf) > 0:
page = pdf[0]
pil_img = page.render(scale=200 / 72).to_pil() # scale = DPI / 72
arr = np.array(pil_img)
if arr.ndim == 3 and arr.shape[2] == 4:
arr = arr[:, :, :3] # drop alpha
print(f"[Worker {os.getpid()}] PDF decoded (pypdfium2): {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
return arr
except Exception as e2:
print(f"[Worker {os.getpid()}] pypdfium2 also failed: {e2}", flush=True)
return None

View File

@@ -0,0 +1,104 @@
"""
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
"""
import asyncio
import json
import shutil
import sys
import uuid
from datetime import datetime, timedelta
from pathlib import Path
QUEUE_DIR = Path(__file__).parents[4] / "data" / "ocr_queue"
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
FILES_DIR = QUEUE_DIR / "files"
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
"""Submit OCR job to queue, return job_id."""
import aiosqlite
job_id = str(uuid.uuid4())
FILES_DIR.mkdir(parents=True, exist_ok=True)
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
shutil.copy(file_path, dest_path)
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
await db.execute("""
INSERT INTO ocr_jobs (
id, status, file_path, mime_type, engine,
created_at, original_filename, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
job_id, "pending", str(dest_path), mime_type, engine,
datetime.now().isoformat(), file_path.name,
(datetime.now() + timedelta(hours=24)).isoformat()
))
await db.commit()
return job_id
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
"""Wait for job completion and return result."""
import aiosqlite
start = datetime.now()
while (datetime.now() - start).seconds < timeout:
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
(job_id,)
) as cursor:
row = await cursor.fetchone()
if row and row["status"] == "completed":
return {
"success": True,
"result": json.loads(row["result_json"]) if row["result_json"] else None,
"time_ms": row["processing_time_ms"]
}
if row and row["status"] == "failed":
return {"success": False, "error": row["error_message"]}
await asyncio.sleep(0.5)
return {"success": False, "error": "Timeout"}
async def process_file(file_path: Path):
"""Process file through OCR queue."""
print(f"[OCR Queue] Submitting: {file_path.name}")
job_id = await submit_ocr_job(file_path)
print(f"[OCR Queue] Job ID: {job_id}")
print(f"[OCR Queue] Waiting for result...")
result = await wait_for_result(job_id)
if result["success"]:
r = result["result"]
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
print(f" CUI: {r.get('cui')}")
print(f" Data: {r.get('receipt_date')}")
print(f" Total: {r.get('amount')}")
print(f" TVA: {r.get('tva_total')}")
return r
else:
print(f"\n❌ Error: {result['error']}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python queue_client.py <file_path>")
sys.exit(1)
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
asyncio.run(process_file(file_path))

View File

@@ -0,0 +1,145 @@
"""Shared helper for writing receipts to Oracle via PACK_CONTAFIN."""
import logging
from datetime import datetime
import oracledb
logger = logging.getLogger(__name__)
_CUI_TO_CONT = {
"11201891": "6022", # MOL
"1590082": "6022", # OMV Petrom
"14991381": "6022", # MOL Romania
"10562600": "6021", # Dedeman
}
def _get_cont_cheltuiala(cui: str) -> str:
cui_clean = cui.upper().replace("RO", "").strip()
return _CUI_TO_CONT.get(cui_clean, "6028")
def _build_conn_params(oracle_cfg) -> dict:
if isinstance(oracle_cfg, dict):
return oracle_cfg
return {
"user": oracle_cfg.user,
"password": oracle_cfg.password,
"dsn": oracle_cfg.get_dsn(),
}
def write_receipt(receipt_dict: dict, oracle_cfg, *, commit: bool = True) -> tuple[int, str]:
"""Write a receipt to Oracle ACT_TEMP via PACK_CONTAFIN.
Args:
receipt_dict: Keys: partner_name, cui, receipt_date, receipt_number, amount, tva_total
oracle_cfg: Dict with user/password/dsn, OracleServerConfig instance, or
oracledb.Connection (pre-acquired from pool — caller manages lifecycle)
commit: If False, rolls back after FINALIZEAZA (dry-run mode)
Returns:
(cod, mesaj) — Oracle document code and result message from PACK_CONTAFIN
"""
if isinstance(oracle_cfg, oracledb.Connection):
conn = oracle_cfg
own_conn = False
else:
conn_params = _build_conn_params(oracle_cfg)
conn = oracledb.connect(**conn_params)
own_conn = True
cursor = conn.cursor()
try:
receipt_date = receipt_dict.get("receipt_date")
_now = datetime.now()
an = receipt_date.year if receipt_date else _now.year
luna = receipt_date.month if receipt_date else _now.month
act_date = receipt_date or _now.date()
id_util = 0
id_sucursala = 0
cursor.callproc("PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL", [
id_util,
datetime.now(),
an,
luna,
0, # suprascriere_cod
0, # suprascriere_anluna
0, # scrie_sterge (0=scrie)
id_sucursala,
])
# Globally unique document COD from sequence (NOT MAX+1 per period —
# that races and reuses CODs across (AN, LUNA) tuples).
cursor.execute("SELECT seq_cod.nextval FROM DUAL")
cod = cursor.fetchone()[0]
cui = receipt_dict.get("cui") or ""
cui_clean = cui.upper().replace("RO", "").strip()
cursor.execute(
"SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
cui=cui_clean, cui2="RO" + cui_clean,
)
row = cursor.fetchone()
id_part = row[0] if row else 0
total = float(receipt_dict.get("amount") or 0)
tva = float(receipt_dict.get("tva_total") or 0)
fara_tva = total - tva
receipt_number = str(receipt_dict.get("receipt_number") or "")
nract = int(receipt_number) if receipt_number.isdigit() else 0
cont = _get_cont_cheltuiala(cui)
partner_name = receipt_dict.get("partner_name") or "N/A"
expl = f"OCR: {partner_name}"
lines = [
(cont, "401", fara_tva, expl, id_part, 0),
("401", "5311", total, f"Plata {expl}", 0, id_part),
]
if tva > 0:
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
for scd, scc, suma, e, id_partc, id_partd in lines:
cursor.execute("""
INSERT INTO ACT_TEMP (
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
EXPLICATIA, SCD, SCC, SUMA,
ID_PARTC, ID_PARTD, ID_UTIL, DATAORA
) VALUES (
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
:expl, :scd, :scc, :suma,
:id_partc, :id_partd, :id_util, SYSDATE
)
""", luna=luna, an=an, cod=cod, dataact=act_date, nract=nract,
expl=e, scd=scd, scc=scc, suma=suma,
id_partc=id_partc, id_partd=id_partd, id_util=id_util)
mesaj_var = cursor.var(oracledb.STRING, 4000)
cursor.callproc("PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL", [
id_util,
cod,
0, # scrie_sterge
0, # modificare_nota
0, # scrie_cump_vanz
mesaj_var,
])
mesaj = mesaj_var.getvalue() or ""
if commit:
conn.commit()
logger.info("write_receipt: saved COD=%s mesaj=%r", cod, mesaj)
else:
conn.rollback()
logger.info("write_receipt: dry-run rollback COD would be %s", cod)
return cod, mesaj
except Exception:
conn.rollback()
raise
finally:
cursor.close()
if own_conn:
conn.close()