feat(telegram): bot bonuri fiscale — OCR → preview → Oracle write
- US-001: mută queue_client.py în data_entry/services/ocr/ - US-002/003/004: oracle_receipt_writer + oracle_server_id în DB - US-005: receipt_handlers.py (PDF/photo/callback flow) - US-006: wire handlers în main.py, per-schema connect, seq_cod.nextval - US-007: .gitignore secrets/*.oracle_pass - US-008/009/010: teste unit + integration + E2E - setup-secrets.sh helper + template - docs/telegram/README.md actualizat cu arhitectura nouă Testat E2E pe DB live (MARIUSM_AUTO). COD din seq_cod.nextval. pypdfium2 fallback pentru PDF decode (fără poppler). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ from .receipt_service import ReceiptService
|
||||
from .nomenclature_service import NomenclatureService
|
||||
from .expense_types import EXPENSE_TYPES, ExpenseType
|
||||
from .receipt_auto_create import ReceiptAutoCreateService, ReceiptCreateResult
|
||||
from .oracle_receipt_writer import write_receipt
|
||||
from . import sse_service
|
||||
|
||||
__all__ = [
|
||||
@@ -12,5 +13,6 @@ __all__ = [
|
||||
"ExpenseType",
|
||||
"ReceiptAutoCreateService",
|
||||
"ReceiptCreateResult",
|
||||
"write_receipt",
|
||||
"sse_service",
|
||||
]
|
||||
|
||||
@@ -257,18 +257,31 @@ def _decode_image(image_bytes: bytes) -> Optional[np.ndarray]:
|
||||
# Try as PDF - use 200 DPI for faster processing (sufficient for receipts)
|
||||
try:
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
# 200 DPI is sufficient for receipt text recognition
|
||||
# 300 DPI was overkill and slowed down processing
|
||||
images = pdf2image.convert_from_bytes(image_bytes, dpi=200)
|
||||
if images:
|
||||
# Convert first page to numpy array
|
||||
pil_img = images[0]
|
||||
print(f"[Worker {os.getpid()}] PDF decoded: {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
|
||||
print(f"[Worker {os.getpid()}] PDF decoded (poppler): {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
|
||||
return np.array(pil_img)
|
||||
except Exception as e:
|
||||
print(f"[Worker {os.getpid()}] PDF decode error: {e}", flush=True)
|
||||
# pdf2image needs poppler (pdftoppm/pdfinfo) on PATH; fall back to pypdfium2.
|
||||
print(f"[Worker {os.getpid()}] pdf2image unavailable ({e}); trying pypdfium2 fallback...", flush=True)
|
||||
try:
|
||||
import pypdfium2 as pdfium
|
||||
|
||||
pdf = pdfium.PdfDocument(image_bytes)
|
||||
if len(pdf) > 0:
|
||||
page = pdf[0]
|
||||
pil_img = page.render(scale=200 / 72).to_pil() # scale = DPI / 72
|
||||
arr = np.array(pil_img)
|
||||
if arr.ndim == 3 and arr.shape[2] == 4:
|
||||
arr = arr[:, :, :3] # drop alpha
|
||||
print(f"[Worker {os.getpid()}] PDF decoded (pypdfium2): {pil_img.width}x{pil_img.height} @ 200 DPI", flush=True)
|
||||
return arr
|
||||
except Exception as e2:
|
||||
print(f"[Worker {os.getpid()}] pypdfium2 also failed: {e2}", flush=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
104
backend/modules/data_entry/services/ocr/queue_client.py
Normal file
104
backend/modules/data_entry/services/ocr/queue_client.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
Client pentru OCR API roa2web - adaugă job direct în SQLite queue.
|
||||
Folosește aceeași coadă ca backend-ul, fără HTTP auth.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
QUEUE_DIR = Path(__file__).parents[4] / "data" / "ocr_queue"
|
||||
DB_PATH = QUEUE_DIR / "ocr_jobs.db"
|
||||
FILES_DIR = QUEUE_DIR / "files"
|
||||
|
||||
|
||||
async def submit_ocr_job(file_path: Path, engine: str = "doctr_plus") -> str:
|
||||
"""Submit OCR job to queue, return job_id."""
|
||||
import aiosqlite
|
||||
|
||||
job_id = str(uuid.uuid4())
|
||||
|
||||
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = FILES_DIR / f"{job_id}_{file_path.name}"
|
||||
shutil.copy(file_path, dest_path)
|
||||
|
||||
mime_type = "application/pdf" if file_path.suffix.lower() == ".pdf" else "image/jpeg"
|
||||
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
await db.execute("""
|
||||
INSERT INTO ocr_jobs (
|
||||
id, status, file_path, mime_type, engine,
|
||||
created_at, original_filename, expires_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
job_id, "pending", str(dest_path), mime_type, engine,
|
||||
datetime.now().isoformat(), file_path.name,
|
||||
(datetime.now() + timedelta(hours=24)).isoformat()
|
||||
))
|
||||
await db.commit()
|
||||
|
||||
return job_id
|
||||
|
||||
|
||||
async def wait_for_result(job_id: str, timeout: int = 120) -> dict:
|
||||
"""Wait for job completion and return result."""
|
||||
import aiosqlite
|
||||
|
||||
start = datetime.now()
|
||||
while (datetime.now() - start).seconds < timeout:
|
||||
async with aiosqlite.connect(str(DB_PATH), timeout=5.0) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT status, result_json, error_message, processing_time_ms FROM ocr_jobs WHERE id = ?",
|
||||
(job_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row and row["status"] == "completed":
|
||||
return {
|
||||
"success": True,
|
||||
"result": json.loads(row["result_json"]) if row["result_json"] else None,
|
||||
"time_ms": row["processing_time_ms"]
|
||||
}
|
||||
if row and row["status"] == "failed":
|
||||
return {"success": False, "error": row["error_message"]}
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return {"success": False, "error": "Timeout"}
|
||||
|
||||
|
||||
async def process_file(file_path: Path):
|
||||
"""Process file through OCR queue."""
|
||||
print(f"[OCR Queue] Submitting: {file_path.name}")
|
||||
job_id = await submit_ocr_job(file_path)
|
||||
print(f"[OCR Queue] Job ID: {job_id}")
|
||||
print(f"[OCR Queue] Waiting for result...")
|
||||
|
||||
result = await wait_for_result(job_id)
|
||||
|
||||
if result["success"]:
|
||||
r = result["result"]
|
||||
print(f"\n✅ OCR Complete ({result['time_ms']}ms)")
|
||||
print(f" CUI: {r.get('cui')}")
|
||||
print(f" Data: {r.get('receipt_date')}")
|
||||
print(f" Total: {r.get('amount')}")
|
||||
print(f" TVA: {r.get('tva_total')}")
|
||||
return r
|
||||
else:
|
||||
print(f"\n❌ Error: {result['error']}")
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python queue_client.py <file_path>")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = Path(sys.argv[1])
|
||||
if not file_path.exists():
|
||||
print(f"File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
asyncio.run(process_file(file_path))
|
||||
145
backend/modules/data_entry/services/oracle_receipt_writer.py
Normal file
145
backend/modules/data_entry/services/oracle_receipt_writer.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Shared helper for writing receipts to Oracle via PACK_CONTAFIN."""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import oracledb
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CUI_TO_CONT = {
|
||||
"11201891": "6022", # MOL
|
||||
"1590082": "6022", # OMV Petrom
|
||||
"14991381": "6022", # MOL Romania
|
||||
"10562600": "6021", # Dedeman
|
||||
}
|
||||
|
||||
|
||||
def _get_cont_cheltuiala(cui: str) -> str:
|
||||
cui_clean = cui.upper().replace("RO", "").strip()
|
||||
return _CUI_TO_CONT.get(cui_clean, "6028")
|
||||
|
||||
|
||||
def _build_conn_params(oracle_cfg) -> dict:
|
||||
if isinstance(oracle_cfg, dict):
|
||||
return oracle_cfg
|
||||
return {
|
||||
"user": oracle_cfg.user,
|
||||
"password": oracle_cfg.password,
|
||||
"dsn": oracle_cfg.get_dsn(),
|
||||
}
|
||||
|
||||
|
||||
def write_receipt(receipt_dict: dict, oracle_cfg, *, commit: bool = True) -> tuple[int, str]:
|
||||
"""Write a receipt to Oracle ACT_TEMP via PACK_CONTAFIN.
|
||||
|
||||
Args:
|
||||
receipt_dict: Keys: partner_name, cui, receipt_date, receipt_number, amount, tva_total
|
||||
oracle_cfg: Dict with user/password/dsn, OracleServerConfig instance, or
|
||||
oracledb.Connection (pre-acquired from pool — caller manages lifecycle)
|
||||
commit: If False, rolls back after FINALIZEAZA (dry-run mode)
|
||||
|
||||
Returns:
|
||||
(cod, mesaj) — Oracle document code and result message from PACK_CONTAFIN
|
||||
"""
|
||||
if isinstance(oracle_cfg, oracledb.Connection):
|
||||
conn = oracle_cfg
|
||||
own_conn = False
|
||||
else:
|
||||
conn_params = _build_conn_params(oracle_cfg)
|
||||
conn = oracledb.connect(**conn_params)
|
||||
own_conn = True
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
receipt_date = receipt_dict.get("receipt_date")
|
||||
_now = datetime.now()
|
||||
an = receipt_date.year if receipt_date else _now.year
|
||||
luna = receipt_date.month if receipt_date else _now.month
|
||||
act_date = receipt_date or _now.date()
|
||||
|
||||
id_util = 0
|
||||
id_sucursala = 0
|
||||
|
||||
cursor.callproc("PACK_CONTAFIN.INITIALIZEAZA_SCRIERE_ACT_RUL", [
|
||||
id_util,
|
||||
datetime.now(),
|
||||
an,
|
||||
luna,
|
||||
0, # suprascriere_cod
|
||||
0, # suprascriere_anluna
|
||||
0, # scrie_sterge (0=scrie)
|
||||
id_sucursala,
|
||||
])
|
||||
|
||||
# Globally unique document COD from sequence (NOT MAX+1 per period —
|
||||
# that races and reuses CODs across (AN, LUNA) tuples).
|
||||
cursor.execute("SELECT seq_cod.nextval FROM DUAL")
|
||||
cod = cursor.fetchone()[0]
|
||||
|
||||
cui = receipt_dict.get("cui") or ""
|
||||
cui_clean = cui.upper().replace("RO", "").strip()
|
||||
cursor.execute(
|
||||
"SELECT ID_PART FROM NOM_PARTENERI WHERE COD_FISCAL = :cui OR COD_FISCAL = :cui2",
|
||||
cui=cui_clean, cui2="RO" + cui_clean,
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
id_part = row[0] if row else 0
|
||||
|
||||
total = float(receipt_dict.get("amount") or 0)
|
||||
tva = float(receipt_dict.get("tva_total") or 0)
|
||||
fara_tva = total - tva
|
||||
receipt_number = str(receipt_dict.get("receipt_number") or "")
|
||||
nract = int(receipt_number) if receipt_number.isdigit() else 0
|
||||
cont = _get_cont_cheltuiala(cui)
|
||||
partner_name = receipt_dict.get("partner_name") or "N/A"
|
||||
expl = f"OCR: {partner_name}"
|
||||
|
||||
lines = [
|
||||
(cont, "401", fara_tva, expl, id_part, 0),
|
||||
("401", "5311", total, f"Plata {expl}", 0, id_part),
|
||||
]
|
||||
if tva > 0:
|
||||
lines.insert(1, ("4426", "401", tva, f"TVA {expl}", id_part, 0))
|
||||
|
||||
for scd, scc, suma, e, id_partc, id_partd in lines:
|
||||
cursor.execute("""
|
||||
INSERT INTO ACT_TEMP (
|
||||
LUNA, AN, COD, DATAIREG, DATAACT, NRACT,
|
||||
EXPLICATIA, SCD, SCC, SUMA,
|
||||
ID_PARTC, ID_PARTD, ID_UTIL, DATAORA
|
||||
) VALUES (
|
||||
:luna, :an, :cod, TRUNC(SYSDATE), :dataact, :nract,
|
||||
:expl, :scd, :scc, :suma,
|
||||
:id_partc, :id_partd, :id_util, SYSDATE
|
||||
)
|
||||
""", luna=luna, an=an, cod=cod, dataact=act_date, nract=nract,
|
||||
expl=e, scd=scd, scc=scc, suma=suma,
|
||||
id_partc=id_partc, id_partd=id_partd, id_util=id_util)
|
||||
|
||||
mesaj_var = cursor.var(oracledb.STRING, 4000)
|
||||
cursor.callproc("PACK_CONTAFIN.FINALIZEAZA_SCRIERE_ACT_RUL", [
|
||||
id_util,
|
||||
cod,
|
||||
0, # scrie_sterge
|
||||
0, # modificare_nota
|
||||
0, # scrie_cump_vanz
|
||||
mesaj_var,
|
||||
])
|
||||
mesaj = mesaj_var.getvalue() or ""
|
||||
|
||||
if commit:
|
||||
conn.commit()
|
||||
logger.info("write_receipt: saved COD=%s mesaj=%r", cod, mesaj)
|
||||
else:
|
||||
conn.rollback()
|
||||
logger.info("write_receipt: dry-run rollback COD would be %s", cod)
|
||||
|
||||
return cod, mesaj
|
||||
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
if own_conn:
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user