- US-001: mută queue_client.py în data_entry/services/ocr/ - US-002/003/004: oracle_receipt_writer + oracle_server_id în DB - US-005: receipt_handlers.py (PDF/photo/callback flow) - US-006: wire handlers în main.py, per-schema connect, seq_cod.nextval - US-007: .gitignore secrets/*.oracle_pass - US-008/009/010: teste unit + integration + E2E - setup-secrets.sh helper + template - docs/telegram/README.md actualizat cu arhitectura nouă Testat E2E pe DB live (MARIUSM_AUTO). COD din seq_cod.nextval. pypdfium2 fallback pentru PDF decode (fără poppler). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
98 lines
2.7 KiB
Python
98 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script complet: PDF → OCR → Oracle (cu PACK_CONTAFIN)
|
|
Usage: python process_and_save.py <path_to_pdf> [--save]
|
|
"""
|
|
import sys
|
|
sys.path.insert(0, "/workspace/roa2web")
|
|
|
|
import asyncio
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from backend.modules.data_entry.services.oracle_receipt_writer import write_receipt
|
|
|
|
ORACLE_CONFIG = {
|
|
"user": "MARIUSM_AUTO",
|
|
"password": "ROMFASTSOFT",
|
|
"dsn": "10.0.20.121:1521/ROA"
|
|
}
|
|
|
|
async def process_pdf(pdf_path: Path):
|
|
from backend.modules.data_entry.services.ocr_service import ocr_service
|
|
|
|
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
|
|
|
|
print(f"\n[OCR] Processing: {pdf_path.name}")
|
|
print("-" * 50)
|
|
|
|
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
|
|
|
|
if not success:
|
|
print(f"ERROR: {message}")
|
|
return None
|
|
|
|
print(f"Partner: {result.partner_name}")
|
|
print(f"CUI: {result.cui}")
|
|
print(f"Data: {result.receipt_date}")
|
|
print(f"Numar: {result.receipt_number}")
|
|
print(f"Total: {result.amount}")
|
|
print(f"TVA: {result.tva_total}")
|
|
print(f"Confidence: {result.overall_confidence:.0%}")
|
|
|
|
return result
|
|
|
|
def save_to_oracle_with_pack(result, do_commit: bool = False):
|
|
mode = "SAVE" if do_commit else "DRY RUN"
|
|
print(f"\n[Oracle + PACK_CONTAFIN] {mode}")
|
|
print("-" * 50)
|
|
|
|
receipt_dict = {
|
|
"partner_name": result.partner_name,
|
|
"cui": result.cui,
|
|
"receipt_date": result.receipt_date,
|
|
"receipt_number": result.receipt_number,
|
|
"amount": result.amount,
|
|
"tva_total": result.tva_total,
|
|
}
|
|
|
|
try:
|
|
cod, mesaj = write_receipt(receipt_dict, ORACLE_CONFIG, commit=do_commit)
|
|
print(f" Mesaj: {mesaj}")
|
|
if do_commit:
|
|
print(f"\n✅ SALVAT în Oracle (COD={cod})")
|
|
else:
|
|
print(f"\n⚠️ DRY RUN - rollback (COD ar fi fost {cod})")
|
|
return cod, mesaj
|
|
except Exception as e:
|
|
print(f"❌ Eroare: {e}")
|
|
raise
|
|
|
|
async def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python process_and_save.py <pdf_path> [--save]")
|
|
return
|
|
|
|
pdf_path = Path(sys.argv[1])
|
|
if not pdf_path.exists():
|
|
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
|
|
|
|
if not pdf_path.exists():
|
|
print(f"File not found: {sys.argv[1]}")
|
|
return
|
|
|
|
do_save = "--save" in sys.argv
|
|
|
|
print("=" * 50)
|
|
print("PDF -> OCR -> Oracle (PACK_CONTAFIN)")
|
|
print("=" * 50)
|
|
|
|
result = await process_pdf(pdf_path)
|
|
if result:
|
|
save_to_oracle_with_pack(result, do_commit=do_save)
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|