- OCR client for SQLite queue - WhatsApp flow: PDF -> OCR -> SQLite -> Oracle - PACK_CONTAFIN integration for Oracle save - README with flux documentation
44 lines
1.2 KiB
Python
44 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
import sys
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, "/workspace/roa2web/backend")
|
|
|
|
async def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python test_ocr_simple.py <path_to_pdf>")
|
|
return
|
|
|
|
pdf_path = Path(sys.argv[1])
|
|
if not pdf_path.exists():
|
|
pdf_path = Path(f"/workspace/roa2web/tests/fixtures/ocr-samples/{sys.argv[1]}")
|
|
|
|
if not pdf_path.exists():
|
|
print(f"Error: File not found: {sys.argv[1]}")
|
|
return
|
|
|
|
print(f"Processing: {pdf_path}")
|
|
|
|
from backend.modules.data_entry.services.ocr_service import ocr_service
|
|
|
|
mime_type = "application/pdf" if pdf_path.suffix.lower() == ".pdf" else "image/jpeg"
|
|
|
|
print("Running OCR...")
|
|
success, message, result = await ocr_service.process_image(pdf_path, mime_type)
|
|
|
|
if not success:
|
|
print(f"Error: {message}")
|
|
return
|
|
|
|
print(f"Partner: {result.partner_name}")
|
|
print(f"CUI: {result.cui}")
|
|
print(f"Date: {result.receipt_date}")
|
|
print(f"Amount: {result.amount}")
|
|
print(f"TVA: {result.tva_total}")
|
|
print(f"Confidence: {result.overall_confidence:.2%}")
|
|
print("SUCCESS!")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|