fix telegram

2026-02-23 15:12:33 +00:00
parent 6c78fec8a7
commit 8bc567a9c5
426 changed files with 112478 additions and 1 deletions
--- a/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr.py
+++ b/deploy-package-20260223-151231/backend/modules/data_entry/routers/ocr.py
@@ -0,0 +1,715 @@
+"""
+OCR API endpoints with async job queue support.
+
+Endpoints:
+- POST /extract - Submit OCR job (returns job_id immediately)
+- GET /jobs/{job_id} - Get job status and result
+- GET /queue/status - Get queue statistics
+- GET /status - Check OCR service availability
+
+For backwards compatibility, we also support sync mode via query param:
+- POST /extract?sync=true - Process synchronously (blocks until complete)
+"""
+
+import os
+import tempfile
+from datetime import datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Response
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from backend.modules.data_entry.db.database import get_session
+from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
+from backend.modules.data_entry.services.ocr_service import ocr_service
+from backend.modules.data_entry.services.ocr_engine import OCREngine
+from backend.modules.data_entry.services.ocr.job_queue import job_queue, OCRJobStatus as JobStatus
+from backend.modules.data_entry.services.ocr.job_worker import estimate_wait_time
+from backend.modules.data_entry.services.ocr.validation import OCRValidationEngine
+from backend.modules.data_entry.schemas.ocr import (
+    OCRResponse,
+    OCRStatusResponse,
+    ExtractionData,
+    TvaEntry,
+    PaymentMethod,
+    # New job queue schemas
+    OCREngineChoice,
+    OCRJobStatus,
+    OCRJobSubmitResponse,
+    OCRJobResponse,
+    OCRQueueStatusResponse,
+)
+
+# Auth integration
+from shared.auth.dependencies import get_current_user
+from shared.auth.models import CurrentUser
+
+router = APIRouter()
+
+
+# ============================================================================
+# OCR Job Queue Endpoints (NEW)
+# ============================================================================
+
+@router.post("/extract", response_model=OCRJobSubmitResponse)
+async def submit_ocr_job(
+    file: UploadFile = File(...),
+    engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus, description="OCR engine to use"),
+    sync: bool = Query(default=False, description="If true, process synchronously (blocks)"),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Submit an OCR job for processing.
+
+    By default, returns immediately with a job_id. Poll GET /jobs/{job_id} for result.
+
+    Use ?sync=true for synchronous processing (blocks until complete).
+    This is for backwards compatibility but not recommended for production.
+
+    Args:
+        file: Image or PDF file (max 10MB)
+        engine: OCR engine choice (tesseract, doctr, doctr_plus, paddleocr)
+        sync: If true, process synchronously (legacy mode)
+
+    Returns:
+        OCRJobSubmitResponse with job_id, queue_position, estimated_wait
+    """
+    allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
+
+    if file.content_type not in allowed_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
+        )
+
+    # Read file content
+    content = await file.read()
+
+    # Check file size (10MB limit)
+    if len(content) > 10 * 1024 * 1024:
+        raise HTTPException(
+            status_code=400,
+            detail="File too large. Maximum size is 10MB."
+        )
+
+    # Sync mode - use legacy processing (blocks)
+    if sync:
+        return await _process_sync(content, file, engine, current_user)
+
+    # Async mode - create job and return immediately
+    try:
+        job = await job_queue.create_job(
+            file_bytes=content,
+            mime_type=file.content_type,
+            engine=engine.value,
+            username=current_user.username,
+            original_filename=file.filename
+        )
+
+        # Get queue position
+        queue_position = await job_queue.get_queue_position(job.id)
+        estimated_wait = estimate_wait_time(queue_position or 1)
+
+        return OCRJobSubmitResponse(
+            job_id=job.id,
+            status=OCRJobStatus.pending,
+            queue_position=queue_position or 1,
+            estimated_wait_seconds=estimated_wait,
+            created_at=job.created_at or datetime.utcnow()
+        )
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create OCR job: {str(e)}"
+        )
+
+
+@router.get("/jobs/{job_id}", response_model=OCRJobResponse)
+async def get_job_status(
+    job_id: str,
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get OCR job status and result (instant response).
+
+    For efficient polling, use GET /jobs/{job_id}/wait instead (long-polling).
+
+    Args:
+        job_id: Job UUID from POST /extract response
+
+    Returns:
+        OCRJobResponse with status, queue_position, and result (if completed)
+    """
+    job = await job_queue.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    # Get queue position for pending jobs
+    queue_position = None
+    estimated_wait = None
+
+    if job.status == JobStatus.pending:
+        queue_position = await job_queue.get_queue_position(job_id)
+        estimated_wait = estimate_wait_time(queue_position or 1)
+    elif job.status == JobStatus.processing:
+        queue_position = 0
+        # Estimate remaining time based on average
+        avg_time = await job_queue.get_average_processing_time()
+        estimated_wait = int(avg_time * 0.5)  # Rough estimate: half remaining
+
+    # Convert result to ExtractionData if available
+    result_data = None
+    if job.status == JobStatus.completed and job.result:
+        result_data = _dict_to_extraction_data(job.result)
+        # Apply fuzzy CUI matching
+        result_data = await _apply_fuzzy_cui_matching(result_data, session)
+        # Debug: log suggested_payment_mode being returned
+        print(f"[OCR Router] Returning job {job_id} with suggested_payment_mode={result_data.suggested_payment_mode}", flush=True)
+
+    return OCRJobResponse(
+        job_id=job.id,
+        status=OCRJobStatus(job.status.value),
+        queue_position=queue_position,
+        estimated_wait_seconds=estimated_wait,
+        created_at=job.created_at or datetime.utcnow(),
+        started_at=job.started_at,
+        completed_at=job.completed_at,
+        queue_wait_ms=job.queue_wait_ms,
+        ocr_time_ms=job.ocr_time_ms,
+        processing_time_ms=job.processing_time_ms,
+        result=result_data,
+        error=job.error_message
+    )
+
+
+@router.get("/jobs/{job_id}/wait", response_model=OCRJobResponse)
+async def wait_for_job_status(
+    job_id: str,
+    response: Response,
+    timeout: int = Query(default=30, ge=1, le=60, description="Max wait time in seconds"),
+    wait_for_terminal: bool = Query(default=False, description="If true, only return on completed/failed"),
+    _t: int = Query(default=None, description="Cache-busting timestamp (ignored)"),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Long-poll for OCR job status change.
+
+    Waits until:
+    - Job status changes (default behavior - returns on any status change)
+    - Job reaches terminal state (if wait_for_terminal=true)
+    - Timeout expires (returns current status)
+
+    Recommended client timeout: timeout + 5 seconds
+
+    Args:
+        job_id: Job UUID from POST /extract response
+        timeout: Max wait time in seconds (1-60, default 30)
+        wait_for_terminal: If true, wait until completed/failed only
+
+    Returns:
+        OCRJobResponse with status, queue_position, and result (if completed)
+    """
+    # Prevent caching - critical for long-polling
+    response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
+    response.headers["Pragma"] = "no-cache"
+    response.headers["Expires"] = "0"
+    import asyncio
+    import time
+
+    start_time = time.time()
+    end_time = start_time + timeout
+    last_status = None
+    iteration = 0
+
+    print(f"[OCR Wait] Starting long-poll for job {job_id}, timeout={timeout}s, wait_for_terminal={wait_for_terminal}", flush=True)
+
+    while time.time() < end_time:
+        iteration += 1
+        job = await job_queue.get_job(job_id)
+
+        if not job:
+            print(f"[OCR Wait] Job {job_id} not found after {iteration} iterations", flush=True)
+            raise HTTPException(status_code=404, detail="Job not found")
+
+        # Return immediately if job completed or failed (terminal states)
+        if job.status in [JobStatus.completed, JobStatus.failed]:
+            elapsed = time.time() - start_time
+            print(f"[OCR Wait] Job {job_id} {job.status.value} after {elapsed:.1f}s ({iteration} iterations)", flush=True)
+            return await get_job_status(job_id, session, current_user)
+
+        # Return on status change (unless wait_for_terminal is set)
+        if not wait_for_terminal and last_status is not None and job.status != last_status:
+            elapsed = time.time() - start_time
+            print(f"[OCR Wait] Job {job_id} status changed {last_status.value}->{job.status.value} after {elapsed:.1f}s", flush=True)
+            return await get_job_status(job_id, session, current_user)
+
+        last_status = job.status
+
+        # Wait 500ms before next internal check (faster polling for better responsiveness)
+        await asyncio.sleep(0.5)
+
+    # Timeout - return current status
+    elapsed = time.time() - start_time
+    print(f"[OCR Wait] Job {job_id} timeout after {elapsed:.1f}s ({iteration} iterations), status={last_status.value if last_status else 'unknown'}", flush=True)
+    return await get_job_status(job_id, session, current_user)
+
+
+@router.get("/queue/status", response_model=OCRQueueStatusResponse)
+async def get_queue_status(
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Get OCR queue statistics.
+
+    Returns:
+        Queue status with pending/processing counts and average time
+    """
+    stats = await job_queue.get_queue_stats()
+
+    return OCRQueueStatusResponse(
+        pending_jobs=stats["pending"],
+        processing_jobs=stats["processing"],
+        average_time_seconds=stats["average_time_seconds"]
+    )
+
+
+# ============================================================================
+# Legacy Endpoints (backwards compatibility)
+# ============================================================================
+
+@router.get("/status", response_model=OCRStatusResponse)
+async def get_ocr_status():
+    """Check OCR service status and available engines."""
+    engines = OCREngine.get_available_engines()
+    available = len(engines) > 0
+
+    if available:
+        message = f"OCR service ready with engines: {', '.join(engines)}"
+    else:
+        message = "No OCR engines available. Install PaddleOCR or Tesseract."
+
+    return OCRStatusResponse(
+        available=available,
+        engines=engines,
+        message=message
+    )
+
+
+@router.get("/engines")
+async def get_available_engines():
+    """
+    Get list of enabled OCR engines based on .env configuration.
+
+    Returns engines availability and available processing modes.
+    Frontend should use this to filter engine selection dropdown.
+
+    Available engines: tesseract, doctr, doctr_plus, paddleocr
+    """
+    # Check which engines are enabled via .env
+    paddle_enabled = os.getenv("OCR_ENABLE_PADDLEOCR", "true").lower() == "true"
+    tesseract_enabled = os.getenv("OCR_ENABLE_TESSERACT", "true").lower() == "true"
+    default_engine = os.getenv("OCR_DEFAULT_ENGINE", "doctr_plus")
+
+    # Build engines dict
+    engines = {
+        "tesseract": tesseract_enabled,
+        "doctr": True,  # Always available (primary engine)
+        "doctr_plus": True,  # Always available (recommended)
+        "paddleocr": paddle_enabled,
+    }
+
+    # Build available modes based on enabled engines
+    modes = []
+
+    if tesseract_enabled:
+        modes.append("tesseract")
+
+    modes.append("doctr")
+    modes.append("doctr_plus")
+
+    if paddle_enabled:
+        modes.append("paddleocr")
+
+    return {
+        "engines": engines,
+        "available_modes": modes,
+        "default_mode": default_engine,
+        "memory_estimate_mb": {
+            "tesseract": 50,
+            "doctr": 600,
+            "doctr_plus": 600,
+            "paddleocr": 800,
+        }
+    }
+
+
+@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
+async def extract_from_attachment(
+    attachment_id: int,
+    engine: OCREngineChoice = Query(default=OCREngineChoice.doctr_plus),
+    session: AsyncSession = Depends(get_session),
+    current_user: CurrentUser = Depends(get_current_user)
+):
+    """
+    Extract receipt data from an existing attachment.
+
+    Re-processes an already uploaded file with OCR.
+    This endpoint always processes synchronously.
+    """
+    attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
+
+    if not attachment:
+        raise HTTPException(status_code=404, detail="Attachment not found")
+
+    file_path = AttachmentCRUD.get_file_path(attachment)
+
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="File not found on disk")
+
+    # Check if file type is supported
+    if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File type not supported for OCR: {attachment.mime_type}"
+        )
+
+    # TODO: Could use job queue here too, but keeping sync for now
+    success, message, result = await ocr_service.process_image(
+        file_path, attachment.mime_type
+    )
+
+    if not success:
+        raise HTTPException(status_code=422, detail=message)
+
+    data = _result_to_extraction_data(result)
+    # Apply fuzzy CUI matching
+    data = await _apply_fuzzy_cui_matching(data, session)
+    return OCRResponse(success=True, message=message, data=data)
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+async def _apply_fuzzy_cui_matching(
+    extraction_data: ExtractionData,
+    session: AsyncSession
+) -> ExtractionData:
+    """
+    Apply fuzzy CUI matching to extraction data.
+
+    ONLY applies fuzzy matching if CUI is missing OR has invalid checksum.
+    If CUI has valid checksum, we trust the OCR and skip fuzzy matching.
+
+    Args:
+        extraction_data: ExtractionData with CUI to potentially correct
+        session: AsyncSession for database lookups
+
+    Returns:
+        ExtractionData with CUI corrected if a match was found
+    """
+    from backend.modules.data_entry.services.ocr.validation import CUIChecksumRule
+
+    # Skip if no CUI and no vendor name (nothing to match)
+    if not extraction_data.cui and not extraction_data.partner_name:
+        return extraction_data
+
+    # Check if CUI has valid checksum - if valid, skip fuzzy matching
+    if extraction_data.cui:
+        cui_digits = CUIChecksumRule.extract_digits(extraction_data.cui)
+        if len(cui_digits) >= 6 and CUIChecksumRule.validate_checksum(cui_digits):
+            print(f"[Fuzzy Match] CUI {extraction_data.cui} has valid checksum, skipping fuzzy match", flush=True)
+            return extraction_data
+
+    # CUI missing or invalid checksum - try fuzzy matching
+    try:
+        match = await OCRValidationEngine.fuzzy_match_supplier(
+            cui=extraction_data.cui,
+            vendor_name=extraction_data.partner_name,
+            db_session=session
+        )
+
+        if match:
+            corrected_cui, supplier_name = match
+            if corrected_cui != extraction_data.cui:
+                print(f"[Fuzzy Match] Corrected: {extraction_data.cui} -> {corrected_cui} ({supplier_name})", flush=True)
+                extraction_data.cui = corrected_cui
+                # Also set partner_name if not already set
+                if not extraction_data.partner_name:
+                    extraction_data.partner_name = supplier_name
+    except Exception as e:
+        print(f"[Fuzzy Match] Error: {e}", flush=True)
+
+    return extraction_data
+
+
+async def _process_sync(
+    content: bytes,
+    file: UploadFile,
+    engine: OCREngineChoice,
+    current_user: CurrentUser
+) -> OCRJobSubmitResponse:
+    """
+    Process OCR synchronously (legacy mode).
+
+    Creates a job, processes it immediately, and returns the result
+    wrapped in a JobSubmitResponse for API consistency.
+    """
+    # Get file extension
+    suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
+    if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
+        suffix = '.jpg'
+
+    # Save to temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(content)
+        tmp_path = Path(tmp.name)
+
+    try:
+        success, message, result = await ocr_service.process_image(
+            tmp_path, file.content_type
+        )
+
+        if not success:
+            raise HTTPException(status_code=422, detail=message)
+
+        # Create a fake job response with the result embedded
+        # This maintains API compatibility
+        now = datetime.utcnow()
+
+        # For sync mode, we return a special response that includes
+        # the result directly. Clients should check if result is present.
+        return OCRJobSubmitResponse(
+            job_id="sync-" + str(hash(content))[:16],
+            status=OCRJobStatus.completed,
+            queue_position=0,
+            estimated_wait_seconds=0,
+            created_at=now
+        )
+
+    finally:
+        # Clean up temp file
+        if tmp_path.exists():
+            os.unlink(tmp_path)
+
+
+def _result_to_extraction_data(result) -> ExtractionData:
+    """Convert ExtractionResult to ExtractionData schema."""
+    # Convert tva_entries from dict to TvaEntry objects
+    tva_entries_schema = [
+        TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
+        for e in result.tva_entries
+    ] if result.tva_entries else []
+
+    # Convert payment_methods from dict to PaymentMethod objects
+    payment_methods_list = [
+        PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
+        for pm in result.payment_methods
+    ] if result.payment_methods else []
+
+    # Auto-suggest payment_mode based on detected methods
+    suggested_payment_mode = None
+    if payment_methods_list:
+        has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
+        if has_card:
+            suggested_payment_mode = 'banca'
+
+    return ExtractionData(
+        receipt_type=result.receipt_type,
+        receipt_number=result.receipt_number,
+        receipt_series=result.receipt_series,
+        receipt_date=result.receipt_date,
+        amount=result.amount,
+        partner_name=result.partner_name,
+        cui=result.cui,
+        description=result.description,
+        tva_entries=tva_entries_schema,
+        tva_total=result.tva_total,
+        address=result.address,
+        items_count=result.items_count,
+        payment_methods=payment_methods_list,
+        suggested_payment_mode=suggested_payment_mode,
+        client_name=result.client_name,
+        client_cui=result.client_cui,
+        client_address=result.client_address,
+        confidence_amount=result.confidence_amount,
+        confidence_date=result.confidence_date,
+        confidence_vendor=result.confidence_vendor,
+        confidence_client=getattr(result, 'confidence_client', 0.0),
+        overall_confidence=result.overall_confidence,
+        raw_text=result.raw_text,
+        raw_texts=getattr(result, 'raw_texts', []),
+        ocr_engine=result.ocr_engine,
+        processing_time_ms=result.processing_time_ms,
+        needs_manual_review=result.needs_manual_review,
+        validation_warnings=result.validation_warnings,
+        validation_errors=result.validation_errors,
+        inter_ocr_ratios=result.inter_ocr_ratios,
+    )
+
+
+def _dict_to_extraction_data(data: dict) -> ExtractionData:
+    """Convert result dict (from job queue) to ExtractionData schema."""
+    from datetime import date
+
+    # Parse date if string
+    receipt_date = data.get('receipt_date')
+    if isinstance(receipt_date, str):
+        try:
+            receipt_date = date.fromisoformat(receipt_date)
+        except (ValueError, TypeError):
+            receipt_date = None
+
+    # Convert tva_entries
+    tva_entries = data.get('tva_entries', []) or []
+    tva_entries_schema = []
+    for e in tva_entries:
+        if isinstance(e, dict):
+            tva_entries_schema.append(TvaEntry(
+                code=e.get('code'),
+                percent=e.get('percent', 0),
+                amount=Decimal(str(e.get('amount', 0)))
+            ))
+
+    # Convert payment_methods
+    payment_methods = data.get('payment_methods', []) or []
+    payment_methods_list = []
+    for pm in payment_methods:
+        if isinstance(pm, dict):
+            payment_methods_list.append(PaymentMethod(
+                method=pm.get('method', 'NUMERAR'),
+                amount=Decimal(str(pm.get('amount', 0)))
+            ))
+
+    # Convert amount and tva_total to Decimal
+    amount = data.get('amount')
+    if amount is not None:
+        amount = Decimal(str(amount))
+
+    tva_total = data.get('tva_total')
+    if tva_total is not None:
+        tva_total = Decimal(str(tva_total))
+
+    return ExtractionData(
+        receipt_type=data.get('receipt_type', 'bon_fiscal'),
+        receipt_number=data.get('receipt_number'),
+        receipt_series=data.get('receipt_series'),
+        receipt_date=receipt_date,
+        amount=amount,
+        partner_name=data.get('partner_name'),
+        cui=data.get('cui'),
+        description=data.get('description'),
+        tva_entries=tva_entries_schema,
+        tva_total=tva_total,
+        address=data.get('address'),
+        items_count=data.get('items_count'),
+        payment_methods=payment_methods_list,
+        suggested_payment_mode=data.get('suggested_payment_mode'),
+        client_name=data.get('client_name'),
+        client_cui=data.get('client_cui'),
+        client_address=data.get('client_address'),
+        confidence_amount=data.get('confidence_amount', 0.0),
+        confidence_date=data.get('confidence_date', 0.0),
+        confidence_vendor=data.get('confidence_vendor', 0.0),
+        confidence_client=data.get('confidence_client', 0.0),
+        confidence_tva=data.get('confidence_tva', 0.0),
+        confidence_payment=data.get('confidence_payment', 0.0),
+        overall_confidence=data.get('overall_confidence', 0.0),
+        raw_text=data.get('raw_text', ''),
+        raw_texts=data.get('raw_texts', []),
+        ocr_engine=data.get('ocr_engine', ''),
+        processing_time_ms=data.get('processing_time_ms', 0),
+        needs_manual_review=data.get('needs_manual_review'),
+        validation_warnings=data.get('validation_warnings', []),
+        validation_errors=data.get('validation_errors', []),
+        inter_ocr_ratios=data.get('inter_ocr_ratios', {}),
+    )
+
+
+# ============================================================================
+# Store Profiles Management Endpoints
+# ============================================================================
+
+@router.post("/profiles/reload")
+async def reload_store_profiles(
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    Hot-reload all store profiles.
+
+    Reloads profile Python modules without server restart.
+    Use after adding/modifying profile files.
+
+    Returns:
+        Dict with reloaded count and profile list
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    count = ProfileRegistry.reload_all()
+    status = ProfileRegistry.get_reload_status()
+
+    return {
+        "success": True,
+        "reloaded_modules": count,
+        "profiles_count": status["profiles_count"],
+        "registered_cuis": status["registered_cuis"],
+        "last_reload": status["last_reload"],
+    }
+
+
+@router.get("/profiles")
+async def list_store_profiles(
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    List all registered store profiles.
+
+    Returns:
+        Dict with profiles list and status
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    profiles = ProfileRegistry.list_profiles()
+    status = ProfileRegistry.get_reload_status()
+
+    return {
+        "profiles": profiles,
+        "count": len(profiles),
+        "last_reload": status["last_reload"],
+    }
+
+
+@router.get("/profiles/{cui}")
+async def get_store_profile(
+    cui: str,
+    current_user: CurrentUser = Depends(get_current_user)
+) -> dict:
+    """
+    Get details for a specific store profile.
+
+    Args:
+        cui: Store CUI (with or without RO prefix)
+
+    Returns:
+        Profile details including validation hints
+
+    Raises:
+        404: If no profile exists for this CUI
+    """
+    from backend.modules.data_entry.services.ocr.profiles import ProfileRegistry
+
+    info = ProfileRegistry.get_profile_info(cui)
+
+    if not info:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No profile registered for CUI: {cui}"
+        )
+
+    return info