Integrate shared JWT authentication into data-entry-app: - Add Oracle pool initialization for auth service - Add AuthenticationMiddleware to protect API routes - Update all receipt endpoints to use CurrentUser from JWT - Add shared auth router (/api/auth/login, /api/auth/refresh) Add nomenclature synchronization feature: - Create SQLite models for synced suppliers, local suppliers, and cash registers - Add nomenclature router with sync triggers and CRUD endpoints - Add sync service for Oracle → SQLite nomenclature data - Update nomenclature_service to use synced SQLite data with fallbacks Create shared frontend components: - Add shared/frontend/ with LoginView.vue, auth store factory, login.css - Integrate shared login and auth into data-entry-app frontend - Add axios-based API service with token refresh interceptor 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
185 lines
6.1 KiB
Python
185 lines
6.1 KiB
Python
"""OCR API endpoints."""
|
|
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.db.database import get_session
|
|
from app.db.crud.attachment import AttachmentCRUD
|
|
from app.services.ocr_service import ocr_service
|
|
from app.services.ocr_engine import OCREngine
|
|
from app.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry
|
|
|
|
# Auth integration (will be protected by middleware)
|
|
from auth.dependencies import get_current_user
|
|
from auth.models import CurrentUser
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/status", response_model=OCRStatusResponse)
|
|
async def get_ocr_status():
|
|
"""Check OCR service status and available engines."""
|
|
engines = OCREngine.get_available_engines()
|
|
available = len(engines) > 0
|
|
|
|
if available:
|
|
message = f"OCR service ready with engines: {', '.join(engines)}"
|
|
else:
|
|
message = "No OCR engines available. Install PaddleOCR or Tesseract."
|
|
|
|
return OCRStatusResponse(
|
|
available=available,
|
|
engines=engines,
|
|
message=message
|
|
)
|
|
|
|
|
|
@router.post("/extract", response_model=OCRResponse)
|
|
async def extract_from_image(file: UploadFile = File(...)):
|
|
"""
|
|
Extract receipt data from uploaded image.
|
|
|
|
Accepts JPG, PNG, or PDF files (max 10MB).
|
|
Returns extracted fields with confidence scores.
|
|
"""
|
|
allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
|
|
|
|
if file.content_type not in allowed_types:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
|
|
)
|
|
|
|
# Get file extension
|
|
suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
|
|
if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
|
|
suffix = '.jpg'
|
|
|
|
# Save to temp file
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
content = await file.read()
|
|
|
|
# Check file size (10MB limit)
|
|
if len(content) > 10 * 1024 * 1024:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="File too large. Maximum size is 10MB."
|
|
)
|
|
|
|
tmp.write(content)
|
|
tmp_path = Path(tmp.name)
|
|
|
|
try:
|
|
success, message, result = await ocr_service.process_image(
|
|
tmp_path, file.content_type
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(status_code=422, detail=message)
|
|
|
|
# Convert ExtractionResult to ExtractionData schema
|
|
# Convert tva_entries from dict to TvaEntry objects
|
|
tva_entries_schema = [
|
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
|
for e in result.tva_entries
|
|
] if result.tva_entries else []
|
|
|
|
data = ExtractionData(
|
|
receipt_type=result.receipt_type,
|
|
receipt_number=result.receipt_number,
|
|
receipt_series=result.receipt_series,
|
|
receipt_date=result.receipt_date,
|
|
amount=result.amount,
|
|
partner_name=result.partner_name,
|
|
cui=result.cui,
|
|
description=result.description,
|
|
tva_entries=tva_entries_schema,
|
|
tva_total=result.tva_total,
|
|
address=result.address,
|
|
items_count=result.items_count,
|
|
confidence_amount=result.confidence_amount,
|
|
confidence_date=result.confidence_date,
|
|
confidence_vendor=result.confidence_vendor,
|
|
overall_confidence=result.overall_confidence,
|
|
raw_text=result.raw_text,
|
|
ocr_engine=result.ocr_engine,
|
|
processing_time_ms=result.processing_time_ms,
|
|
)
|
|
|
|
return OCRResponse(success=True, message=message, data=data)
|
|
|
|
finally:
|
|
# Clean up temp file
|
|
if tmp_path.exists():
|
|
os.unlink(tmp_path)
|
|
|
|
|
|
@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
|
|
async def extract_from_attachment(
|
|
attachment_id: int,
|
|
session: AsyncSession = Depends(get_session),
|
|
):
|
|
"""
|
|
Extract receipt data from an existing attachment.
|
|
|
|
Re-processes an already uploaded file with OCR.
|
|
"""
|
|
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
|
|
|
|
if not attachment:
|
|
raise HTTPException(status_code=404, detail="Attachment not found")
|
|
|
|
file_path = AttachmentCRUD.get_file_path(attachment)
|
|
|
|
if not file_path.exists():
|
|
raise HTTPException(status_code=404, detail="File not found on disk")
|
|
|
|
# Check if file type is supported
|
|
if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"File type not supported for OCR: {attachment.mime_type}"
|
|
)
|
|
|
|
success, message, result = await ocr_service.process_image(
|
|
file_path, attachment.mime_type
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(status_code=422, detail=message)
|
|
|
|
# Convert ExtractionResult to ExtractionData schema
|
|
# Convert tva_entries from dict to TvaEntry objects
|
|
tva_entries_schema = [
|
|
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
|
|
for e in result.tva_entries
|
|
] if result.tva_entries else []
|
|
|
|
data = ExtractionData(
|
|
receipt_type=result.receipt_type,
|
|
receipt_number=result.receipt_number,
|
|
receipt_series=result.receipt_series,
|
|
receipt_date=result.receipt_date,
|
|
amount=result.amount,
|
|
partner_name=result.partner_name,
|
|
cui=result.cui,
|
|
description=result.description,
|
|
tva_entries=tva_entries_schema,
|
|
tva_total=result.tva_total,
|
|
address=result.address,
|
|
items_count=result.items_count,
|
|
confidence_amount=result.confidence_amount,
|
|
confidence_date=result.confidence_date,
|
|
confidence_vendor=result.confidence_vendor,
|
|
overall_confidence=result.overall_confidence,
|
|
raw_text=result.raw_text,
|
|
ocr_engine=result.ocr_engine,
|
|
processing_time_ms=result.processing_time_ms,
|
|
)
|
|
|
|
return OCRResponse(success=True, message=message, data=data)
|