fix telegram

This commit is contained in:
Claude Agent
2026-02-23 15:12:33 +00:00
parent 6c78fec8a7
commit 8bc567a9c5
426 changed files with 112478 additions and 1 deletions

View File

@@ -0,0 +1,4 @@
# Database module
from .database import get_session, init_db, engine
__all__ = ["get_session", "init_db", "engine"]

View File

@@ -0,0 +1,13 @@
# CRUD operations
from .receipt import ReceiptCRUD
from .attachment import AttachmentCRUD
from .accounting_entry import AccountingEntryCRUD
from .ocr_settings import OCRPreferenceCRUD, OCRMetricsCRUD
__all__ = [
"ReceiptCRUD",
"AttachmentCRUD",
"AccountingEntryCRUD",
"OCRPreferenceCRUD",
"OCRMetricsCRUD",
]

View File

@@ -0,0 +1,197 @@
"""CRUD operations for accounting entries."""
from datetime import datetime
from typing import Optional, List
from sqlalchemy import select, delete
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.accounting_entry import AccountingEntry, EntryType
from backend.modules.data_entry.schemas.receipt import AccountingEntryCreate, AccountingEntryUpdate
class AccountingEntryCRUD:
"""CRUD operations for AccountingEntry model."""
@staticmethod
async def create(
session: AsyncSession,
receipt_id: int,
data: AccountingEntryCreate,
sort_order: int = 0,
is_auto_generated: bool = True,
) -> AccountingEntry:
"""Create a new accounting entry."""
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=data.entry_type,
account_code=data.account_code,
account_name=data.account_name,
amount=data.amount,
partner_id=data.partner_id,
cost_center_id=data.cost_center_id,
is_auto_generated=is_auto_generated,
sort_order=sort_order,
)
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
@staticmethod
async def create_bulk(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
is_auto_generated: bool = True,
) -> List[AccountingEntry]:
"""Create multiple accounting entries at once."""
created_entries = []
for idx, entry_data in enumerate(entries):
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=entry_data.entry_type,
account_code=entry_data.account_code,
account_name=entry_data.account_name,
amount=entry_data.amount,
partner_id=entry_data.partner_id,
cost_center_id=entry_data.cost_center_id,
is_auto_generated=is_auto_generated,
sort_order=idx,
)
session.add(entry)
created_entries.append(entry)
await session.commit()
for entry in created_entries:
await session.refresh(entry)
return created_entries
@staticmethod
async def get_by_id(
session: AsyncSession,
entry_id: int,
) -> Optional[AccountingEntry]:
"""Get accounting entry by ID."""
query = select(AccountingEntry).where(AccountingEntry.id == entry_id)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_by_receipt_id(
session: AsyncSession,
receipt_id: int,
) -> List[AccountingEntry]:
"""Get all accounting entries for a receipt."""
query = select(AccountingEntry).where(
AccountingEntry.receipt_id == receipt_id
).order_by(AccountingEntry.sort_order.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
async def update(
session: AsyncSession,
entry: AccountingEntry,
data: AccountingEntryUpdate,
modified_by: str,
) -> AccountingEntry:
"""Update an accounting entry."""
update_data = data.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(entry, field, value)
entry.is_auto_generated = False
entry.modified_by = modified_by
entry.modified_at = datetime.utcnow()
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
@staticmethod
async def delete(session: AsyncSession, entry: AccountingEntry) -> bool:
"""Delete an accounting entry."""
await session.delete(entry)
await session.commit()
return True
@staticmethod
async def delete_all_for_receipt(session: AsyncSession, receipt_id: int) -> int:
"""Delete all accounting entries for a receipt."""
query = delete(AccountingEntry).where(AccountingEntry.receipt_id == receipt_id)
result = await session.execute(query)
await session.commit()
return result.rowcount
@staticmethod
async def replace_all_for_receipt(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
modified_by: str,
) -> List[AccountingEntry]:
"""Replace all entries for a receipt with new ones."""
# Delete existing entries
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
# Create new entries (marked as manually modified)
created_entries = []
for idx, entry_data in enumerate(entries):
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=entry_data.entry_type,
account_code=entry_data.account_code,
account_name=entry_data.account_name,
amount=entry_data.amount,
partner_id=entry_data.partner_id,
cost_center_id=entry_data.cost_center_id,
is_auto_generated=False,
modified_by=modified_by,
modified_at=datetime.utcnow(),
sort_order=idx,
)
session.add(entry)
created_entries.append(entry)
await session.commit()
for entry in created_entries:
await session.refresh(entry)
return created_entries
@staticmethod
async def validate_entries(entries: List[AccountingEntryCreate]) -> tuple[bool, str]:
"""
Validate accounting entries.
Returns (is_valid, error_message).
"""
if not entries:
return False, "At least one entry is required"
total_debit = sum(
e.amount for e in entries if e.entry_type == EntryType.DEBIT
)
total_credit = sum(
e.amount for e in entries if e.entry_type == EntryType.CREDIT
)
# Check balance (debit should equal credit)
if abs(total_debit - total_credit) > 0.01:
return False, f"Entries not balanced: Debit={total_debit}, Credit={total_credit}"
# Check for valid account codes
for entry in entries:
if not entry.account_code or len(entry.account_code) < 3:
return False, f"Invalid account code: {entry.account_code}"
return True, ""

View File

@@ -0,0 +1,140 @@
"""CRUD operations for receipt attachments."""
import os
import uuid
import aiofiles
from datetime import datetime
from pathlib import Path
from typing import Optional, List
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from fastapi import UploadFile
from backend.modules.data_entry.db.models.receipt import ReceiptAttachment
from backend.config import settings
class AttachmentCRUD:
"""CRUD operations for ReceiptAttachment model."""
@staticmethod
def _generate_stored_filename(original_filename: str) -> str:
"""Generate unique filename for storage."""
ext = Path(original_filename).suffix.lower()
return f"{uuid.uuid4()}{ext}"
@staticmethod
def _get_upload_path(stored_filename: str) -> Path:
"""Get full path for storing file, organized by year/month."""
now = datetime.utcnow()
relative_path = Path(str(now.year)) / f"{now.month:02d}"
full_path = settings.data_entry_upload_path_resolved / relative_path
# Ensure directory exists
full_path.mkdir(parents=True, exist_ok=True)
return relative_path / stored_filename
@staticmethod
async def create(
session: AsyncSession,
receipt_id: int,
file: UploadFile,
) -> ReceiptAttachment:
"""Create attachment by saving file and creating DB record."""
# Generate stored filename
stored_filename = AttachmentCRUD._generate_stored_filename(file.filename or "upload")
# Get relative path
relative_path = AttachmentCRUD._get_upload_path(stored_filename)
# Full path for saving
full_path = settings.data_entry_upload_path_resolved / relative_path
# Read file content
content = await file.read()
file_size = len(content)
# Validate file size
if file_size > settings.data_entry_max_upload_size_bytes:
raise ValueError(f"File too large. Maximum size is {settings.data_entry_max_upload_size_mb}MB")
# Validate MIME type
mime_type = file.content_type or "application/octet-stream"
if mime_type not in settings.data_entry_allowed_mime_types:
raise ValueError(f"File type not allowed: {mime_type}")
# Save file
async with aiofiles.open(full_path, "wb") as f:
await f.write(content)
# Create DB record
attachment = ReceiptAttachment(
receipt_id=receipt_id,
filename=file.filename or "upload",
stored_filename=stored_filename,
file_path=str(relative_path),
file_size=file_size,
mime_type=mime_type,
)
session.add(attachment)
await session.commit()
await session.refresh(attachment)
return attachment
@staticmethod
async def get_by_id(
session: AsyncSession,
attachment_id: int,
) -> Optional[ReceiptAttachment]:
"""Get attachment by ID."""
query = select(ReceiptAttachment).where(ReceiptAttachment.id == attachment_id)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_by_receipt_id(
session: AsyncSession,
receipt_id: int,
) -> List[ReceiptAttachment]:
"""Get all attachments for a receipt."""
query = select(ReceiptAttachment).where(
ReceiptAttachment.receipt_id == receipt_id
).order_by(ReceiptAttachment.uploaded_at.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
def get_file_path(attachment: ReceiptAttachment) -> Path:
"""Get full file path for an attachment."""
return settings.data_entry_upload_path_resolved / attachment.file_path
@staticmethod
async def delete(session: AsyncSession, attachment: ReceiptAttachment) -> bool:
"""Delete attachment (file and DB record)."""
# Delete file
file_path = AttachmentCRUD.get_file_path(attachment)
if file_path.exists():
os.remove(file_path)
# Delete DB record
await session.delete(attachment)
await session.commit()
return True
@staticmethod
async def delete_all_for_receipt(session: AsyncSession, receipt_id: int) -> int:
"""Delete all attachments for a receipt."""
attachments = await AttachmentCRUD.get_by_receipt_id(session, receipt_id)
count = 0
for attachment in attachments:
await AttachmentCRUD.delete(session, attachment)
count += 1
return count

View File

@@ -0,0 +1,222 @@
"""CRUD operations for OCR settings and metrics."""
from datetime import datetime, timedelta
from typing import List, Optional
from sqlalchemy import func, select, and_
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.ocr_settings import (
UserOCRPreference,
OCRJobMetrics,
OCRMetricsSummary,
OCREngine,
)
class OCRPreferenceCRUD:
"""CRUD operations for user OCR preferences."""
@staticmethod
async def get_by_username(session: AsyncSession, username: str) -> Optional[UserOCRPreference]:
"""Get user's OCR preference by username."""
result = await session.execute(
select(UserOCRPreference).where(UserOCRPreference.username == username)
)
return result.scalar_one_or_none()
@staticmethod
async def create_or_update(
session: AsyncSession,
username: str,
preferred_engine: OCREngine
) -> UserOCRPreference:
"""Create or update user's OCR preference."""
existing = await OCRPreferenceCRUD.get_by_username(session, username)
if existing:
existing.preferred_engine = preferred_engine
existing.updated_at = datetime.utcnow()
await session.commit()
await session.refresh(existing)
return existing
else:
preference = UserOCRPreference(
username=username,
preferred_engine=preferred_engine
)
session.add(preference)
await session.commit()
await session.refresh(preference)
return preference
@staticmethod
async def delete_by_username(session: AsyncSession, username: str) -> bool:
"""Delete user's OCR preference."""
existing = await OCRPreferenceCRUD.get_by_username(session, username)
if existing:
await session.delete(existing)
await session.commit()
return True
return False
class OCRMetricsCRUD:
"""CRUD operations for OCR job metrics."""
@staticmethod
async def create(
session: AsyncSession,
job_id: str,
username: str,
engine_requested: str,
engine_used: str,
processing_time_ms: int = 0,
file_size_bytes: int = 0,
file_type: str = "image/jpeg",
original_filename: Optional[str] = None,
success: bool = True,
error_message: Optional[str] = None,
overall_confidence: float = 0.0,
fields_extracted: int = 0,
needs_manual_review: Optional[bool] = None,
validation_warnings_count: int = 0,
validation_errors_count: int = 0,
company_id: Optional[int] = None
) -> OCRJobMetrics:
"""Create a new OCR job metrics record."""
metrics = OCRJobMetrics(
job_id=job_id,
username=username,
company_id=company_id,
engine_requested=engine_requested,
engine_used=engine_used,
processing_time_ms=processing_time_ms,
file_size_bytes=file_size_bytes,
file_type=file_type,
original_filename=original_filename,
success=success,
error_message=error_message,
overall_confidence=overall_confidence,
fields_extracted=fields_extracted,
needs_manual_review=needs_manual_review,
validation_warnings_count=validation_warnings_count,
validation_errors_count=validation_errors_count,
)
session.add(metrics)
await session.commit()
await session.refresh(metrics)
return metrics
@staticmethod
async def get_by_job_id(session: AsyncSession, job_id: str) -> Optional[OCRJobMetrics]:
"""Get metrics by job ID."""
result = await session.execute(
select(OCRJobMetrics).where(OCRJobMetrics.job_id == job_id)
)
return result.scalar_one_or_none()
@staticmethod
async def get_user_history(
session: AsyncSession,
username: str,
limit: int = 50,
offset: int = 0
) -> List[OCRJobMetrics]:
"""Get user's OCR job history."""
result = await session.execute(
select(OCRJobMetrics)
.where(OCRJobMetrics.username == username)
.order_by(OCRJobMetrics.created_at.desc())
.limit(limit)
.offset(offset)
)
return list(result.scalars().all())
@staticmethod
async def get_summary_by_engine(
session: AsyncSession,
days: int = 30,
username: Optional[str] = None
) -> List[OCRMetricsSummary]:
"""Get summary metrics grouped by engine."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
# Build query
conditions = [OCRJobMetrics.created_at >= cutoff_date]
if username:
conditions.append(OCRJobMetrics.username == username)
# Query for aggregated metrics
result = await session.execute(
select(
OCRJobMetrics.engine_used,
func.count(OCRJobMetrics.id).label('total_jobs'),
func.sum(func.cast(OCRJobMetrics.success, sa.Integer)).label('successful_jobs'),
func.avg(OCRJobMetrics.processing_time_ms).label('avg_processing_time_ms'),
func.avg(OCRJobMetrics.overall_confidence).label('avg_confidence'),
func.avg(OCRJobMetrics.fields_extracted).label('avg_fields_extracted'),
)
.where(and_(*conditions))
.group_by(OCRJobMetrics.engine_used)
.order_by(func.count(OCRJobMetrics.id).desc())
)
summaries = []
for row in result.all():
total = row.total_jobs or 0
successful = row.successful_jobs or 0
success_rate = successful / total if total > 0 else 0.0
summaries.append(OCRMetricsSummary(
engine=row.engine_used,
total_jobs=total,
successful_jobs=successful,
failed_jobs=total - successful,
success_rate=success_rate,
avg_processing_time_ms=float(row.avg_processing_time_ms or 0),
avg_confidence=float(row.avg_confidence or 0),
avg_fields_extracted=float(row.avg_fields_extracted or 0),
))
return summaries
@staticmethod
async def get_overall_stats(
session: AsyncSession,
days: int = 30,
username: Optional[str] = None
) -> dict:
"""Get overall OCR statistics."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
conditions = [OCRJobMetrics.created_at >= cutoff_date]
if username:
conditions.append(OCRJobMetrics.username == username)
result = await session.execute(
select(
func.count(OCRJobMetrics.id).label('total_jobs'),
func.sum(func.cast(OCRJobMetrics.success, sa.Integer)).label('successful_jobs'),
func.avg(OCRJobMetrics.processing_time_ms).label('avg_processing_time_ms'),
func.avg(OCRJobMetrics.overall_confidence).label('avg_confidence'),
)
.where(and_(*conditions))
)
row = result.one()
total = row.total_jobs or 0
successful = row.successful_jobs or 0
return {
"total_jobs": total,
"successful_jobs": successful,
"failed_jobs": total - successful,
"success_rate": (successful / total * 100) if total > 0 else 0.0,
"avg_processing_time_ms": float(row.avg_processing_time_ms or 0),
"avg_confidence": float(row.avg_confidence or 0),
"period_days": days,
}
# Import sqlalchemy for func.cast
import sqlalchemy as sa

View File

@@ -0,0 +1,418 @@
"""CRUD operations for receipts."""
import json
from datetime import datetime, date
from decimal import Decimal
from typing import Optional, List, Tuple, Any
from sqlalchemy import select, func, or_
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptStatus
from backend.modules.data_entry.schemas.receipt import ReceiptCreate, ReceiptUpdate, ReceiptFilter
def _serialize_tva_breakdown(tva_breakdown: Optional[List[Any]]) -> Optional[str]:
"""Serialize TVA breakdown list to JSON string for SQLite storage."""
if tva_breakdown is None:
return None
# Convert Decimal to float for JSON serialization
serializable = []
for entry in tva_breakdown:
if hasattr(entry, 'model_dump'):
# Pydantic model
item = entry.model_dump()
elif isinstance(entry, dict):
item = entry.copy()
else:
item = dict(entry)
# Convert Decimal to float
if 'amount' in item and isinstance(item['amount'], Decimal):
item['amount'] = float(item['amount'])
serializable.append(item)
return json.dumps(serializable)
def _serialize_payment_methods(payment_methods: Optional[List[Any]]) -> Optional[str]:
"""Serialize payment methods list to JSON string for SQLite storage."""
if payment_methods is None:
return None
serializable = []
for pm in payment_methods:
if hasattr(pm, 'model_dump'):
item = pm.model_dump()
elif isinstance(pm, dict):
item = pm.copy()
else:
item = dict(pm)
# Convert Decimal to float for JSON
if 'amount' in item:
if hasattr(item['amount'], '__float__'):
item['amount'] = float(item['amount'])
serializable.append(item)
return json.dumps(serializable)
class ReceiptCRUD:
"""CRUD operations for Receipt model."""
@staticmethod
async def create(
session: AsyncSession,
data: ReceiptCreate,
created_by: str,
) -> Receipt:
"""Create a new receipt."""
# Get data as dict and serialize tva_breakdown and payment_methods to JSON string
receipt_data = data.model_dump()
receipt_data['tva_breakdown'] = _serialize_tva_breakdown(receipt_data.get('tva_breakdown'))
receipt_data['payment_methods'] = _serialize_payment_methods(receipt_data.get('payment_methods'))
receipt = Receipt(
**receipt_data,
created_by=created_by,
status=ReceiptStatus.DRAFT,
)
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def get_by_id(
session: AsyncSession,
receipt_id: int,
include_relations: bool = True,
) -> Optional[Receipt]:
"""Get receipt by ID, optionally with relationships."""
query = select(Receipt).where(Receipt.id == receipt_id)
if include_relations:
query = query.options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_list(
session: AsyncSession,
filters: ReceiptFilter,
) -> Tuple[List[Receipt], int]:
"""Get paginated list of receipts with filters."""
# Base query
query = select(Receipt).options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
# Apply filters
if filters.status:
query = query.where(Receipt.status == filters.status)
if filters.direction:
query = query.where(Receipt.direction == filters.direction)
if filters.company_id:
query = query.where(Receipt.company_id == filters.company_id)
if filters.created_by:
query = query.where(Receipt.created_by == filters.created_by)
if filters.date_from:
query = query.where(Receipt.receipt_date >= filters.date_from)
if filters.date_to:
query = query.where(Receipt.receipt_date <= filters.date_to)
if filters.search:
search_term = f"%{filters.search}%"
query = query.where(
or_(
Receipt.description.ilike(search_term),
Receipt.partner_name.ilike(search_term),
Receipt.receipt_number.ilike(search_term),
)
)
# Bulk upload filters (US-012)
# US-005: Support comma-separated values for processing_status filter (e.g., "pending,processing")
if filters.processing_status:
statuses = [s.strip() for s in filters.processing_status.split(",")]
if len(statuses) == 1:
query = query.where(Receipt.processing_status == statuses[0])
else:
query = query.where(Receipt.processing_status.in_(statuses))
if filters.batch_id:
query = query.where(Receipt.batch_id == filters.batch_id)
# Count total
count_query = select(func.count()).select_from(query.subquery())
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Apply ordering based on sort_by parameter (US-012)
if filters.sort_by == "processing_started_at":
query = query.order_by(Receipt.processing_started_at.desc())
elif filters.sort_by == "processing_started_at_asc":
query = query.order_by(Receipt.processing_started_at.asc())
else:
# Default ordering
query = query.order_by(Receipt.created_at.desc())
# Apply pagination
offset = (filters.page - 1) * filters.page_size
query = query.offset(offset).limit(filters.page_size)
# Execute
result = await session.execute(query)
receipts = result.scalars().all()
return list(receipts), total
@staticmethod
async def get_processing_stats(
session: AsyncSession,
company_id: Optional[int] = None,
batch_id: Optional[str] = None,
) -> dict:
"""Get processing status counts for bulk uploaded receipts (US-012)."""
# Build base query for counting by processing_status
base_conditions = []
if company_id:
base_conditions.append(Receipt.company_id == company_id)
if batch_id:
base_conditions.append(Receipt.batch_id == batch_id)
# Only count receipts that have a processing_status (bulk uploads)
base_conditions.append(Receipt.processing_status.isnot(None))
query = select(
Receipt.processing_status,
func.count(Receipt.id).label("count")
)
for condition in base_conditions:
query = query.where(condition)
query = query.group_by(Receipt.processing_status)
result = await session.execute(query)
rows = result.all()
# Initialize stats
stats = {
"pending_count": 0,
"processing_count": 0,
"completed_count": 0,
"failed_count": 0,
}
# Map results
for row in rows:
status = row.processing_status
count = row.count
if status == "pending":
stats["pending_count"] = count
elif status == "processing":
stats["processing_count"] = count
elif status == "completed":
stats["completed_count"] = count
elif status == "failed":
stats["failed_count"] = count
return stats
@staticmethod
async def get_pending_review(
session: AsyncSession,
company_id: Optional[int] = None,
) -> List[Receipt]:
"""Get all receipts pending review."""
query = select(Receipt).where(
Receipt.status == ReceiptStatus.PENDING_REVIEW
).options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
if company_id:
query = query.where(Receipt.company_id == company_id)
query = query.order_by(Receipt.submitted_at.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
async def update(
session: AsyncSession,
receipt: Receipt,
data: ReceiptUpdate,
) -> Receipt:
"""Update receipt fields.
US-407: When a receipt is manually updated, reset processing_status and
processing_error to NULL. This allows failed OCR receipts to be corrected
manually and then submitted for approval without showing as "error" status.
"""
update_data = data.model_dump(exclude_unset=True)
# Recalculate tva_total from tva_breakdown if breakdown is being updated
if 'tva_breakdown' in update_data and update_data['tva_breakdown']:
tva_total = sum(
float(entry.get('amount', 0) if isinstance(entry, dict) else getattr(entry, 'amount', 0))
for entry in update_data['tva_breakdown']
)
update_data['tva_total'] = round(tva_total, 2)
# Serialize tva_breakdown and payment_methods to JSON string if present
if 'tva_breakdown' in update_data:
update_data['tva_breakdown'] = _serialize_tva_breakdown(update_data['tva_breakdown'])
if 'payment_methods' in update_data:
update_data['payment_methods'] = _serialize_payment_methods(update_data['payment_methods'])
for field, value in update_data.items():
setattr(receipt, field, value)
# US-407: Reset processing status when receipt is manually edited
# This clears the "failed" status so edited receipts can be submitted for approval
if receipt.processing_status == 'failed':
receipt.processing_status = None
receipt.processing_error = None
receipt.updated_at = datetime.utcnow()
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def update_status(
session: AsyncSession,
receipt: Receipt,
new_status: ReceiptStatus,
reviewed_by: Optional[str] = None,
rejection_reason: Optional[str] = None,
) -> Receipt:
"""Update receipt workflow status."""
receipt.status = new_status
receipt.updated_at = datetime.utcnow()
if new_status == ReceiptStatus.PENDING_REVIEW:
receipt.submitted_at = datetime.utcnow()
if new_status in [ReceiptStatus.APPROVED, ReceiptStatus.REJECTED]:
receipt.reviewed_by = reviewed_by
receipt.reviewed_at = datetime.utcnow()
if new_status == ReceiptStatus.REJECTED:
receipt.rejection_reason = rejection_reason
if new_status == ReceiptStatus.DRAFT:
# Reset review fields when moving back to draft
receipt.rejection_reason = None
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def delete(session: AsyncSession, receipt: Receipt) -> bool:
"""Delete a receipt (cascade deletes attachments and entries)."""
await session.delete(receipt)
await session.commit()
return True
@staticmethod
async def can_edit(receipt: Receipt, username: str) -> bool:
"""Check if user can edit receipt."""
# DRAFT and REJECTED receipts can be edited (to fix and resubmit)
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
return False
# Only creator can edit their own receipts
return receipt.created_by == username
@staticmethod
async def can_delete(receipt: Receipt, username: str) -> bool:
"""Check if user can delete receipt."""
# Only DRAFT receipts can be deleted
if receipt.status != ReceiptStatus.DRAFT:
return False
# Only creator can delete their own drafts
return receipt.created_by == username
@staticmethod
async def can_submit(receipt: Receipt, username: str) -> bool:
"""Check if user can submit receipt for review."""
# Only DRAFT or REJECTED receipts can be submitted
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
return False
# Only creator can submit their own receipts
return receipt.created_by == username
@staticmethod
async def get_stats(
session: AsyncSession,
company_id: int,
created_by: Optional[str] = None,
) -> dict:
"""Get receipt statistics."""
base_query = select(
Receipt.status,
func.count(Receipt.id).label("count"),
func.sum(Receipt.amount).label("total_amount"),
).where(
Receipt.company_id == company_id
)
if created_by:
base_query = base_query.where(Receipt.created_by == created_by)
query = base_query.group_by(Receipt.status)
result = await session.execute(query)
rows = result.all()
stats = {
"draft": {"count": 0, "amount": 0},
"pending_review": {"count": 0, "amount": 0},
"approved": {"count": 0, "amount": 0},
"rejected": {"count": 0, "amount": 0},
"synced": {"count": 0, "amount": 0},
"total": {"count": 0, "amount": 0},
}
for row in rows:
status_key = row.status.value
stats[status_key] = {
"count": row.count,
"amount": float(row.total_amount or 0),
}
stats["total"]["count"] += row.count
stats["total"]["amount"] += float(row.total_amount or 0)
return stats

View File

@@ -0,0 +1,50 @@
"""Database configuration and session management using SQLModel."""
from pathlib import Path
from typing import AsyncGenerator
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from sqlmodel import SQLModel
from backend.config import settings
# Create async engine
# Note: echo=False to disable SQL query logging (too verbose)
engine = create_async_engine(
settings.data_entry_database_url,
echo=False,
future=True,
)
# Create async session factory
async_session_maker = sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
)
async def init_db() -> None:
"""Initialize database - create tables if they don't exist."""
# Ensure data directory exists
db_path = Path(settings.data_entry_sqlite_database_path)
db_path.parent.mkdir(parents=True, exist_ok=True)
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async def get_session() -> AsyncGenerator[AsyncSession, None]:
"""Get async database session for dependency injection."""
async with async_session_maker() as session:
try:
yield session
finally:
await session.close()
# Convenience function for manual session usage
async def get_db_session() -> AsyncSession:
"""Get a new database session (manual management)."""
return async_session_maker()

View File

@@ -0,0 +1,131 @@
"""
Alembic migrations helper for Data Entry module.
Provides automatic migration execution at backend startup.
"""
import logging
import os
from pathlib import Path
logger = logging.getLogger(__name__)
def run_migrations() -> bool:
"""
Run pending Alembic migrations at startup.
Returns:
True if migrations ran successfully (or no pending migrations),
False if migrations failed (backend should continue with WARNING).
"""
try:
from alembic.config import Config
from alembic import command
from alembic.runtime.migration import MigrationContext
from sqlalchemy import create_engine
# Get the path to alembic.ini
data_entry_module = Path(__file__).parent.parent
alembic_ini_path = data_entry_module / "alembic.ini"
if not alembic_ini_path.exists():
logger.warning(f"[MIGRATIONS] alembic.ini not found at {alembic_ini_path}")
return False
# Get database path from environment or default
db_path = Path(os.getenv(
"SQLITE_DATABASE_PATH",
"data/receipts/receipts.db"
)).resolve()
# Ensure database directory exists
db_path.parent.mkdir(parents=True, exist_ok=True)
# Create Alembic config
alembic_cfg = Config(str(alembic_ini_path))
# Override database URL
sync_db_url = f"sqlite:///{db_path}"
alembic_cfg.set_main_option("sqlalchemy.url", sync_db_url)
# Set script location relative to alembic.ini
alembic_cfg.set_main_option(
"script_location",
str(data_entry_module / "migrations")
)
# Get current revision before upgrade
engine = create_engine(sync_db_url)
with engine.connect() as connection:
context = MigrationContext.configure(connection)
current_rev = context.get_current_revision()
engine.dispose()
logger.info(f"[MIGRATIONS] Current revision: {current_rev or 'None (fresh database)'}")
logger.info(f"[MIGRATIONS] Database path: {db_path}")
# Run upgrade to head
logger.info("[MIGRATIONS] Checking for pending migrations...")
command.upgrade(alembic_cfg, "head")
# Get new revision after upgrade
engine = create_engine(sync_db_url)
with engine.connect() as connection:
context = MigrationContext.configure(connection)
new_rev = context.get_current_revision()
engine.dispose()
if current_rev != new_rev:
logger.info(f"[MIGRATIONS] Applied: {current_rev or 'None'} -> {new_rev}")
else:
logger.info(f"[MIGRATIONS] No pending migrations. Current: {new_rev}")
return True
except ImportError as e:
logger.warning(f"[MIGRATIONS] Alembic not installed: {e}")
logger.warning("[MIGRATIONS] Skipping migrations - install alembic to enable")
return False
except Exception as e:
logger.error(f"[MIGRATIONS] Migration error: {e}", exc_info=True)
logger.warning("[MIGRATIONS] Backend will continue without migrations")
return False
def get_current_revision() -> str:
"""
Get the current Alembic revision.
Returns:
Current revision string, or 'unknown' if cannot be determined.
"""
try:
from alembic.runtime.migration import MigrationContext
from sqlalchemy import create_engine
# Get database path from environment or default
db_path = Path(os.getenv(
"SQLITE_DATABASE_PATH",
"data/receipts/receipts.db"
)).resolve()
if not db_path.exists():
return "no_database"
sync_db_url = f"sqlite:///{db_path}"
engine = create_engine(sync_db_url)
with engine.connect() as connection:
context = MigrationContext.configure(connection)
revision = context.get_current_revision()
engine.dispose()
return revision or "none"
except ImportError:
return "alembic_not_installed"
except Exception as e:
logger.debug(f"[MIGRATIONS] Could not get revision: {e}")
return "unknown"

View File

@@ -0,0 +1,29 @@
# Database models
from .receipt import Receipt, ReceiptAttachment, ReceiptStatus, ReceiptType, ReceiptDirection, ProcessingStatus
from .accounting_entry import AccountingEntry, EntryType
from .nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
from .ocr_settings import UserOCRPreference, OCRJobMetrics, OCRMetricsSummary, OCREngine
from .batch import BatchUpload, BatchJob, BatchStatus
__all__ = [
"Receipt",
"ReceiptAttachment",
"ReceiptStatus",
"ReceiptType",
"ReceiptDirection",
"ProcessingStatus",
"AccountingEntry",
"EntryType",
"SyncedSupplier",
"LocalSupplier",
"SyncedCashRegister",
# OCR Settings & Metrics
"UserOCRPreference",
"OCRJobMetrics",
"OCRMetricsSummary",
"OCREngine",
# Batch Upload
"BatchUpload",
"BatchJob",
"BatchStatus",
]

View File

@@ -0,0 +1,49 @@
"""AccountingEntry SQLModel model for proposed accounting entries."""
from datetime import datetime
from decimal import Decimal
from enum import Enum
from typing import Optional, TYPE_CHECKING
from sqlmodel import SQLModel, Field, Relationship
if TYPE_CHECKING:
from .receipt import Receipt
class EntryType(str, Enum):
"""Type of accounting entry."""
DEBIT = "debit"
CREDIT = "credit"
class AccountingEntry(SQLModel, table=True):
"""Proposed accounting entry for a receipt."""
__tablename__ = "accounting_entries"
id: Optional[int] = Field(default=None, primary_key=True)
receipt_id: int = Field(foreign_key="receipts.id", index=True)
# Account
entry_type: EntryType
account_code: str = Field(max_length=20) # e.g., 6022, 5311, 4426
account_name: Optional[str] = Field(default=None, max_length=200) # Cache: "Cheltuieli combustibil"
# Amount
amount: Decimal = Field(decimal_places=2, max_digits=15)
# Analytics (optional)
partner_id: Optional[int] = Field(default=None)
cost_center_id: Optional[int] = Field(default=None)
# Entry metadata
is_auto_generated: bool = Field(default=True) # True if system-generated
modified_by: Optional[str] = Field(default=None, max_length=100) # Username if modified
modified_at: Optional[datetime] = Field(default=None)
# Order for display
sort_order: int = Field(default=0)
# Relationship
receipt: Optional["Receipt"] = Relationship(back_populates="entries")

View File

@@ -0,0 +1,64 @@
"""BatchUpload and BatchJob SQLModel models for bulk receipt processing."""
from datetime import datetime
from enum import Enum
from typing import Optional
from sqlmodel import SQLModel, Field
class BatchStatus(str, Enum):
"""Status of a batch upload."""
PENDING = "pending" # Batch created, jobs queued
PROCESSING = "processing" # At least one job is processing
COMPLETED = "completed" # All jobs completed (success or failed)
FAILED = "failed" # Batch-level failure (e.g., all jobs failed)
class BatchUpload(SQLModel, table=True):
"""
Batch upload record for grouping multiple OCR jobs.
Tracks overall progress and status of a bulk upload operation.
"""
__tablename__ = "batch_uploads"
id: Optional[int] = Field(default=None, primary_key=True)
# User info
user_id: str = Field(max_length=100, index=True) # Username who created the batch
company_id: int = Field(index=True) # Company ID for receipt creation
# Timestamps
created_at: datetime = Field(default_factory=datetime.utcnow)
# Status tracking
status: BatchStatus = Field(default=BatchStatus.PENDING)
total_files: int = Field(default=0)
class BatchJob(SQLModel, table=True):
"""
Junction table linking batch_uploads to ocr_jobs.
Each record represents one file in a batch, linking to its OCR job.
Also stores the receipt_id once the job completes and auto-creates a receipt.
"""
__tablename__ = "batch_jobs"
id: Optional[int] = Field(default=None, primary_key=True)
# Foreign keys
batch_id: int = Field(foreign_key="batch_uploads.id", index=True)
job_id: str = Field(max_length=36, index=True) # UUID from ocr_jobs table
# Original filename for display
filename: str = Field(max_length=255)
# Receipt reference (set after auto-create)
receipt_id: Optional[int] = Field(default=None, foreign_key="receipts.id")
# Timestamps
created_at: datetime = Field(default_factory=datetime.utcnow)

View File

@@ -0,0 +1,46 @@
"""Nomenclature models for synced and local data."""
from typing import Optional
from datetime import datetime
from sqlmodel import SQLModel, Field
class SyncedSupplier(SQLModel, table=True):
"""Suppliers synced from Oracle NOM_PARTENERI."""
__tablename__ = "synced_suppliers"
id: Optional[int] = Field(default=None, primary_key=True)
oracle_id: int = Field(index=True) # Original Oracle ID
company_id: int = Field(index=True) # Company this supplier belongs to
name: str = Field(max_length=200)
fiscal_code: Optional[str] = Field(default=None, max_length=50, index=True) # CUI/CIF
address: Optional[str] = Field(default=None, max_length=500)
synced_at: datetime = Field(default_factory=datetime.utcnow)
class LocalSupplier(SQLModel, table=True):
"""Suppliers created locally from OCR (not in Oracle)."""
__tablename__ = "local_suppliers"
id: Optional[int] = Field(default=None, primary_key=True)
company_id: int = Field(index=True)
name: str = Field(max_length=200)
fiscal_code: Optional[str] = Field(default=None, max_length=50, index=True)
address: Optional[str] = Field(default=None, max_length=500)
created_by: str = Field(max_length=100) # Username who created it
created_at: datetime = Field(default_factory=datetime.utcnow)
# Flag to indicate if it should be synced to Oracle later
pending_oracle_sync: bool = Field(default=True)
class SyncedCashRegister(SQLModel, table=True):
"""Cash registers and bank accounts synced from Oracle."""
__tablename__ = "synced_cash_registers"
id: Optional[int] = Field(default=None, primary_key=True)
oracle_id: int = Field(index=True)
company_id: int = Field(index=True)
name: str = Field(max_length=100)
account_code: str = Field(max_length=20) # 5311, 5121, etc.
register_type: str = Field(max_length=10) # 'cash' or 'bank'
synced_at: datetime = Field(default_factory=datetime.utcnow)

View File

@@ -0,0 +1,102 @@
"""OCR settings and metrics SQLModel models."""
from datetime import datetime
from decimal import Decimal
from enum import Enum
from typing import Optional
from sqlmodel import SQLModel, Field
class OCREngine(str, Enum):
"""Available OCR engines."""
TESSERACT = "tesseract"
DOCTR = "doctr"
DOCTR_PLUS = "doctr_plus" # docTR with 2-tier sequential processing + early exit (optimized, recommended)
PADDLEOCR = "paddleocr"
class UserOCRPreference(SQLModel, table=True):
"""
User's preferred OCR engine setting.
Each user can have one preferred OCR engine that will be
auto-selected when they upload new receipts for processing.
"""
__tablename__ = "user_ocr_preferences"
id: Optional[int] = Field(default=None, primary_key=True)
# User identification
username: str = Field(max_length=100, unique=True, index=True)
# Preference settings
preferred_engine: OCREngine = Field(default=OCREngine.DOCTR_PLUS)
# Timestamps
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class OCRJobMetrics(SQLModel, table=True):
"""
OCR job processing metrics for analytics.
Stores metrics for each OCR job to enable:
- Performance tracking by engine
- Success rate analysis
- Processing time trends
- User-specific analytics
"""
__tablename__ = "ocr_job_metrics"
id: Optional[int] = Field(default=None, primary_key=True)
# Job identification
job_id: str = Field(max_length=50, unique=True, index=True)
# User and company context
username: str = Field(max_length=100, index=True)
company_id: Optional[int] = Field(default=None, index=True)
# Engine used
engine_requested: str = Field(max_length=20) # What user/auto requested
engine_used: str = Field(max_length=50) # What was actually used (e.g., "doctr-light")
# Processing metrics
processing_time_ms: int = Field(default=0)
file_size_bytes: int = Field(default=0)
file_type: str = Field(max_length=50, default="image/jpeg") # MIME type
original_filename: Optional[str] = Field(default=None, max_length=255) # Original uploaded filename
# Success metrics
success: bool = Field(default=True)
error_message: Optional[str] = Field(default=None, max_length=500)
# Extraction quality metrics
overall_confidence: float = Field(default=0.0)
fields_extracted: int = Field(default=0) # Number of fields successfully extracted
needs_manual_review: Optional[bool] = Field(default=None)
validation_warnings_count: int = Field(default=0)
validation_errors_count: int = Field(default=0)
# Timestamps
created_at: datetime = Field(default_factory=datetime.utcnow)
class OCRMetricsSummary(SQLModel):
"""
Summary metrics for OCR analytics.
Not a database table - used for API responses.
"""
engine: str
total_jobs: int
successful_jobs: int
failed_jobs: int
success_rate: float # Computed: successful_jobs / total_jobs
avg_processing_time_ms: float
avg_confidence: float
avg_fields_extracted: float

View File

@@ -0,0 +1,143 @@
"""Receipt and ReceiptAttachment SQLModel models."""
from datetime import datetime, date
from decimal import Decimal
from enum import Enum
from typing import Optional, List, TYPE_CHECKING
from sqlmodel import SQLModel, Field, Relationship
class ReceiptType(str, Enum):
"""Type of receipt document."""
BON_FISCAL = "bon_fiscal"
CHITANTA = "chitanta"
class ReceiptDirection(str, Enum):
"""Direction of receipt - expense or income."""
CHELTUIALA = "cheltuiala" # Expense (receipt from supplier)
INCASARE = "incasare" # Income (receipt issued to client)
class ReceiptStatus(str, Enum):
"""Workflow status of receipt."""
DRAFT = "draft" # User is filling in data
PENDING_REVIEW = "pending_review" # Awaiting accountant approval
APPROVED = "approved" # Approved by accountant
REJECTED = "rejected" # Rejected by accountant
SYNCED = "synced" # Synced to Oracle (Phase 2)
class PaymentMode(str, Enum):
"""Payment mode - how the expense was paid."""
CASA = "casa" # Numerar firma (5311)
BANCA = "banca" # Virament/POS (5121)
AVANS_DECONTARE = "avans_decontare" # Decont angajat (542)
class ProcessingStatus(str, Enum):
"""Processing status for bulk uploaded receipts."""
PENDING = "pending" # Waiting in queue
PROCESSING = "processing" # Currently being processed by OCR
COMPLETED = "completed" # Successfully processed
FAILED = "failed" # Processing failed with error
if TYPE_CHECKING:
from .accounting_entry import AccountingEntry
class Receipt(SQLModel, table=True):
"""Receipt (Bon Fiscal / Chitanta) with approval workflow."""
__tablename__ = "receipts"
id: Optional[int] = Field(default=None, primary_key=True)
# Document identification
receipt_type: ReceiptType = Field(default=ReceiptType.BON_FISCAL)
direction: ReceiptDirection = Field(default=ReceiptDirection.CHELTUIALA)
receipt_number: Optional[str] = Field(default=None, max_length=50)
receipt_series: Optional[str] = Field(default=None, max_length=20)
# Main data
receipt_date: date
amount: Decimal = Field(decimal_places=2, max_digits=15)
description: Optional[str] = Field(default=None, max_length=500)
# TVA info (extracted from OCR) - stored as JSON for multiple entries
tva_breakdown: Optional[str] = Field(default=None, max_length=1000) # JSON: [{"code":"A","percent":19,"amount":"15.20"}]
tva_total: Optional[Decimal] = Field(default=None, decimal_places=2, max_digits=15)
items_count: Optional[int] = Field(default=None)
vendor_address: Optional[str] = Field(default=None, max_length=500)
# Expense type (for auto-generating accounting entries)
expense_type_code: Optional[str] = Field(default=None, max_length=20)
# Oracle references (nomenclatures)
company_id: int
# partner_id removed - supplier data is text-only (partner_name, cui)
partner_name: Optional[str] = Field(default=None, max_length=200) # Supplier name from OCR/selection
cui: Optional[str] = Field(default=None, max_length=20) # Fiscal code from OCR
ocr_raw_text: Optional[str] = Field(default=None) # Raw OCR text for debugging
payment_methods: Optional[str] = Field(default=None, max_length=500) # JSON: [{"method":"CARD","amount":"50.00"}]
cash_register_id: Optional[int] = Field(default=None) # Cash/Bank ID from Oracle
cash_register_name: Optional[str] = Field(default=None, max_length=100) # Cache for display
cash_register_account: Optional[str] = Field(default=None, max_length=20) # Account code (5311, 5121)
payment_mode: Optional[str] = Field(default=None, max_length=20) # PaymentMode value: casa/banca/avans_decontare
# Workflow
status: ReceiptStatus = Field(default=ReceiptStatus.DRAFT)
created_by: str = Field(max_length=100) # Username of creator
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
submitted_at: Optional[datetime] = Field(default=None) # When submitted for approval
# Approval
reviewed_by: Optional[str] = Field(default=None, max_length=100) # Accountant username
reviewed_at: Optional[datetime] = Field(default=None)
rejection_reason: Optional[str] = Field(default=None, max_length=500) # Reason for rejection
# Phase 2 - Oracle sync
oracle_synced_at: Optional[datetime] = Field(default=None)
oracle_act_id: Optional[int] = Field(default=None)
oracle_error: Optional[str] = Field(default=None, max_length=500)
# Bulk upload batch tracking
batch_id: Optional[str] = Field(default=None, max_length=50, index=True)
processing_status: Optional[str] = Field(default=None, max_length=20, index=True) # ProcessingStatus enum value
processing_error: Optional[str] = Field(default=None) # Full error message text
file_hash: Optional[str] = Field(default=None, max_length=64, index=True) # SHA-256 hash for duplicate detection
processing_started_at: Optional[datetime] = Field(default=None)
processing_completed_at: Optional[datetime] = Field(default=None)
# Relationships
attachments: List["ReceiptAttachment"] = Relationship(
back_populates="receipt",
sa_relationship_kwargs={"cascade": "all, delete-orphan"}
)
entries: List["AccountingEntry"] = Relationship(
back_populates="receipt",
sa_relationship_kwargs={"cascade": "all, delete-orphan"}
)
class ReceiptAttachment(SQLModel, table=True):
"""Attachment (photo or PDF) for a receipt."""
__tablename__ = "receipt_attachments"
id: Optional[int] = Field(default=None, primary_key=True)
receipt_id: int = Field(foreign_key="receipts.id", index=True)
# File info
filename: str = Field(max_length=255) # Original filename
stored_filename: str = Field(max_length=255) # Filename on disk (UUID)
file_path: str = Field(max_length=500) # Relative path
file_size: int # Size in bytes
mime_type: str = Field(max_length=100) # MIME type (image/jpeg, application/pdf)
uploaded_at: datetime = Field(default_factory=datetime.utcnow)
# Relationship
receipt: Optional[Receipt] = Relationship(back_populates="attachments")