feat: Migrate to ultrathin monolith architecture

Consolidate 3 separate applications (reports-app, data-entry-app, telegram-bot) into a unified
architecture with single backend and frontend:

Backend Changes:
- Unified FastAPI backend at backend/ with modular structure
- Modules: reports, data_entry, telegram in backend/modules/
- Centralized config.py and main.py with all routers registered
- Single worker mode (--workers 1) for Telegram bot compatibility
- Shared Oracle connection pool and JWT authentication
- Unified requirements.txt and environment configuration

Frontend Changes:
- Single Vue.js SPA with module-based routing
- Unified frontend at src/ with modules in src/modules/{reports,data-entry}/
- Shared components and stores in src/shared/
- Error boundaries for module isolation
- Dual API proxy in Vite for module communication

Infrastructure:
- New unified startup scripts: start-prod.sh, start-test.sh, start-backend.sh
- Environment templates: .env.dev.example, .env.test.example, .env.prod.example
- Updated deployment scripts for Windows IIS
- Simplified SSH tunnel management

Documentation:
- Comprehensive CLAUDE.md with architecture overview
- Module-specific docs in docs/{data-entry,telegram}/
- Architecture decision records in docs/ARCHITECTURE-DECISIONS.md
- Deployment guides consolidated in deployment/windows/docs/

This migration reduces complexity, improves maintainability, and enables easier
deployment while maintaining all existing functionality.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 23:48:14 +02:00
parent 2a101f1ef5
commit c5e051ad80
378 changed files with 7566 additions and 73730 deletions

View File

View File

@@ -0,0 +1,96 @@
"""Application configuration using pydantic-settings."""
import os
from pathlib import Path
from typing import List
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# App info
app_name: str = "Data Entry API"
app_version: str = "1.0.0"
debug: bool = False
# API
api_host: str = "0.0.0.0"
api_port: int = 8003
# SQLite Database
sqlite_database_path: str = "data/receipts/receipts.db"
# File uploads
upload_path: str = "data/uploads"
max_upload_size_mb: int = 10
allowed_mime_types: List[str] = [
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"application/pdf",
]
# Oracle Database (for nomenclatures)
oracle_user: str = ""
oracle_password: str = ""
oracle_host: str = "localhost"
oracle_port: int = 1526
oracle_sid: str = "ROA"
# JWT Authentication
jwt_secret_key: str = "change-me-in-production"
jwt_algorithm: str = "HS256"
jwt_expire_minutes: int = 480
# CORS
cors_origins: str = "http://localhost:3010,http://localhost:3000"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
@property
def database_url(self) -> str:
"""Get SQLite database URL for async."""
return f"sqlite+aiosqlite:///{self.sqlite_database_path}"
@property
def sync_database_url(self) -> str:
"""Get SQLite database URL for sync operations (Alembic)."""
return f"sqlite:///{self.sqlite_database_path}"
@property
def upload_path_resolved(self) -> Path:
"""Get resolved upload path."""
path = Path(self.upload_path)
path.mkdir(parents=True, exist_ok=True)
return path
@property
def max_upload_size_bytes(self) -> int:
"""Get max upload size in bytes."""
return self.max_upload_size_mb * 1024 * 1024
@property
def cors_origins_list(self) -> List[str]:
"""Get CORS origins as list."""
return [origin.strip() for origin in self.cors_origins.split(",")]
@property
def oracle_dsn(self) -> str:
"""Get Oracle DSN string."""
return f"{self.oracle_host}:{self.oracle_port}/{self.oracle_sid}"
@lru_cache()
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()
# Convenience instance
settings = get_settings()

View File

@@ -0,0 +1,4 @@
# Database module
from .database import get_session, init_db, engine
__all__ = ["get_session", "init_db", "engine"]

View File

@@ -0,0 +1,10 @@
# CRUD operations
from .receipt import ReceiptCRUD
from .attachment import AttachmentCRUD
from .accounting_entry import AccountingEntryCRUD
__all__ = [
"ReceiptCRUD",
"AttachmentCRUD",
"AccountingEntryCRUD",
]

View File

@@ -0,0 +1,197 @@
"""CRUD operations for accounting entries."""
from datetime import datetime
from typing import Optional, List
from sqlalchemy import select, delete
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.accounting_entry import AccountingEntry, EntryType
from backend.modules.data_entry.schemas.receipt import AccountingEntryCreate, AccountingEntryUpdate
class AccountingEntryCRUD:
"""CRUD operations for AccountingEntry model."""
@staticmethod
async def create(
session: AsyncSession,
receipt_id: int,
data: AccountingEntryCreate,
sort_order: int = 0,
is_auto_generated: bool = True,
) -> AccountingEntry:
"""Create a new accounting entry."""
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=data.entry_type,
account_code=data.account_code,
account_name=data.account_name,
amount=data.amount,
partner_id=data.partner_id,
cost_center_id=data.cost_center_id,
is_auto_generated=is_auto_generated,
sort_order=sort_order,
)
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
@staticmethod
async def create_bulk(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
is_auto_generated: bool = True,
) -> List[AccountingEntry]:
"""Create multiple accounting entries at once."""
created_entries = []
for idx, entry_data in enumerate(entries):
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=entry_data.entry_type,
account_code=entry_data.account_code,
account_name=entry_data.account_name,
amount=entry_data.amount,
partner_id=entry_data.partner_id,
cost_center_id=entry_data.cost_center_id,
is_auto_generated=is_auto_generated,
sort_order=idx,
)
session.add(entry)
created_entries.append(entry)
await session.commit()
for entry in created_entries:
await session.refresh(entry)
return created_entries
@staticmethod
async def get_by_id(
session: AsyncSession,
entry_id: int,
) -> Optional[AccountingEntry]:
"""Get accounting entry by ID."""
query = select(AccountingEntry).where(AccountingEntry.id == entry_id)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_by_receipt_id(
session: AsyncSession,
receipt_id: int,
) -> List[AccountingEntry]:
"""Get all accounting entries for a receipt."""
query = select(AccountingEntry).where(
AccountingEntry.receipt_id == receipt_id
).order_by(AccountingEntry.sort_order.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
async def update(
session: AsyncSession,
entry: AccountingEntry,
data: AccountingEntryUpdate,
modified_by: str,
) -> AccountingEntry:
"""Update an accounting entry."""
update_data = data.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(entry, field, value)
entry.is_auto_generated = False
entry.modified_by = modified_by
entry.modified_at = datetime.utcnow()
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
@staticmethod
async def delete(session: AsyncSession, entry: AccountingEntry) -> bool:
"""Delete an accounting entry."""
await session.delete(entry)
await session.commit()
return True
@staticmethod
async def delete_all_for_receipt(session: AsyncSession, receipt_id: int) -> int:
"""Delete all accounting entries for a receipt."""
query = delete(AccountingEntry).where(AccountingEntry.receipt_id == receipt_id)
result = await session.execute(query)
await session.commit()
return result.rowcount
@staticmethod
async def replace_all_for_receipt(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
modified_by: str,
) -> List[AccountingEntry]:
"""Replace all entries for a receipt with new ones."""
# Delete existing entries
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
# Create new entries (marked as manually modified)
created_entries = []
for idx, entry_data in enumerate(entries):
entry = AccountingEntry(
receipt_id=receipt_id,
entry_type=entry_data.entry_type,
account_code=entry_data.account_code,
account_name=entry_data.account_name,
amount=entry_data.amount,
partner_id=entry_data.partner_id,
cost_center_id=entry_data.cost_center_id,
is_auto_generated=False,
modified_by=modified_by,
modified_at=datetime.utcnow(),
sort_order=idx,
)
session.add(entry)
created_entries.append(entry)
await session.commit()
for entry in created_entries:
await session.refresh(entry)
return created_entries
@staticmethod
async def validate_entries(entries: List[AccountingEntryCreate]) -> tuple[bool, str]:
"""
Validate accounting entries.
Returns (is_valid, error_message).
"""
if not entries:
return False, "At least one entry is required"
total_debit = sum(
e.amount for e in entries if e.entry_type == EntryType.DEBIT
)
total_credit = sum(
e.amount for e in entries if e.entry_type == EntryType.CREDIT
)
# Check balance (debit should equal credit)
if abs(total_debit - total_credit) > 0.01:
return False, f"Entries not balanced: Debit={total_debit}, Credit={total_credit}"
# Check for valid account codes
for entry in entries:
if not entry.account_code or len(entry.account_code) < 3:
return False, f"Invalid account code: {entry.account_code}"
return True, ""

View File

@@ -0,0 +1,140 @@
"""CRUD operations for receipt attachments."""
import os
import uuid
import aiofiles
from datetime import datetime
from pathlib import Path
from typing import Optional, List
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from fastapi import UploadFile
from backend.modules.data_entry.db.models.receipt import ReceiptAttachment
from backend.modules.data_entry.config import settings
class AttachmentCRUD:
"""CRUD operations for ReceiptAttachment model."""
@staticmethod
def _generate_stored_filename(original_filename: str) -> str:
"""Generate unique filename for storage."""
ext = Path(original_filename).suffix.lower()
return f"{uuid.uuid4()}{ext}"
@staticmethod
def _get_upload_path(stored_filename: str) -> Path:
"""Get full path for storing file, organized by year/month."""
now = datetime.utcnow()
relative_path = Path(str(now.year)) / f"{now.month:02d}"
full_path = settings.upload_path_resolved / relative_path
# Ensure directory exists
full_path.mkdir(parents=True, exist_ok=True)
return relative_path / stored_filename
@staticmethod
async def create(
session: AsyncSession,
receipt_id: int,
file: UploadFile,
) -> ReceiptAttachment:
"""Create attachment by saving file and creating DB record."""
# Generate stored filename
stored_filename = AttachmentCRUD._generate_stored_filename(file.filename or "upload")
# Get relative path
relative_path = AttachmentCRUD._get_upload_path(stored_filename)
# Full path for saving
full_path = settings.upload_path_resolved / relative_path
# Read file content
content = await file.read()
file_size = len(content)
# Validate file size
if file_size > settings.max_upload_size_bytes:
raise ValueError(f"File too large. Maximum size is {settings.max_upload_size_mb}MB")
# Validate MIME type
mime_type = file.content_type or "application/octet-stream"
if mime_type not in settings.allowed_mime_types:
raise ValueError(f"File type not allowed: {mime_type}")
# Save file
async with aiofiles.open(full_path, "wb") as f:
await f.write(content)
# Create DB record
attachment = ReceiptAttachment(
receipt_id=receipt_id,
filename=file.filename or "upload",
stored_filename=stored_filename,
file_path=str(relative_path),
file_size=file_size,
mime_type=mime_type,
)
session.add(attachment)
await session.commit()
await session.refresh(attachment)
return attachment
@staticmethod
async def get_by_id(
session: AsyncSession,
attachment_id: int,
) -> Optional[ReceiptAttachment]:
"""Get attachment by ID."""
query = select(ReceiptAttachment).where(ReceiptAttachment.id == attachment_id)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_by_receipt_id(
session: AsyncSession,
receipt_id: int,
) -> List[ReceiptAttachment]:
"""Get all attachments for a receipt."""
query = select(ReceiptAttachment).where(
ReceiptAttachment.receipt_id == receipt_id
).order_by(ReceiptAttachment.uploaded_at.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
def get_file_path(attachment: ReceiptAttachment) -> Path:
"""Get full file path for an attachment."""
return settings.upload_path_resolved / attachment.file_path
@staticmethod
async def delete(session: AsyncSession, attachment: ReceiptAttachment) -> bool:
"""Delete attachment (file and DB record)."""
# Delete file
file_path = AttachmentCRUD.get_file_path(attachment)
if file_path.exists():
os.remove(file_path)
# Delete DB record
await session.delete(attachment)
await session.commit()
return True
@staticmethod
async def delete_all_for_receipt(session: AsyncSession, receipt_id: int) -> int:
"""Delete all attachments for a receipt."""
attachments = await AttachmentCRUD.get_by_receipt_id(session, receipt_id)
count = 0
for attachment in attachments:
await AttachmentCRUD.delete(session, attachment)
count += 1
return count

View File

@@ -0,0 +1,324 @@
"""CRUD operations for receipts."""
import json
from datetime import datetime, date
from decimal import Decimal
from typing import Optional, List, Tuple, Any
from sqlalchemy import select, func, or_
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptStatus
from backend.modules.data_entry.schemas.receipt import ReceiptCreate, ReceiptUpdate, ReceiptFilter
def _serialize_tva_breakdown(tva_breakdown: Optional[List[Any]]) -> Optional[str]:
"""Serialize TVA breakdown list to JSON string for SQLite storage."""
if tva_breakdown is None:
return None
# Convert Decimal to float for JSON serialization
serializable = []
for entry in tva_breakdown:
if hasattr(entry, 'model_dump'):
# Pydantic model
item = entry.model_dump()
elif isinstance(entry, dict):
item = entry.copy()
else:
item = dict(entry)
# Convert Decimal to float
if 'amount' in item and isinstance(item['amount'], Decimal):
item['amount'] = float(item['amount'])
serializable.append(item)
return json.dumps(serializable)
def _serialize_payment_methods(payment_methods: Optional[List[Any]]) -> Optional[str]:
"""Serialize payment methods list to JSON string for SQLite storage."""
if payment_methods is None:
return None
serializable = []
for pm in payment_methods:
if hasattr(pm, 'model_dump'):
item = pm.model_dump()
elif isinstance(pm, dict):
item = pm.copy()
else:
item = dict(pm)
# Convert Decimal to float for JSON
if 'amount' in item:
if hasattr(item['amount'], '__float__'):
item['amount'] = float(item['amount'])
serializable.append(item)
return json.dumps(serializable)
class ReceiptCRUD:
"""CRUD operations for Receipt model."""
@staticmethod
async def create(
session: AsyncSession,
data: ReceiptCreate,
created_by: str,
) -> Receipt:
"""Create a new receipt."""
# Get data as dict and serialize tva_breakdown and payment_methods to JSON string
receipt_data = data.model_dump()
receipt_data['tva_breakdown'] = _serialize_tva_breakdown(receipt_data.get('tva_breakdown'))
receipt_data['payment_methods'] = _serialize_payment_methods(receipt_data.get('payment_methods'))
receipt = Receipt(
**receipt_data,
created_by=created_by,
status=ReceiptStatus.DRAFT,
)
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def get_by_id(
session: AsyncSession,
receipt_id: int,
include_relations: bool = True,
) -> Optional[Receipt]:
"""Get receipt by ID, optionally with relationships."""
query = select(Receipt).where(Receipt.id == receipt_id)
if include_relations:
query = query.options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
result = await session.execute(query)
return result.scalar_one_or_none()
@staticmethod
async def get_list(
session: AsyncSession,
filters: ReceiptFilter,
) -> Tuple[List[Receipt], int]:
"""Get paginated list of receipts with filters."""
# Base query
query = select(Receipt).options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
# Apply filters
if filters.status:
query = query.where(Receipt.status == filters.status)
if filters.direction:
query = query.where(Receipt.direction == filters.direction)
if filters.company_id:
query = query.where(Receipt.company_id == filters.company_id)
if filters.created_by:
query = query.where(Receipt.created_by == filters.created_by)
if filters.date_from:
query = query.where(Receipt.receipt_date >= filters.date_from)
if filters.date_to:
query = query.where(Receipt.receipt_date <= filters.date_to)
if filters.search:
search_term = f"%{filters.search}%"
query = query.where(
or_(
Receipt.description.ilike(search_term),
Receipt.partner_name.ilike(search_term),
Receipt.receipt_number.ilike(search_term),
)
)
# Count total
count_query = select(func.count()).select_from(query.subquery())
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
# Apply pagination and ordering
query = query.order_by(Receipt.created_at.desc())
offset = (filters.page - 1) * filters.page_size
query = query.offset(offset).limit(filters.page_size)
# Execute
result = await session.execute(query)
receipts = result.scalars().all()
return list(receipts), total
@staticmethod
async def get_pending_review(
session: AsyncSession,
company_id: Optional[int] = None,
) -> List[Receipt]:
"""Get all receipts pending review."""
query = select(Receipt).where(
Receipt.status == ReceiptStatus.PENDING_REVIEW
).options(
selectinload(Receipt.attachments),
selectinload(Receipt.entries),
)
if company_id:
query = query.where(Receipt.company_id == company_id)
query = query.order_by(Receipt.submitted_at.asc())
result = await session.execute(query)
return list(result.scalars().all())
@staticmethod
async def update(
session: AsyncSession,
receipt: Receipt,
data: ReceiptUpdate,
) -> Receipt:
"""Update receipt fields."""
update_data = data.model_dump(exclude_unset=True)
# Serialize tva_breakdown and payment_methods to JSON string if present
if 'tva_breakdown' in update_data:
update_data['tva_breakdown'] = _serialize_tva_breakdown(update_data['tva_breakdown'])
if 'payment_methods' in update_data:
update_data['payment_methods'] = _serialize_payment_methods(update_data['payment_methods'])
for field, value in update_data.items():
setattr(receipt, field, value)
receipt.updated_at = datetime.utcnow()
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def update_status(
session: AsyncSession,
receipt: Receipt,
new_status: ReceiptStatus,
reviewed_by: Optional[str] = None,
rejection_reason: Optional[str] = None,
) -> Receipt:
"""Update receipt workflow status."""
receipt.status = new_status
receipt.updated_at = datetime.utcnow()
if new_status == ReceiptStatus.PENDING_REVIEW:
receipt.submitted_at = datetime.utcnow()
if new_status in [ReceiptStatus.APPROVED, ReceiptStatus.REJECTED]:
receipt.reviewed_by = reviewed_by
receipt.reviewed_at = datetime.utcnow()
if new_status == ReceiptStatus.REJECTED:
receipt.rejection_reason = rejection_reason
if new_status == ReceiptStatus.DRAFT:
# Reset review fields when moving back to draft
receipt.rejection_reason = None
session.add(receipt)
await session.commit()
await session.refresh(receipt)
# Reload with relationships to avoid lazy loading issues with async
return await ReceiptCRUD.get_by_id(session, receipt.id, include_relations=True)
@staticmethod
async def delete(session: AsyncSession, receipt: Receipt) -> bool:
"""Delete a receipt (cascade deletes attachments and entries)."""
await session.delete(receipt)
await session.commit()
return True
@staticmethod
async def can_edit(receipt: Receipt, username: str) -> bool:
"""Check if user can edit receipt."""
# DRAFT and REJECTED receipts can be edited (to fix and resubmit)
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
return False
# Only creator can edit their own receipts
return receipt.created_by == username
@staticmethod
async def can_delete(receipt: Receipt, username: str) -> bool:
"""Check if user can delete receipt."""
# Only DRAFT receipts can be deleted
if receipt.status != ReceiptStatus.DRAFT:
return False
# Only creator can delete their own drafts
return receipt.created_by == username
@staticmethod
async def can_submit(receipt: Receipt, username: str) -> bool:
"""Check if user can submit receipt for review."""
# Only DRAFT or REJECTED receipts can be submitted
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
return False
# Only creator can submit their own receipts
return receipt.created_by == username
@staticmethod
async def get_stats(
session: AsyncSession,
company_id: int,
created_by: Optional[str] = None,
) -> dict:
"""Get receipt statistics."""
base_query = select(
Receipt.status,
func.count(Receipt.id).label("count"),
func.sum(Receipt.amount).label("total_amount"),
).where(
Receipt.company_id == company_id
)
if created_by:
base_query = base_query.where(Receipt.created_by == created_by)
query = base_query.group_by(Receipt.status)
result = await session.execute(query)
rows = result.all()
stats = {
"draft": {"count": 0, "amount": 0},
"pending_review": {"count": 0, "amount": 0},
"approved": {"count": 0, "amount": 0},
"rejected": {"count": 0, "amount": 0},
"synced": {"count": 0, "amount": 0},
"total": {"count": 0, "amount": 0},
}
for row in rows:
status_key = row.status.value
stats[status_key] = {
"count": row.count,
"amount": float(row.total_amount or 0),
}
stats["total"]["count"] += row.count
stats["total"]["amount"] += float(row.total_amount or 0)
return stats

View File

@@ -0,0 +1,49 @@
"""Database configuration and session management using SQLModel."""
from pathlib import Path
from typing import AsyncGenerator
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
from sqlmodel import SQLModel
from backend.modules.data_entry.config import settings
# Create async engine
engine = create_async_engine(
settings.database_url,
echo=settings.debug,
future=True,
)
# Create async session factory
async_session_maker = sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
)
async def init_db() -> None:
"""Initialize database - create tables if they don't exist."""
# Ensure data directory exists
db_path = Path(settings.sqlite_database_path)
db_path.parent.mkdir(parents=True, exist_ok=True)
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async def get_session() -> AsyncGenerator[AsyncSession, None]:
"""Get async database session for dependency injection."""
async with async_session_maker() as session:
try:
yield session
finally:
await session.close()
# Convenience function for manual session usage
async def get_db_session() -> AsyncSession:
"""Get a new database session (manual management)."""
return async_session_maker()

View File

@@ -0,0 +1,17 @@
# Database models
from .receipt import Receipt, ReceiptAttachment, ReceiptStatus, ReceiptType, ReceiptDirection
from .accounting_entry import AccountingEntry, EntryType
from .nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
__all__ = [
"Receipt",
"ReceiptAttachment",
"ReceiptStatus",
"ReceiptType",
"ReceiptDirection",
"AccountingEntry",
"EntryType",
"SyncedSupplier",
"LocalSupplier",
"SyncedCashRegister",
]

View File

@@ -0,0 +1,49 @@
"""AccountingEntry SQLModel model for proposed accounting entries."""
from datetime import datetime
from decimal import Decimal
from enum import Enum
from typing import Optional, TYPE_CHECKING
from sqlmodel import SQLModel, Field, Relationship
if TYPE_CHECKING:
from .receipt import Receipt
class EntryType(str, Enum):
"""Type of accounting entry."""
DEBIT = "debit"
CREDIT = "credit"
class AccountingEntry(SQLModel, table=True):
"""Proposed accounting entry for a receipt."""
__tablename__ = "accounting_entries"
id: Optional[int] = Field(default=None, primary_key=True)
receipt_id: int = Field(foreign_key="receipts.id", index=True)
# Account
entry_type: EntryType
account_code: str = Field(max_length=20) # e.g., 6022, 5311, 4426
account_name: Optional[str] = Field(default=None, max_length=200) # Cache: "Cheltuieli combustibil"
# Amount
amount: Decimal = Field(decimal_places=2, max_digits=15)
# Analytics (optional)
partner_id: Optional[int] = Field(default=None)
cost_center_id: Optional[int] = Field(default=None)
# Entry metadata
is_auto_generated: bool = Field(default=True) # True if system-generated
modified_by: Optional[str] = Field(default=None, max_length=100) # Username if modified
modified_at: Optional[datetime] = Field(default=None)
# Order for display
sort_order: int = Field(default=0)
# Relationship
receipt: Optional["Receipt"] = Relationship(back_populates="entries")

View File

@@ -0,0 +1,46 @@
"""Nomenclature models for synced and local data."""
from typing import Optional
from datetime import datetime
from sqlmodel import SQLModel, Field
class SyncedSupplier(SQLModel, table=True):
"""Suppliers synced from Oracle NOM_PARTENERI."""
__tablename__ = "synced_suppliers"
id: Optional[int] = Field(default=None, primary_key=True)
oracle_id: int = Field(index=True) # Original Oracle ID
company_id: int = Field(index=True) # Company this supplier belongs to
name: str = Field(max_length=200)
fiscal_code: Optional[str] = Field(default=None, max_length=50, index=True) # CUI/CIF
address: Optional[str] = Field(default=None, max_length=500)
synced_at: datetime = Field(default_factory=datetime.utcnow)
class LocalSupplier(SQLModel, table=True):
"""Suppliers created locally from OCR (not in Oracle)."""
__tablename__ = "local_suppliers"
id: Optional[int] = Field(default=None, primary_key=True)
company_id: int = Field(index=True)
name: str = Field(max_length=200)
fiscal_code: Optional[str] = Field(default=None, max_length=50, index=True)
address: Optional[str] = Field(default=None, max_length=500)
created_by: str = Field(max_length=100) # Username who created it
created_at: datetime = Field(default_factory=datetime.utcnow)
# Flag to indicate if it should be synced to Oracle later
pending_oracle_sync: bool = Field(default=True)
class SyncedCashRegister(SQLModel, table=True):
"""Cash registers and bank accounts synced from Oracle."""
__tablename__ = "synced_cash_registers"
id: Optional[int] = Field(default=None, primary_key=True)
oracle_id: int = Field(index=True)
company_id: int = Field(index=True)
name: str = Field(max_length=100)
account_code: str = Field(max_length=20) # 5311, 5121, etc.
register_type: str = Field(max_length=10) # 'cash' or 'bank'
synced_at: datetime = Field(default_factory=datetime.utcnow)

View File

@@ -0,0 +1,127 @@
"""Receipt and ReceiptAttachment SQLModel models."""
from datetime import datetime, date
from decimal import Decimal
from enum import Enum
from typing import Optional, List, TYPE_CHECKING
from sqlmodel import SQLModel, Field, Relationship
class ReceiptType(str, Enum):
"""Type of receipt document."""
BON_FISCAL = "bon_fiscal"
CHITANTA = "chitanta"
class ReceiptDirection(str, Enum):
"""Direction of receipt - expense or income."""
CHELTUIALA = "cheltuiala" # Expense (receipt from supplier)
INCASARE = "incasare" # Income (receipt issued to client)
class ReceiptStatus(str, Enum):
"""Workflow status of receipt."""
DRAFT = "draft" # User is filling in data
PENDING_REVIEW = "pending_review" # Awaiting accountant approval
APPROVED = "approved" # Approved by accountant
REJECTED = "rejected" # Rejected by accountant
SYNCED = "synced" # Synced to Oracle (Phase 2)
class PaymentMode(str, Enum):
"""Payment mode - how the expense was paid."""
CASA = "casa" # Numerar firma (5311)
BANCA = "banca" # Virament/POS (5121)
AVANS_DECONTARE = "avans_decontare" # Decont angajat (542)
if TYPE_CHECKING:
from .accounting_entry import AccountingEntry
class Receipt(SQLModel, table=True):
"""Receipt (Bon Fiscal / Chitanta) with approval workflow."""
__tablename__ = "receipts"
id: Optional[int] = Field(default=None, primary_key=True)
# Document identification
receipt_type: ReceiptType = Field(default=ReceiptType.BON_FISCAL)
direction: ReceiptDirection = Field(default=ReceiptDirection.CHELTUIALA)
receipt_number: Optional[str] = Field(default=None, max_length=50)
receipt_series: Optional[str] = Field(default=None, max_length=20)
# Main data
receipt_date: date
amount: Decimal = Field(decimal_places=2, max_digits=15)
description: Optional[str] = Field(default=None, max_length=500)
# TVA info (extracted from OCR) - stored as JSON for multiple entries
tva_breakdown: Optional[str] = Field(default=None, max_length=1000) # JSON: [{"code":"A","percent":19,"amount":"15.20"}]
tva_total: Optional[Decimal] = Field(default=None, decimal_places=2, max_digits=15)
items_count: Optional[int] = Field(default=None)
vendor_address: Optional[str] = Field(default=None, max_length=500)
# Expense type (for auto-generating accounting entries)
expense_type_code: Optional[str] = Field(default=None, max_length=20)
# Oracle references (nomenclatures)
company_id: int
# partner_id removed - supplier data is text-only (partner_name, cui)
partner_name: Optional[str] = Field(default=None, max_length=200) # Supplier name from OCR/selection
cui: Optional[str] = Field(default=None, max_length=20) # Fiscal code from OCR
ocr_raw_text: Optional[str] = Field(default=None) # Raw OCR text for debugging
payment_methods: Optional[str] = Field(default=None, max_length=500) # JSON: [{"method":"CARD","amount":"50.00"}]
cash_register_id: Optional[int] = Field(default=None) # Cash/Bank ID from Oracle
cash_register_name: Optional[str] = Field(default=None, max_length=100) # Cache for display
cash_register_account: Optional[str] = Field(default=None, max_length=20) # Account code (5311, 5121)
payment_mode: Optional[str] = Field(default=None, max_length=20) # PaymentMode value: casa/banca/avans_decontare
# Workflow
status: ReceiptStatus = Field(default=ReceiptStatus.DRAFT)
created_by: str = Field(max_length=100) # Username of creator
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
submitted_at: Optional[datetime] = Field(default=None) # When submitted for approval
# Approval
reviewed_by: Optional[str] = Field(default=None, max_length=100) # Accountant username
reviewed_at: Optional[datetime] = Field(default=None)
rejection_reason: Optional[str] = Field(default=None, max_length=500) # Reason for rejection
# Phase 2 - Oracle sync
oracle_synced_at: Optional[datetime] = Field(default=None)
oracle_act_id: Optional[int] = Field(default=None)
oracle_error: Optional[str] = Field(default=None, max_length=500)
# Relationships
attachments: List["ReceiptAttachment"] = Relationship(
back_populates="receipt",
sa_relationship_kwargs={"cascade": "all, delete-orphan"}
)
entries: List["AccountingEntry"] = Relationship(
back_populates="receipt",
sa_relationship_kwargs={"cascade": "all, delete-orphan"}
)
class ReceiptAttachment(SQLModel, table=True):
"""Attachment (photo or PDF) for a receipt."""
__tablename__ = "receipt_attachments"
id: Optional[int] = Field(default=None, primary_key=True)
receipt_id: int = Field(foreign_key="receipts.id", index=True)
# File info
filename: str = Field(max_length=255) # Original filename
stored_filename: str = Field(max_length=255) # Filename on disk (UUID)
file_path: str = Field(max_length=500) # Relative path
file_size: int # Size in bytes
mime_type: str = Field(max_length=100) # MIME type (image/jpeg, application/pdf)
uploaded_at: datetime = Field(default_factory=datetime.utcnow)
# Relationship
receipt: Optional[Receipt] = Relationship(back_populates="attachments")

View File

@@ -0,0 +1,89 @@
"""Alembic environment configuration."""
import os
from logging.config import fileConfig
from dotenv import load_dotenv
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
from sqlmodel import SQLModel
# Load environment variables from .env file
load_dotenv()
# Import all models to ensure they're registered with SQLModel
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptAttachment
from backend.modules.data_entry.db.models.accounting_entry import AccountingEntry
from backend.modules.data_entry.db.models.nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Override sqlalchemy.url from environment variable if set
db_path = os.getenv("SQLITE_DATABASE_PATH", "data/receipts/receipts.db")
config.set_main_option("sqlalchemy.url", f"sqlite:///{db_path}")
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
target_metadata = SQLModel.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
render_as_batch=True, # Required for SQLite ALTER TABLE support
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
render_as_batch=True, # Required for SQLite ALTER TABLE support
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,27 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,112 @@
"""Initial receipts schema
Revision ID: 001_initial
Revises:
Create Date: 2024-12-11
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '001_initial'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Create receipts table
op.create_table(
'receipts',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('receipt_type', sa.Enum('BON_FISCAL', 'CHITANTA', name='receipttype'), nullable=False),
sa.Column('direction', sa.Enum('CHELTUIALA', 'INCASARE', name='receiptdirection'), nullable=False),
sa.Column('receipt_number', sa.String(length=50), nullable=True),
sa.Column('receipt_series', sa.String(length=20), nullable=True),
sa.Column('receipt_date', sa.Date(), nullable=False),
sa.Column('amount', sa.Numeric(precision=15, scale=2), nullable=False),
sa.Column('description', sa.String(length=500), nullable=True),
sa.Column('expense_type_code', sa.String(length=20), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=False),
sa.Column('partner_id', sa.Integer(), nullable=True),
sa.Column('partner_name', sa.String(length=200), nullable=True),
sa.Column('cash_register_id', sa.Integer(), nullable=True),
sa.Column('cash_register_name', sa.String(length=100), nullable=True),
sa.Column('cash_register_account', sa.String(length=20), nullable=True),
sa.Column('status', sa.Enum('DRAFT', 'PENDING_REVIEW', 'APPROVED', 'REJECTED', 'SYNCED', name='receiptstatus'), nullable=False),
sa.Column('created_by', sa.String(length=100), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=False),
sa.Column('submitted_at', sa.DateTime(), nullable=True),
sa.Column('reviewed_by', sa.String(length=100), nullable=True),
sa.Column('reviewed_at', sa.DateTime(), nullable=True),
sa.Column('rejection_reason', sa.String(length=500), nullable=True),
sa.Column('oracle_synced_at', sa.DateTime(), nullable=True),
sa.Column('oracle_act_id', sa.Integer(), nullable=True),
sa.Column('oracle_error', sa.String(length=500), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_receipts_company_id'), 'receipts', ['company_id'], unique=False)
op.create_index(op.f('ix_receipts_status'), 'receipts', ['status'], unique=False)
op.create_index(op.f('ix_receipts_created_by'), 'receipts', ['created_by'], unique=False)
op.create_index(op.f('ix_receipts_receipt_date'), 'receipts', ['receipt_date'], unique=False)
# Create receipt_attachments table
op.create_table(
'receipt_attachments',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('receipt_id', sa.Integer(), nullable=False),
sa.Column('filename', sa.String(length=255), nullable=False),
sa.Column('stored_filename', sa.String(length=255), nullable=False),
sa.Column('file_path', sa.String(length=500), nullable=False),
sa.Column('file_size', sa.Integer(), nullable=False),
sa.Column('mime_type', sa.String(length=100), nullable=False),
sa.Column('uploaded_at', sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(['receipt_id'], ['receipts.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_receipt_attachments_receipt_id'), 'receipt_attachments', ['receipt_id'], unique=False)
# Create accounting_entries table
op.create_table(
'accounting_entries',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('receipt_id', sa.Integer(), nullable=False),
sa.Column('entry_type', sa.Enum('DEBIT', 'CREDIT', name='entrytype'), nullable=False),
sa.Column('account_code', sa.String(length=20), nullable=False),
sa.Column('account_name', sa.String(length=200), nullable=True),
sa.Column('amount', sa.Numeric(precision=15, scale=2), nullable=False),
sa.Column('partner_id', sa.Integer(), nullable=True),
sa.Column('cost_center_id', sa.Integer(), nullable=True),
sa.Column('is_auto_generated', sa.Boolean(), nullable=False),
sa.Column('modified_by', sa.String(length=100), nullable=True),
sa.Column('modified_at', sa.DateTime(), nullable=True),
sa.Column('sort_order', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['receipt_id'], ['receipts.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_accounting_entries_receipt_id'), 'accounting_entries', ['receipt_id'], unique=False)
def downgrade() -> None:
op.drop_index(op.f('ix_accounting_entries_receipt_id'), table_name='accounting_entries')
op.drop_table('accounting_entries')
op.drop_index(op.f('ix_receipt_attachments_receipt_id'), table_name='receipt_attachments')
op.drop_table('receipt_attachments')
op.drop_index(op.f('ix_receipts_receipt_date'), table_name='receipts')
op.drop_index(op.f('ix_receipts_created_by'), table_name='receipts')
op.drop_index(op.f('ix_receipts_status'), table_name='receipts')
op.drop_index(op.f('ix_receipts_company_id'), table_name='receipts')
op.drop_table('receipts')
# Drop enums (SQLite doesn't actually use these, but for consistency)
op.execute("DROP TYPE IF EXISTS receipttype")
op.execute("DROP TYPE IF EXISTS receiptdirection")
op.execute("DROP TYPE IF EXISTS receiptstatus")
op.execute("DROP TYPE IF EXISTS entrytype")

View File

@@ -0,0 +1,37 @@
"""add_tva_breakdown_to_receipt
Revision ID: 1cfb423c6953
Revises: 001_initial
Create Date: 2025-12-12 14:04:22.464289+00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '1cfb423c6953'
down_revision: Union[str, None] = '001_initial'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add TVA-related columns to receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.add_column(sa.Column('tva_breakdown', sqlmodel.sql.sqltypes.AutoString(length=1000), nullable=True))
batch_op.add_column(sa.Column('tva_total', sa.Numeric(precision=15, scale=2), nullable=True))
batch_op.add_column(sa.Column('items_count', sa.Integer(), nullable=True))
batch_op.add_column(sa.Column('vendor_address', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True))
def downgrade() -> None:
# Remove TVA-related columns from receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.drop_column('vendor_address')
batch_op.drop_column('items_count')
batch_op.drop_column('tva_total')
batch_op.drop_column('tva_breakdown')

View File

@@ -0,0 +1,89 @@
"""add nomenclature tables
Revision ID: 3a653da79002
Revises: 1cfb423c6953
Create Date: 2025-12-13 00:28:05.719430+00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '3a653da79002'
down_revision: Union[str, None] = '1cfb423c6953'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('local_suppliers',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('company_id', sa.Integer(), nullable=False),
sa.Column('name', sqlmodel.sql.sqltypes.AutoString(length=200), nullable=False),
sa.Column('fiscal_code', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=True),
sa.Column('address', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True),
sa.Column('created_by', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('pending_oracle_sync', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('local_suppliers', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_local_suppliers_company_id'), ['company_id'], unique=False)
batch_op.create_index(batch_op.f('ix_local_suppliers_fiscal_code'), ['fiscal_code'], unique=False)
op.create_table('synced_cash_registers',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('oracle_id', sa.Integer(), nullable=False),
sa.Column('company_id', sa.Integer(), nullable=False),
sa.Column('name', sqlmodel.sql.sqltypes.AutoString(length=100), nullable=False),
sa.Column('account_code', sqlmodel.sql.sqltypes.AutoString(length=20), nullable=False),
sa.Column('register_type', sqlmodel.sql.sqltypes.AutoString(length=10), nullable=False),
sa.Column('synced_at', sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('synced_cash_registers', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_synced_cash_registers_company_id'), ['company_id'], unique=False)
batch_op.create_index(batch_op.f('ix_synced_cash_registers_oracle_id'), ['oracle_id'], unique=False)
op.create_table('synced_suppliers',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('oracle_id', sa.Integer(), nullable=False),
sa.Column('company_id', sa.Integer(), nullable=False),
sa.Column('name', sqlmodel.sql.sqltypes.AutoString(length=200), nullable=False),
sa.Column('fiscal_code', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=True),
sa.Column('address', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True),
sa.Column('synced_at', sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('synced_suppliers', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_synced_suppliers_company_id'), ['company_id'], unique=False)
batch_op.create_index(batch_op.f('ix_synced_suppliers_fiscal_code'), ['fiscal_code'], unique=False)
batch_op.create_index(batch_op.f('ix_synced_suppliers_oracle_id'), ['oracle_id'], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('synced_suppliers', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_synced_suppliers_oracle_id'))
batch_op.drop_index(batch_op.f('ix_synced_suppliers_fiscal_code'))
batch_op.drop_index(batch_op.f('ix_synced_suppliers_company_id'))
op.drop_table('synced_suppliers')
with op.batch_alter_table('synced_cash_registers', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_synced_cash_registers_oracle_id'))
batch_op.drop_index(batch_op.f('ix_synced_cash_registers_company_id'))
op.drop_table('synced_cash_registers')
with op.batch_alter_table('local_suppliers', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_local_suppliers_fiscal_code'))
batch_op.drop_index(batch_op.f('ix_local_suppliers_company_id'))
op.drop_table('local_suppliers')
# ### end Alembic commands ###

View File

@@ -0,0 +1,35 @@
"""add_ocr_fields_to_receipt
Revision ID: 4b8e5f2a1d93
Revises: 3a653da79002
Create Date: 2025-12-15 10:00:00.000000+00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '4b8e5f2a1d93'
down_revision: Union[str, None] = '3a653da79002'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add OCR-related columns to receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.add_column(sa.Column('cui', sqlmodel.sql.sqltypes.AutoString(length=20), nullable=True))
batch_op.add_column(sa.Column('ocr_raw_text', sa.Text(), nullable=True))
batch_op.add_column(sa.Column('payment_methods', sqlmodel.sql.sqltypes.AutoString(length=500), nullable=True))
def downgrade() -> None:
# Remove OCR-related columns from receipts table
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.drop_column('payment_methods')
batch_op.drop_column('ocr_raw_text')
batch_op.drop_column('cui')

View File

@@ -0,0 +1,29 @@
"""Remove partner_id from receipts - supplier data is text-only
Revision ID: 20251215_remove_partner_id
Revises: 20251216_payment_mode
Create Date: 2025-12-15
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '20251215_remove_partner_id'
down_revision: Union[str, None] = '20251216_payment_mode'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Remove partner_id column - supplier data is now text-only (partner_name, cui)."""
# Drop the partner_id column
op.drop_column('receipts', 'partner_id')
def downgrade() -> None:
"""Re-add partner_id column."""
op.add_column('receipts', sa.Column('partner_id', sa.Integer(), nullable=True))

View File

@@ -0,0 +1,44 @@
"""Add payment_mode field to receipts table.
Revision ID: 20251216_payment_mode
Revises: 4b8e5f2a1d93
Create Date: 2024-12-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '20251216_payment_mode'
down_revision = '4b8e5f2a1d93'
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Add payment_mode column and migrate existing data."""
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.add_column(sa.Column('payment_mode', sa.String(length=20), nullable=True))
# Migrate existing data based on cash_register_account
op.execute("""
UPDATE receipts
SET payment_mode = 'casa'
WHERE cash_register_account LIKE '531%' AND payment_mode IS NULL
""")
op.execute("""
UPDATE receipts
SET payment_mode = 'banca'
WHERE cash_register_account LIKE '512%' AND payment_mode IS NULL
""")
op.execute("""
UPDATE receipts
SET payment_mode = 'avans_decontare'
WHERE cash_register_account LIKE '542%' AND payment_mode IS NULL
""")
def downgrade() -> None:
"""Remove payment_mode column."""
with op.batch_alter_table('receipts', schema=None) as batch_op:
batch_op.drop_column('payment_mode')

View File

@@ -0,0 +1,30 @@
"""Data Entry module router factory."""
from fastapi import APIRouter
def create_data_entry_router() -> APIRouter:
"""
Create and configure Data Entry module router.
Includes all data entry endpoints:
- /receipts - Receipt CRUD and workflow
- /ocr - OCR processing for receipts
- /nomenclature - Nomenclature syncing from Oracle
Returns:
APIRouter: Configured router for data entry module
"""
router = APIRouter()
# Import routers here to avoid circular imports
from .receipts import router as receipts_router
from .ocr import router as ocr_router
from .nomenclature import router as nomenclature_router
# Include all sub-routers (no prefix - already prefixed in main.py with /api/data-entry)
router.include_router(receipts_router, prefix="/receipts", tags=["data-entry-receipts"])
router.include_router(ocr_router, prefix="/ocr", tags=["data-entry-ocr"])
router.include_router(nomenclature_router, prefix="/nomenclature", tags=["data-entry-nomenclature"])
return router

View File

@@ -0,0 +1,254 @@
"""Nomenclature API endpoints."""
from typing import Optional, List, Annotated
from fastapi import APIRouter, Depends, HTTPException, Header
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.services.sync_service import SyncService
# Import auth dependencies
import sys
from pathlib import Path
# Path setup handled by main.py - this is redundant
# project_root = Path(__file__).parent.parent.parent.parent.parent
# sys.path.insert(0, str(project_root / "shared"))
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============ Selected Company Dependency ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates user access. Falls back to first company if no header.
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(400, f"Invalid company ID: {x_selected_company}")
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(403, f"Nu aveți acces la firma {company_id}")
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(400, "Nu aveți nicio firmă asignată")
SelectedCompany = Annotated[int, Depends(get_selected_company)]
# Request/Response Models
class SupplierSearchResult(BaseModel):
found: bool
supplier: Optional[dict] = None
source: str # 'synced', 'local', 'not_found'
class LocalSupplierCreate(BaseModel):
name: str
fiscal_code: Optional[str] = None
address: Optional[str] = None
class LocalSupplierResponse(BaseModel):
id: int
name: str
fiscal_code: Optional[str]
address: Optional[str]
is_local: bool = True
class SyncResult(BaseModel):
synced: int
errors: int
message: str
class SupplierOption(BaseModel):
id: int
oracle_id: Optional[int] = None
name: str
fiscal_code: Optional[str]
source: str # 'synced' or 'local'
class CashRegisterOption(BaseModel):
id: int
oracle_id: int
name: str
account_code: str
register_type: str
# Endpoints
@router.get("/suppliers/search", response_model=SupplierSearchResult)
async def search_supplier(
fiscal_code: Optional[str] = None,
name: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Search for supplier by fiscal code or name."""
if not fiscal_code and not name:
raise HTTPException(status_code=400, detail="Provide fiscal_code or name")
cid = company_id or selected_company
found, supplier, source = await SyncService.search_supplier(
session, cid, fiscal_code, name
)
return SupplierSearchResult(found=found, supplier=supplier, source=source)
@router.get("/suppliers", response_model=List[SupplierOption])
async def get_suppliers(
search: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all suppliers (synced + local) for dropdown/autocomplete."""
cid = company_id or selected_company
suppliers = await SyncService.get_all_suppliers(session, cid, search)
return [
SupplierOption(
id=s["id"],
oracle_id=s.get("oracle_id"),
name=s["name"],
fiscal_code=s.get("fiscal_code"),
source=s["source"]
)
for s in suppliers
]
@router.post("/suppliers/local", response_model=LocalSupplierResponse)
async def create_local_supplier(
data: LocalSupplierCreate,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
current_user: CurrentUser = Depends(get_current_user),
):
"""Create a local supplier from OCR data."""
cid = company_id or selected_company
supplier = await SyncService.create_local_supplier(
session, cid, data.name, data.fiscal_code, data.address, current_user.username
)
return LocalSupplierResponse(
id=supplier.id,
name=supplier.name,
fiscal_code=supplier.fiscal_code,
address=supplier.address,
)
@router.get("/cash-registers", response_model=List[CashRegisterOption])
async def get_cash_registers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all cash registers for a company."""
cid = company_id or selected_company
registers = await SyncService.get_all_cash_registers(session, cid)
return [
CashRegisterOption(
id=r["id"],
oracle_id=r["oracle_id"],
name=r["name"],
account_code=r["account_code"],
register_type=r["register_type"]
)
for r in registers
]
@router.post("/sync/suppliers", response_model=SyncResult)
async def sync_suppliers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Manually trigger supplier sync from Oracle."""
cid = company_id or selected_company
synced, errors = await SyncService.sync_suppliers(session, cid)
return SyncResult(
synced=synced,
errors=errors,
message=f"Synced {synced} suppliers with {errors} errors"
)
@router.post("/sync/cash-registers", response_model=SyncResult)
async def sync_cash_registers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Manually trigger cash register sync from Oracle."""
cid = company_id or selected_company
synced, errors = await SyncService.sync_cash_registers(session, cid)
return SyncResult(
synced=synced,
errors=errors,
message=f"Synced {synced} cash registers with {errors} errors"
)
@router.post("/sync/all", response_model=dict)
async def sync_all_nomenclatures(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Sync all nomenclatures (suppliers + cash registers) from Oracle."""
cid = company_id or selected_company
# Sync suppliers
suppliers_synced, suppliers_errors = await SyncService.sync_suppliers(session, cid)
# Sync cash registers
registers_synced, registers_errors = await SyncService.sync_cash_registers(session, cid)
return {
"suppliers": {
"synced": suppliers_synced,
"errors": suppliers_errors
},
"cash_registers": {
"synced": registers_synced,
"errors": registers_errors
},
"total_synced": suppliers_synced + registers_synced,
"total_errors": suppliers_errors + registers_errors,
"message": f"Synced {suppliers_synced} suppliers and {registers_synced} cash registers"
}

View File

@@ -0,0 +1,218 @@
"""OCR API endpoints."""
import os
import tempfile
from pathlib import Path
from fastapi import APIRouter, HTTPException, UploadFile, File, Depends
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
from backend.modules.data_entry.services.ocr_service import ocr_service
from backend.modules.data_entry.services.ocr_engine import OCREngine
from backend.modules.data_entry.schemas.ocr import OCRResponse, OCRStatusResponse, ExtractionData, TvaEntry, PaymentMethod
# Auth integration (will be protected by middleware)
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
@router.get("/status", response_model=OCRStatusResponse)
async def get_ocr_status():
"""Check OCR service status and available engines."""
engines = OCREngine.get_available_engines()
available = len(engines) > 0
if available:
message = f"OCR service ready with engines: {', '.join(engines)}"
else:
message = "No OCR engines available. Install PaddleOCR or Tesseract."
return OCRStatusResponse(
available=available,
engines=engines,
message=message
)
@router.post("/extract", response_model=OCRResponse)
async def extract_from_image(file: UploadFile = File(...)):
"""
Extract receipt data from uploaded image.
Accepts JPG, PNG, or PDF files (max 10MB).
Returns extracted fields with confidence scores.
"""
allowed_types = ['image/jpeg', 'image/png', 'application/pdf']
if file.content_type not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"File type not supported: {file.content_type}. Allowed: JPG, PNG, PDF"
)
# Get file extension
suffix = Path(file.filename).suffix.lower() if file.filename else '.jpg'
if suffix not in ['.jpg', '.jpeg', '.png', '.pdf']:
suffix = '.jpg'
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
content = await file.read()
# Check file size (10MB limit)
if len(content) > 10 * 1024 * 1024:
raise HTTPException(
status_code=400,
detail="File too large. Maximum size is 10MB."
)
tmp.write(content)
tmp_path = Path(tmp.name)
try:
success, message, result = await ocr_service.process_image(
tmp_path, file.content_type
)
if not success:
raise HTTPException(status_code=422, detail=message)
# Convert ExtractionResult to ExtractionData schema
# Convert tva_entries from dict to TvaEntry objects
tva_entries_schema = [
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
for e in result.tva_entries
] if result.tva_entries else []
# Convert payment_methods from dict to PaymentMethod objects
from decimal import Decimal
payment_methods_list = [
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
for pm in result.payment_methods
] if result.payment_methods else []
# Auto-suggest payment_mode based on detected methods
suggested_payment_mode = None
if payment_methods_list:
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
if has_card:
suggested_payment_mode = 'banca'
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
data = ExtractionData(
receipt_type=result.receipt_type,
receipt_number=result.receipt_number,
receipt_series=result.receipt_series,
receipt_date=result.receipt_date,
amount=result.amount,
partner_name=result.partner_name,
cui=result.cui,
description=result.description,
tva_entries=tva_entries_schema,
tva_total=result.tva_total,
address=result.address,
items_count=result.items_count,
payment_methods=payment_methods_list,
suggested_payment_mode=suggested_payment_mode,
confidence_amount=result.confidence_amount,
confidence_date=result.confidence_date,
confidence_vendor=result.confidence_vendor,
overall_confidence=result.overall_confidence,
raw_text=result.raw_text,
ocr_engine=result.ocr_engine,
processing_time_ms=result.processing_time_ms,
)
return OCRResponse(success=True, message=message, data=data)
finally:
# Clean up temp file
if tmp_path.exists():
os.unlink(tmp_path)
@router.post("/extract-attachment/{attachment_id}", response_model=OCRResponse)
async def extract_from_attachment(
attachment_id: int,
session: AsyncSession = Depends(get_session),
):
"""
Extract receipt data from an existing attachment.
Re-processes an already uploaded file with OCR.
"""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
file_path = AttachmentCRUD.get_file_path(attachment)
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
# Check if file type is supported
if attachment.mime_type not in ['image/jpeg', 'image/png', 'application/pdf']:
raise HTTPException(
status_code=400,
detail=f"File type not supported for OCR: {attachment.mime_type}"
)
success, message, result = await ocr_service.process_image(
file_path, attachment.mime_type
)
if not success:
raise HTTPException(status_code=422, detail=message)
# Convert ExtractionResult to ExtractionData schema
# Convert tva_entries from dict to TvaEntry objects
tva_entries_schema = [
TvaEntry(code=e.get('code'), percent=e['percent'], amount=e['amount'])
for e in result.tva_entries
] if result.tva_entries else []
# Convert payment_methods from dict to PaymentMethod objects
from decimal import Decimal
payment_methods_list = [
PaymentMethod(method=pm['method'], amount=Decimal(str(pm['amount'])))
for pm in result.payment_methods
] if result.payment_methods else []
# Auto-suggest payment_mode based on detected methods
suggested_payment_mode = None
if payment_methods_list:
has_card = any(pm.method == 'CARD' for pm in payment_methods_list)
if has_card:
suggested_payment_mode = 'banca'
# NUMERAR -> no auto-suggestion, user chooses between casa/avans
data = ExtractionData(
receipt_type=result.receipt_type,
receipt_number=result.receipt_number,
receipt_series=result.receipt_series,
receipt_date=result.receipt_date,
amount=result.amount,
partner_name=result.partner_name,
cui=result.cui,
description=result.description,
tva_entries=tva_entries_schema,
tva_total=result.tva_total,
address=result.address,
items_count=result.items_count,
payment_methods=payment_methods_list,
suggested_payment_mode=suggested_payment_mode,
confidence_amount=result.confidence_amount,
confidence_date=result.confidence_date,
confidence_vendor=result.confidence_vendor,
overall_confidence=result.overall_confidence,
raw_text=result.raw_text,
ocr_engine=result.ocr_engine,
processing_time_ms=result.processing_time_ms,
)
return OCRResponse(success=True, message=message, data=data)

View File

@@ -0,0 +1,517 @@
"""API endpoints for receipts."""
from typing import List, Optional, Annotated
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Header
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.database import get_session
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
from backend.modules.data_entry.db.crud.attachment import AttachmentCRUD
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
from backend.modules.data_entry.services.receipt_service import ReceiptService
from backend.modules.data_entry.services.nomenclature_service import NomenclatureService
from backend.modules.data_entry.schemas.receipt import (
ReceiptCreate,
ReceiptUpdate,
ReceiptResponse,
ReceiptListResponse,
ReceiptFilter,
AttachmentResponse,
AccountingEntryResponse,
WorkflowAction,
RejectRequest,
EntriesUpdateRequest,
PartnerOption,
AccountOption,
CashRegisterOption,
ExpenseTypeOption,
)
from backend.modules.data_entry.db.models.receipt import ReceiptStatus, ReceiptDirection
# Auth integration
from shared.auth.dependencies import get_current_user
from shared.auth.models import CurrentUser
router = APIRouter()
# ============ Helper for selected company from header ============
async def get_selected_company(
current_user: CurrentUser = Depends(get_current_user),
x_selected_company: Annotated[Optional[str], Header()] = None
) -> int:
"""
Get selected company from X-Selected-Company header.
Validates that the user has access to the specified company.
Falls back to user's first company if no header is provided.
Raises:
HTTPException 403: If user doesn't have access to specified company
HTTPException 400: If user has no companies assigned
"""
if x_selected_company:
try:
company_id = int(x_selected_company)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Invalid company ID format: {x_selected_company}"
)
# Validate user has access to this company
# Auth stores companies as strings
if str(company_id) in current_user.companies:
return company_id
raise HTTPException(
status_code=403,
detail=f"Nu aveți acces la firma {company_id}"
)
# No header - use first company from user's list
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
pass
raise HTTPException(
status_code=400,
detail="Nu aveți nicio firmă asignată"
)
# Dependency for injection
SelectedCompany = Annotated[int, Depends(get_selected_company)]
# Legacy function for backwards compatibility (deprecated)
def get_current_user_company(current_user: CurrentUser) -> int:
"""
DEPRECATED: Use get_selected_company() dependency instead.
This function returns the first company, ignoring X-Selected-Company header.
"""
if current_user.companies:
try:
return int(current_user.companies[0])
except (ValueError, IndexError):
return 1
return 1
# ============ Receipt CRUD Endpoints ============
@router.post("/", response_model=ReceiptResponse)
async def create_receipt(
data: ReceiptCreate,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Create a new receipt in DRAFT status."""
receipt = await ReceiptService.create_receipt(session, data, current_user.username)
return ReceiptResponse.model_validate(receipt)
@router.get("/", response_model=ReceiptListResponse)
async def list_receipts(
status: Optional[ReceiptStatus] = None,
direction: Optional[ReceiptDirection] = None,
company_id: Optional[int] = None,
created_by: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
search: Optional[str] = None,
page: int = Query(default=1, ge=1),
page_size: int = Query(default=20, ge=1, le=100),
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get paginated list of receipts with filters."""
from datetime import date as date_type
filters = ReceiptFilter(
status=status,
direction=direction,
company_id=company_id or selected_company,
created_by=created_by,
date_from=date_type.fromisoformat(date_from) if date_from else None,
date_to=date_type.fromisoformat(date_to) if date_to else None,
search=search,
page=page,
page_size=page_size,
)
return await ReceiptService.get_receipts(session, filters)
@router.get("/pending", response_model=List[ReceiptResponse])
async def list_pending_receipts(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get all receipts pending review (for accountant view)."""
receipts = await ReceiptCRUD.get_pending_review(
session, company_id or selected_company
)
return [ReceiptResponse.model_validate(r) for r in receipts]
@router.get("/stats")
async def get_receipt_stats(
company_id: Optional[int] = None,
my_receipts: bool = False,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
current_user: CurrentUser = Depends(get_current_user),
):
"""Get receipt statistics."""
return await ReceiptCRUD.get_stats(
session,
company_id or selected_company,
created_by=current_user.username if my_receipts else None,
)
@router.get("/{receipt_id}", response_model=ReceiptResponse)
async def get_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
):
"""Get receipt details with attachments and accounting entries."""
receipt = await ReceiptService.get_receipt(session, receipt_id)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
return ReceiptResponse.model_validate(receipt)
@router.put("/{receipt_id}", response_model=ReceiptResponse)
async def update_receipt(
receipt_id: int,
data: ReceiptUpdate,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Update receipt (only DRAFT status, only by creator)."""
success, message, receipt = await ReceiptService.update_receipt(
session, receipt_id, data, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return ReceiptResponse.model_validate(receipt)
@router.delete("/{receipt_id}")
async def delete_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Delete receipt (only DRAFT status, only by creator)."""
success, message = await ReceiptService.delete_receipt(
session, receipt_id, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return {"success": True, "message": message}
# ============ Workflow Endpoints ============
@router.post("/{receipt_id}/submit", response_model=WorkflowAction)
async def submit_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Submit receipt for review (DRAFT → PENDING_REVIEW)."""
success, message, receipt = await ReceiptService.submit_for_review(
session, receipt_id, current_user.username
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/approve", response_model=WorkflowAction)
async def approve_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Approve receipt (PENDING_REVIEW → APPROVED). Accountant action."""
success, message, receipt = await ReceiptService.approve_receipt(
session, receipt_id, current_user.username
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/reject", response_model=WorkflowAction)
async def reject_receipt(
receipt_id: int,
data: RejectRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Reject receipt (PENDING_REVIEW → REJECTED). Accountant action."""
success, message, receipt = await ReceiptService.reject_receipt(
session, receipt_id, current_user.username, data.reason
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/resubmit", response_model=WorkflowAction)
async def resubmit_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Resubmit rejected receipt after corrections (REJECTED → PENDING_REVIEW)."""
success, message, receipt = await ReceiptService.resubmit_receipt(
session, receipt_id, current_user.username
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
@router.post("/{receipt_id}/unapprove", response_model=WorkflowAction)
async def unapprove_receipt(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Unapprove receipt (APPROVED → PENDING_REVIEW). Returns to pending for corrections."""
success, message, receipt = await ReceiptService.unapprove_receipt(
session, receipt_id, current_user.username
)
return WorkflowAction(
success=success,
message=message,
receipt=ReceiptResponse.model_validate(receipt) if receipt else None,
)
# ============ Accounting Entries Endpoints ============
@router.get("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
async def get_receipt_entries(
receipt_id: int,
session: AsyncSession = Depends(get_session),
):
"""Get accounting entries for a receipt."""
entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
return [AccountingEntryResponse.model_validate(e) for e in entries]
@router.put("/{receipt_id}/entries", response_model=List[AccountingEntryResponse])
async def update_receipt_entries(
receipt_id: int,
data: EntriesUpdateRequest,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Update accounting entries for a receipt (accountant action)."""
success, message, entries = await ReceiptService.update_entries(
session, receipt_id, data.entries, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
return [AccountingEntryResponse.model_validate(e) for e in entries]
@router.post("/{receipt_id}/entries/regenerate", response_model=List[AccountingEntryResponse])
async def regenerate_entries(
receipt_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Regenerate accounting entries based on receipt data."""
success, message, _ = await ReceiptService.regenerate_entries(
session, receipt_id, current_user.username
)
if not success:
raise HTTPException(status_code=400, detail=message)
entries = await AccountingEntryCRUD.get_by_receipt_id(session, receipt_id)
return [AccountingEntryResponse.model_validate(e) for e in entries]
# ============ Attachment Endpoints ============
@router.post("/{receipt_id}/attachments", response_model=AttachmentResponse)
async def upload_attachment(
receipt_id: int,
file: UploadFile = File(...),
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Upload attachment for a receipt."""
# Check receipt exists and user can modify it
receipt = await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=False)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
# Only allow uploads for DRAFT and REJECTED receipts
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.REJECTED]:
raise HTTPException(
status_code=400,
detail="Cannot upload attachments for this receipt status"
)
# Only creator can upload
if receipt.created_by != current_user.username:
raise HTTPException(
status_code=403,
detail="Only the creator can upload attachments"
)
try:
attachment = await AttachmentCRUD.create(session, receipt_id, file)
return AttachmentResponse.model_validate(attachment)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.get("/{receipt_id}/attachments", response_model=List[AttachmentResponse])
async def list_attachments(
receipt_id: int,
session: AsyncSession = Depends(get_session),
):
"""Get all attachments for a receipt."""
attachments = await AttachmentCRUD.get_by_receipt_id(session, receipt_id)
return [AttachmentResponse.model_validate(a) for a in attachments]
@router.get("/attachments/{attachment_id}/download")
async def download_attachment(
attachment_id: int,
session: AsyncSession = Depends(get_session),
):
"""Download an attachment file."""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
file_path = AttachmentCRUD.get_file_path(attachment)
if not file_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(
path=str(file_path),
filename=attachment.filename,
media_type=attachment.mime_type,
)
@router.delete("/attachments/{attachment_id}")
async def delete_attachment(
attachment_id: int,
session: AsyncSession = Depends(get_session),
current_user: CurrentUser = Depends(get_current_user),
):
"""Delete an attachment."""
attachment = await AttachmentCRUD.get_by_id(session, attachment_id)
if not attachment:
raise HTTPException(status_code=404, detail="Attachment not found")
# Get receipt to check permissions
receipt = await ReceiptCRUD.get_by_id(session, attachment.receipt_id, include_relations=False)
if not receipt:
raise HTTPException(status_code=404, detail="Receipt not found")
# Only allow deletion for DRAFT receipts by creator
if receipt.status != ReceiptStatus.DRAFT:
raise HTTPException(
status_code=400,
detail="Cannot delete attachments for this receipt status"
)
if receipt.created_by != current_user.username:
raise HTTPException(
status_code=403,
detail="Only the creator can delete attachments"
)
await AttachmentCRUD.delete(session, attachment)
return {"success": True, "message": "Attachment deleted"}
# ============ Nomenclature Endpoints ============
@router.get("/nomenclature/partners", response_model=List[PartnerOption])
async def get_partners(
search: Optional[str] = None,
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get partners (suppliers/customers) for dropdown."""
return await NomenclatureService.get_partners(
company_id or selected_company, search, session
)
@router.get("/nomenclature/accounts", response_model=List[AccountOption])
async def get_accounts(
prefix: Optional[str] = None,
company_id: Optional[int] = None,
selected_company: SelectedCompany = None,
):
"""Get chart of accounts for dropdown."""
return await NomenclatureService.get_accounts(
company_id or selected_company, prefix
)
@router.get("/nomenclature/cash-registers", response_model=List[CashRegisterOption])
async def get_cash_registers(
company_id: Optional[int] = None,
session: AsyncSession = Depends(get_session),
selected_company: SelectedCompany = None,
):
"""Get cash registers and bank accounts for dropdown."""
return await NomenclatureService.get_cash_registers(company_id or selected_company, session)
@router.get("/nomenclature/expense-types", response_model=List[ExpenseTypeOption])
async def get_expense_types():
"""Get predefined expense types for dropdown."""
return await NomenclatureService.get_expense_types()

View File

@@ -0,0 +1,28 @@
# Pydantic schemas
from .receipt import (
ReceiptCreate,
ReceiptUpdate,
ReceiptResponse,
ReceiptListResponse,
ReceiptFilter,
AttachmentResponse,
AccountingEntryCreate,
AccountingEntryUpdate,
AccountingEntryResponse,
WorkflowAction,
RejectRequest,
)
__all__ = [
"ReceiptCreate",
"ReceiptUpdate",
"ReceiptResponse",
"ReceiptListResponse",
"ReceiptFilter",
"AttachmentResponse",
"AccountingEntryCreate",
"AccountingEntryUpdate",
"AccountingEntryResponse",
"WorkflowAction",
"RejectRequest",
]

View File

@@ -0,0 +1,122 @@
"""Pydantic schemas for OCR API."""
from datetime import date
from decimal import Decimal
from typing import Optional, List
from pydantic import BaseModel, Field
class TvaEntry(BaseModel):
"""Single TVA entry with code, percentage and amount."""
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
amount: Decimal = Field(description="TVA amount for this rate")
class PaymentMethod(BaseModel):
"""Payment method entry from OCR."""
method: str = Field(description="CARD or NUMERAR")
amount: Decimal = Field(description="Amount paid")
class ExtractionData(BaseModel):
"""Extracted receipt data from OCR."""
receipt_type: str = Field(default='bon_fiscal', description="Receipt type: bon_fiscal or chitanta")
receipt_number: Optional[str] = Field(default=None, description="Receipt number")
receipt_series: Optional[str] = Field(default=None, description="Receipt series")
receipt_date: Optional[date] = Field(default=None, description="Receipt date")
amount: Optional[Decimal] = Field(default=None, description="Total amount")
partner_name: Optional[str] = Field(default=None, description="Vendor/partner name")
cui: Optional[str] = Field(default=None, description="CUI (fiscal identification code)")
description: Optional[str] = Field(default=None, description="Optional description")
# Additional extracted fields - Multiple TVA entries support
tva_entries: List[TvaEntry] = Field(default=[], description="List of TVA entries by rate (A, B, C, D)")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
address: Optional[str] = Field(default=None, description="Vendor address")
items_count: Optional[int] = Field(default=None, description="Number of items/articles")
# Payment methods extracted from receipt
payment_methods: List[PaymentMethod] = Field(default=[], description="Payment methods from receipt (CARD, NUMERAR)")
suggested_payment_mode: Optional[str] = Field(default=None, description="Auto-suggested payment mode based on OCR (casa/banca)")
# Client data (for B2B receipts - buyer information)
client_name: Optional[str] = Field(default=None, description="Client/customer company name")
client_cui: Optional[str] = Field(default=None, description="Client CUI/CIF fiscal code")
client_address: Optional[str] = Field(default=None, description="Client address")
confidence_amount: float = Field(default=0.0, ge=0, le=1, description="Amount extraction confidence")
confidence_date: float = Field(default=0.0, ge=0, le=1, description="Date extraction confidence")
confidence_vendor: float = Field(default=0.0, ge=0, le=1, description="Vendor extraction confidence")
confidence_client: float = Field(default=0.0, ge=0, le=1, description="Client extraction confidence")
overall_confidence: float = Field(default=0.0, ge=0, le=1, description="Overall confidence score")
raw_text: str = Field(default="", description="Raw OCR text")
ocr_engine: str = Field(default="", description="OCR engine used: paddleocr or tesseract")
processing_time_ms: int = Field(default=0, ge=0, description="Processing time in milliseconds")
class Config:
"""Pydantic config."""
json_schema_extra = {
"example": {
"receipt_type": "bon_fiscal",
"receipt_number": "1360760",
"receipt_series": "0146",
"receipt_date": "2025-10-11",
"amount": 186.16,
"partner_name": "FIVE-HOLDING S.A.",
"cui": "10562600",
"description": None,
"tva_entries": [
{"code": "A", "percent": 19, "amount": 25.00},
{"code": "B", "percent": 9, "amount": 7.31}
],
"tva_total": 32.31,
"address": "JUD. CONSTANTA, MUN. CONSTANTA, STR. ION ROATA NR. 3",
"items_count": 17,
"confidence_amount": 0.98,
"confidence_date": 0.98,
"confidence_vendor": 0.95,
"overall_confidence": 0.97,
"raw_text": "FIVE-HOLDING S.A.\nCIF: RO10562600\n..."
}
}
class OCRResponse(BaseModel):
"""OCR API response."""
success: bool = Field(description="Whether OCR processing was successful")
message: str = Field(description="Status message")
data: Optional[ExtractionData] = Field(default=None, description="Extracted data")
class Config:
"""Pydantic config."""
json_schema_extra = {
"example": {
"success": True,
"message": "OCR processing successful. Found: amount, date, vendor",
"data": {
"receipt_type": "bon_fiscal",
"receipt_number": "12345",
"receipt_date": "2024-01-15",
"amount": 125.50,
"partner_name": "MEGA IMAGE SRL",
"cui": "12345678",
"confidence_amount": 0.95,
"confidence_date": 0.90,
"confidence_vendor": 0.75,
"overall_confidence": 0.87,
"raw_text": "BON FISCAL\nMEGA IMAGE SRL\n..."
}
}
}
class OCRStatusResponse(BaseModel):
"""OCR service status response."""
available: bool = Field(description="Whether OCR service is available")
engines: list[str] = Field(description="Available OCR engines")
message: str = Field(description="Status message")

View File

@@ -0,0 +1,269 @@
"""Pydantic schemas for receipts API."""
import json
from datetime import datetime, date
from decimal import Decimal
from typing import Optional, List, Any, Union
from pydantic import BaseModel, Field, ConfigDict, field_validator
from backend.modules.data_entry.db.models.receipt import ReceiptType, ReceiptDirection, ReceiptStatus
from backend.modules.data_entry.db.models.accounting_entry import EntryType
# ============ Accounting Entry Schemas ============
class AccountingEntryBase(BaseModel):
"""Base schema for accounting entry."""
entry_type: EntryType
account_code: str = Field(max_length=20)
account_name: Optional[str] = Field(default=None, max_length=200)
amount: Decimal
partner_id: Optional[int] = None
cost_center_id: Optional[int] = None
class AccountingEntryCreate(AccountingEntryBase):
"""Schema for creating an accounting entry."""
pass
class AccountingEntryUpdate(BaseModel):
"""Schema for updating an accounting entry."""
entry_type: Optional[EntryType] = None
account_code: Optional[str] = Field(default=None, max_length=20)
account_name: Optional[str] = Field(default=None, max_length=200)
amount: Optional[Decimal] = None
partner_id: Optional[int] = None
cost_center_id: Optional[int] = None
class AccountingEntryResponse(AccountingEntryBase):
"""Schema for accounting entry response."""
model_config = ConfigDict(from_attributes=True)
id: int
receipt_id: int
is_auto_generated: bool
modified_by: Optional[str] = None
modified_at: Optional[datetime] = None
sort_order: int
# ============ Attachment Schemas ============
class AttachmentResponse(BaseModel):
"""Schema for attachment response."""
model_config = ConfigDict(from_attributes=True)
id: int
receipt_id: int
filename: str
stored_filename: str
file_path: str
file_size: int
mime_type: str
uploaded_at: datetime
# ============ TVA Schema ============
class TvaEntrySchema(BaseModel):
"""Single TVA entry with code, percentage and amount."""
code: Optional[str] = Field(default=None, description="TVA code: A, B, C, D")
percent: int = Field(description="TVA percentage: 0, 5, 9, 19, 21")
amount: Decimal = Field(description="TVA amount for this rate")
class PaymentMethodSchema(BaseModel):
"""Payment method entry (CARD/NUMERAR)."""
method: str = Field(description="Payment method: CARD or NUMERAR")
amount: Decimal = Field(description="Amount paid with this method")
# ============ Receipt Schemas ============
class ReceiptBase(BaseModel):
"""Base schema for receipt."""
receipt_type: ReceiptType = ReceiptType.BON_FISCAL
direction: ReceiptDirection = ReceiptDirection.CHELTUIALA
receipt_number: Optional[str] = Field(default=None, max_length=50)
receipt_series: Optional[str] = Field(default=None, max_length=20)
receipt_date: date
amount: Decimal = Field(gt=0)
description: Optional[str] = Field(default=None, max_length=500)
# TVA info (multiple entries support)
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
items_count: Optional[int] = Field(default=None, description="Number of items")
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
# Other fields
expense_type_code: Optional[str] = Field(default=None, max_length=20)
company_id: int
# partner_id removed - supplier data is text-only (partner_name, cui)
partner_name: Optional[str] = Field(default=None, max_length=200)
cui: Optional[str] = Field(default=None, max_length=20, description="Fiscal code (CUI) from OCR")
ocr_raw_text: Optional[str] = Field(default=None, description="Raw OCR text for debugging")
payment_methods: Optional[List[PaymentMethodSchema]] = Field(default=None, description="Payment methods from OCR")
cash_register_id: Optional[int] = None
cash_register_name: Optional[str] = Field(default=None, max_length=100)
cash_register_account: Optional[str] = Field(default=None, max_length=20)
payment_mode: Optional[str] = Field(default=None, description="Payment mode: casa/banca/avans_decontare")
class ReceiptCreate(ReceiptBase):
"""Schema for creating a receipt."""
pass
class ReceiptUpdate(BaseModel):
"""Schema for updating a receipt (DRAFT only)."""
receipt_type: Optional[ReceiptType] = None
direction: Optional[ReceiptDirection] = None
receipt_number: Optional[str] = Field(default=None, max_length=50)
receipt_series: Optional[str] = Field(default=None, max_length=20)
receipt_date: Optional[date] = None
amount: Optional[Decimal] = Field(default=None, gt=0)
description: Optional[str] = Field(default=None, max_length=500)
# TVA info (multiple entries support)
tva_breakdown: Optional[List[TvaEntrySchema]] = Field(default=None, description="List of TVA entries")
tva_total: Optional[Decimal] = Field(default=None, description="Total TVA amount")
items_count: Optional[int] = Field(default=None, description="Number of items")
vendor_address: Optional[str] = Field(default=None, max_length=500, description="Vendor address")
# Other fields
expense_type_code: Optional[str] = Field(default=None, max_length=20)
# partner_id removed - supplier data is text-only (partner_name, cui)
partner_name: Optional[str] = Field(default=None, max_length=200)
cui: Optional[str] = Field(default=None, max_length=20, description="Fiscal code (CUI) from OCR")
ocr_raw_text: Optional[str] = Field(default=None, description="Raw OCR text for debugging")
payment_methods: Optional[List[PaymentMethodSchema]] = Field(default=None, description="Payment methods from OCR")
cash_register_id: Optional[int] = None
cash_register_name: Optional[str] = Field(default=None, max_length=100)
cash_register_account: Optional[str] = Field(default=None, max_length=20)
payment_mode: Optional[str] = Field(default=None, description="Payment mode: casa/banca/avans_decontare")
class ReceiptResponse(ReceiptBase):
"""Schema for receipt response with all fields."""
model_config = ConfigDict(from_attributes=True)
id: int
# Override amount to allow zero values in response (validation is on input, not output)
amount: Decimal
status: ReceiptStatus
created_by: str
created_at: datetime
updated_at: datetime
submitted_at: Optional[datetime] = None
reviewed_by: Optional[str] = None
reviewed_at: Optional[datetime] = None
rejection_reason: Optional[str] = None
oracle_synced_at: Optional[datetime] = None
oracle_act_id: Optional[int] = None
oracle_error: Optional[str] = None
# Relationships (optional, loaded when needed)
attachments: List[AttachmentResponse] = []
entries: List[AccountingEntryResponse] = []
@field_validator('tva_breakdown', mode='before')
@classmethod
def parse_tva_breakdown(cls, v: Any) -> Optional[List[dict]]:
"""Deserialize tva_breakdown from JSON string if needed."""
if v is None:
return None
if isinstance(v, str):
try:
return json.loads(v)
except (json.JSONDecodeError, TypeError):
return None
if isinstance(v, list):
return v
return None
@field_validator('payment_methods', mode='before')
@classmethod
def parse_payment_methods(cls, v: Any) -> Optional[List[dict]]:
"""Deserialize payment_methods from JSON string if needed."""
if v is None:
return None
if isinstance(v, str):
try:
return json.loads(v)
except (json.JSONDecodeError, TypeError):
return None
if isinstance(v, list):
return v
return None
class ReceiptListResponse(BaseModel):
"""Schema for paginated receipt list response."""
items: List[ReceiptResponse]
total: int
page: int
page_size: int
pages: int
class ReceiptFilter(BaseModel):
"""Schema for filtering receipts."""
status: Optional[ReceiptStatus] = None
direction: Optional[ReceiptDirection] = None
company_id: Optional[int] = None
created_by: Optional[str] = None
date_from: Optional[date] = None
date_to: Optional[date] = None
search: Optional[str] = None # Search in description, partner_name
page: int = Field(default=1, ge=1)
page_size: int = Field(default=20, ge=1, le=100)
# ============ Workflow Schemas ============
class WorkflowAction(BaseModel):
"""Schema for workflow action response."""
success: bool
message: str
receipt: Optional[ReceiptResponse] = None
class RejectRequest(BaseModel):
"""Schema for rejection request."""
reason: str = Field(min_length=5, max_length=500)
class EntriesUpdateRequest(BaseModel):
"""Schema for bulk updating accounting entries."""
entries: List[AccountingEntryCreate]
# ============ Nomenclature Schemas ============
class PartnerOption(BaseModel):
"""Schema for partner dropdown option (used for autocomplete assistance)."""
name: str
fiscal_code: Optional[str] = None
address: Optional[str] = None
source: str = "oracle" # 'oracle' (synced) or 'local'
class AccountOption(BaseModel):
"""Schema for account dropdown option."""
code: str
name: str
class CashRegisterOption(BaseModel):
"""Schema for cash register dropdown option."""
id: int
name: str
account_code: str # 5311, 5121, etc.
class ExpenseTypeOption(BaseModel):
"""Schema for expense type dropdown option."""
code: str
name: str
account_code: str
has_vat: bool
vat_percent: Decimal = Decimal("19")

View File

@@ -0,0 +1,11 @@
# Business logic services
from .receipt_service import ReceiptService
from .nomenclature_service import NomenclatureService
from .expense_types import EXPENSE_TYPES, ExpenseType
__all__ = [
"ReceiptService",
"NomenclatureService",
"EXPENSE_TYPES",
"ExpenseType",
]

View File

@@ -0,0 +1,101 @@
"""Predefined expense types for automatic accounting entry generation."""
from decimal import Decimal
from dataclasses import dataclass
from typing import Dict, Optional
@dataclass
class ExpenseType:
"""Expense type definition with accounting configuration."""
code: str
name: str
account_code: str
account_name: str
has_vat: bool
vat_percent: Decimal = Decimal("19")
vat_account: str = "4426"
# Predefined expense types
EXPENSE_TYPES: Dict[str, ExpenseType] = {
"FUEL": ExpenseType(
code="FUEL",
name="Combustibil",
account_code="6022",
account_name="Cheltuieli cu combustibilii",
has_vat=True,
),
"MATERIALS": ExpenseType(
code="MATERIALS",
name="Materiale consumabile",
account_code="6028",
account_name="Alte cheltuieli cu materiale consumabile",
has_vat=True,
),
"OFFICE": ExpenseType(
code="OFFICE",
name="Rechizite birou",
account_code="6024",
account_name="Cheltuieli privind materialele pentru ambalat",
has_vat=True,
),
"PHONE": ExpenseType(
code="PHONE",
name="Telefonie / Internet",
account_code="626",
account_name="Cheltuieli postale si taxe de telecomunicatii",
has_vat=True,
),
"PARKING": ExpenseType(
code="PARKING",
name="Parcare",
account_code="6022",
account_name="Cheltuieli cu combustibilii",
has_vat=True,
),
"FOOD": ExpenseType(
code="FOOD",
name="Alimentatie",
account_code="6028",
account_name="Alte cheltuieli cu materiale consumabile",
has_vat=False, # No deductible VAT for food
),
"TRANSPORT": ExpenseType(
code="TRANSPORT",
name="Transport",
account_code="624",
account_name="Cheltuieli cu transportul de bunuri si personal",
has_vat=True,
),
"OTHER": ExpenseType(
code="OTHER",
name="Altele",
account_code="628",
account_name="Alte cheltuieli cu serviciile executate de terti",
has_vat=True,
),
}
def get_expense_type(code: str) -> Optional[ExpenseType]:
"""Get expense type by code."""
return EXPENSE_TYPES.get(code)
def get_all_expense_types() -> Dict[str, ExpenseType]:
"""Get all expense types."""
return EXPENSE_TYPES.copy()
# Default cash register accounts
CASH_REGISTER_ACCOUNTS = {
"CASA": {
"code": "5311",
"name": "Casa in lei",
},
"BANCA": {
"code": "5121",
"name": "Conturi la banci in lei",
},
}

View File

@@ -0,0 +1,270 @@
"""Image preprocessing for optimal OCR results."""
from pathlib import Path
from typing import List
import numpy as np
import cv2
try:
import pdf2image
PDF_AVAILABLE = True
except ImportError:
PDF_AVAILABLE = False
class ImagePreprocessor:
"""Preprocess receipt images for OCR."""
def _add_safety_padding(self, image: np.ndarray, padding: int = 50) -> np.ndarray:
"""Add white padding around image to protect edge content during rotation.
This prevents left/right margin truncation in OCR by ensuring text near
edges isn't lost during deskew rotation.
"""
if len(image.shape) == 2:
# Grayscale
return cv2.copyMakeBorder(
image, padding, padding, padding, padding,
cv2.BORDER_CONSTANT, value=255
)
else:
# Color (BGR)
return cv2.copyMakeBorder(
image, padding, padding, padding, padding,
cv2.BORDER_CONSTANT, value=(255, 255, 255)
)
def load_image(self, path: Path) -> np.ndarray:
"""Load image from file."""
image = cv2.imread(str(path))
if image is None:
raise ValueError(f"Could not load image: {path}")
return image
def pdf_to_images(self, path: Path, dpi: int = 300) -> List[np.ndarray]:
"""
Convert PDF to images.
Args:
path: Path to PDF file
dpi: Resolution (300 = fast & good quality, 400 = better but slower)
"""
if not PDF_AVAILABLE:
raise RuntimeError("pdf2image not available. Install with: pip install pdf2image")
images = pdf2image.convert_from_path(str(path), dpi=dpi)
return [np.array(img) for img in images]
def preprocess(self, image: np.ndarray, high_quality: bool = True) -> np.ndarray:
"""
Apply LIGHT preprocessing - better for clear PDFs.
Heavy binarization can destroy text on clear images.
"""
return self.preprocess_light(image)
def preprocess_light(self, image: np.ndarray) -> np.ndarray:
"""
Light preprocessing for CLEAR images (PDFs, good scans).
Preserves original quality, only enhances contrast.
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
height, width = gray.shape
max_side = max(height, width)
if max_side > 4000:
scale = 4000 / max_side
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
height, width = gray.shape
# 2b. Scale UP if too small
if width < 1500:
scale = 1500 / width
# Ensure we don't exceed 4000px after upscaling
new_width = int(width * scale)
new_height = int(height * scale)
if max(new_width, new_height) > 4000:
scale = 4000 / max(new_width, new_height)
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
# 3. Deskew
gray = self._deskew(gray)
# 4. Light contrast enhancement only
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# NO binarization, NO morphological ops - preserve original quality
return enhanced
def preprocess_heavy(self, image: np.ndarray) -> np.ndarray:
"""
Heavy preprocessing for FADED thermal receipts.
Aggressive binarization to recover faded text.
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2a. Scale DOWN if any side exceeds 4000px (PaddleOCR limit)
height, width = gray.shape
max_side = max(height, width)
if max_side > 4000:
scale = 4000 / max_side
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
height, width = gray.shape
# 2b. Scale UP if too small (larger = better OCR)
if width < 1500:
scale = 1500 / width
# Ensure we don't exceed 4000px after upscaling
new_width = int(width * scale)
new_height = int(height * scale)
if max(new_width, new_height) > 4000:
scale = 4000 / max(new_width, new_height)
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
# 3. Deskew
gray = self._deskew(gray)
# 4. Contrast enhancement with CLAHE
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 5. Denoise
denoised = cv2.fastNlMeansDenoising(enhanced, h=8, templateWindowSize=7, searchWindowSize=21)
# 6. Sharpening
gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
sharpened = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
# 7. Adaptive thresholding (binarization)
binary = cv2.adaptiveThreshold(
sharpened, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=11, C=5
)
# 8. Morphological operations
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
result = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_close)
return result
def preprocess_for_tesseract(self, image: np.ndarray) -> np.ndarray:
"""
Tesseract-optimized preprocessing.
Tesseract works best with:
- Clean black text on white background (binarized)
- High DPI (scale up small images)
- Otsu thresholding (better than adaptive for clean documents)
"""
# 0. Add safety padding to protect edge content during deskew rotation
image = self._add_safety_padding(image)
# 1. Grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 2. Scale for optimal Tesseract (target ~2000px width for receipts)
height, width = gray.shape
if width < 2000:
scale = 2000 / width
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
elif width > 3000:
scale = 3000 / width
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
# 3. Deskew
gray = self._deskew(gray)
# 4. Strong contrast enhancement
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 5. Denoise before binarization
denoised = cv2.fastNlMeansDenoising(enhanced, h=10, templateWindowSize=7, searchWindowSize=21)
# 6. Otsu binarization (better than adaptive for clean PDFs)
_, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 7. Light morphological cleanup
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
return cleaned
def get_all_variants(self, image: np.ndarray) -> List[np.ndarray]:
"""
Generate 2 preprocessing variants for OCR (fast mode).
Returns: [light_processed, heavy_processed]
"""
return [
self.preprocess_light(image),
self.preprocess_heavy(image),
]
def _deskew(self, image: np.ndarray) -> np.ndarray:
"""Correct image rotation/skew using Hough lines.
Uses expanded canvas to preserve all content during rotation,
preventing left/right margin truncation.
"""
edges = cv2.Canny(image, 50, 150, apertureSize=3)
lines = cv2.HoughLinesP(
edges, 1, np.pi / 180,
threshold=100, minLineLength=100, maxLineGap=10
)
if lines is None:
return image
angles = []
for line in lines:
x1, y1, x2, y2 = line[0]
angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
if abs(angle) < 45:
angles.append(angle)
if not angles:
return image
median_angle = np.median(angles)
if abs(median_angle) < 0.5:
return image
h, w = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
# Calculate new canvas size to fit entire rotated image (prevents edge truncation)
cos_angle = abs(np.cos(np.radians(median_angle)))
sin_angle = abs(np.sin(np.radians(median_angle)))
new_w = int(h * sin_angle + w * cos_angle)
new_h = int(h * cos_angle + w * sin_angle)
# Adjust rotation matrix for new canvas center
M[0, 2] += (new_w - w) / 2
M[1, 2] += (new_h - h) / 2
return cv2.warpAffine(
image, M, (new_w, new_h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=255 # White background (grayscale)
)

View File

@@ -0,0 +1,234 @@
"""Service for fetching nomenclatures from Oracle (read-only)."""
from typing import List, Optional
from decimal import Decimal
from sqlmodel import select
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.schemas.receipt import (
PartnerOption,
AccountOption,
CashRegisterOption,
ExpenseTypeOption,
)
from backend.modules.data_entry.services.expense_types import EXPENSE_TYPES
from backend.modules.data_entry.db.models.nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
class NomenclatureService:
"""
Service for fetching nomenclatures.
In Phase 1 (MVP), some nomenclatures are hardcoded.
In Phase 2, these will be fetched from Oracle.
"""
@staticmethod
async def get_partners(
company_id: int,
search: Optional[str] = None,
session: Optional[AsyncSession] = None
) -> List[PartnerOption]:
"""
Get partners (suppliers/customers) for a company.
Phase 1: Returns mock data.
Phase 2: Returns synced data from SQLite (from Oracle sync).
Phase 3: Will fetch live from Oracle.
"""
# If session is provided, try to get from synced SQLite data
if session:
# Try to get from SQLite synced data
stmt = select(SyncedSupplier).where(SyncedSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(SyncedSupplier.name.ilike(f"%{search}%")) |
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.order_by(SyncedSupplier.name) # Order alphabetically, no limit for AutoComplete
result = await session.execute(stmt)
suppliers = result.scalars().all()
if suppliers:
# Also get local suppliers
local_stmt = select(LocalSupplier).where(LocalSupplier.company_id == company_id)
if search:
local_stmt = local_stmt.where(
(LocalSupplier.name.ilike(f"%{search}%")) |
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
)
local_stmt = local_stmt.order_by(LocalSupplier.name) # Order alphabetically
local_result = await session.execute(local_stmt)
local_suppliers = local_result.scalars().all()
# Combine both - no IDs needed, just text data for autocomplete
partners = []
for s in suppliers:
partners.append(PartnerOption(
name=s.name,
fiscal_code=s.fiscal_code,
address=s.address,
source="oracle"
))
for l in local_suppliers:
partners.append(PartnerOption(
name=l.name, # No suffix - must match search results
fiscal_code=l.fiscal_code,
address=l.address,
source="local"
))
return partners
# Fallback to mock data for Phase 1 (when no synced data)
mock_partners = [
PartnerOption(name="OMV Petrom", fiscal_code="RO123456", source="mock"),
PartnerOption(name="Dedeman", fiscal_code="RO789012", source="mock"),
PartnerOption(name="Kaufland", fiscal_code="RO345678", source="mock"),
PartnerOption(name="Emag", fiscal_code="RO901234", source="mock"),
PartnerOption(name="Altex", fiscal_code="RO567890", source="mock"),
]
if search:
search_lower = search.lower()
mock_partners = [
p for p in mock_partners
if search_lower in p.name.lower() or (p.fiscal_code and search_lower in p.fiscal_code.lower())
]
return mock_partners
@staticmethod
async def get_accounts(company_id: int, prefix: Optional[str] = None) -> List[AccountOption]:
"""
Get chart of accounts for a company.
Phase 1: Returns common expense/income accounts.
Phase 2: Will fetch from Oracle PLAN_CONTURI.
"""
# Common accounts for expenses and receipts
accounts = [
# Expense accounts (Class 6)
AccountOption(code="6022", name="Cheltuieli cu combustibilii"),
AccountOption(code="6024", name="Cheltuieli materiale pentru ambalat"),
AccountOption(code="6028", name="Alte cheltuieli cu materiale consumabile"),
AccountOption(code="624", name="Cheltuieli cu transportul de bunuri si personal"),
AccountOption(code="626", name="Cheltuieli postale si taxe telecomunicatii"),
AccountOption(code="628", name="Alte cheltuieli cu serviciile executate de terti"),
# VAT
AccountOption(code="4426", name="TVA deductibila"),
AccountOption(code="4427", name="TVA colectata"),
# Cash and Bank (Class 5)
AccountOption(code="5311", name="Casa in lei"),
AccountOption(code="5121", name="Conturi la banci in lei"),
# Income accounts (Class 7)
AccountOption(code="7588", name="Alte venituri din exploatare"),
]
if prefix:
accounts = [a for a in accounts if a.code.startswith(prefix)]
return accounts
@staticmethod
async def get_cash_registers(
company_id: int,
session: Optional[AsyncSession] = None
) -> List[CashRegisterOption]:
"""
Get cash registers and bank accounts for a company.
Phase 1: Returns default options.
Phase 2: Returns synced data from SQLite (from Oracle sync).
Phase 3: Will fetch live from Oracle NOM_CASE / NOM_BANCI.
"""
# If session is provided, try to get from synced SQLite data
if session:
stmt = select(SyncedCashRegister).where(SyncedCashRegister.company_id == company_id)
result = await session.execute(stmt)
registers = result.scalars().all()
if registers:
return [
CashRegisterOption(id=r.id, name=r.name, account_code=r.account_code)
for r in registers
]
# Fallback to default cash registers for Phase 1
return [
CashRegisterOption(id=1, name="Casa principala", account_code="5311"),
CashRegisterOption(id=2, name="Cont BCR", account_code="5121"),
CashRegisterOption(id=3, name="Cont BRD", account_code="5121"),
]
@staticmethod
async def get_expense_types() -> List[ExpenseTypeOption]:
"""
Get predefined expense types with their accounting configuration.
"""
return [
ExpenseTypeOption(
code=et.code,
name=et.name,
account_code=et.account_code,
has_vat=et.has_vat,
vat_percent=et.vat_percent,
)
for et in EXPENSE_TYPES.values()
]
@staticmethod
async def get_companies(username: str) -> List[dict]:
"""
Get companies accessible by user.
Phase 1: Returns mock data.
Phase 2: Will fetch from shared auth based on user permissions.
"""
# TODO: Integrate with shared auth to get user's companies
return [
{"id": 1, "name": "SC Test SRL", "cui": "RO12345678"},
{"id": 2, "name": "SC Demo SA", "cui": "RO87654321"},
]
# ============ Phase 2 Oracle Integration Methods ============
@staticmethod
async def _fetch_partners_oracle(company_id: int, search: Optional[str] = None) -> List[PartnerOption]:
"""
Fetch partners from Oracle NOM_PARTENERI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
# Example query:
# SELECT ID_PART, DEN_PART, COD_FISCAL
# FROM {schema}.NOM_PARTENERI
# WHERE DEN_PART LIKE :search
raise NotImplementedError("Oracle integration pending - Phase 2")
@staticmethod
async def _fetch_accounts_oracle(company_id: int, prefix: Optional[str] = None) -> List[AccountOption]:
"""
Fetch chart of accounts from Oracle PLAN_CONTURI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
raise NotImplementedError("Oracle integration pending - Phase 2")
@staticmethod
async def _fetch_cash_registers_oracle(company_id: int) -> List[CashRegisterOption]:
"""
Fetch cash registers from Oracle NOM_CASE / NOM_BANCI.
Will be implemented in Phase 2.
"""
# TODO: Implement using shared oracle_pool
raise NotImplementedError("Oracle integration pending - Phase 2")

View File

@@ -0,0 +1,295 @@
"""OCR engine wrapper for PaddleOCR and Tesseract."""
import os
import logging
import threading
import time
from dataclasses import dataclass
from typing import List, Optional, Tuple
import numpy as np
# Setup logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO) # Ensure logs are visible
# Disable PaddleOCR model source check for faster startup (PaddleX 3.x)
os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'
# Lazy imports - these will be imported on first use
PaddleOCR = None # Will be imported lazily
pytesseract = None # Will be imported lazily
# Check availability without importing heavy libraries
def _check_paddle_available() -> bool:
"""Check if paddleocr is installed without importing it."""
try:
import importlib.util
return importlib.util.find_spec("paddleocr") is not None
except Exception:
return False
def _check_tesseract_available() -> bool:
"""Check if pytesseract is installed without importing it."""
try:
import importlib.util
return importlib.util.find_spec("pytesseract") is not None
except Exception:
return False
PADDLE_AVAILABLE = _check_paddle_available()
TESSERACT_AVAILABLE = _check_tesseract_available()
@dataclass
class OCRResult:
"""Raw OCR result."""
text: str
confidence: float
boxes: List[dict]
engine: str = "" # OCR engine used: paddleocr or tesseract
class OCREngine:
"""Unified OCR engine with fallback support."""
def __init__(self):
self._paddle = None
self._paddle_init_started = False
self._paddle_ready = threading.Event() # Signals when PaddleOCR is FULLY ready
self._paddle_init_lock = threading.Lock()
def _init_paddle_lazy(self):
"""Lazy initialize PaddleOCR on first use (avoids slow startup)."""
global PaddleOCR
with self._paddle_init_lock:
if self._paddle_init_started:
return # Already initializing or done
self._paddle_init_started = True
if PADDLE_AVAILABLE:
try:
print("Importing PaddleOCR (first use, may take ~15-20 seconds)...", flush=True)
from paddleocr import PaddleOCR as _PaddleOCR
PaddleOCR = _PaddleOCR
print("Initializing PaddleOCR engine...", flush=True)
# PaddleOCR 3.x API - optimized for Romanian receipts
# Note: 'latin' not available in PaddleOCR 3.x, 'en' works well for receipts
self._paddle = PaddleOCR(
lang='en', # 'en' handles Latin alphabet well for receipts
# High quality settings for better accuracy
det_db_thresh=0.3, # Lower threshold = detect more text (default 0.3)
det_db_box_thresh=0.5, # Box confidence threshold (default 0.5)
det_db_unclip_ratio=1.8, # Expand detected boxes slightly (default 1.5)
rec_batch_num=6, # Batch size for recognition
use_angle_cls=True, # Enable text angle classification
)
print("PaddleOCR initialized successfully with high-quality settings", flush=True)
except Exception as e:
print(f"Warning: Failed to initialize PaddleOCR: {e}", flush=True)
self._paddle = None
# Signal that initialization is complete (success or failure)
self._paddle_ready.set()
def wait_for_paddle(self, timeout: float = 30.0) -> bool:
"""
Wait for PaddleOCR to be fully initialized.
Args:
timeout: Max seconds to wait (default 30s)
Returns:
True if PaddleOCR is ready, False if timeout or unavailable
"""
if not PADDLE_AVAILABLE:
return False
if self._paddle is not None:
return True # Already ready
if not self._paddle_init_started:
# Start initialization if not already started
self._init_paddle_lazy()
# Wait for initialization to complete
print(f"[OCR] Waiting for PaddleOCR to be ready (max {timeout}s)...", flush=True)
start = time.time()
ready = self._paddle_ready.wait(timeout=timeout)
elapsed = time.time() - start
if ready and self._paddle is not None:
print(f"[OCR] PaddleOCR ready after {elapsed:.1f}s", flush=True)
return True
else:
print(f"[OCR] PaddleOCR not ready after {elapsed:.1f}s (timeout or failed)", flush=True)
return False
def is_paddle_ready(self) -> bool:
"""Check if PaddleOCR is ready without waiting."""
return self._paddle is not None
def recognize(self, image: np.ndarray) -> OCRResult:
"""Perform OCR on preprocessed image."""
logger.info(f"[OCR] Starting recognition, image shape: {image.shape}, dtype: {image.dtype}")
# Lazy init PaddleOCR on first call
self._init_paddle_lazy()
if PADDLE_AVAILABLE and self._paddle:
logger.info("[OCR] Using PaddleOCR engine")
return self._paddle_recognize(image)
elif TESSERACT_AVAILABLE:
logger.info("[OCR] Using Tesseract engine (PaddleOCR not available)")
return self._tesseract_recognize(image)
else:
logger.error("[OCR] No OCR engine available!")
raise RuntimeError(
"No OCR engine available. Install PaddleOCR or Tesseract."
)
def _paddle_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using PaddleOCR 3.x API."""
# Wait for PaddleOCR to be fully ready (handles background init)
if not self.wait_for_paddle(timeout=30.0):
logger.warning("[PaddleOCR] Not ready, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise RuntimeError("PaddleOCR not ready and Tesseract not available")
try:
logger.info(f"[PaddleOCR] Processing image, shape: {image.shape}")
# PaddleOCR 3.x requires 3-channel images
if len(image.shape) == 2:
# Convert grayscale to 3-channel BGR
import cv2
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
logger.info(f"[PaddleOCR] Converted to BGR, new shape: {image.shape}")
# PaddleOCR 3.x uses predict() with new parameter names
logger.info("[PaddleOCR] Calling predict()...")
result = self._paddle.predict(image, use_textline_orientation=True)
logger.info(f"[PaddleOCR] predict() returned, result type: {type(result)}")
if not result or len(result) == 0:
logger.warning("[PaddleOCR] No results returned")
return OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
# PaddleOCR 3.x returns OCRResult objects with different structure
ocr_result = result[0]
# Extract texts and scores from the new format
rec_texts = ocr_result.get('rec_texts', [])
rec_scores = ocr_result.get('rec_scores', [])
dt_polys = ocr_result.get('dt_polys', [])
if not rec_texts:
return OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
boxes = []
for i, text in enumerate(rec_texts):
conf = rec_scores[i] if i < len(rec_scores) else 0.0
box = dt_polys[i].tolist() if i < len(dt_polys) else []
boxes.append({
'text': text,
'confidence': float(conf),
'box': box
})
avg_conf = sum(rec_scores) / len(rec_scores) if rec_scores else 0.0
text_result = '\n'.join(rec_texts)
logger.info(f"[PaddleOCR] SUCCESS - Found {len(rec_texts)} text lines, avg confidence: {avg_conf:.2%}")
logger.debug(f"[PaddleOCR] Raw text preview: {text_result[:200]}...")
return OCRResult(
text=text_result,
confidence=float(avg_conf),
boxes=boxes,
engine="paddleocr"
)
except Exception as e:
logger.error(f"[PaddleOCR] ERROR: {e}, falling back to Tesseract")
if TESSERACT_AVAILABLE:
return self._tesseract_recognize(image)
raise
def _tesseract_recognize(self, image: np.ndarray) -> OCRResult:
"""Recognize text using Tesseract."""
global pytesseract
logger.info(f"[Tesseract] Processing image, shape: {image.shape}")
# Lazy import pytesseract
if pytesseract is None:
logger.info("[Tesseract] Importing pytesseract...")
import pytesseract as _pytesseract
pytesseract = _pytesseract
# PSM 4: Single column (best for receipts)
config = '--psm 4 -l ron+eng'
text = pytesseract.image_to_string(image, config=config)
# Quick confidence estimate
data = pytesseract.image_to_data(image, config=config, output_type=pytesseract.Output.DICT)
confidences = [int(c) for c in data['conf'] if int(c) > 0]
avg_conf = sum(confidences) / len(confidences) / 100 if confidences else 0.0
logger.info(f"[Tesseract] Done: {len(text)} chars, conf: {avg_conf:.2%}")
return OCRResult(text=text, confidence=avg_conf, boxes=[], engine="tesseract")
def recognize_dual(self, image: np.ndarray) -> Tuple[OCRResult, Optional[OCRResult]]:
"""
Run both OCR engines and return both results.
Returns:
Tuple of (paddle_result, tesseract_result)
tesseract_result may be None if Tesseract is not available
"""
logger.info(f"[OCR Dual] Starting dual recognition, image shape: {image.shape}")
# Lazy init PaddleOCR
self._init_paddle_lazy()
paddle_result = None
tesseract_result = None
# Run PaddleOCR
if PADDLE_AVAILABLE and self._paddle:
try:
logger.info("[OCR Dual] Running PaddleOCR...")
paddle_result = self._paddle_recognize(image)
logger.info(f"[OCR Dual] PaddleOCR: {len(paddle_result.text)} chars, conf: {paddle_result.confidence:.2%}")
except Exception as e:
logger.error(f"[OCR Dual] PaddleOCR failed: {e}")
paddle_result = OCRResult(text="", confidence=0.0, boxes=[], engine="paddleocr")
# Run Tesseract
if TESSERACT_AVAILABLE:
try:
logger.info("[OCR Dual] Running Tesseract...")
tesseract_result = self._tesseract_recognize(image)
logger.info(f"[OCR Dual] Tesseract: {len(tesseract_result.text)} chars, conf: {tesseract_result.confidence:.2%}")
except Exception as e:
logger.error(f"[OCR Dual] Tesseract failed: {e}")
tesseract_result = OCRResult(text="", confidence=0.0, boxes=[], engine="tesseract")
# Fallback if PaddleOCR not available
if paddle_result is None:
if tesseract_result:
paddle_result = tesseract_result
else:
raise RuntimeError("No OCR engine available")
return paddle_result, tesseract_result
@staticmethod
def get_available_engines() -> List[str]:
"""Return list of available OCR engines."""
engines = []
if PADDLE_AVAILABLE:
engines.append('paddleocr')
if TESSERACT_AVAILABLE:
engines.append('tesseract')
return engines

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,569 @@
"""Main OCR service coordinating preprocessing, recognition, and extraction."""
import os
import re
import logging
# Disable PaddleOCR model source check for faster startup (PaddleX 3.x) - must be set before import
os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
from decimal import Decimal
from pathlib import Path
from typing import Optional, Tuple
from backend.modules.data_entry.services.ocr_engine import OCREngine
from backend.modules.data_entry.services.ocr_extractor import ReceiptExtractor, ExtractionResult
from backend.modules.data_entry.services.image_preprocessor import ImagePreprocessor
# Setup logging
logger = logging.getLogger(__name__)
class OCRService:
"""Service for OCR processing of receipt images."""
_executor = ThreadPoolExecutor(max_workers=2)
def __init__(self):
self.preprocessor = ImagePreprocessor()
self.ocr_engine = OCREngine()
self.extractor = ReceiptExtractor()
async def process_image(
self,
image_path: Path,
mime_type: str
) -> Tuple[bool, str, Optional[ExtractionResult]]:
"""
Process receipt image and extract structured data.
Args:
image_path: Path to the image file
mime_type: MIME type of the file
Returns:
Tuple of (success, message, extraction_result)
"""
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
self._executor,
self._process_sync,
image_path,
mime_type
)
return result
except Exception as e:
return False, f"OCR processing failed: {str(e)}", None
def _process_sync(
self,
image_path: Path,
mime_type: str
) -> Tuple[bool, str, Optional[ExtractionResult]]:
"""Synchronous processing with ADAPTIVE OCR pipeline."""
start_time = time.time()
print(f"[OCR Service] Starting processing: {image_path}, mime: {mime_type}", flush=True)
# Load image
if mime_type == 'application/pdf':
try:
images = self.preprocessor.pdf_to_images(image_path)
if not images:
return False, "Failed to extract images from PDF", None
image = images[0]
except RuntimeError as e:
return False, str(e), None
else:
try:
image = self.preprocessor.load_image(image_path)
except ValueError as e:
return False, str(e), None
raw_texts = []
extraction = None
# ══════════════════════════════════════════════════════════════
# STEP 1: PaddleOCR + Light (fastest, best for clear PDFs)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 1: PaddleOCR + Light preprocessing", flush=True)
print("=" * 60, flush=True)
light_img = self.preprocessor.preprocess_light(image)
try:
paddle_light = self.ocr_engine._paddle_recognize(light_img)
if paddle_light and paddle_light.text:
extraction = self.extractor.extract(paddle_light.text)
extraction.ocr_engine = "paddle-light"
raw_texts.append(f"═══ PaddleOCR (light, conf: {paddle_light.confidence:.0%}) ═══\n{paddle_light.text}")
# Log extraction results
print(f"[OCR] Step 1 Results:", flush=True)
print(f" - OCR Confidence: {paddle_light.confidence:.0%}", flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total} (entries: {len(extraction.tva_entries) if extraction.tva_entries else 0})", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
# Early exit if complete
if self._is_extraction_complete(extraction):
extraction.raw_text = "\n\n".join(raw_texts)
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print(f"[OCR] ✓✓✓ EARLY EXIT at Step 1 - All fields found! ({elapsed_ms}ms) ✓✓✓", flush=True)
return True, "OCR complete (fast mode)", extraction
else:
print("[OCR] → Step 1 incomplete, continuing to Step 2...", flush=True)
except Exception as e:
print(f"[OCR] PaddleOCR light failed: {e}", flush=True)
extraction = ExtractionResult()
# ══════════════════════════════════════════════════════════════
# STEP 2: PaddleOCR + Heavy (for faded thermal receipts)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 2: PaddleOCR + Heavy preprocessing", flush=True)
print("=" * 60, flush=True)
heavy_img = self.preprocessor.preprocess_heavy(image)
try:
paddle_heavy = self.ocr_engine._paddle_recognize(heavy_img)
if paddle_heavy and paddle_heavy.text:
extraction_heavy = self.extractor.extract(paddle_heavy.text)
extraction_heavy.ocr_engine = "paddle-heavy"
raw_texts.append(f"═══ PaddleOCR (heavy, conf: {paddle_heavy.confidence:.0%}) ═══\n{paddle_heavy.text}")
print(f"[OCR] Step 2 (Heavy) Results:", flush=True)
print(f" - OCR Confidence: {paddle_heavy.confidence:.0%}", flush=True)
print(f" - Amount: {extraction_heavy.amount}", flush=True)
print(f" - Date: {extraction_heavy.receipt_date}", flush=True)
print(f" - CUI: {extraction_heavy.cui}", flush=True)
# Merge with previous
extraction = self._merge_extractions(extraction, extraction_heavy)
print(f"[OCR] After merge:", flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total}", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
if self._is_extraction_complete(extraction):
extraction.raw_text = "\n\n".join(raw_texts)
extraction.ocr_engine = "paddle-adaptive"
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print(f"[OCR] ✓✓✓ EARLY EXIT at Step 2 - All fields found after merge! ({elapsed_ms}ms) ✓✓✓", flush=True)
return True, "OCR complete (paddle dual)", extraction
else:
print("[OCR] → Step 2 incomplete, continuing to Step 3 (Tesseract)...", flush=True)
except Exception as e:
print(f"[OCR] PaddleOCR heavy failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# STEP 3: Tesseract - ONLY to complete missing fields
# Uses Tesseract-optimized preprocessing (binarized, high contrast)
# ══════════════════════════════════════════════════════════════
print("=" * 60, flush=True)
print("[OCR] STEP 3: Tesseract (complement only, not override)", flush=True)
print("=" * 60, flush=True)
try:
# Use Tesseract-specific preprocessing (Otsu binarization)
tesseract_img = self.preprocessor.preprocess_for_tesseract(image)
tesseract_result = self.ocr_engine._tesseract_recognize(tesseract_img)
if tesseract_result and tesseract_result.text:
extraction_tess = self.extractor.extract(tesseract_result.text)
extraction_tess.ocr_engine = "tesseract"
raw_texts.append(f"═══ Tesseract (conf: {tesseract_result.confidence:.0%}) ═══\n{tesseract_result.text}")
print(f"[OCR] Step 3 (Tesseract) Results:", flush=True)
print(f" - OCR Confidence: {tesseract_result.confidence:.0%}", flush=True)
print(f" - Amount: {extraction_tess.amount}", flush=True)
print(f" - Date: {extraction_tess.receipt_date}", flush=True)
print(f" - CUI: {extraction_tess.cui}", flush=True)
# IMPORTANT: Tesseract only COMPLETES missing fields, never overrides!
extraction = self._complement_extraction(extraction, extraction_tess)
except Exception as e:
print(f"[OCR] Tesseract failed: {e}", flush=True)
# ══════════════════════════════════════════════════════════════
# FINAL VALIDATION: Fix impossible values
# ══════════════════════════════════════════════════════════════
if extraction:
extraction = self._final_validation(extraction)
# Final result
if extraction is None:
return False, "No text detected", None
extraction.raw_text = "\n\n".join(raw_texts)
extraction.ocr_engine = "adaptive-full"
# Build result message
fields_found = []
if extraction.amount: fields_found.append("amount")
if extraction.receipt_date: fields_found.append("date")
if extraction.receipt_number: fields_found.append("number")
if extraction.cui: fields_found.append("CUI")
if extraction.tva_total or extraction.tva_entries: fields_found.append("TVA")
message = f"OCR complete (full pipeline). Found: {', '.join(fields_found) or 'no fields'}"
elapsed_ms = int((time.time() - start_time) * 1000)
extraction.processing_time_ms = elapsed_ms
print("=" * 60, flush=True)
print(f"[OCR] FINAL RESULT (full pipeline) - {elapsed_ms}ms", flush=True)
print("=" * 60, flush=True)
print(f" - Amount: {extraction.amount}", flush=True)
print(f" - Date: {extraction.receipt_date}", flush=True)
print(f" - Number: {extraction.receipt_number}", flush=True)
print(f" - CUI: {extraction.cui}", flush=True)
print(f" - TVA: {extraction.tva_total}", flush=True)
print(f" - Overall Confidence: {extraction.overall_confidence:.0%}", flush=True)
print(f" - Processing Time: {elapsed_ms}ms", flush=True)
print(f" - Message: {message}", flush=True)
return True, message, extraction
def _merge_extractions(
self,
paddle: Optional[ExtractionResult],
tesseract: Optional[ExtractionResult]
) -> ExtractionResult:
"""
Merge two extractions, picking best fields from each engine.
Strategy:
- For each field, prefer the one with higher confidence
- Use validation rules (CUI format, date validity, company indicators)
- Combine TVA entries if different
"""
result = ExtractionResult()
# Handle case where one is None
if paddle is None and tesseract is None:
return result
if paddle is None:
return tesseract
if tesseract is None:
return paddle
print("[Merge] Comparing PaddleOCR vs Tesseract extractions...", flush=True)
# === AMOUNT ===
# Pick higher confidence, both must be positive
if paddle.amount and tesseract.amount:
if paddle.confidence_amount >= tesseract.confidence_amount:
result.amount = paddle.amount
result.confidence_amount = paddle.confidence_amount
print(f"[Merge] Amount: PaddleOCR {paddle.amount} (conf: {paddle.confidence_amount:.0%})", flush=True)
else:
result.amount = tesseract.amount
result.confidence_amount = tesseract.confidence_amount
print(f"[Merge] Amount: Tesseract {tesseract.amount} (conf: {tesseract.confidence_amount:.0%})", flush=True)
elif paddle.amount:
result.amount = paddle.amount
result.confidence_amount = paddle.confidence_amount
elif tesseract.amount:
result.amount = tesseract.amount
result.confidence_amount = tesseract.confidence_amount
# === DATE ===
# Pick higher confidence, validate date reasonableness
if paddle.receipt_date and tesseract.receipt_date:
if paddle.confidence_date >= tesseract.confidence_date:
result.receipt_date = paddle.receipt_date
result.confidence_date = paddle.confidence_date
print(f"[Merge] Date: PaddleOCR {paddle.receipt_date}", flush=True)
else:
result.receipt_date = tesseract.receipt_date
result.confidence_date = tesseract.confidence_date
print(f"[Merge] Date: Tesseract {tesseract.receipt_date}", flush=True)
elif paddle.receipt_date:
result.receipt_date = paddle.receipt_date
result.confidence_date = paddle.confidence_date
elif tesseract.receipt_date:
result.receipt_date = tesseract.receipt_date
result.confidence_date = tesseract.confidence_date
# === VENDOR NAME ===
# Prefer one with company indicators (S.R.L., S.A., etc.)
paddle_has_indicator = self._has_company_indicator(paddle.partner_name)
tesseract_has_indicator = self._has_company_indicator(tesseract.partner_name)
if paddle.partner_name and tesseract.partner_name:
if paddle_has_indicator and not tesseract_has_indicator:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
print(f"[Merge] Vendor: PaddleOCR '{paddle.partner_name}' (has company indicator)", flush=True)
elif tesseract_has_indicator and not paddle_has_indicator:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
print(f"[Merge] Vendor: Tesseract '{tesseract.partner_name}' (has company indicator)", flush=True)
elif paddle.confidence_vendor >= tesseract.confidence_vendor:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
print(f"[Merge] Vendor: PaddleOCR '{paddle.partner_name}' (higher conf)", flush=True)
else:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
print(f"[Merge] Vendor: Tesseract '{tesseract.partner_name}' (higher conf)", flush=True)
elif paddle.partner_name:
result.partner_name = paddle.partner_name
result.confidence_vendor = paddle.confidence_vendor
elif tesseract.partner_name:
result.partner_name = tesseract.partner_name
result.confidence_vendor = tesseract.confidence_vendor
# === CUI (Fiscal Code) ===
# Validate format: 6-10 digits, prefer valid one
paddle_cui_valid = self._is_valid_cui(paddle.cui)
tesseract_cui_valid = self._is_valid_cui(tesseract.cui)
if paddle.cui and tesseract.cui:
if paddle_cui_valid and not tesseract_cui_valid:
result.cui = paddle.cui
print(f"[Merge] CUI: PaddleOCR {paddle.cui} (valid format)", flush=True)
elif tesseract_cui_valid and not paddle_cui_valid:
result.cui = tesseract.cui
print(f"[Merge] CUI: Tesseract {tesseract.cui} (valid format)", flush=True)
else:
# Both valid or both invalid - prefer PaddleOCR
result.cui = paddle.cui
print(f"[Merge] CUI: PaddleOCR {paddle.cui}", flush=True)
elif paddle.cui and paddle_cui_valid:
result.cui = paddle.cui
elif tesseract.cui and tesseract_cui_valid:
result.cui = tesseract.cui
elif paddle.cui:
result.cui = paddle.cui
elif tesseract.cui:
result.cui = tesseract.cui
# === TVA ENTRIES ===
# Prefer non-empty, use the one with more entries or higher amounts
if paddle.tva_entries and tesseract.tva_entries:
# Compare: prefer the one with actual amounts (not just 0)
paddle_total = sum(e.get('amount', Decimal('0')) for e in paddle.tva_entries)
tesseract_total = sum(e.get('amount', Decimal('0')) for e in tesseract.tva_entries)
if paddle_total >= tesseract_total:
result.tva_entries = paddle.tva_entries
result.tva_total = paddle.tva_total
print(f"[Merge] TVA: PaddleOCR (total: {paddle_total})", flush=True)
else:
result.tva_entries = tesseract.tva_entries
result.tva_total = tesseract.tva_total
print(f"[Merge] TVA: Tesseract (total: {tesseract_total})", flush=True)
elif paddle.tva_entries:
result.tva_entries = paddle.tva_entries
result.tva_total = paddle.tva_total
elif tesseract.tva_entries:
result.tva_entries = tesseract.tva_entries
result.tva_total = tesseract.tva_total
# === OTHER FIELDS ===
# Simple preference: paddle > tesseract
result.receipt_number = paddle.receipt_number or tesseract.receipt_number
result.receipt_series = paddle.receipt_series or tesseract.receipt_series
result.receipt_type = paddle.receipt_type or tesseract.receipt_type
result.items_count = paddle.items_count or tesseract.items_count
result.address = paddle.address or tesseract.address
result.description = paddle.description or tesseract.description
return result
def _has_company_indicator(self, name: Optional[str]) -> bool:
"""Check if vendor name has company type indicator (S.R.L., S.A., etc.)"""
if not name:
return False
name_upper = name.upper()
indicators = [
r'\bS\.?\s*R\.?\s*L\.?\b',
r'\bS\.?\s*A\.?\b',
r'\bS\.?\s*N\.?\s*C\.?\b',
r'\bP\.?\s*F\.?\s*A\.?\b',
r'\bI\.?\s*I\.?\b',
r'\bHOLDING\b',
r'\bGROUP\b',
r'\bCOMPANY\b',
]
for indicator in indicators:
if re.search(indicator, name_upper):
return True
return False
def _is_valid_cui(self, cui: Optional[str]) -> bool:
"""Validate CUI format: 6-10 digits."""
if not cui:
return False
# Remove any RO prefix
cui_clean = re.sub(r'^RO', '', cui.upper())
# Must be 6-10 digits
return bool(re.match(r'^\d{6,10}$', cui_clean))
def _is_extraction_complete(self, ext: ExtractionResult, min_confidence: float = 0.85) -> bool:
"""
Check if extraction has ALL required fields to skip further processing.
Required for early exit (ALL must be true):
- Overall confidence >= 85%
- ALL 5 critical fields present: number, date, amount, TVA, CUI
"""
# Must have high confidence
if ext.overall_confidence < min_confidence:
print(f"[OCR] Confidence {ext.overall_confidence:.0%} < {min_confidence:.0%} - continuing", flush=True)
return False
# Check all required fields
has_number = bool(ext.receipt_number)
has_date = bool(ext.receipt_date)
has_amount = bool(ext.amount)
has_tva = bool(ext.tva_total) or bool(ext.tva_entries)
has_cui = bool(ext.cui)
missing = []
if not has_number: missing.append("number")
if not has_date: missing.append("date")
if not has_amount: missing.append("amount")
if not has_tva: missing.append("TVA")
if not has_cui: missing.append("CUI")
if missing:
print(f"[OCR] Missing: {', '.join(missing)} - continuing", flush=True)
return False
print(f"[OCR] ✓ All 5 fields found with {ext.overall_confidence:.0%} confidence", flush=True)
return True
def _complement_extraction(
self,
primary: Optional[ExtractionResult],
secondary: Optional[ExtractionResult]
) -> ExtractionResult:
"""
Complement primary extraction with missing fields from secondary.
NEVER overrides existing values - only fills in gaps.
This is different from _merge_extractions which can override values.
"""
if primary is None and secondary is None:
return ExtractionResult()
if primary is None:
return secondary
if secondary is None:
return primary
print("[Complement] Adding missing fields from Tesseract...", flush=True)
# Only fill missing amount
if not primary.amount and secondary.amount:
primary.amount = secondary.amount
primary.confidence_amount = secondary.confidence_amount
print(f"[Complement] Added amount: {secondary.amount}", flush=True)
# Only fill missing date
if not primary.receipt_date and secondary.receipt_date:
primary.receipt_date = secondary.receipt_date
primary.confidence_date = secondary.confidence_date
print(f"[Complement] Added date: {secondary.receipt_date}", flush=True)
# Only fill missing vendor
if not primary.partner_name and secondary.partner_name:
primary.partner_name = secondary.partner_name
primary.confidence_vendor = secondary.confidence_vendor
print(f"[Complement] Added vendor: {secondary.partner_name}", flush=True)
# Only fill missing CUI
if not primary.cui and secondary.cui and self._is_valid_cui(secondary.cui):
primary.cui = secondary.cui
print(f"[Complement] Added CUI: {secondary.cui}", flush=True)
# Only fill missing TVA
if not primary.tva_entries and secondary.tva_entries:
primary.tva_entries = secondary.tva_entries
primary.tva_total = secondary.tva_total
print(f"[Complement] Added TVA: {secondary.tva_total}", flush=True)
# Only fill missing receipt number
if not primary.receipt_number and secondary.receipt_number:
primary.receipt_number = secondary.receipt_number
print(f"[Complement] Added number: {secondary.receipt_number}", flush=True)
# Only fill missing address
if not primary.address and secondary.address:
primary.address = secondary.address
print(f"[Complement] Added address: {secondary.address}", flush=True)
return primary
def _final_validation(self, extraction: ExtractionResult) -> ExtractionResult:
"""
Final validation and correction of impossible values.
Key rules:
1. TVA cannot be greater than TOTAL (it's always a fraction)
2. If TVA > TOTAL, recalculate TOTAL from TVA using known rates
3. Validate TVA entries sum equals TVA total
"""
print("[Final Validation] Checking extracted values...", flush=True)
# Rule 1: TVA cannot be greater than TOTAL
if extraction.tva_total and extraction.amount:
if extraction.tva_total > extraction.amount:
print(f"[Final Validation] TVA ({extraction.tva_total}) > TOTAL ({extraction.amount}) - IMPOSSIBLE!", flush=True)
# Calculate TOTAL from TVA using reverse formula:
# total = base + tva = tva * (100/rate + 1) = tva * (100 + rate) / rate
# For 9% TVA: total = tva * 109 / 9 = tva * 12.11
# For 19% TVA: total = tva * 119 / 19 = tva * 6.26
# For 21% TVA: total = tva * 121 / 21 = tva * 5.76
rate = 19 # Default rate assumption
if extraction.tva_entries:
# Use the rate from the first entry
rate = extraction.tva_entries[0].get('percent', 19)
if rate > 0:
# Formula: total = tva * (100 + rate) / rate
calculated_total = extraction.tva_total * (Decimal('100') + Decimal(str(rate))) / Decimal(str(rate))
calculated_total = calculated_total.quantize(Decimal('0.01'))
print(f"[Final Validation] Calculated TOTAL from TVA: {calculated_total} (using {rate}% rate)", flush=True)
extraction.amount = calculated_total
extraction.confidence_amount = 0.70 # Lower confidence for calculated value
# Rule 2: TVA cannot be more than ~25% of total (max Romanian rate is 21%)
if extraction.tva_total and extraction.amount:
tva_percent = extraction.tva_total / extraction.amount * Decimal('100')
if tva_percent > Decimal('25'):
print(f"[Final Validation] Warning: TVA is {tva_percent:.1f}% of total - suspicious", flush=True)
# Rule 3: Validate TVA entries sum
if extraction.tva_entries and extraction.tva_total:
entries_sum = sum(e.get('amount', Decimal('0')) for e in extraction.tva_entries)
tolerance = Decimal('0.05')
if abs(entries_sum - extraction.tva_total) > tolerance:
print(f"[Final Validation] TVA entries sum ({entries_sum}) != tva_total ({extraction.tva_total})", flush=True)
# Use the sum as it's more reliable
extraction.tva_total = entries_sum
print(f"[Final Validation] Done. Amount={extraction.amount}, TVA={extraction.tva_total}", flush=True)
return extraction
# Singleton instance
ocr_service = OCRService()

View File

@@ -0,0 +1,447 @@
"""Business logic service for receipts workflow."""
from decimal import Decimal, ROUND_HALF_UP
from typing import List, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptStatus, ReceiptDirection
from backend.modules.data_entry.db.models.accounting_entry import EntryType
from backend.modules.data_entry.db.crud.receipt import ReceiptCRUD
from backend.modules.data_entry.db.crud.accounting_entry import AccountingEntryCRUD
from backend.modules.data_entry.schemas.receipt import (
ReceiptCreate,
ReceiptUpdate,
ReceiptFilter,
ReceiptResponse,
ReceiptListResponse,
AccountingEntryCreate,
)
from backend.modules.data_entry.services.expense_types import EXPENSE_TYPES, get_expense_type
# Payment mode to accounting account mapping
PAYMENT_MODE_ACCOUNTS = {
'casa': ('5311', 'Casa in lei'),
'banca': ('5121', 'Conturi la banci in lei'),
'avans_decontare': ('542', 'Avansuri de trezorerie'),
}
class ReceiptService:
"""Service for receipt business logic and workflow."""
@staticmethod
async def create_receipt(
session: AsyncSession,
data: ReceiptCreate,
created_by: str,
) -> Receipt:
"""Create a new receipt in DRAFT status."""
return await ReceiptCRUD.create(session, data, created_by)
@staticmethod
async def get_receipt(
session: AsyncSession,
receipt_id: int,
) -> Optional[Receipt]:
"""Get receipt by ID with all relationships."""
return await ReceiptCRUD.get_by_id(session, receipt_id, include_relations=True)
@staticmethod
async def get_receipts(
session: AsyncSession,
filters: ReceiptFilter,
) -> ReceiptListResponse:
"""Get paginated list of receipts."""
receipts, total = await ReceiptCRUD.get_list(session, filters)
pages = (total + filters.page_size - 1) // filters.page_size if total > 0 else 1
return ReceiptListResponse(
items=[ReceiptResponse.model_validate(r) for r in receipts],
total=total,
page=filters.page,
page_size=filters.page_size,
pages=pages,
)
@staticmethod
async def update_receipt(
session: AsyncSession,
receipt_id: int,
data: ReceiptUpdate,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Update receipt (only DRAFT status).
Returns (success, message, receipt).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if not await ReceiptCRUD.can_edit(receipt, username):
return False, "Cannot edit this receipt", None
updated = await ReceiptCRUD.update(session, receipt, data)
return True, "Receipt updated", updated
@staticmethod
async def delete_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str]:
"""
Delete receipt (only DRAFT status).
Returns (success, message).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found"
if not await ReceiptCRUD.can_delete(receipt, username):
return False, "Cannot delete this receipt"
await ReceiptCRUD.delete(session, receipt)
return True, "Receipt deleted"
@staticmethod
def generate_accounting_entries(receipt: Receipt) -> List[AccountingEntryCreate]:
"""
Generate accounting entries based on receipt data and expense type.
"""
entries: List[AccountingEntryCreate] = []
# Get expense type configuration
expense_type = get_expense_type(receipt.expense_type_code or "OTHER")
if not expense_type:
expense_type = EXPENSE_TYPES["OTHER"]
amount = Decimal(str(receipt.amount))
if receipt.direction == ReceiptDirection.CHELTUIALA:
# Expense: Debit expense account, Credit cash/bank
if expense_type.has_vat:
# Calculate net and VAT
vat_rate = expense_type.vat_percent / Decimal("100")
net_amount = (amount / (1 + vat_rate)).quantize(
Decimal("0.01"), rounding=ROUND_HALF_UP
)
vat_amount = amount - net_amount
# Debit: Expense account (net)
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.account_code,
account_name=expense_type.account_name,
amount=net_amount,
))
# Debit: VAT deductible
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.vat_account,
account_name="TVA deductibila",
amount=vat_amount,
))
else:
# No VAT - full amount to expense
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=expense_type.account_code,
account_name=expense_type.account_name,
amount=amount,
))
# Credit entry - based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
credit_account, credit_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
# Backwards compatibility for existing receipts
credit_account = receipt.cash_register_account
credit_name = receipt.cash_register_name or "Casa/Banca"
else:
# Default fallback
credit_account = "5311"
credit_name = "Casa in lei"
entries.append(AccountingEntryCreate(
entry_type=EntryType.CREDIT,
account_code=credit_account,
account_name=credit_name,
amount=amount,
))
else:
# Income: Debit cash/bank, Credit income account
# Based on payment_mode (new) or cash_register (legacy)
if receipt.payment_mode and receipt.payment_mode in PAYMENT_MODE_ACCOUNTS:
cash_account, cash_name = PAYMENT_MODE_ACCOUNTS[receipt.payment_mode]
elif receipt.cash_register_account:
cash_account = receipt.cash_register_account
cash_name = receipt.cash_register_name or "Casa/Banca"
else:
cash_account = "5311"
cash_name = "Casa in lei"
# Debit: Cash/Bank
entries.append(AccountingEntryCreate(
entry_type=EntryType.DEBIT,
account_code=cash_account,
account_name=cash_name,
amount=amount,
))
# Credit: Income account (7xx - to be configured)
entries.append(AccountingEntryCreate(
entry_type=EntryType.CREDIT,
account_code="7588",
account_name="Alte venituri din exploatare",
amount=amount,
))
return entries
@staticmethod
async def submit_for_review(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Submit receipt for review (DRAFT/REJECTED → PENDING_REVIEW).
Generates accounting entries automatically.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if not await ReceiptCRUD.can_submit(receipt, username):
return False, "Cannot submit this receipt", None
# Check if receipt has at least one attachment
if not receipt.attachments:
return False, "Receipt must have at least one attachment", None
# Check required fields
if not receipt.expense_type_code:
return False, "Expense type is required", None
# Validate payment_mode or cash_register (backwards compatibility)
if not receipt.payment_mode and not receipt.cash_register_account:
return False, "Modul de plata este obligatoriu", None
# Generate accounting entries
entries = ReceiptService.generate_accounting_entries(receipt)
# Delete existing entries and create new ones
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
# Refresh receipt to clear stale relationship references after entry deletion
await session.refresh(receipt)
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
# Reload with entries
updated = await ReceiptCRUD.get_by_id(session, receipt_id)
return True, "Receipt submitted for review", updated
@staticmethod
async def approve_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Approve receipt (PENDING_REVIEW → APPROVED).
Requires valid CUI (fiscal code) for approval.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Receipt is not pending review", None
# Validate CUI is present (required for Oracle import)
if not receipt.cui:
return False, "Trebuie completat codul fiscal (CUI) pentru aprobare", None
# Validate accounting entries
if not receipt.entries:
return False, "Receipt has no accounting entries", None
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.APPROVED, reviewed_by=username
)
return True, "Receipt approved", updated
@staticmethod
async def unapprove_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Unapprove receipt (APPROVED → PENDING_REVIEW).
Returns receipt to pending review for corrections.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.APPROVED:
return False, "Receipt is not approved", None
# Update status back to pending review
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
return True, "Receipt returned to pending review", updated
@staticmethod
async def reject_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
reason: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Reject receipt (PENDING_REVIEW → REJECTED).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Receipt is not pending review", None
# Update status
updated = await ReceiptCRUD.update_status(
session,
receipt,
ReceiptStatus.REJECTED,
reviewed_by=username,
rejection_reason=reason,
)
return True, "Receipt rejected", updated
@staticmethod
async def resubmit_receipt(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, Optional[Receipt]]:
"""
Resubmit rejected receipt after corrections (REJECTED → PENDING_REVIEW).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", None
if receipt.status != ReceiptStatus.REJECTED:
return False, "Receipt is not rejected", None
if receipt.created_by != username:
return False, "Only the creator can resubmit", None
# Re-generate accounting entries
entries = ReceiptService.generate_accounting_entries(receipt)
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
# Refresh receipt to clear stale relationship references after entry deletion
await session.refresh(receipt)
# Update status
updated = await ReceiptCRUD.update_status(
session, receipt, ReceiptStatus.PENDING_REVIEW
)
# Reload with entries
updated = await ReceiptCRUD.get_by_id(session, receipt_id)
return True, "Receipt resubmitted for review", updated
@staticmethod
async def regenerate_entries(
session: AsyncSession,
receipt_id: int,
username: str,
) -> Tuple[bool, str, List[AccountingEntryCreate]]:
"""
Regenerate accounting entries for a receipt.
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", []
if receipt.status not in [ReceiptStatus.DRAFT, ReceiptStatus.PENDING_REVIEW]:
return False, "Cannot regenerate entries for this receipt status", []
# Generate new entries
entries = ReceiptService.generate_accounting_entries(receipt)
# Replace existing entries
await AccountingEntryCRUD.delete_all_for_receipt(session, receipt_id)
await AccountingEntryCRUD.create_bulk(session, receipt_id, entries, is_auto_generated=True)
return True, "Entries regenerated", entries
@staticmethod
async def update_entries(
session: AsyncSession,
receipt_id: int,
entries: List[AccountingEntryCreate],
username: str,
) -> Tuple[bool, str, List]:
"""
Update accounting entries for a receipt (accountant action).
"""
receipt = await ReceiptCRUD.get_by_id(session, receipt_id)
if not receipt:
return False, "Receipt not found", []
if receipt.status != ReceiptStatus.PENDING_REVIEW:
return False, "Can only modify entries for receipts pending review", []
# Validate entries
is_valid, error = await AccountingEntryCRUD.validate_entries(entries)
if not is_valid:
return False, error, []
# Replace entries
updated_entries = await AccountingEntryCRUD.replace_all_for_receipt(
session, receipt_id, entries, username
)
return True, "Entries updated", updated_entries
@staticmethod
async def get_pending_count(
session: AsyncSession,
company_id: Optional[int] = None,
) -> int:
"""Get count of receipts pending review."""
receipts = await ReceiptCRUD.get_pending_review(session, company_id)
return len(receipts)

View File

@@ -0,0 +1,406 @@
"""Service for syncing nomenclatures from Oracle to SQLite."""
import sys
from pathlib import Path
from typing import Optional, List, Tuple
from datetime import datetime
import logging
from sqlmodel import select
from sqlalchemy.ext.asyncio import AsyncSession
# Path setup handled by main.py - this is redundant
# project_root = Path(__file__).parent.parent.parent.parent.parent
# sys.path.insert(0, str(project_root / "shared"))
from shared.database.oracle_pool import oracle_pool
from backend.modules.data_entry.db.models.nomenclature import SyncedSupplier, LocalSupplier, SyncedCashRegister
logger = logging.getLogger(__name__)
# Cache for schema lookups (populated dynamically from Oracle)
_schema_cache: dict[int, str] = {}
class SyncService:
"""Service for syncing nomenclatures from Oracle."""
@staticmethod
async def get_schema_for_company(company_id: int) -> Optional[str]:
"""
Get Oracle schema for company ID from V_NOM_FIRME view.
Results are cached in memory for performance.
"""
# Check cache first
if company_id in _schema_cache:
return _schema_cache[company_id]
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
cursor.execute("""
SELECT SCHEMA
FROM CONTAFIN_ORACLE.V_NOM_FIRME
WHERE ID_FIRMA = :company_id
""", {'company_id': company_id})
result = cursor.fetchone()
if result:
schema = result[0]
_schema_cache[company_id] = schema
logger.info(f"Resolved schema for company {company_id}: {schema}")
return schema
else:
logger.warning(f"No schema found for company {company_id}")
return None
except Exception as e:
logger.error(f"Error fetching schema for company {company_id}: {e}")
return None
@staticmethod
async def sync_suppliers(session: AsyncSession, company_id: int) -> Tuple[int, int]:
"""
Sync suppliers (furnizori, id_tip_part=17) from Oracle to SQLite.
Uses CORESP_TIP_PART joined with VNOM_PARTENERI view.
Returns (synced_count, error_count).
"""
schema = await SyncService.get_schema_for_company(company_id)
if not schema:
logger.warning(f"No schema mapping for company {company_id}")
return 0, 0
synced = 0
errors = 0
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
# Fetch active suppliers from Oracle
# id_tip_part = 17 means "furnizori" (suppliers)
# Using CORESP_TIP_PART to filter by partner type
cursor.execute(f"""
SELECT B.ID_PART, B.DENUMIRE, B.COD_FISCAL, B.ADRESA
FROM {schema}.CORESP_TIP_PART A
INNER JOIN {schema}.VNOM_PARTENERI B ON A.ID_PART = B.ID_PART
WHERE A.ID_TIP_PART = 17
AND (B.INACTIV = 0 OR B.INACTIV IS NULL)
AND B.ID_PART IS NOT NULL
ORDER BY B.DENUMIRE
""")
rows = cursor.fetchall()
for row in rows:
try:
oracle_id, name, fiscal_code, address = row
# Check if already exists
stmt = select(SyncedSupplier).where(
SyncedSupplier.oracle_id == oracle_id,
SyncedSupplier.company_id == company_id
)
result = await session.execute(stmt)
existing = result.scalar_one_or_none()
if existing:
# Update existing record
existing.name = name or ""
existing.fiscal_code = fiscal_code
existing.address = address
existing.synced_at = datetime.utcnow()
logger.debug(f"Updated supplier {oracle_id}: {name}")
else:
# Create new record
supplier = SyncedSupplier(
oracle_id=oracle_id,
company_id=company_id,
name=name or "",
fiscal_code=fiscal_code,
address=address,
)
session.add(supplier)
logger.debug(f"Created supplier {oracle_id}: {name}")
synced += 1
except Exception as e:
logger.error(f"Error processing supplier row {row}: {e}")
errors += 1
# Commit all changes
await session.commit()
logger.info(f"Synced {synced} suppliers for company {company_id}, {errors} errors")
except Exception as e:
logger.error(f"Error syncing suppliers for company {company_id}: {e}")
errors += 1
await session.rollback()
return synced, errors
@staticmethod
async def sync_cash_registers(session: AsyncSession, company_id: int) -> Tuple[int, int]:
"""
Sync cash registers and bank accounts from Oracle to SQLite.
Returns (synced_count, error_count).
Uses CORESP_TIP_PART with:
- id_tip_part = 22: CASA LEI
- id_tip_part = 23: CASA VALUTA
- id_tip_part = 24: BANCA LEI
- id_tip_part = 25: BANCA VALUTA
"""
schema = await SyncService.get_schema_for_company(company_id)
if not schema:
logger.warning(f"No schema mapping for company {company_id}")
return 0, 0
synced = 0
errors = 0
# Partner types mapping
# 22=CASA LEI, 23=CASA VALUTA -> cash
# 24=BANCA LEI, 25=BANCA VALUTA -> bank
partner_types = [22, 23, 24, 25]
try:
async with oracle_pool.get_connection() as connection:
with connection.cursor() as cursor:
# Fetch cash/bank partners from CORESP_TIP_PART
cursor.execute(f"""
SELECT B.ID_PART, B.DENUMIRE, A.ID_TIP_PART
FROM {schema}.CORESP_TIP_PART A
INNER JOIN {schema}.VNOM_PARTENERI B ON A.ID_PART = B.ID_PART
WHERE A.ID_TIP_PART IN (22, 23, 24, 25)
AND (B.INACTIV = 0 OR B.INACTIV IS NULL)
AND B.ID_PART IS NOT NULL
ORDER BY A.ID_TIP_PART, B.DENUMIRE
""")
rows = cursor.fetchall()
# Type mapping: 22=CASA LEI, 23=CASA VALUTA -> cash; 24=BANCA LEI, 25=BANCA VALUTA -> bank
type_mapping = {
22: ("cash", "CASA_LEI"),
23: ("cash", "CASA_VALUTA"),
24: ("bank", "BANCA_LEI"),
25: ("bank", "BANCA_VALUTA"),
}
for row in rows:
try:
oracle_id, name, tip_part_id = row
# Determine type based on partner type
register_type, account_code = type_mapping.get(tip_part_id, ("cash", "UNKNOWN"))
# Check if already exists
stmt = select(SyncedCashRegister).where(
SyncedCashRegister.oracle_id == oracle_id,
SyncedCashRegister.company_id == company_id
)
result = await session.execute(stmt)
existing = result.scalar_one_or_none()
if existing:
# Update existing record
existing.name = name or ""
existing.account_code = account_code
existing.register_type = register_type
existing.synced_at = datetime.utcnow()
logger.debug(f"Updated cash register {oracle_id}: {name}")
else:
# Create new record
cash_register = SyncedCashRegister(
oracle_id=oracle_id,
company_id=company_id,
name=name or "",
account_code=account_code,
register_type=register_type,
)
session.add(cash_register)
logger.debug(f"Created cash register {oracle_id}: {name}")
synced += 1
except Exception as e:
logger.error(f"Error processing cash register row {row}: {e}")
errors += 1
# Commit all changes
await session.commit()
logger.info(f"Synced {synced} cash registers for company {company_id}, {errors} errors")
except Exception as e:
logger.error(f"Error syncing cash registers for company {company_id}: {e}")
errors += 1
await session.rollback()
return synced, errors
@staticmethod
async def search_supplier(
session: AsyncSession,
company_id: int,
fiscal_code: Optional[str] = None,
name: Optional[str] = None
) -> Tuple[bool, Optional[dict], str]:
"""
Search for supplier in SQLite first, then Oracle if not found.
Returns (found, supplier_data, source).
Source can be: 'synced', 'local', 'not_found'
"""
# 1. Search in synced suppliers
if fiscal_code:
stmt = select(SyncedSupplier).where(
SyncedSupplier.company_id == company_id,
SyncedSupplier.fiscal_code == fiscal_code
)
elif name:
stmt = select(SyncedSupplier).where(
SyncedSupplier.company_id == company_id,
SyncedSupplier.name.ilike(f"%{name}%")
)
else:
return False, None, "no_query"
result = await session.execute(stmt)
supplier = result.scalar_one_or_none()
if supplier:
# Return only text data - no IDs needed for autocomplete
return True, {
"name": supplier.name,
"fiscal_code": supplier.fiscal_code,
"address": supplier.address,
}, "synced"
# 2. Search in local suppliers
if fiscal_code:
stmt = select(LocalSupplier).where(
LocalSupplier.company_id == company_id,
LocalSupplier.fiscal_code == fiscal_code
)
elif name:
stmt = select(LocalSupplier).where(
LocalSupplier.company_id == company_id,
LocalSupplier.name.ilike(f"%{name}%")
)
result = await session.execute(stmt)
local = result.scalar_one_or_none()
if local:
# Return only text data - no IDs needed for autocomplete
return True, {
"name": local.name,
"fiscal_code": local.fiscal_code,
"address": local.address,
}, "local"
# 3. Try live Oracle search (optional fallback for unsynced data)
# This is a fallback - ideally sync should be up to date
# TODO: Implement live Oracle search if needed
return False, None, "not_found"
@staticmethod
async def create_local_supplier(
session: AsyncSession,
company_id: int,
name: str,
fiscal_code: Optional[str],
address: Optional[str],
created_by: str
) -> LocalSupplier:
"""Create a local supplier entry from OCR data."""
supplier = LocalSupplier(
company_id=company_id,
name=name,
fiscal_code=fiscal_code,
address=address,
created_by=created_by,
)
session.add(supplier)
await session.commit()
await session.refresh(supplier)
logger.info(f"Created local supplier: {name} (CUI: {fiscal_code})")
return supplier
@staticmethod
async def get_all_suppliers(
session: AsyncSession,
company_id: int,
search: Optional[str] = None
) -> List[dict]:
"""
Get all suppliers (synced + local) for a company.
Used for dropdown/autocomplete in UI.
"""
suppliers = []
# Get synced suppliers
stmt = select(SyncedSupplier).where(SyncedSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(SyncedSupplier.name.ilike(f"%{search}%")) |
(SyncedSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.limit(50) # Limit results for performance
result = await session.execute(stmt)
synced = result.scalars().all()
for s in synced:
suppliers.append({
"id": s.id,
"oracle_id": s.oracle_id,
"name": s.name,
"fiscal_code": s.fiscal_code,
"source": "synced"
})
# Get local suppliers
stmt = select(LocalSupplier).where(LocalSupplier.company_id == company_id)
if search:
stmt = stmt.where(
(LocalSupplier.name.ilike(f"%{search}%")) |
(LocalSupplier.fiscal_code.ilike(f"%{search}%"))
)
stmt = stmt.limit(50)
result = await session.execute(stmt)
local = result.scalars().all()
for l in local:
suppliers.append({
"id": l.id,
"name": l.name,
"fiscal_code": l.fiscal_code,
"source": "local"
})
return suppliers
@staticmethod
async def get_all_cash_registers(
session: AsyncSession,
company_id: int
) -> List[dict]:
"""
Get all cash registers for a company.
Used for dropdown in UI.
"""
stmt = select(SyncedCashRegister).where(SyncedCashRegister.company_id == company_id)
result = await session.execute(stmt)
registers = result.scalars().all()
return [
{
"id": r.id,
"oracle_id": r.oracle_id,
"name": r.name,
"account_code": r.account_code,
"register_type": r.register_type
}
for r in registers
]