## Funcționalități Principale ### Bulk Upload & Processing - Drag & drop pentru upload bonuri multiple oriunde pe pagină - Batch processing cu job queue și worker pool - Real-time updates via SSE (Server-Sent Events) cu fallback polling - Duplicate detection via SHA-256 file hash - Auto-retry pentru job-uri failed - Cancel individual jobs sau batch complet ### Mobile UX - Android Native Style - Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter) - Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări) - FAB (Floating Action Button) cu hide/show on scroll - Filter chips orizontal scrollabile - Selecție multiplă prin long-press (500ms) - Select All + Bulk Delete cu confirmare - Layout Android pentru Create/Edit/View bon (Gmail compose style) ### Bug Fixes - Refresh individual via SSE în loc de refresh total pagină - Bonurile cu eroare OCR rămân vizibile pentru editare manuală - Afișare nume fișier original pentru toate bonurile - Upload stabil pe mobil (fix race condition File API) - Păstrare ordine bonuri la refresh (nu se reordonează) ### Backend - SSE endpoint pentru status updates real-time - Bulk delete endpoint cu partial success - Auto-cleanup bonuri failed după 7 zile - Batch model cu tracking complet ### Testing - E2E tests cu Playwright - Unit tests pentru bulk upload, auto-create, cleanup ## Commits Squashed: 43 user stories (US-001 → US-043) ## Branch: ralph/bulk-receipt-upload ## Timp dezvoltare: ~3 zile (Ralph autonomous) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
216 lines
6.8 KiB
Python
216 lines
6.8 KiB
Python
"""
|
|
Cleanup service for auto-deleting expired failed receipts.
|
|
|
|
US-008: Backend - Auto-Cleanup Erori După 7 Zile
|
|
- Finds receipts with processing_status='failed' and processing_completed_at < now() - 7 days
|
|
- Deletes the receipts and their attached files from storage
|
|
- Runs at startup and then daily as a background task
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import select, and_
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm import selectinload
|
|
|
|
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptAttachment
|
|
from backend.modules.data_entry.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Cleanup configuration
|
|
CLEANUP_RETENTION_DAYS = 7
|
|
CLEANUP_INTERVAL_HOURS = 24
|
|
|
|
# In-memory storage for last cleanup stats (optional - for login notification)
|
|
_last_cleanup_stats: dict = {
|
|
"count": 0,
|
|
"timestamp": None
|
|
}
|
|
|
|
|
|
def get_last_cleanup_stats() -> dict:
|
|
"""Get stats from the last cleanup run for notification purposes."""
|
|
return _last_cleanup_stats.copy()
|
|
|
|
|
|
async def cleanup_expired_failed_receipts(session: AsyncSession) -> int:
|
|
"""
|
|
Find and delete receipts with processing_status='failed' older than 7 days.
|
|
|
|
This function:
|
|
1. Queries for failed receipts where processing_completed_at < now() - 7 days
|
|
2. Deletes attachment files from disk
|
|
3. Deletes the receipt records (cascade deletes attachment records)
|
|
|
|
Args:
|
|
session: AsyncSession for database operations
|
|
|
|
Returns:
|
|
Number of receipts deleted
|
|
"""
|
|
global _last_cleanup_stats
|
|
|
|
cutoff_date = datetime.utcnow() - timedelta(days=CLEANUP_RETENTION_DAYS)
|
|
|
|
# Find expired failed receipts with their attachments
|
|
query = select(Receipt).options(
|
|
selectinload(Receipt.attachments)
|
|
).where(
|
|
and_(
|
|
Receipt.processing_status == "failed",
|
|
Receipt.processing_completed_at.isnot(None),
|
|
Receipt.processing_completed_at < cutoff_date
|
|
)
|
|
)
|
|
|
|
result = await session.execute(query)
|
|
expired_receipts = result.scalars().all()
|
|
|
|
if not expired_receipts:
|
|
logger.debug("[Cleanup] No expired failed receipts found")
|
|
return 0
|
|
|
|
deleted_count = 0
|
|
deleted_files = 0
|
|
|
|
upload_base_path = settings.upload_path_resolved
|
|
|
|
for receipt in expired_receipts:
|
|
try:
|
|
# Delete attachment files from disk
|
|
for attachment in receipt.attachments:
|
|
file_path = upload_base_path / attachment.file_path
|
|
if file_path.exists():
|
|
try:
|
|
file_path.unlink()
|
|
deleted_files += 1
|
|
logger.debug(f"[Cleanup] Deleted file: {file_path}")
|
|
except OSError as e:
|
|
logger.warning(f"[Cleanup] Failed to delete file {file_path}: {e}")
|
|
|
|
# Also try to clean up empty parent directories
|
|
parent_dir = file_path.parent
|
|
if parent_dir.exists() and parent_dir != upload_base_path:
|
|
try:
|
|
# Only remove if directory is empty
|
|
if not any(parent_dir.iterdir()):
|
|
parent_dir.rmdir()
|
|
logger.debug(f"[Cleanup] Removed empty directory: {parent_dir}")
|
|
except OSError:
|
|
pass # Directory not empty or permission issue, skip
|
|
|
|
# Delete receipt (cascade deletes attachment records in DB)
|
|
await session.delete(receipt)
|
|
deleted_count += 1
|
|
|
|
except Exception as e:
|
|
logger.error(f"[Cleanup] Error deleting receipt {receipt.id}: {e}")
|
|
continue
|
|
|
|
# Commit all deletions
|
|
if deleted_count > 0:
|
|
await session.commit()
|
|
|
|
# Update stats for notification
|
|
_last_cleanup_stats = {
|
|
"count": deleted_count,
|
|
"files_deleted": deleted_files,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
logger.info(f"[Cleanup] Cleaned up {deleted_count} expired failed receipts ({deleted_files} files)")
|
|
|
|
return deleted_count
|
|
|
|
|
|
async def run_cleanup_task(get_session_func) -> None:
|
|
"""
|
|
Background task that runs cleanup at startup and then every 24 hours.
|
|
|
|
Args:
|
|
get_session_func: Async generator function that yields database sessions
|
|
"""
|
|
logger.info("[Cleanup] Starting cleanup background task")
|
|
|
|
# Run immediately at startup
|
|
try:
|
|
async for session in get_session_func():
|
|
count = await cleanup_expired_failed_receipts(session)
|
|
if count > 0:
|
|
logger.info(f"[Cleanup] Initial cleanup: {count} receipts removed")
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"[Cleanup] Initial cleanup failed: {e}")
|
|
|
|
# Then run every 24 hours
|
|
while True:
|
|
try:
|
|
await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600)
|
|
|
|
async for session in get_session_func():
|
|
count = await cleanup_expired_failed_receipts(session)
|
|
if count > 0:
|
|
logger.info(f"[Cleanup] Daily cleanup: {count} receipts removed")
|
|
break
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info("[Cleanup] Cleanup task cancelled")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"[Cleanup] Daily cleanup failed: {e}")
|
|
# Continue running even if one cleanup fails
|
|
|
|
|
|
# Global reference to cleanup task for graceful shutdown
|
|
_cleanup_task: Optional[asyncio.Task] = None
|
|
|
|
|
|
async def start_cleanup_task(get_session_func) -> bool:
|
|
"""
|
|
Start the cleanup background task.
|
|
|
|
Args:
|
|
get_session_func: Async generator function that yields database sessions
|
|
|
|
Returns:
|
|
True if task started successfully, False otherwise
|
|
"""
|
|
global _cleanup_task
|
|
|
|
if _cleanup_task is not None and not _cleanup_task.done():
|
|
logger.warning("[Cleanup] Cleanup task already running")
|
|
return False
|
|
|
|
try:
|
|
_cleanup_task = asyncio.create_task(run_cleanup_task(get_session_func))
|
|
logger.info("[Cleanup] ✅ Cleanup background task started")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"[Cleanup] Failed to start cleanup task: {e}")
|
|
return False
|
|
|
|
|
|
async def stop_cleanup_task() -> None:
|
|
"""Stop the cleanup background task gracefully."""
|
|
global _cleanup_task
|
|
|
|
if _cleanup_task is not None and not _cleanup_task.done():
|
|
_cleanup_task.cancel()
|
|
try:
|
|
await _cleanup_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
logger.info("[Cleanup] Cleanup task stopped")
|
|
|
|
_cleanup_task = None
|
|
|
|
|
|
def is_cleanup_task_running() -> bool:
|
|
"""Check if the cleanup task is currently running."""
|
|
return _cleanup_task is not None and not _cleanup_task.done()
|