Files
roa2web-service-auto/backend/modules/data_entry/services/cleanup_service.py
Claude Agent 7b3541403f feat(data-entry): Bulk Receipt Upload cu Mobile UX Android Nativ
## Funcționalități Principale

### Bulk Upload & Processing
- Drag & drop pentru upload bonuri multiple oriunde pe pagină
- Batch processing cu job queue și worker pool
- Real-time updates via SSE (Server-Sent Events) cu fallback polling
- Duplicate detection via SHA-256 file hash
- Auto-retry pentru job-uri failed
- Cancel individual jobs sau batch complet

### Mobile UX - Android Native Style
- Top bar fixă cu hamburger, titlu centrat, acțiuni (search/filter)
- Bottom navigation cu 4 tab-uri (Bonuri, Upload, Rapoarte, Setări)
- FAB (Floating Action Button) cu hide/show on scroll
- Filter chips orizontal scrollabile
- Selecție multiplă prin long-press (500ms)
- Select All + Bulk Delete cu confirmare
- Layout Android pentru Create/Edit/View bon (Gmail compose style)

### Bug Fixes
- Refresh individual via SSE în loc de refresh total pagină
- Bonurile cu eroare OCR rămân vizibile pentru editare manuală
- Afișare nume fișier original pentru toate bonurile
- Upload stabil pe mobil (fix race condition File API)
- Păstrare ordine bonuri la refresh (nu se reordonează)

### Backend
- SSE endpoint pentru status updates real-time
- Bulk delete endpoint cu partial success
- Auto-cleanup bonuri failed după 7 zile
- Batch model cu tracking complet

### Testing
- E2E tests cu Playwright
- Unit tests pentru bulk upload, auto-create, cleanup

## Commits Squashed: 43 user stories (US-001 → US-043)
## Branch: ralph/bulk-receipt-upload
## Timp dezvoltare: ~3 zile (Ralph autonomous)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:33:17 +00:00

216 lines
6.8 KiB
Python

"""
Cleanup service for auto-deleting expired failed receipts.
US-008: Backend - Auto-Cleanup Erori După 7 Zile
- Finds receipts with processing_status='failed' and processing_completed_at < now() - 7 days
- Deletes the receipts and their attached files from storage
- Runs at startup and then daily as a background task
"""
import asyncio
import logging
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
from sqlalchemy import select, and_
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptAttachment
from backend.modules.data_entry.config import settings
logger = logging.getLogger(__name__)
# Cleanup configuration
CLEANUP_RETENTION_DAYS = 7
CLEANUP_INTERVAL_HOURS = 24
# In-memory storage for last cleanup stats (optional - for login notification)
_last_cleanup_stats: dict = {
"count": 0,
"timestamp": None
}
def get_last_cleanup_stats() -> dict:
"""Get stats from the last cleanup run for notification purposes."""
return _last_cleanup_stats.copy()
async def cleanup_expired_failed_receipts(session: AsyncSession) -> int:
"""
Find and delete receipts with processing_status='failed' older than 7 days.
This function:
1. Queries for failed receipts where processing_completed_at < now() - 7 days
2. Deletes attachment files from disk
3. Deletes the receipt records (cascade deletes attachment records)
Args:
session: AsyncSession for database operations
Returns:
Number of receipts deleted
"""
global _last_cleanup_stats
cutoff_date = datetime.utcnow() - timedelta(days=CLEANUP_RETENTION_DAYS)
# Find expired failed receipts with their attachments
query = select(Receipt).options(
selectinload(Receipt.attachments)
).where(
and_(
Receipt.processing_status == "failed",
Receipt.processing_completed_at.isnot(None),
Receipt.processing_completed_at < cutoff_date
)
)
result = await session.execute(query)
expired_receipts = result.scalars().all()
if not expired_receipts:
logger.debug("[Cleanup] No expired failed receipts found")
return 0
deleted_count = 0
deleted_files = 0
upload_base_path = settings.upload_path_resolved
for receipt in expired_receipts:
try:
# Delete attachment files from disk
for attachment in receipt.attachments:
file_path = upload_base_path / attachment.file_path
if file_path.exists():
try:
file_path.unlink()
deleted_files += 1
logger.debug(f"[Cleanup] Deleted file: {file_path}")
except OSError as e:
logger.warning(f"[Cleanup] Failed to delete file {file_path}: {e}")
# Also try to clean up empty parent directories
parent_dir = file_path.parent
if parent_dir.exists() and parent_dir != upload_base_path:
try:
# Only remove if directory is empty
if not any(parent_dir.iterdir()):
parent_dir.rmdir()
logger.debug(f"[Cleanup] Removed empty directory: {parent_dir}")
except OSError:
pass # Directory not empty or permission issue, skip
# Delete receipt (cascade deletes attachment records in DB)
await session.delete(receipt)
deleted_count += 1
except Exception as e:
logger.error(f"[Cleanup] Error deleting receipt {receipt.id}: {e}")
continue
# Commit all deletions
if deleted_count > 0:
await session.commit()
# Update stats for notification
_last_cleanup_stats = {
"count": deleted_count,
"files_deleted": deleted_files,
"timestamp": datetime.utcnow().isoformat()
}
logger.info(f"[Cleanup] Cleaned up {deleted_count} expired failed receipts ({deleted_files} files)")
return deleted_count
async def run_cleanup_task(get_session_func) -> None:
"""
Background task that runs cleanup at startup and then every 24 hours.
Args:
get_session_func: Async generator function that yields database sessions
"""
logger.info("[Cleanup] Starting cleanup background task")
# Run immediately at startup
try:
async for session in get_session_func():
count = await cleanup_expired_failed_receipts(session)
if count > 0:
logger.info(f"[Cleanup] Initial cleanup: {count} receipts removed")
break
except Exception as e:
logger.error(f"[Cleanup] Initial cleanup failed: {e}")
# Then run every 24 hours
while True:
try:
await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600)
async for session in get_session_func():
count = await cleanup_expired_failed_receipts(session)
if count > 0:
logger.info(f"[Cleanup] Daily cleanup: {count} receipts removed")
break
except asyncio.CancelledError:
logger.info("[Cleanup] Cleanup task cancelled")
raise
except Exception as e:
logger.error(f"[Cleanup] Daily cleanup failed: {e}")
# Continue running even if one cleanup fails
# Global reference to cleanup task for graceful shutdown
_cleanup_task: Optional[asyncio.Task] = None
async def start_cleanup_task(get_session_func) -> bool:
"""
Start the cleanup background task.
Args:
get_session_func: Async generator function that yields database sessions
Returns:
True if task started successfully, False otherwise
"""
global _cleanup_task
if _cleanup_task is not None and not _cleanup_task.done():
logger.warning("[Cleanup] Cleanup task already running")
return False
try:
_cleanup_task = asyncio.create_task(run_cleanup_task(get_session_func))
logger.info("[Cleanup] ✅ Cleanup background task started")
return True
except Exception as e:
logger.error(f"[Cleanup] Failed to start cleanup task: {e}")
return False
async def stop_cleanup_task() -> None:
"""Stop the cleanup background task gracefully."""
global _cleanup_task
if _cleanup_task is not None and not _cleanup_task.done():
_cleanup_task.cancel()
try:
await _cleanup_task
except asyncio.CancelledError:
pass
logger.info("[Cleanup] Cleanup task stopped")
_cleanup_task = None
def is_cleanup_task_running() -> bool:
"""Check if the cleanup task is currently running."""
return _cleanup_task is not None and not _cleanup_task.done()