""" Cleanup service for auto-deleting expired failed receipts. US-008: Backend - Auto-Cleanup Erori După 7 Zile - Finds receipts with processing_status='failed' and processing_completed_at < now() - 7 days - Deletes the receipts and their attached files from storage - Runs at startup and then daily as a background task """ import asyncio import logging from datetime import datetime, timedelta from pathlib import Path from typing import Optional from sqlalchemy import select, and_ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload from backend.modules.data_entry.db.models.receipt import Receipt, ReceiptAttachment from backend.modules.data_entry.config import settings logger = logging.getLogger(__name__) # Cleanup configuration CLEANUP_RETENTION_DAYS = 7 CLEANUP_INTERVAL_HOURS = 24 # In-memory storage for last cleanup stats (optional - for login notification) _last_cleanup_stats: dict = { "count": 0, "timestamp": None } def get_last_cleanup_stats() -> dict: """Get stats from the last cleanup run for notification purposes.""" return _last_cleanup_stats.copy() async def cleanup_expired_failed_receipts(session: AsyncSession) -> int: """ Find and delete receipts with processing_status='failed' older than 7 days. This function: 1. Queries for failed receipts where processing_completed_at < now() - 7 days 2. Deletes attachment files from disk 3. Deletes the receipt records (cascade deletes attachment records) Args: session: AsyncSession for database operations Returns: Number of receipts deleted """ global _last_cleanup_stats cutoff_date = datetime.utcnow() - timedelta(days=CLEANUP_RETENTION_DAYS) # Find expired failed receipts with their attachments query = select(Receipt).options( selectinload(Receipt.attachments) ).where( and_( Receipt.processing_status == "failed", Receipt.processing_completed_at.isnot(None), Receipt.processing_completed_at < cutoff_date ) ) result = await session.execute(query) expired_receipts = result.scalars().all() if not expired_receipts: logger.debug("[Cleanup] No expired failed receipts found") return 0 deleted_count = 0 deleted_files = 0 upload_base_path = settings.upload_path_resolved for receipt in expired_receipts: try: # Delete attachment files from disk for attachment in receipt.attachments: file_path = upload_base_path / attachment.file_path if file_path.exists(): try: file_path.unlink() deleted_files += 1 logger.debug(f"[Cleanup] Deleted file: {file_path}") except OSError as e: logger.warning(f"[Cleanup] Failed to delete file {file_path}: {e}") # Also try to clean up empty parent directories parent_dir = file_path.parent if parent_dir.exists() and parent_dir != upload_base_path: try: # Only remove if directory is empty if not any(parent_dir.iterdir()): parent_dir.rmdir() logger.debug(f"[Cleanup] Removed empty directory: {parent_dir}") except OSError: pass # Directory not empty or permission issue, skip # Delete receipt (cascade deletes attachment records in DB) await session.delete(receipt) deleted_count += 1 except Exception as e: logger.error(f"[Cleanup] Error deleting receipt {receipt.id}: {e}") continue # Commit all deletions if deleted_count > 0: await session.commit() # Update stats for notification _last_cleanup_stats = { "count": deleted_count, "files_deleted": deleted_files, "timestamp": datetime.utcnow().isoformat() } logger.info(f"[Cleanup] Cleaned up {deleted_count} expired failed receipts ({deleted_files} files)") return deleted_count async def run_cleanup_task(get_session_func) -> None: """ Background task that runs cleanup at startup and then every 24 hours. Args: get_session_func: Async generator function that yields database sessions """ logger.info("[Cleanup] Starting cleanup background task") # Run immediately at startup try: async for session in get_session_func(): count = await cleanup_expired_failed_receipts(session) if count > 0: logger.info(f"[Cleanup] Initial cleanup: {count} receipts removed") break except Exception as e: logger.error(f"[Cleanup] Initial cleanup failed: {e}") # Then run every 24 hours while True: try: await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600) async for session in get_session_func(): count = await cleanup_expired_failed_receipts(session) if count > 0: logger.info(f"[Cleanup] Daily cleanup: {count} receipts removed") break except asyncio.CancelledError: logger.info("[Cleanup] Cleanup task cancelled") raise except Exception as e: logger.error(f"[Cleanup] Daily cleanup failed: {e}") # Continue running even if one cleanup fails # Global reference to cleanup task for graceful shutdown _cleanup_task: Optional[asyncio.Task] = None async def start_cleanup_task(get_session_func) -> bool: """ Start the cleanup background task. Args: get_session_func: Async generator function that yields database sessions Returns: True if task started successfully, False otherwise """ global _cleanup_task if _cleanup_task is not None and not _cleanup_task.done(): logger.warning("[Cleanup] Cleanup task already running") return False try: _cleanup_task = asyncio.create_task(run_cleanup_task(get_session_func)) logger.info("[Cleanup] ✅ Cleanup background task started") return True except Exception as e: logger.error(f"[Cleanup] Failed to start cleanup task: {e}") return False async def stop_cleanup_task() -> None: """Stop the cleanup background task gracefully.""" global _cleanup_task if _cleanup_task is not None and not _cleanup_task.done(): _cleanup_task.cancel() try: await _cleanup_task except asyncio.CancelledError: pass logger.info("[Cleanup] Cleanup task stopped") _cleanup_task = None def is_cleanup_task_running() -> bool: """Check if the cleanup task is currently running.""" return _cleanup_task is not None and not _cleanup_task.done()