""" Data cleanup and maintenance service Business Layer - handles data retention policies and system maintenance """ import asyncio from datetime import datetime, timedelta from typing import Dict, Any import logging from ..infrastructure.database_connection import database_connection from ..infrastructure.repositories import SensorReadingRepository logger = logging.getLogger(__name__) class CleanupService: """Service for data cleanup and maintenance operations""" def __init__(self): self.sensor_reading_repo = SensorReadingRepository() self.is_running = False self.cleanup_task = None async def start_scheduled_cleanup(self, interval_hours: int = 24) -> None: """Start scheduled cleanup process""" if self.is_running: logger.warning("Cleanup service is already running") return self.is_running = True self.cleanup_task = asyncio.create_task(self._cleanup_loop(interval_hours)) logger.info(f"Started scheduled cleanup service (interval: {interval_hours} hours)") async def stop_scheduled_cleanup(self) -> None: """Stop scheduled cleanup process""" self.is_running = False if self.cleanup_task: self.cleanup_task.cancel() try: await self.cleanup_task except asyncio.CancelledError: pass logger.info("Cleanup service stopped") async def _cleanup_loop(self, interval_hours: int) -> None: """Main cleanup loop""" while self.is_running: try: await self.cleanup_old_data() # Wait for next cleanup interval await asyncio.sleep(interval_hours * 3600) # Convert hours to seconds except Exception as e: logger.error(f"Error in scheduled cleanup: {e}") # Wait 1 hour before retrying on error await asyncio.sleep(3600) async def cleanup_old_data(self) -> Dict[str, int]: """Perform data cleanup based on retention policies""" try: cleanup_results = {} db = await database_connection.get_database() # Delete sensor readings older than 90 days sensor_retention_date = datetime.utcnow() - timedelta(days=90) sensor_result = await db.sensor_readings.delete_many({ "created_at": {"$lt": sensor_retention_date} }) cleanup_results["sensor_readings_deleted"] = sensor_result.deleted_count if sensor_result.deleted_count > 0: logger.info(f"Deleted {sensor_result.deleted_count} old sensor readings") # Delete room metrics older than 30 days room_retention_date = datetime.utcnow() - timedelta(days=30) room_result = await db.room_metrics.delete_many({ "created_at": {"$lt": room_retention_date} }) cleanup_results["room_metrics_deleted"] = room_result.deleted_count if room_result.deleted_count > 0: logger.info(f"Deleted {room_result.deleted_count} old room metrics") # Delete system events older than 60 days events_retention_date = datetime.utcnow() - timedelta(days=60) events_result = await db.system_events.delete_many({ "created_at": {"$lt": events_retention_date} }) cleanup_results["system_events_deleted"] = events_result.deleted_count if events_result.deleted_count > 0: logger.info(f"Deleted {events_result.deleted_count} old system events") # Clean up orphaned sensor metadata (sensors with no recent readings) orphaned_retention_date = datetime.utcnow() - timedelta(days=30) # Find sensors with no recent readings active_sensors = await db.sensor_readings.distinct("sensor_id", { "created_at": {"$gte": orphaned_retention_date} }) orphaned_result = await db.sensor_metadata.delete_many({ "sensor_id": {"$nin": active_sensors}, "last_seen": {"$lt": orphaned_retention_date} }) cleanup_results["orphaned_metadata_deleted"] = orphaned_result.deleted_count if orphaned_result.deleted_count > 0: logger.info(f"Deleted {orphaned_result.deleted_count} orphaned sensor metadata records") return cleanup_results except Exception as e: logger.error(f"Error during data cleanup: {e}") return {"error": str(e)} async def get_storage_statistics(self) -> Dict[str, Any]: """Get storage statistics for different collections""" try: db = await database_connection.get_database() stats = {} # Sensor readings statistics sensor_stats = await db.command("collStats", "sensor_readings") stats["sensor_readings"] = { "count": sensor_stats.get("count", 0), "size_bytes": sensor_stats.get("size", 0), "avg_obj_size": sensor_stats.get("avgObjSize", 0), "storage_size": sensor_stats.get("storageSize", 0) } # Room metrics statistics room_stats = await db.command("collStats", "room_metrics") stats["room_metrics"] = { "count": room_stats.get("count", 0), "size_bytes": room_stats.get("size", 0), "avg_obj_size": room_stats.get("avgObjSize", 0), "storage_size": room_stats.get("storageSize", 0) } # System events statistics events_stats = await db.command("collStats", "system_events") stats["system_events"] = { "count": events_stats.get("count", 0), "size_bytes": events_stats.get("size", 0), "avg_obj_size": events_stats.get("avgObjSize", 0), "storage_size": events_stats.get("storageSize", 0) } # Sensor metadata statistics metadata_stats = await db.command("collStats", "sensor_metadata") stats["sensor_metadata"] = { "count": metadata_stats.get("count", 0), "size_bytes": metadata_stats.get("size", 0), "avg_obj_size": metadata_stats.get("avgObjSize", 0), "storage_size": metadata_stats.get("storageSize", 0) } # Calculate totals total_documents = sum(collection["count"] for collection in stats.values()) total_size = sum(collection["size_bytes"] for collection in stats.values()) total_storage = sum(collection["storage_size"] for collection in stats.values()) stats["totals"] = { "total_documents": total_documents, "total_size_bytes": total_size, "total_storage_bytes": total_storage, "total_size_mb": round(total_size / (1024 * 1024), 2), "total_storage_mb": round(total_storage / (1024 * 1024), 2) } return stats except Exception as e: logger.error(f"Error getting storage statistics: {e}") return {"error": str(e)} async def get_data_retention_info(self) -> Dict[str, Any]: """Get information about data retention policies and old data""" try: db = await database_connection.get_database() # Current date references now = datetime.utcnow() sensor_cutoff = now - timedelta(days=90) room_cutoff = now - timedelta(days=30) events_cutoff = now - timedelta(days=60) retention_info = {} # Sensor readings retention info old_sensor_count = await db.sensor_readings.count_documents({ "created_at": {"$lt": sensor_cutoff} }) retention_info["sensor_readings"] = { "retention_days": 90, "cutoff_date": sensor_cutoff.isoformat(), "old_records_count": old_sensor_count } # Room metrics retention info old_room_count = await db.room_metrics.count_documents({ "created_at": {"$lt": room_cutoff} }) retention_info["room_metrics"] = { "retention_days": 30, "cutoff_date": room_cutoff.isoformat(), "old_records_count": old_room_count } # System events retention info old_events_count = await db.system_events.count_documents({ "created_at": {"$lt": events_cutoff} }) retention_info["system_events"] = { "retention_days": 60, "cutoff_date": events_cutoff.isoformat(), "old_records_count": old_events_count } return retention_info except Exception as e: logger.error(f"Error getting retention info: {e}") return {"error": str(e)} def is_cleanup_running(self) -> bool: """Check if cleanup service is currently running""" return self.is_running and ( self.cleanup_task is not None and not self.cleanup_task.done() ) # Global cleanup service instance cleanup_service = CleanupService()