first commit
This commit is contained in:
234
layers/business/cleanup_service.py
Normal file
234
layers/business/cleanup_service.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
Data cleanup and maintenance service
|
||||
Business Layer - handles data retention policies and system maintenance
|
||||
"""
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
from ..infrastructure.database_connection import database_connection
|
||||
from ..infrastructure.repositories import SensorReadingRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CleanupService:
|
||||
"""Service for data cleanup and maintenance operations"""
|
||||
|
||||
def __init__(self):
|
||||
self.sensor_reading_repo = SensorReadingRepository()
|
||||
self.is_running = False
|
||||
self.cleanup_task = None
|
||||
|
||||
async def start_scheduled_cleanup(self, interval_hours: int = 24) -> None:
|
||||
"""Start scheduled cleanup process"""
|
||||
if self.is_running:
|
||||
logger.warning("Cleanup service is already running")
|
||||
return
|
||||
|
||||
self.is_running = True
|
||||
self.cleanup_task = asyncio.create_task(self._cleanup_loop(interval_hours))
|
||||
logger.info(f"Started scheduled cleanup service (interval: {interval_hours} hours)")
|
||||
|
||||
async def stop_scheduled_cleanup(self) -> None:
|
||||
"""Stop scheduled cleanup process"""
|
||||
self.is_running = False
|
||||
if self.cleanup_task:
|
||||
self.cleanup_task.cancel()
|
||||
try:
|
||||
await self.cleanup_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Cleanup service stopped")
|
||||
|
||||
async def _cleanup_loop(self, interval_hours: int) -> None:
|
||||
"""Main cleanup loop"""
|
||||
while self.is_running:
|
||||
try:
|
||||
await self.cleanup_old_data()
|
||||
# Wait for next cleanup interval
|
||||
await asyncio.sleep(interval_hours * 3600) # Convert hours to seconds
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduled cleanup: {e}")
|
||||
# Wait 1 hour before retrying on error
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
async def cleanup_old_data(self) -> Dict[str, int]:
|
||||
"""Perform data cleanup based on retention policies"""
|
||||
try:
|
||||
cleanup_results = {}
|
||||
db = await database_connection.get_database()
|
||||
|
||||
# Delete sensor readings older than 90 days
|
||||
sensor_retention_date = datetime.utcnow() - timedelta(days=90)
|
||||
sensor_result = await db.sensor_readings.delete_many({
|
||||
"created_at": {"$lt": sensor_retention_date}
|
||||
})
|
||||
cleanup_results["sensor_readings_deleted"] = sensor_result.deleted_count
|
||||
|
||||
if sensor_result.deleted_count > 0:
|
||||
logger.info(f"Deleted {sensor_result.deleted_count} old sensor readings")
|
||||
|
||||
# Delete room metrics older than 30 days
|
||||
room_retention_date = datetime.utcnow() - timedelta(days=30)
|
||||
room_result = await db.room_metrics.delete_many({
|
||||
"created_at": {"$lt": room_retention_date}
|
||||
})
|
||||
cleanup_results["room_metrics_deleted"] = room_result.deleted_count
|
||||
|
||||
if room_result.deleted_count > 0:
|
||||
logger.info(f"Deleted {room_result.deleted_count} old room metrics")
|
||||
|
||||
# Delete system events older than 60 days
|
||||
events_retention_date = datetime.utcnow() - timedelta(days=60)
|
||||
events_result = await db.system_events.delete_many({
|
||||
"created_at": {"$lt": events_retention_date}
|
||||
})
|
||||
cleanup_results["system_events_deleted"] = events_result.deleted_count
|
||||
|
||||
if events_result.deleted_count > 0:
|
||||
logger.info(f"Deleted {events_result.deleted_count} old system events")
|
||||
|
||||
# Clean up orphaned sensor metadata (sensors with no recent readings)
|
||||
orphaned_retention_date = datetime.utcnow() - timedelta(days=30)
|
||||
|
||||
# Find sensors with no recent readings
|
||||
active_sensors = await db.sensor_readings.distinct("sensor_id", {
|
||||
"created_at": {"$gte": orphaned_retention_date}
|
||||
})
|
||||
|
||||
orphaned_result = await db.sensor_metadata.delete_many({
|
||||
"sensor_id": {"$nin": active_sensors},
|
||||
"last_seen": {"$lt": orphaned_retention_date}
|
||||
})
|
||||
cleanup_results["orphaned_metadata_deleted"] = orphaned_result.deleted_count
|
||||
|
||||
if orphaned_result.deleted_count > 0:
|
||||
logger.info(f"Deleted {orphaned_result.deleted_count} orphaned sensor metadata records")
|
||||
|
||||
return cleanup_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during data cleanup: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def get_storage_statistics(self) -> Dict[str, Any]:
|
||||
"""Get storage statistics for different collections"""
|
||||
try:
|
||||
db = await database_connection.get_database()
|
||||
|
||||
stats = {}
|
||||
|
||||
# Sensor readings statistics
|
||||
sensor_stats = await db.command("collStats", "sensor_readings")
|
||||
stats["sensor_readings"] = {
|
||||
"count": sensor_stats.get("count", 0),
|
||||
"size_bytes": sensor_stats.get("size", 0),
|
||||
"avg_obj_size": sensor_stats.get("avgObjSize", 0),
|
||||
"storage_size": sensor_stats.get("storageSize", 0)
|
||||
}
|
||||
|
||||
# Room metrics statistics
|
||||
room_stats = await db.command("collStats", "room_metrics")
|
||||
stats["room_metrics"] = {
|
||||
"count": room_stats.get("count", 0),
|
||||
"size_bytes": room_stats.get("size", 0),
|
||||
"avg_obj_size": room_stats.get("avgObjSize", 0),
|
||||
"storage_size": room_stats.get("storageSize", 0)
|
||||
}
|
||||
|
||||
# System events statistics
|
||||
events_stats = await db.command("collStats", "system_events")
|
||||
stats["system_events"] = {
|
||||
"count": events_stats.get("count", 0),
|
||||
"size_bytes": events_stats.get("size", 0),
|
||||
"avg_obj_size": events_stats.get("avgObjSize", 0),
|
||||
"storage_size": events_stats.get("storageSize", 0)
|
||||
}
|
||||
|
||||
# Sensor metadata statistics
|
||||
metadata_stats = await db.command("collStats", "sensor_metadata")
|
||||
stats["sensor_metadata"] = {
|
||||
"count": metadata_stats.get("count", 0),
|
||||
"size_bytes": metadata_stats.get("size", 0),
|
||||
"avg_obj_size": metadata_stats.get("avgObjSize", 0),
|
||||
"storage_size": metadata_stats.get("storageSize", 0)
|
||||
}
|
||||
|
||||
# Calculate totals
|
||||
total_documents = sum(collection["count"] for collection in stats.values())
|
||||
total_size = sum(collection["size_bytes"] for collection in stats.values())
|
||||
total_storage = sum(collection["storage_size"] for collection in stats.values())
|
||||
|
||||
stats["totals"] = {
|
||||
"total_documents": total_documents,
|
||||
"total_size_bytes": total_size,
|
||||
"total_storage_bytes": total_storage,
|
||||
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
||||
"total_storage_mb": round(total_storage / (1024 * 1024), 2)
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting storage statistics: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def get_data_retention_info(self) -> Dict[str, Any]:
|
||||
"""Get information about data retention policies and old data"""
|
||||
try:
|
||||
db = await database_connection.get_database()
|
||||
|
||||
# Current date references
|
||||
now = datetime.utcnow()
|
||||
sensor_cutoff = now - timedelta(days=90)
|
||||
room_cutoff = now - timedelta(days=30)
|
||||
events_cutoff = now - timedelta(days=60)
|
||||
|
||||
retention_info = {}
|
||||
|
||||
# Sensor readings retention info
|
||||
old_sensor_count = await db.sensor_readings.count_documents({
|
||||
"created_at": {"$lt": sensor_cutoff}
|
||||
})
|
||||
retention_info["sensor_readings"] = {
|
||||
"retention_days": 90,
|
||||
"cutoff_date": sensor_cutoff.isoformat(),
|
||||
"old_records_count": old_sensor_count
|
||||
}
|
||||
|
||||
# Room metrics retention info
|
||||
old_room_count = await db.room_metrics.count_documents({
|
||||
"created_at": {"$lt": room_cutoff}
|
||||
})
|
||||
retention_info["room_metrics"] = {
|
||||
"retention_days": 30,
|
||||
"cutoff_date": room_cutoff.isoformat(),
|
||||
"old_records_count": old_room_count
|
||||
}
|
||||
|
||||
# System events retention info
|
||||
old_events_count = await db.system_events.count_documents({
|
||||
"created_at": {"$lt": events_cutoff}
|
||||
})
|
||||
retention_info["system_events"] = {
|
||||
"retention_days": 60,
|
||||
"cutoff_date": events_cutoff.isoformat(),
|
||||
"old_records_count": old_events_count
|
||||
}
|
||||
|
||||
return retention_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting retention info: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def is_cleanup_running(self) -> bool:
|
||||
"""Check if cleanup service is currently running"""
|
||||
return self.is_running and (
|
||||
self.cleanup_task is not None and
|
||||
not self.cleanup_task.done()
|
||||
)
|
||||
|
||||
# Global cleanup service instance
|
||||
cleanup_service = CleanupService()
|
||||
Reference in New Issue
Block a user