first commit

This commit is contained in:
rafaeldpsilva
2025-09-09 13:46:42 +01:00
commit a7a18e6295
77 changed files with 8678 additions and 0 deletions

View File

@@ -0,0 +1,234 @@
"""
Data cleanup and maintenance service
Business Layer - handles data retention policies and system maintenance
"""
import asyncio
from datetime import datetime, timedelta
from typing import Dict, Any
import logging
from ..infrastructure.database_connection import database_connection
from ..infrastructure.repositories import SensorReadingRepository
logger = logging.getLogger(__name__)
class CleanupService:
"""Service for data cleanup and maintenance operations"""
def __init__(self):
self.sensor_reading_repo = SensorReadingRepository()
self.is_running = False
self.cleanup_task = None
async def start_scheduled_cleanup(self, interval_hours: int = 24) -> None:
"""Start scheduled cleanup process"""
if self.is_running:
logger.warning("Cleanup service is already running")
return
self.is_running = True
self.cleanup_task = asyncio.create_task(self._cleanup_loop(interval_hours))
logger.info(f"Started scheduled cleanup service (interval: {interval_hours} hours)")
async def stop_scheduled_cleanup(self) -> None:
"""Stop scheduled cleanup process"""
self.is_running = False
if self.cleanup_task:
self.cleanup_task.cancel()
try:
await self.cleanup_task
except asyncio.CancelledError:
pass
logger.info("Cleanup service stopped")
async def _cleanup_loop(self, interval_hours: int) -> None:
"""Main cleanup loop"""
while self.is_running:
try:
await self.cleanup_old_data()
# Wait for next cleanup interval
await asyncio.sleep(interval_hours * 3600) # Convert hours to seconds
except Exception as e:
logger.error(f"Error in scheduled cleanup: {e}")
# Wait 1 hour before retrying on error
await asyncio.sleep(3600)
async def cleanup_old_data(self) -> Dict[str, int]:
"""Perform data cleanup based on retention policies"""
try:
cleanup_results = {}
db = await database_connection.get_database()
# Delete sensor readings older than 90 days
sensor_retention_date = datetime.utcnow() - timedelta(days=90)
sensor_result = await db.sensor_readings.delete_many({
"created_at": {"$lt": sensor_retention_date}
})
cleanup_results["sensor_readings_deleted"] = sensor_result.deleted_count
if sensor_result.deleted_count > 0:
logger.info(f"Deleted {sensor_result.deleted_count} old sensor readings")
# Delete room metrics older than 30 days
room_retention_date = datetime.utcnow() - timedelta(days=30)
room_result = await db.room_metrics.delete_many({
"created_at": {"$lt": room_retention_date}
})
cleanup_results["room_metrics_deleted"] = room_result.deleted_count
if room_result.deleted_count > 0:
logger.info(f"Deleted {room_result.deleted_count} old room metrics")
# Delete system events older than 60 days
events_retention_date = datetime.utcnow() - timedelta(days=60)
events_result = await db.system_events.delete_many({
"created_at": {"$lt": events_retention_date}
})
cleanup_results["system_events_deleted"] = events_result.deleted_count
if events_result.deleted_count > 0:
logger.info(f"Deleted {events_result.deleted_count} old system events")
# Clean up orphaned sensor metadata (sensors with no recent readings)
orphaned_retention_date = datetime.utcnow() - timedelta(days=30)
# Find sensors with no recent readings
active_sensors = await db.sensor_readings.distinct("sensor_id", {
"created_at": {"$gte": orphaned_retention_date}
})
orphaned_result = await db.sensor_metadata.delete_many({
"sensor_id": {"$nin": active_sensors},
"last_seen": {"$lt": orphaned_retention_date}
})
cleanup_results["orphaned_metadata_deleted"] = orphaned_result.deleted_count
if orphaned_result.deleted_count > 0:
logger.info(f"Deleted {orphaned_result.deleted_count} orphaned sensor metadata records")
return cleanup_results
except Exception as e:
logger.error(f"Error during data cleanup: {e}")
return {"error": str(e)}
async def get_storage_statistics(self) -> Dict[str, Any]:
"""Get storage statistics for different collections"""
try:
db = await database_connection.get_database()
stats = {}
# Sensor readings statistics
sensor_stats = await db.command("collStats", "sensor_readings")
stats["sensor_readings"] = {
"count": sensor_stats.get("count", 0),
"size_bytes": sensor_stats.get("size", 0),
"avg_obj_size": sensor_stats.get("avgObjSize", 0),
"storage_size": sensor_stats.get("storageSize", 0)
}
# Room metrics statistics
room_stats = await db.command("collStats", "room_metrics")
stats["room_metrics"] = {
"count": room_stats.get("count", 0),
"size_bytes": room_stats.get("size", 0),
"avg_obj_size": room_stats.get("avgObjSize", 0),
"storage_size": room_stats.get("storageSize", 0)
}
# System events statistics
events_stats = await db.command("collStats", "system_events")
stats["system_events"] = {
"count": events_stats.get("count", 0),
"size_bytes": events_stats.get("size", 0),
"avg_obj_size": events_stats.get("avgObjSize", 0),
"storage_size": events_stats.get("storageSize", 0)
}
# Sensor metadata statistics
metadata_stats = await db.command("collStats", "sensor_metadata")
stats["sensor_metadata"] = {
"count": metadata_stats.get("count", 0),
"size_bytes": metadata_stats.get("size", 0),
"avg_obj_size": metadata_stats.get("avgObjSize", 0),
"storage_size": metadata_stats.get("storageSize", 0)
}
# Calculate totals
total_documents = sum(collection["count"] for collection in stats.values())
total_size = sum(collection["size_bytes"] for collection in stats.values())
total_storage = sum(collection["storage_size"] for collection in stats.values())
stats["totals"] = {
"total_documents": total_documents,
"total_size_bytes": total_size,
"total_storage_bytes": total_storage,
"total_size_mb": round(total_size / (1024 * 1024), 2),
"total_storage_mb": round(total_storage / (1024 * 1024), 2)
}
return stats
except Exception as e:
logger.error(f"Error getting storage statistics: {e}")
return {"error": str(e)}
async def get_data_retention_info(self) -> Dict[str, Any]:
"""Get information about data retention policies and old data"""
try:
db = await database_connection.get_database()
# Current date references
now = datetime.utcnow()
sensor_cutoff = now - timedelta(days=90)
room_cutoff = now - timedelta(days=30)
events_cutoff = now - timedelta(days=60)
retention_info = {}
# Sensor readings retention info
old_sensor_count = await db.sensor_readings.count_documents({
"created_at": {"$lt": sensor_cutoff}
})
retention_info["sensor_readings"] = {
"retention_days": 90,
"cutoff_date": sensor_cutoff.isoformat(),
"old_records_count": old_sensor_count
}
# Room metrics retention info
old_room_count = await db.room_metrics.count_documents({
"created_at": {"$lt": room_cutoff}
})
retention_info["room_metrics"] = {
"retention_days": 30,
"cutoff_date": room_cutoff.isoformat(),
"old_records_count": old_room_count
}
# System events retention info
old_events_count = await db.system_events.count_documents({
"created_at": {"$lt": events_cutoff}
})
retention_info["system_events"] = {
"retention_days": 60,
"cutoff_date": events_cutoff.isoformat(),
"old_records_count": old_events_count
}
return retention_info
except Exception as e:
logger.error(f"Error getting retention info: {e}")
return {"error": str(e)}
def is_cleanup_running(self) -> bool:
"""Check if cleanup service is currently running"""
return self.is_running and (
self.cleanup_task is not None and
not self.cleanup_task.done()
)
# Global cleanup service instance
cleanup_service = CleanupService()