Files
sac4cps-backend/microservices/data-ingestion-service/redis_publisher.py
rafaeldpsilva 5fdce00e5d Add data-ingestion-service for SA4CPS FTP integration
- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add
robust data processor with multi-format and unit inference support -
Publish parsed data to Redis topics for real-time dashboard simulation -
Include validation, monitoring, and auto-configuration scripts - Provide
documentation and test scripts for SA4CPS integration
2025-09-10 14:43:30 +01:00

484 lines
20 KiB
Python

"""
Redis publisher for broadcasting time series data to multiple topics.
Handles data transformation, routing, and publishing for real-time simulation.
"""
import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
import hashlib
import uuid
from collections import defaultdict
import redis.asyncio as redis
logger = logging.getLogger(__name__)
class RedisPublisher:
"""Publishes time series data to Redis channels for real-time simulation"""
def __init__(self, redis_client):
self.redis = redis_client
self.publishing_stats = defaultdict(int)
self.topic_configs = {}
self.message_cache = {}
# Default topic configurations
self.default_topics = {
"energy_data": {
"description": "General energy consumption data",
"data_types": ["energy", "power", "consumption"],
"format": "sensor_reading"
},
"community_consumption": {
"description": "Community-level energy consumption",
"data_types": ["consumption", "usage", "demand"],
"format": "aggregated_data"
},
"real_time_metrics": {
"description": "Real-time sensor metrics",
"data_types": ["all"],
"format": "metric_update"
},
"simulation_data": {
"description": "Data for simulation purposes",
"data_types": ["all"],
"format": "simulation_point"
},
"community_generation": {
"description": "Community energy generation data",
"data_types": ["generation", "production", "renewable"],
"format": "generation_data"
},
"grid_events": {
"description": "Grid-related events and alerts",
"data_types": ["events", "alerts", "grid_status"],
"format": "event_data"
}
}
async def initialize(self):
"""Initialize publisher with default topic configurations"""
try:
for topic, config in self.default_topics.items():
await self.configure_topic(topic, config)
logger.info(f"Initialized Redis publisher with {len(self.default_topics)} default topics")
except Exception as e:
logger.error(f"Error initializing Redis publisher: {e}")
raise
async def publish_time_series_data(self, topic: str, data: List[Dict[str, Any]], source_name: str):
"""Publish time series data to a specific Redis topic"""
try:
if not data:
logger.warning(f"No data to publish to topic: {topic}")
return
logger.info(f"Publishing {len(data)} records to topic: {topic}")
# Get topic configuration
topic_config = self.topic_configs.get(topic, {})
data_format = topic_config.get("format", "sensor_reading")
# Process and publish each data point
published_count = 0
for record in data:
try:
# Transform data based on topic format
message = await self._transform_data_for_topic(record, data_format, source_name)
# Add publishing metadata
message["published_at"] = datetime.utcnow().isoformat()
message["topic"] = topic
message["message_id"] = str(uuid.uuid4())
# Publish to Redis
await self.redis.publish(topic, json.dumps(message))
published_count += 1
self.publishing_stats[topic] += 1
except Exception as e:
logger.warning(f"Error publishing record to {topic}: {e}")
continue
logger.info(f"Successfully published {published_count}/{len(data)} records to {topic}")
# Update topic statistics
await self._update_topic_stats(topic, published_count)
except Exception as e:
logger.error(f"Error publishing to topic {topic}: {e}")
raise
async def publish_single_message(self, topic: str, message: Dict[str, Any]):
"""Publish a single message to a Redis topic"""
try:
# Add metadata
message["published_at"] = datetime.utcnow().isoformat()
message["topic"] = topic
message["message_id"] = str(uuid.uuid4())
# Publish
await self.redis.publish(topic, json.dumps(message))
self.publishing_stats[topic] += 1
logger.debug(f"Published single message to {topic}")
except Exception as e:
logger.error(f"Error publishing single message to {topic}: {e}")
raise
async def publish_batch(self, topic_messages: Dict[str, List[Dict[str, Any]]]):
"""Publish multiple messages to multiple topics"""
try:
total_published = 0
for topic, messages in topic_messages.items():
for message in messages:
await self.publish_single_message(topic, message)
total_published += 1
logger.info(f"Batch published {total_published} messages across {len(topic_messages)} topics")
except Exception as e:
logger.error(f"Error in batch publishing: {e}")
raise
async def configure_topic(self, topic: str, config: Dict[str, Any]):
"""Configure a topic with specific settings"""
try:
self.topic_configs[topic] = {
"description": config.get("description", ""),
"data_types": config.get("data_types", ["all"]),
"format": config.get("format", "generic"),
"created_at": datetime.utcnow().isoformat(),
"message_count": 0
}
logger.info(f"Configured topic: {topic}")
except Exception as e:
logger.error(f"Error configuring topic {topic}: {e}")
raise
async def get_topics_info(self) -> Dict[str, Any]:
"""Get information about all configured topics"""
try:
topics_info = {}
for topic, config in self.topic_configs.items():
# Get recent message count
message_count = self.publishing_stats.get(topic, 0)
topics_info[topic] = {
**config,
"message_count": message_count,
"last_published": await self._get_last_published_time(topic)
}
return topics_info
except Exception as e:
logger.error(f"Error getting topics info: {e}")
return {}
async def get_publishing_stats(self) -> Dict[str, Any]:
"""Get publishing statistics"""
try:
total_messages = sum(self.publishing_stats.values())
return {
"total_messages_published": total_messages,
"active_topics": len(self.topic_configs),
"topic_stats": dict(self.publishing_stats),
"last_updated": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error getting publishing stats: {e}")
return {}
async def _transform_data_for_topic(self, record: Dict[str, Any], format_type: str, source_name: str) -> Dict[str, Any]:
"""Transform data based on topic format requirements"""
try:
base_message = {
"source": source_name,
"format": format_type
}
if format_type == "sensor_reading":
return await self._format_as_sensor_reading(record, base_message)
elif format_type == "aggregated_data":
return await self._format_as_aggregated_data(record, base_message)
elif format_type == "metric_update":
return await self._format_as_metric_update(record, base_message)
elif format_type == "simulation_point":
return await self._format_as_simulation_point(record, base_message)
elif format_type == "generation_data":
return await self._format_as_generation_data(record, base_message)
elif format_type == "event_data":
return await self._format_as_event_data(record, base_message)
else:
# Generic format
return {**base_message, **record}
except Exception as e:
logger.error(f"Error transforming data for format {format_type}: {e}")
return {**base_message, **record}
async def _format_as_sensor_reading(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data as sensor reading for energy dashboard"""
return {
**base_message,
"type": "sensor_data",
"sensorId": record.get("sensor_id", "unknown"),
"sensor_id": record.get("sensor_id", "unknown"),
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"value": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"room": record.get("metadata", {}).get("room"),
"sensor_type": self._infer_sensor_type(record),
"metadata": record.get("metadata", {}),
"data_quality": await self._assess_data_quality(record)
}
async def _format_as_aggregated_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data as aggregated community data"""
return {
**base_message,
"type": "aggregated_consumption",
"community_id": record.get("sensor_id", "community_1"),
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"total_consumption": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"period": "real_time",
"households": record.get("metadata", {}).get("households", 1),
"average_per_household": record.get("value", 0) / max(record.get("metadata", {}).get("households", 1), 1)
}
async def _format_as_metric_update(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data as real-time metric update"""
return {
**base_message,
"type": "metric_update",
"metric_id": record.get("sensor_id", "unknown"),
"metric_type": self._infer_metric_type(record),
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"current_value": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"trend": await self._calculate_trend(record),
"metadata": record.get("metadata", {})
}
async def _format_as_simulation_point(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data for simulation purposes"""
return {
**base_message,
"type": "simulation_data",
"simulation_id": f"sim_{record.get('sensor_id', 'unknown')}",
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"energy_value": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"scenario": record.get("metadata", {}).get("scenario", "baseline"),
"location": record.get("metadata", {}).get("location", "unknown"),
"data_source": record.get("data_source", "real_community"),
"quality_score": await self._assess_data_quality(record)
}
async def _format_as_generation_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data as energy generation data"""
return {
**base_message,
"type": "generation_data",
"generator_id": record.get("sensor_id", "unknown"),
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"generation_value": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"generation_type": record.get("metadata", {}).get("type", "renewable"),
"efficiency": record.get("metadata", {}).get("efficiency", 0.85),
"weather_conditions": record.get("metadata", {}).get("weather")
}
async def _format_as_event_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
"""Format data as grid event"""
return {
**base_message,
"type": "grid_event",
"event_id": str(uuid.uuid4()),
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
"event_type": await self._classify_event_type(record),
"severity": await self._assess_event_severity(record),
"affected_area": record.get("metadata", {}).get("area", "unknown"),
"value": record.get("value", 0),
"unit": record.get("unit", "kWh"),
"description": f"Energy event detected: {record.get('value', 0)} {record.get('unit', 'kWh')}"
}
def _infer_sensor_type(self, record: Dict[str, Any]) -> str:
"""Infer sensor type from record data"""
value = record.get("value", 0)
unit = record.get("unit", "").lower()
metadata = record.get("metadata", {})
if "generation" in str(metadata).lower() or "solar" in str(metadata).lower():
return "generation"
elif "temperature" in str(metadata).lower() or "temp" in str(metadata).lower():
return "temperature"
elif "co2" in str(metadata).lower() or "carbon" in str(metadata).lower():
return "co2"
elif "humidity" in str(metadata).lower():
return "humidity"
elif "motion" in str(metadata).lower() or "occupancy" in str(metadata).lower():
return "motion"
else:
return "energy"
def _infer_metric_type(self, record: Dict[str, Any]) -> str:
"""Infer metric type from record"""
unit = record.get("unit", "").lower()
if "wh" in unit:
return "energy"
elif "w" in unit:
return "power"
elif "°c" in unit or "celsius" in unit or "temp" in unit:
return "temperature"
elif "%" in unit:
return "percentage"
elif "ppm" in unit or "co2" in unit:
return "co2"
else:
return "generic"
async def _calculate_trend(self, record: Dict[str, Any]) -> str:
"""Calculate trend for metric (simplified)"""
# This is a simplified trend calculation
# In a real implementation, you'd compare with historical values
value = record.get("value", 0)
if value > 100:
return "increasing"
elif value < 50:
return "decreasing"
else:
return "stable"
async def _assess_data_quality(self, record: Dict[str, Any]) -> float:
"""Assess data quality score (0-1)"""
score = 1.0
# Check for missing fields
if not record.get("timestamp"):
score -= 0.2
if not record.get("sensor_id"):
score -= 0.2
if record.get("value") is None:
score -= 0.3
if not record.get("unit"):
score -= 0.1
# Check for reasonable values
value = record.get("value", 0)
if value < 0:
score -= 0.1
if value > 10000: # Unusually high energy value
score -= 0.1
return max(0.0, score)
async def _classify_event_type(self, record: Dict[str, Any]) -> str:
"""Classify event type based on data"""
value = record.get("value", 0)
if value > 1000:
return "high_consumption"
elif value < 10:
return "low_consumption"
else:
return "normal_operation"
async def _assess_event_severity(self, record: Dict[str, Any]) -> str:
"""Assess event severity"""
value = record.get("value", 0)
if value > 5000:
return "critical"
elif value > 1000:
return "warning"
elif value < 5:
return "info"
else:
return "normal"
async def _update_topic_stats(self, topic: str, count: int):
"""Update topic statistics"""
try:
stats_key = f"topic_stats:{topic}"
await self.redis.hincrby(stats_key, "message_count", count)
await self.redis.hset(stats_key, "last_published", datetime.utcnow().isoformat())
await self.redis.expire(stats_key, 86400) # Expire after 24 hours
except Exception as e:
logger.error(f"Error updating topic stats: {e}")
async def _get_last_published_time(self, topic: str) -> Optional[str]:
"""Get last published time for a topic"""
try:
stats_key = f"topic_stats:{topic}"
return await self.redis.hget(stats_key, "last_published")
except Exception as e:
logger.debug(f"Error getting last published time for {topic}: {e}")
return None
async def create_data_stream(self, topic: str, data_stream: List[Dict[str, Any]],
interval_seconds: float = 1.0):
"""Create a continuous data stream by publishing data at intervals"""
try:
logger.info(f"Starting data stream for topic {topic} with {len(data_stream)} points")
for i, data_point in enumerate(data_stream):
await self.publish_single_message(topic, data_point)
# Add stream metadata
stream_info = {
"type": "stream_info",
"topic": topic,
"current_point": i + 1,
"total_points": len(data_stream),
"progress": (i + 1) / len(data_stream) * 100,
"timestamp": datetime.utcnow().isoformat()
}
await self.publish_single_message(f"{topic}_stream_info", stream_info)
# Wait before next data point
if i < len(data_stream) - 1:
await asyncio.sleep(interval_seconds)
logger.info(f"Completed data stream for topic {topic}")
except Exception as e:
logger.error(f"Error creating data stream: {e}")
raise
async def cleanup_old_stats(self, days: int = 7):
"""Clean up old topic statistics"""
try:
# Get all topic stat keys
pattern = "topic_stats:*"
keys = []
async for key in self.redis.scan_iter(match=pattern):
keys.append(key)
# Delete old keys (Redis TTL should handle this, but cleanup anyway)
if keys:
await self.redis.delete(*keys)
logger.info(f"Cleaned up {len(keys)} old topic stat keys")
except Exception as e:
logger.error(f"Error cleaning up old stats: {e}")