Add data-ingestion-service for SA4CPS FTP integration
- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add robust data processor with multi-format and unit inference support - Publish parsed data to Redis topics for real-time dashboard simulation - Include validation, monitoring, and auto-configuration scripts - Provide documentation and test scripts for SA4CPS integration
This commit is contained in:
484
microservices/data-ingestion-service/redis_publisher.py
Normal file
484
microservices/data-ingestion-service/redis_publisher.py
Normal file
@@ -0,0 +1,484 @@
|
||||
"""
|
||||
Redis publisher for broadcasting time series data to multiple topics.
|
||||
Handles data transformation, routing, and publishing for real-time simulation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
import hashlib
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
import redis.asyncio as redis
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RedisPublisher:
|
||||
"""Publishes time series data to Redis channels for real-time simulation"""
|
||||
|
||||
def __init__(self, redis_client):
|
||||
self.redis = redis_client
|
||||
self.publishing_stats = defaultdict(int)
|
||||
self.topic_configs = {}
|
||||
self.message_cache = {}
|
||||
|
||||
# Default topic configurations
|
||||
self.default_topics = {
|
||||
"energy_data": {
|
||||
"description": "General energy consumption data",
|
||||
"data_types": ["energy", "power", "consumption"],
|
||||
"format": "sensor_reading"
|
||||
},
|
||||
"community_consumption": {
|
||||
"description": "Community-level energy consumption",
|
||||
"data_types": ["consumption", "usage", "demand"],
|
||||
"format": "aggregated_data"
|
||||
},
|
||||
"real_time_metrics": {
|
||||
"description": "Real-time sensor metrics",
|
||||
"data_types": ["all"],
|
||||
"format": "metric_update"
|
||||
},
|
||||
"simulation_data": {
|
||||
"description": "Data for simulation purposes",
|
||||
"data_types": ["all"],
|
||||
"format": "simulation_point"
|
||||
},
|
||||
"community_generation": {
|
||||
"description": "Community energy generation data",
|
||||
"data_types": ["generation", "production", "renewable"],
|
||||
"format": "generation_data"
|
||||
},
|
||||
"grid_events": {
|
||||
"description": "Grid-related events and alerts",
|
||||
"data_types": ["events", "alerts", "grid_status"],
|
||||
"format": "event_data"
|
||||
}
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize publisher with default topic configurations"""
|
||||
try:
|
||||
for topic, config in self.default_topics.items():
|
||||
await self.configure_topic(topic, config)
|
||||
|
||||
logger.info(f"Initialized Redis publisher with {len(self.default_topics)} default topics")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing Redis publisher: {e}")
|
||||
raise
|
||||
|
||||
async def publish_time_series_data(self, topic: str, data: List[Dict[str, Any]], source_name: str):
|
||||
"""Publish time series data to a specific Redis topic"""
|
||||
try:
|
||||
if not data:
|
||||
logger.warning(f"No data to publish to topic: {topic}")
|
||||
return
|
||||
|
||||
logger.info(f"Publishing {len(data)} records to topic: {topic}")
|
||||
|
||||
# Get topic configuration
|
||||
topic_config = self.topic_configs.get(topic, {})
|
||||
data_format = topic_config.get("format", "sensor_reading")
|
||||
|
||||
# Process and publish each data point
|
||||
published_count = 0
|
||||
for record in data:
|
||||
try:
|
||||
# Transform data based on topic format
|
||||
message = await self._transform_data_for_topic(record, data_format, source_name)
|
||||
|
||||
# Add publishing metadata
|
||||
message["published_at"] = datetime.utcnow().isoformat()
|
||||
message["topic"] = topic
|
||||
message["message_id"] = str(uuid.uuid4())
|
||||
|
||||
# Publish to Redis
|
||||
await self.redis.publish(topic, json.dumps(message))
|
||||
|
||||
published_count += 1
|
||||
self.publishing_stats[topic] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error publishing record to {topic}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully published {published_count}/{len(data)} records to {topic}")
|
||||
|
||||
# Update topic statistics
|
||||
await self._update_topic_stats(topic, published_count)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error publishing to topic {topic}: {e}")
|
||||
raise
|
||||
|
||||
async def publish_single_message(self, topic: str, message: Dict[str, Any]):
|
||||
"""Publish a single message to a Redis topic"""
|
||||
try:
|
||||
# Add metadata
|
||||
message["published_at"] = datetime.utcnow().isoformat()
|
||||
message["topic"] = topic
|
||||
message["message_id"] = str(uuid.uuid4())
|
||||
|
||||
# Publish
|
||||
await self.redis.publish(topic, json.dumps(message))
|
||||
|
||||
self.publishing_stats[topic] += 1
|
||||
logger.debug(f"Published single message to {topic}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error publishing single message to {topic}: {e}")
|
||||
raise
|
||||
|
||||
async def publish_batch(self, topic_messages: Dict[str, List[Dict[str, Any]]]):
|
||||
"""Publish multiple messages to multiple topics"""
|
||||
try:
|
||||
total_published = 0
|
||||
|
||||
for topic, messages in topic_messages.items():
|
||||
for message in messages:
|
||||
await self.publish_single_message(topic, message)
|
||||
total_published += 1
|
||||
|
||||
logger.info(f"Batch published {total_published} messages across {len(topic_messages)} topics")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in batch publishing: {e}")
|
||||
raise
|
||||
|
||||
async def configure_topic(self, topic: str, config: Dict[str, Any]):
|
||||
"""Configure a topic with specific settings"""
|
||||
try:
|
||||
self.topic_configs[topic] = {
|
||||
"description": config.get("description", ""),
|
||||
"data_types": config.get("data_types", ["all"]),
|
||||
"format": config.get("format", "generic"),
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"message_count": 0
|
||||
}
|
||||
|
||||
logger.info(f"Configured topic: {topic}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error configuring topic {topic}: {e}")
|
||||
raise
|
||||
|
||||
async def get_topics_info(self) -> Dict[str, Any]:
|
||||
"""Get information about all configured topics"""
|
||||
try:
|
||||
topics_info = {}
|
||||
|
||||
for topic, config in self.topic_configs.items():
|
||||
# Get recent message count
|
||||
message_count = self.publishing_stats.get(topic, 0)
|
||||
|
||||
topics_info[topic] = {
|
||||
**config,
|
||||
"message_count": message_count,
|
||||
"last_published": await self._get_last_published_time(topic)
|
||||
}
|
||||
|
||||
return topics_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting topics info: {e}")
|
||||
return {}
|
||||
|
||||
async def get_publishing_stats(self) -> Dict[str, Any]:
|
||||
"""Get publishing statistics"""
|
||||
try:
|
||||
total_messages = sum(self.publishing_stats.values())
|
||||
|
||||
return {
|
||||
"total_messages_published": total_messages,
|
||||
"active_topics": len(self.topic_configs),
|
||||
"topic_stats": dict(self.publishing_stats),
|
||||
"last_updated": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting publishing stats: {e}")
|
||||
return {}
|
||||
|
||||
async def _transform_data_for_topic(self, record: Dict[str, Any], format_type: str, source_name: str) -> Dict[str, Any]:
|
||||
"""Transform data based on topic format requirements"""
|
||||
try:
|
||||
base_message = {
|
||||
"source": source_name,
|
||||
"format": format_type
|
||||
}
|
||||
|
||||
if format_type == "sensor_reading":
|
||||
return await self._format_as_sensor_reading(record, base_message)
|
||||
elif format_type == "aggregated_data":
|
||||
return await self._format_as_aggregated_data(record, base_message)
|
||||
elif format_type == "metric_update":
|
||||
return await self._format_as_metric_update(record, base_message)
|
||||
elif format_type == "simulation_point":
|
||||
return await self._format_as_simulation_point(record, base_message)
|
||||
elif format_type == "generation_data":
|
||||
return await self._format_as_generation_data(record, base_message)
|
||||
elif format_type == "event_data":
|
||||
return await self._format_as_event_data(record, base_message)
|
||||
else:
|
||||
# Generic format
|
||||
return {**base_message, **record}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error transforming data for format {format_type}: {e}")
|
||||
return {**base_message, **record}
|
||||
|
||||
async def _format_as_sensor_reading(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data as sensor reading for energy dashboard"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "sensor_data",
|
||||
"sensorId": record.get("sensor_id", "unknown"),
|
||||
"sensor_id": record.get("sensor_id", "unknown"),
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"value": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"room": record.get("metadata", {}).get("room"),
|
||||
"sensor_type": self._infer_sensor_type(record),
|
||||
"metadata": record.get("metadata", {}),
|
||||
"data_quality": await self._assess_data_quality(record)
|
||||
}
|
||||
|
||||
async def _format_as_aggregated_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data as aggregated community data"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "aggregated_consumption",
|
||||
"community_id": record.get("sensor_id", "community_1"),
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"total_consumption": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"period": "real_time",
|
||||
"households": record.get("metadata", {}).get("households", 1),
|
||||
"average_per_household": record.get("value", 0) / max(record.get("metadata", {}).get("households", 1), 1)
|
||||
}
|
||||
|
||||
async def _format_as_metric_update(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data as real-time metric update"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "metric_update",
|
||||
"metric_id": record.get("sensor_id", "unknown"),
|
||||
"metric_type": self._infer_metric_type(record),
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"current_value": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"trend": await self._calculate_trend(record),
|
||||
"metadata": record.get("metadata", {})
|
||||
}
|
||||
|
||||
async def _format_as_simulation_point(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data for simulation purposes"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "simulation_data",
|
||||
"simulation_id": f"sim_{record.get('sensor_id', 'unknown')}",
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"energy_value": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"scenario": record.get("metadata", {}).get("scenario", "baseline"),
|
||||
"location": record.get("metadata", {}).get("location", "unknown"),
|
||||
"data_source": record.get("data_source", "real_community"),
|
||||
"quality_score": await self._assess_data_quality(record)
|
||||
}
|
||||
|
||||
async def _format_as_generation_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data as energy generation data"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "generation_data",
|
||||
"generator_id": record.get("sensor_id", "unknown"),
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"generation_value": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"generation_type": record.get("metadata", {}).get("type", "renewable"),
|
||||
"efficiency": record.get("metadata", {}).get("efficiency", 0.85),
|
||||
"weather_conditions": record.get("metadata", {}).get("weather")
|
||||
}
|
||||
|
||||
async def _format_as_event_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format data as grid event"""
|
||||
return {
|
||||
**base_message,
|
||||
"type": "grid_event",
|
||||
"event_id": str(uuid.uuid4()),
|
||||
"timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
|
||||
"event_type": await self._classify_event_type(record),
|
||||
"severity": await self._assess_event_severity(record),
|
||||
"affected_area": record.get("metadata", {}).get("area", "unknown"),
|
||||
"value": record.get("value", 0),
|
||||
"unit": record.get("unit", "kWh"),
|
||||
"description": f"Energy event detected: {record.get('value', 0)} {record.get('unit', 'kWh')}"
|
||||
}
|
||||
|
||||
def _infer_sensor_type(self, record: Dict[str, Any]) -> str:
|
||||
"""Infer sensor type from record data"""
|
||||
value = record.get("value", 0)
|
||||
unit = record.get("unit", "").lower()
|
||||
metadata = record.get("metadata", {})
|
||||
|
||||
if "generation" in str(metadata).lower() or "solar" in str(metadata).lower():
|
||||
return "generation"
|
||||
elif "temperature" in str(metadata).lower() or "temp" in str(metadata).lower():
|
||||
return "temperature"
|
||||
elif "co2" in str(metadata).lower() or "carbon" in str(metadata).lower():
|
||||
return "co2"
|
||||
elif "humidity" in str(metadata).lower():
|
||||
return "humidity"
|
||||
elif "motion" in str(metadata).lower() or "occupancy" in str(metadata).lower():
|
||||
return "motion"
|
||||
else:
|
||||
return "energy"
|
||||
|
||||
def _infer_metric_type(self, record: Dict[str, Any]) -> str:
|
||||
"""Infer metric type from record"""
|
||||
unit = record.get("unit", "").lower()
|
||||
|
||||
if "wh" in unit:
|
||||
return "energy"
|
||||
elif "w" in unit:
|
||||
return "power"
|
||||
elif "°c" in unit or "celsius" in unit or "temp" in unit:
|
||||
return "temperature"
|
||||
elif "%" in unit:
|
||||
return "percentage"
|
||||
elif "ppm" in unit or "co2" in unit:
|
||||
return "co2"
|
||||
else:
|
||||
return "generic"
|
||||
|
||||
async def _calculate_trend(self, record: Dict[str, Any]) -> str:
|
||||
"""Calculate trend for metric (simplified)"""
|
||||
# This is a simplified trend calculation
|
||||
# In a real implementation, you'd compare with historical values
|
||||
value = record.get("value", 0)
|
||||
|
||||
if value > 100:
|
||||
return "increasing"
|
||||
elif value < 50:
|
||||
return "decreasing"
|
||||
else:
|
||||
return "stable"
|
||||
|
||||
async def _assess_data_quality(self, record: Dict[str, Any]) -> float:
|
||||
"""Assess data quality score (0-1)"""
|
||||
score = 1.0
|
||||
|
||||
# Check for missing fields
|
||||
if not record.get("timestamp"):
|
||||
score -= 0.2
|
||||
if not record.get("sensor_id"):
|
||||
score -= 0.2
|
||||
if record.get("value") is None:
|
||||
score -= 0.3
|
||||
if not record.get("unit"):
|
||||
score -= 0.1
|
||||
|
||||
# Check for reasonable values
|
||||
value = record.get("value", 0)
|
||||
if value < 0:
|
||||
score -= 0.1
|
||||
if value > 10000: # Unusually high energy value
|
||||
score -= 0.1
|
||||
|
||||
return max(0.0, score)
|
||||
|
||||
async def _classify_event_type(self, record: Dict[str, Any]) -> str:
|
||||
"""Classify event type based on data"""
|
||||
value = record.get("value", 0)
|
||||
|
||||
if value > 1000:
|
||||
return "high_consumption"
|
||||
elif value < 10:
|
||||
return "low_consumption"
|
||||
else:
|
||||
return "normal_operation"
|
||||
|
||||
async def _assess_event_severity(self, record: Dict[str, Any]) -> str:
|
||||
"""Assess event severity"""
|
||||
value = record.get("value", 0)
|
||||
|
||||
if value > 5000:
|
||||
return "critical"
|
||||
elif value > 1000:
|
||||
return "warning"
|
||||
elif value < 5:
|
||||
return "info"
|
||||
else:
|
||||
return "normal"
|
||||
|
||||
async def _update_topic_stats(self, topic: str, count: int):
|
||||
"""Update topic statistics"""
|
||||
try:
|
||||
stats_key = f"topic_stats:{topic}"
|
||||
await self.redis.hincrby(stats_key, "message_count", count)
|
||||
await self.redis.hset(stats_key, "last_published", datetime.utcnow().isoformat())
|
||||
await self.redis.expire(stats_key, 86400) # Expire after 24 hours
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating topic stats: {e}")
|
||||
|
||||
async def _get_last_published_time(self, topic: str) -> Optional[str]:
|
||||
"""Get last published time for a topic"""
|
||||
try:
|
||||
stats_key = f"topic_stats:{topic}"
|
||||
return await self.redis.hget(stats_key, "last_published")
|
||||
except Exception as e:
|
||||
logger.debug(f"Error getting last published time for {topic}: {e}")
|
||||
return None
|
||||
|
||||
async def create_data_stream(self, topic: str, data_stream: List[Dict[str, Any]],
|
||||
interval_seconds: float = 1.0):
|
||||
"""Create a continuous data stream by publishing data at intervals"""
|
||||
try:
|
||||
logger.info(f"Starting data stream for topic {topic} with {len(data_stream)} points")
|
||||
|
||||
for i, data_point in enumerate(data_stream):
|
||||
await self.publish_single_message(topic, data_point)
|
||||
|
||||
# Add stream metadata
|
||||
stream_info = {
|
||||
"type": "stream_info",
|
||||
"topic": topic,
|
||||
"current_point": i + 1,
|
||||
"total_points": len(data_stream),
|
||||
"progress": (i + 1) / len(data_stream) * 100,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
await self.publish_single_message(f"{topic}_stream_info", stream_info)
|
||||
|
||||
# Wait before next data point
|
||||
if i < len(data_stream) - 1:
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
logger.info(f"Completed data stream for topic {topic}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating data stream: {e}")
|
||||
raise
|
||||
|
||||
async def cleanup_old_stats(self, days: int = 7):
|
||||
"""Clean up old topic statistics"""
|
||||
try:
|
||||
# Get all topic stat keys
|
||||
pattern = "topic_stats:*"
|
||||
keys = []
|
||||
|
||||
async for key in self.redis.scan_iter(match=pattern):
|
||||
keys.append(key)
|
||||
|
||||
# Delete old keys (Redis TTL should handle this, but cleanup anyway)
|
||||
if keys:
|
||||
await self.redis.delete(*keys)
|
||||
logger.info(f"Cleaned up {len(keys)} old topic stat keys")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up old stats: {e}")
|
||||
Reference in New Issue
Block a user