sac4cps-backend/microservices/data-ingestion-service/redis_publisher.py

"""
Redis publisher for broadcasting time series data to multiple topics.
Handles data transformation, routing, and publishing for real-time simulation.
"""

import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
import hashlib
import uuid
from collections import defaultdict
import redis.asyncio as redis

logger = logging.getLogger(__name__)

class RedisPublisher:
    """Publishes time series data to Redis channels for real-time simulation"""

    def __init__(self, redis_client):
        self.redis = redis_client
        self.publishing_stats = defaultdict(int)
        self.topic_configs = {}
        self.message_cache = {}

        # Default topic configurations
        self.default_topics = {
            "energy_data": {
                "description": "General energy consumption data",
                "data_types": ["energy", "power", "consumption"],
                "format": "sensor_reading"
            },
            "community_consumption": {
                "description": "Community-level energy consumption",
                "data_types": ["consumption", "usage", "demand"],
                "format": "aggregated_data"
            },
            "real_time_metrics": {
                "description": "Real-time sensor metrics",
                "data_types": ["all"],
                "format": "metric_update"
            },
            "simulation_data": {
                "description": "Data for simulation purposes",
                "data_types": ["all"],
                "format": "simulation_point"
            },
            "community_generation": {
                "description": "Community energy generation data",
                "data_types": ["generation", "production", "renewable"],
                "format": "generation_data"
            },
            "grid_events": {
                "description": "Grid-related events and alerts",
                "data_types": ["events", "alerts", "grid_status"],
                "format": "event_data"
            }
        }

    async def initialize(self):
        """Initialize publisher with default topic configurations"""
        try:
            for topic, config in self.default_topics.items():
                await self.configure_topic(topic, config)

            logger.info(f"Initialized Redis publisher with {len(self.default_topics)} default topics")

        except Exception as e:
            logger.error(f"Error initializing Redis publisher: {e}")
            raise

    async def publish_time_series_data(self, topic: str, data: List[Dict[str, Any]], source_name: str):
        """Publish time series data to a specific Redis topic"""
        try:
            if not data:
                logger.warning(f"No data to publish to topic: {topic}")
                return

            logger.info(f"Publishing {len(data)} records to topic: {topic}")

            # Get topic configuration
            topic_config = self.topic_configs.get(topic, {})
            data_format = topic_config.get("format", "sensor_reading")

            # Process and publish each data point
            published_count = 0
            for record in data:
                try:
                    # Transform data based on topic format
                    message = await self._transform_data_for_topic(record, data_format, source_name)

                    # Add publishing metadata
                    message["published_at"] = datetime.utcnow().isoformat()
                    message["topic"] = topic
                    message["message_id"] = str(uuid.uuid4())

                    # Publish to Redis
                    await self.redis.publish(topic, json.dumps(message))

                    published_count += 1
                    self.publishing_stats[topic] += 1

                except Exception as e:
                    logger.warning(f"Error publishing record to {topic}: {e}")
                    continue

            logger.info(f"Successfully published {published_count}/{len(data)} records to {topic}")

            # Update topic statistics
            await self._update_topic_stats(topic, published_count)

        except Exception as e:
            logger.error(f"Error publishing to topic {topic}: {e}")
            raise

    async def publish_single_message(self, topic: str, message: Dict[str, Any]):
        """Publish a single message to a Redis topic"""
        try:
            # Add metadata
            message["published_at"] = datetime.utcnow().isoformat()
            message["topic"] = topic
            message["message_id"] = str(uuid.uuid4())

            # Publish
            await self.redis.publish(topic, json.dumps(message))

            self.publishing_stats[topic] += 1
            logger.debug(f"Published single message to {topic}")

        except Exception as e:
            logger.error(f"Error publishing single message to {topic}: {e}")
            raise

    async def publish_batch(self, topic_messages: Dict[str, List[Dict[str, Any]]]):
        """Publish multiple messages to multiple topics"""
        try:
            total_published = 0

            for topic, messages in topic_messages.items():
                for message in messages:
                    await self.publish_single_message(topic, message)
                    total_published += 1

            logger.info(f"Batch published {total_published} messages across {len(topic_messages)} topics")

        except Exception as e:
            logger.error(f"Error in batch publishing: {e}")
            raise

    async def configure_topic(self, topic: str, config: Dict[str, Any]):
        """Configure a topic with specific settings"""
        try:
            self.topic_configs[topic] = {
                "description": config.get("description", ""),
                "data_types": config.get("data_types", ["all"]),
                "format": config.get("format", "generic"),
                "created_at": datetime.utcnow().isoformat(),
                "message_count": 0
            }

            logger.info(f"Configured topic: {topic}")

        except Exception as e:
            logger.error(f"Error configuring topic {topic}: {e}")
            raise

    async def get_topics_info(self) -> Dict[str, Any]:
        """Get information about all configured topics"""
        try:
            topics_info = {}

            for topic, config in self.topic_configs.items():
                # Get recent message count
                message_count = self.publishing_stats.get(topic, 0)

                topics_info[topic] = {
                    **config,
                    "message_count": message_count,
                    "last_published": await self._get_last_published_time(topic)
                }

            return topics_info

        except Exception as e:
            logger.error(f"Error getting topics info: {e}")
            return {}

    async def get_publishing_stats(self) -> Dict[str, Any]:
        """Get publishing statistics"""
        try:
            total_messages = sum(self.publishing_stats.values())

            return {
                "total_messages_published": total_messages,
                "active_topics": len(self.topic_configs),
                "topic_stats": dict(self.publishing_stats),
                "last_updated": datetime.utcnow().isoformat()
            }

        except Exception as e:
            logger.error(f"Error getting publishing stats: {e}")
            return {}

    async def _transform_data_for_topic(self, record: Dict[str, Any], format_type: str, source_name: str) -> Dict[str, Any]:
        """Transform data based on topic format requirements"""
        try:
            base_message = {
                "source": source_name,
                "format": format_type
            }

            if format_type == "sensor_reading":
                return await self._format_as_sensor_reading(record, base_message)
            elif format_type == "aggregated_data":
                return await self._format_as_aggregated_data(record, base_message)
            elif format_type == "metric_update":
                return await self._format_as_metric_update(record, base_message)
            elif format_type == "simulation_point":
                return await self._format_as_simulation_point(record, base_message)
            elif format_type == "generation_data":
                return await self._format_as_generation_data(record, base_message)
            elif format_type == "event_data":
                return await self._format_as_event_data(record, base_message)
            else:
                # Generic format
                return {**base_message, **record}

        except Exception as e:
            logger.error(f"Error transforming data for format {format_type}: {e}")
            return {**base_message, **record}

    async def _format_as_sensor_reading(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data as sensor reading for energy dashboard"""
        return {
            **base_message,
            "type": "sensor_data",
            "sensorId": record.get("sensor_id", "unknown"),
            "sensor_id": record.get("sensor_id", "unknown"),
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "value": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "room": record.get("metadata", {}).get("room"),
            "sensor_type": self._infer_sensor_type(record),
            "metadata": record.get("metadata", {}),
            "data_quality": await self._assess_data_quality(record)
        }

    async def _format_as_aggregated_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data as aggregated community data"""
        return {
            **base_message,
            "type": "aggregated_consumption",
            "community_id": record.get("sensor_id", "community_1"),
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "total_consumption": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "period": "real_time",
            "households": record.get("metadata", {}).get("households", 1),
            "average_per_household": record.get("value", 0) / max(record.get("metadata", {}).get("households", 1), 1)
        }

    async def _format_as_metric_update(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data as real-time metric update"""
        return {
            **base_message,
            "type": "metric_update",
            "metric_id": record.get("sensor_id", "unknown"),
            "metric_type": self._infer_metric_type(record),
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "current_value": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "trend": await self._calculate_trend(record),
            "metadata": record.get("metadata", {})
        }

    async def _format_as_simulation_point(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data for simulation purposes"""
        return {
            **base_message,
            "type": "simulation_data",
            "simulation_id": f"sim_{record.get('sensor_id', 'unknown')}",
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "energy_value": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "scenario": record.get("metadata", {}).get("scenario", "baseline"),
            "location": record.get("metadata", {}).get("location", "unknown"),
            "data_source": record.get("data_source", "real_community"),
            "quality_score": await self._assess_data_quality(record)
        }

    async def _format_as_generation_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data as energy generation data"""
        return {
            **base_message,
            "type": "generation_data",
            "generator_id": record.get("sensor_id", "unknown"),
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "generation_value": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "generation_type": record.get("metadata", {}).get("type", "renewable"),
            "efficiency": record.get("metadata", {}).get("efficiency", 0.85),
            "weather_conditions": record.get("metadata", {}).get("weather")
        }

    async def _format_as_event_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
        """Format data as grid event"""
        return {
            **base_message,
            "type": "grid_event",
            "event_id": str(uuid.uuid4()),
            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
            "event_type": await self._classify_event_type(record),
            "severity": await self._assess_event_severity(record),
            "affected_area": record.get("metadata", {}).get("area", "unknown"),
            "value": record.get("value", 0),
            "unit": record.get("unit", "kWh"),
            "description": f"Energy event detected: {record.get('value', 0)} {record.get('unit', 'kWh')}"
        }

    def _infer_sensor_type(self, record: Dict[str, Any]) -> str:
        """Infer sensor type from record data"""
        value = record.get("value", 0)
        unit = record.get("unit", "").lower()
        metadata = record.get("metadata", {})

        if "generation" in str(metadata).lower() or "solar" in str(metadata).lower():
            return "generation"
        elif "temperature" in str(metadata).lower() or "temp" in str(metadata).lower():
            return "temperature"
        elif "co2" in str(metadata).lower() or "carbon" in str(metadata).lower():
            return "co2"
        elif "humidity" in str(metadata).lower():
            return "humidity"
        elif "motion" in str(metadata).lower() or "occupancy" in str(metadata).lower():
            return "motion"
        else:
            return "energy"

    def _infer_metric_type(self, record: Dict[str, Any]) -> str:
        """Infer metric type from record"""
        unit = record.get("unit", "").lower()

        if "wh" in unit:
            return "energy"
        elif "w" in unit:
            return "power"
        elif "°c" in unit or "celsius" in unit or "temp" in unit:
            return "temperature"
        elif "%" in unit:
            return "percentage"
        elif "ppm" in unit or "co2" in unit:
            return "co2"
        else:
            return "generic"

    async def _calculate_trend(self, record: Dict[str, Any]) -> str:
        """Calculate trend for metric (simplified)"""
        # This is a simplified trend calculation
        # In a real implementation, you'd compare with historical values
        value = record.get("value", 0)

        if value > 100:
            return "increasing"
        elif value < 50:
            return "decreasing"
        else:
            return "stable"

    async def _assess_data_quality(self, record: Dict[str, Any]) -> float:
        """Assess data quality score (0-1)"""
        score = 1.0

        # Check for missing fields
        if not record.get("timestamp"):
            score -= 0.2
        if not record.get("sensor_id"):
            score -= 0.2
        if record.get("value") is None:
            score -= 0.3
        if not record.get("unit"):
            score -= 0.1

        # Check for reasonable values
        value = record.get("value", 0)
        if value < 0:
            score -= 0.1
        if value > 10000:  # Unusually high energy value
            score -= 0.1

        return max(0.0, score)

    async def _classify_event_type(self, record: Dict[str, Any]) -> str:
        """Classify event type based on data"""
        value = record.get("value", 0)

        if value > 1000:
            return "high_consumption"
        elif value < 10:
            return "low_consumption"
        else:
            return "normal_operation"

    async def _assess_event_severity(self, record: Dict[str, Any]) -> str:
        """Assess event severity"""
        value = record.get("value", 0)

        if value > 5000:
            return "critical"
        elif value > 1000:
            return "warning"
        elif value < 5:
            return "info"
        else:
            return "normal"

    async def _update_topic_stats(self, topic: str, count: int):
        """Update topic statistics"""
        try:
            stats_key = f"topic_stats:{topic}"
            await self.redis.hincrby(stats_key, "message_count", count)
            await self.redis.hset(stats_key, "last_published", datetime.utcnow().isoformat())
            await self.redis.expire(stats_key, 86400)  # Expire after 24 hours

        except Exception as e:
            logger.error(f"Error updating topic stats: {e}")

    async def _get_last_published_time(self, topic: str) -> Optional[str]:
        """Get last published time for a topic"""
        try:
            stats_key = f"topic_stats:{topic}"
            return await self.redis.hget(stats_key, "last_published")
        except Exception as e:
            logger.debug(f"Error getting last published time for {topic}: {e}")
            return None

    async def create_data_stream(self, topic: str, data_stream: List[Dict[str, Any]],
                               interval_seconds: float = 1.0):
        """Create a continuous data stream by publishing data at intervals"""
        try:
            logger.info(f"Starting data stream for topic {topic} with {len(data_stream)} points")

            for i, data_point in enumerate(data_stream):
                await self.publish_single_message(topic, data_point)

                # Add stream metadata
                stream_info = {
                    "type": "stream_info",
                    "topic": topic,
                    "current_point": i + 1,
                    "total_points": len(data_stream),
                    "progress": (i + 1) / len(data_stream) * 100,
                    "timestamp": datetime.utcnow().isoformat()
                }

                await self.publish_single_message(f"{topic}_stream_info", stream_info)

                # Wait before next data point
                if i < len(data_stream) - 1:
                    await asyncio.sleep(interval_seconds)

            logger.info(f"Completed data stream for topic {topic}")

        except Exception as e:
            logger.error(f"Error creating data stream: {e}")
            raise

    async def cleanup_old_stats(self, days: int = 7):
        """Clean up old topic statistics"""
        try:
            # Get all topic stat keys
            pattern = "topic_stats:*"
            keys = []

            async for key in self.redis.scan_iter(match=pattern):
                keys.append(key)

            # Delete old keys (Redis TTL should handle this, but cleanup anyway)
            if keys:
                await self.redis.delete(*keys)
                logger.info(f"Cleaned up {len(keys)} old topic stat keys")

        except Exception as e:
            logger.error(f"Error cleaning up old stats: {e}")