Simplify data ingestion service

2025-09-10 15:47:10 +01:00
parent 13556347b0
commit b7e734e0d2
13 changed files with 474 additions and 4440 deletions
--- a/microservices/data-ingestion-service/Dockerfile
+++ b/microservices/data-ingestion-service/Dockerfile
@@ -39,4 +39,5 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8008/health || exit 1

 # Start the application from src directory
-CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8008", "--reload"]
+WORKDIR /app/src
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8008"]
--- a/microservices/data-ingestion-service/src/init.py
+++ b/microservices/data-ingestion-service/src/init.py
@@ -1 +0,0 @@
-# Source package initialization
--- a/microservices/data-ingestion-service/src/data_validator.py
+++ b/microservices/data-ingestion-service/src/data_validator.py
@@ -1,710 +0,0 @@
-"""
-Data validation and enrichment for time series data.
-Provides quality assessment, metadata enrichment, and data transformation capabilities.
-"""
-
-import asyncio
-import json
-import logging
-import statistics
-from datetime import datetime, timedelta
-from typing import List, Dict, Any, Optional, Tuple
-import hashlib
-import re
-from collections import defaultdict
-import math
-
-logger = logging.getLogger(__name__)
-
-class DataValidator:
-    """Validates, enriches, and transforms time series data"""
-    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-        self.validation_rules = {}
-        self.enrichment_cache = {}
-        self.quality_thresholds = {
-            "completeness": 0.8,
-            "accuracy": 0.9,
-            "consistency": 0.85,
-            "timeliness": 0.9
-        }
-    
-    async def initialize(self):
-        """Initialize validator with default rules and configurations"""
-        try:
-            await self._load_validation_rules()
-            await self._load_enrichment_metadata()
-            logger.info("Data validator initialized successfully")
-        except Exception as e:
-            logger.error(f"Error initializing data validator: {e}")
-            raise
-    
-    async def validate_and_enrich_data(self, data: List[Dict[str, Any]], 
-                                     source_name: str) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
-        """Validate and enrich time series data, returning processed data and quality report"""
-        try:
-            logger.info(f"Validating and enriching {len(data)} records from {source_name}")
-            
-            # Initialize validation report
-            quality_report = {
-                "source": source_name,
-                "total_records": len(data),
-                "processed_records": 0,
-                "rejected_records": 0,
-                "quality_scores": {},
-                "issues_found": [],
-                "processing_time": datetime.utcnow().isoformat()
-            }
-            
-            enriched_data = []
-            
-            # Process each record
-            for i, record in enumerate(data):
-                try:
-                    # Validate record
-                    validation_result = await self._validate_record(record, source_name)
-                    
-                    if validation_result["is_valid"]:
-                        # Enrich the record
-                        enriched_record = await self._enrich_record(record, source_name, validation_result)
-                        enriched_data.append(enriched_record)
-                        quality_report["processed_records"] += 1
-                    else:
-                        quality_report["rejected_records"] += 1
-                        quality_report["issues_found"].extend(validation_result["issues"])
-                        logger.warning(f"Record {i} rejected: {validation_result['issues']}")
-                
-                except Exception as e:
-                    logger.error(f"Error processing record {i}: {e}")
-                    quality_report["rejected_records"] += 1
-                    quality_report["issues_found"].append(f"Processing error: {str(e)}")
-            
-            # Calculate overall quality scores
-            quality_report["quality_scores"] = await self._calculate_quality_scores(enriched_data, quality_report)
-            
-            # Store quality report
-            await self._store_quality_report(quality_report, source_name)
-            
-            logger.info(f"Validation complete: {quality_report['processed_records']}/{quality_report['total_records']} records processed")
-            
-            return enriched_data, quality_report
-        
-        except Exception as e:
-            logger.error(f"Error in data validation and enrichment: {e}")
-            raise
-    
-    async def _validate_record(self, record: Dict[str, Any], source_name: str) -> Dict[str, Any]:
-        """Validate a single record against quality rules"""
-        validation_result = {
-            "is_valid": True,
-            "issues": [],
-            "quality_metrics": {}
-        }
-        
-        try:
-            # Check required fields
-            required_fields = ["sensor_id", "timestamp", "value"]
-            for field in required_fields:
-                if field not in record or record[field] is None:
-                    validation_result["is_valid"] = False
-                    validation_result["issues"].append(f"Missing required field: {field}")
-            
-            if not validation_result["is_valid"]:
-                return validation_result
-            
-            # Validate timestamp
-            timestamp_validation = await self._validate_timestamp(record["timestamp"])
-            validation_result["quality_metrics"]["timestamp_quality"] = timestamp_validation["score"]
-            if not timestamp_validation["is_valid"]:
-                validation_result["issues"].extend(timestamp_validation["issues"])
-            
-            # Validate numeric value
-            value_validation = await self._validate_numeric_value(record["value"], record.get("unit"))
-            validation_result["quality_metrics"]["value_quality"] = value_validation["score"]
-            if not value_validation["is_valid"]:
-                validation_result["issues"].extend(value_validation["issues"])
-            
-            # Validate sensor ID format
-            sensor_validation = await self._validate_sensor_id(record["sensor_id"])
-            validation_result["quality_metrics"]["sensor_id_quality"] = sensor_validation["score"]
-            if not sensor_validation["is_valid"]:
-                validation_result["issues"].extend(sensor_validation["issues"])
-            
-            # Check for duplicates
-            duplicate_check = await self._check_for_duplicates(record, source_name)
-            validation_result["quality_metrics"]["uniqueness"] = duplicate_check["score"]
-            if not duplicate_check["is_unique"]:
-                validation_result["issues"].extend(duplicate_check["issues"])
-            
-            # Calculate overall validity
-            if validation_result["issues"]:
-                # Allow minor issues but flag major ones
-                major_issues = [issue for issue in validation_result["issues"] 
-                              if "Missing required field" in issue or "Invalid" in issue]
-                validation_result["is_valid"] = len(major_issues) == 0
-        
-        except Exception as e:
-            logger.error(f"Error validating record: {e}")
-            validation_result["is_valid"] = False
-            validation_result["issues"].append(f"Validation error: {str(e)}")
-        
-        return validation_result
-    
-    async def _enrich_record(self, record: Dict[str, Any], source_name: str, 
-                           validation_result: Dict[str, Any]) -> Dict[str, Any]:
-        """Enrich a record with additional metadata and derived fields"""
-        try:
-            enriched = record.copy()
-            
-            # Add validation metadata
-            enriched["data_quality"] = {
-                "quality_score": statistics.mean(validation_result["quality_metrics"].values()) if validation_result["quality_metrics"] else 0.0,
-                "quality_metrics": validation_result["quality_metrics"],
-                "validation_timestamp": datetime.utcnow().isoformat()
-            }
-            
-            # Add source information
-            enriched["source_info"] = {
-                "source_name": source_name,
-                "ingestion_time": datetime.utcnow().isoformat(),
-                "record_id": hashlib.md5(f"{source_name}_{record.get('sensor_id', 'unknown')}_{record.get('timestamp', 0)}".encode()).hexdigest()
-            }
-            
-            # Normalize timestamp format
-            enriched["timestamp"] = await self._normalize_timestamp(record["timestamp"])
-            enriched["timestamp_iso"] = datetime.fromtimestamp(enriched["timestamp"]).isoformat()
-            
-            # Infer and enrich sensor type
-            sensor_type_info = await self._infer_sensor_type(record)
-            enriched["sensor_type"] = sensor_type_info["type"]
-            enriched["sensor_category"] = sensor_type_info["category"]
-            
-            # Add unit standardization
-            unit_info = await self._standardize_unit(record.get("unit"))
-            enriched["unit"] = unit_info["standard_unit"]
-            enriched["unit_info"] = unit_info
-            
-            # Calculate derived metrics
-            derived_metrics = await self._calculate_derived_metrics(enriched, source_name)
-            enriched["derived_metrics"] = derived_metrics
-            
-            # Add location and context information
-            context_info = await self._enrich_with_context(enriched, source_name)
-            enriched["metadata"] = {**enriched.get("metadata", {}), **context_info}
-            
-            # Add temporal features
-            temporal_features = await self._extract_temporal_features(enriched["timestamp"])
-            enriched["temporal"] = temporal_features
-            
-            # Energy-specific enrichments
-            if sensor_type_info["category"] == "energy":
-                energy_enrichment = await self._enrich_energy_data(enriched)
-                enriched.update(energy_enrichment)
-            
-            return enriched
-        
-        except Exception as e:
-            logger.error(f"Error enriching record: {e}")
-            return record
-    
-    async def _validate_timestamp(self, timestamp) -> Dict[str, Any]:
-        """Validate timestamp format and reasonableness"""
-        result = {"is_valid": True, "issues": [], "score": 1.0}
-        
-        try:
-            # Convert to numeric timestamp
-            if isinstance(timestamp, str):
-                try:
-                    # Try parsing ISO format
-                    dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
-                    ts = dt.timestamp()
-                except:
-                    # Try parsing as unix timestamp string
-                    ts = float(timestamp)
-            else:
-                ts = float(timestamp)
-            
-            # Check if timestamp is reasonable (not too far in past/future)
-            current_time = datetime.utcnow().timestamp()
-            max_age = 365 * 24 * 3600  # 1 year
-            max_future = 24 * 3600    # 1 day
-            
-            if ts < current_time - max_age:
-                result["issues"].append("Timestamp too old (more than 1 year)")
-                result["score"] -= 0.3
-            elif ts > current_time + max_future:
-                result["issues"].append("Timestamp too far in future")
-                result["score"] -= 0.3
-            
-            # Check for reasonable precision (not too precise for energy data)
-            if ts != int(ts) and len(str(ts).split('.')[1]) > 3:
-                result["score"] -= 0.1  # Minor issue
-        
-        except (ValueError, TypeError) as e:
-            result["is_valid"] = False
-            result["issues"].append(f"Invalid timestamp format: {e}")
-            result["score"] = 0.0
-        
-        return result
-    
-    async def _validate_numeric_value(self, value, unit: Optional[str] = None) -> Dict[str, Any]:
-        """Validate numeric value reasonableness"""
-        result = {"is_valid": True, "issues": [], "score": 1.0}
-        
-        try:
-            numeric_value = float(value)
-            
-            # Check for negative values (usually invalid for energy data)
-            if numeric_value < 0:
-                result["issues"].append("Negative energy value")
-                result["score"] -= 0.4
-            
-            # Check for unreasonably large values
-            unit_str = (unit or "").lower()
-            if "wh" in unit_str:
-                # Energy values
-                if numeric_value > 100000:  # >100kWh seems excessive for single reading
-                    result["issues"].append("Unusually high energy value")
-                    result["score"] -= 0.2
-            elif "w" in unit_str:
-                # Power values
-                if numeric_value > 50000:   # >50kW seems excessive
-                    result["issues"].append("Unusually high power value")
-                    result["score"] -= 0.2
-            
-            # Check for zero values (might indicate sensor issues)
-            if numeric_value == 0:
-                result["score"] -= 0.1
-            
-            # Check for NaN or infinity
-            if math.isnan(numeric_value) or math.isinf(numeric_value):
-                result["is_valid"] = False
-                result["issues"].append("Invalid numeric value (NaN or Infinity)")
-                result["score"] = 0.0
-        
-        except (ValueError, TypeError) as e:
-            result["is_valid"] = False
-            result["issues"].append(f"Non-numeric value: {e}")
-            result["score"] = 0.0
-        
-        return result
-    
-    async def _validate_sensor_id(self, sensor_id: str) -> Dict[str, Any]:
-        """Validate sensor ID format and consistency"""
-        result = {"is_valid": True, "issues": [], "score": 1.0}
-        
-        try:
-            if not isinstance(sensor_id, str) or len(sensor_id) == 0:
-                result["is_valid"] = False
-                result["issues"].append("Empty or invalid sensor ID")
-                result["score"] = 0.0
-                return result
-            
-            # Check length
-            if len(sensor_id) < 3:
-                result["issues"].append("Very short sensor ID")
-                result["score"] -= 0.2
-            elif len(sensor_id) > 50:
-                result["issues"].append("Very long sensor ID")
-                result["score"] -= 0.1
-            
-            # Check for reasonable characters
-            if not re.match(r'^[a-zA-Z0-9_\-\.]+$', sensor_id):
-                result["issues"].append("Sensor ID contains unusual characters")
-                result["score"] -= 0.1
-        
-        except Exception as e:
-            result["issues"].append(f"Sensor ID validation error: {e}")
-            result["score"] -= 0.1
-        
-        return result
-    
-    async def _check_for_duplicates(self, record: Dict[str, Any], source_name: str) -> Dict[str, Any]:
-        """Check for duplicate records"""
-        result = {"is_unique": True, "issues": [], "score": 1.0}
-        
-        try:
-            # Create record signature
-            signature = hashlib.md5(
-                f"{source_name}_{record.get('sensor_id')}_{record.get('timestamp')}_{record.get('value')}".encode()
-            ).hexdigest()
-            
-            # Check cache for recent duplicates
-            cache_key = f"record_signature:{signature}"
-            exists = await self.redis.exists(cache_key)
-            
-            if exists:
-                result["is_unique"] = False
-                result["issues"].append("Duplicate record detected")
-                result["score"] = 0.0
-            else:
-                # Store signature with short expiration (1 hour)
-                await self.redis.setex(cache_key, 3600, "1")
-        
-        except Exception as e:
-            logger.debug(f"Error checking duplicates: {e}")
-            # Don't fail validation for cache errors
-        
-        return result
-    
-    async def _normalize_timestamp(self, timestamp) -> int:
-        """Normalize timestamp to unix timestamp"""
-        try:
-            if isinstance(timestamp, str):
-                try:
-                    # Try ISO format first
-                    dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
-                    return int(dt.timestamp())
-                except:
-                    # Try as unix timestamp string
-                    return int(float(timestamp))
-            else:
-                return int(float(timestamp))
-        except:
-            # Fallback to current time
-            return int(datetime.utcnow().timestamp())
-    
-    async def _infer_sensor_type(self, record: Dict[str, Any]) -> Dict[str, str]:
-        """Infer sensor type from record data"""
-        sensor_id = record.get("sensor_id", "").lower()
-        unit = (record.get("unit", "") or "").lower()
-        value = record.get("value", 0)
-        metadata = record.get("metadata", {})
-        
-        # Energy sensors
-        if "wh" in unit or "energy" in sensor_id or "consumption" in sensor_id:
-            return {"type": "energy", "category": "energy"}
-        elif "w" in unit and "wh" not in unit:
-            return {"type": "power", "category": "energy"}
-        
-        # Environmental sensors
-        elif "temp" in sensor_id or "°c" in unit or "celsius" in unit:
-            return {"type": "temperature", "category": "environmental"}
-        elif "humid" in sensor_id or "%" in unit:
-            return {"type": "humidity", "category": "environmental"}
-        elif "co2" in sensor_id or "ppm" in unit:
-            return {"type": "co2", "category": "environmental"}
-        
-        # Motion/occupancy sensors
-        elif "motion" in sensor_id or "occupancy" in sensor_id or ("motion" in str(metadata).lower()):
-            return {"type": "motion", "category": "occupancy"}
-        
-        # Generation sensors
-        elif "generation" in sensor_id or "solar" in sensor_id or "generation" in str(metadata).lower():
-            return {"type": "generation", "category": "energy"}
-        
-        # Default to energy if unclear
-        else:
-            return {"type": "energy", "category": "energy"}
-    
-    async def _standardize_unit(self, unit: Optional[str]) -> Dict[str, Any]:
-        """Standardize unit format"""
-        if not unit:
-            return {"standard_unit": "kWh", "conversion_factor": 1.0, "unit_type": "energy"}
-        
-        unit_lower = unit.lower().strip()
-        
-        # Energy units
-        if unit_lower in ["kwh", "kw-h", "kw_h"]:
-            return {"standard_unit": "kWh", "conversion_factor": 1.0, "unit_type": "energy"}
-        elif unit_lower in ["wh", "w-h", "w_h"]:
-            return {"standard_unit": "kWh", "conversion_factor": 0.001, "unit_type": "energy"}
-        elif unit_lower in ["mwh", "mw-h", "mw_h"]:
-            return {"standard_unit": "kWh", "conversion_factor": 1000.0, "unit_type": "energy"}
-        
-        # Power units
-        elif unit_lower in ["kw", "kilowatt", "kilowatts"]:
-            return {"standard_unit": "kW", "conversion_factor": 1.0, "unit_type": "power"}
-        elif unit_lower in ["w", "watt", "watts"]:
-            return {"standard_unit": "kW", "conversion_factor": 0.001, "unit_type": "power"}
-        elif unit_lower in ["mw", "megawatt", "megawatts"]:
-            return {"standard_unit": "kW", "conversion_factor": 1000.0, "unit_type": "power"}
-        
-        # Temperature units
-        elif unit_lower in ["°c", "celsius", "c"]:
-            return {"standard_unit": "°C", "conversion_factor": 1.0, "unit_type": "temperature"}
-        elif unit_lower in ["°f", "fahrenheit", "f"]:
-            return {"standard_unit": "°C", "conversion_factor": 1.0, "unit_type": "temperature", "requires_conversion": True}
-        
-        # Default
-        else:
-            return {"standard_unit": unit, "conversion_factor": 1.0, "unit_type": "unknown"}
-    
-    async def _calculate_derived_metrics(self, record: Dict[str, Any], source_name: str) -> Dict[str, Any]:
-        """Calculate derived metrics from the record"""
-        derived = {}
-        
-        try:
-            value = float(record.get("value", 0))
-            unit_info = record.get("unit_info", {})
-            
-            # Apply unit conversion if needed
-            if unit_info.get("conversion_factor", 1.0) != 1.0:
-                derived["original_value"] = value
-                derived["converted_value"] = value * unit_info["conversion_factor"]
-            
-            # Energy-specific calculations
-            if unit_info.get("unit_type") == "energy":
-                # Estimate cost (simplified)
-                cost_per_kwh = 0.12  # Example rate
-                derived["estimated_cost"] = value * cost_per_kwh
-                
-                # Estimate CO2 emissions (simplified)
-                co2_per_kwh = 0.4  # kg CO2 per kWh (example grid factor)
-                derived["estimated_co2_kg"] = value * co2_per_kwh
-            
-            # Add value range classification
-            derived["value_range"] = await self._classify_value_range(value, unit_info.get("unit_type"))
-            
-        except Exception as e:
-            logger.debug(f"Error calculating derived metrics: {e}")
-        
-        return derived
-    
-    async def _classify_value_range(self, value: float, unit_type: str) -> str:
-        """Classify value into ranges for better understanding"""
-        if unit_type == "energy":
-            if value < 1:
-                return "very_low"
-            elif value < 10:
-                return "low"
-            elif value < 50:
-                return "medium"
-            elif value < 200:
-                return "high"
-            else:
-                return "very_high"
-        elif unit_type == "power":
-            if value < 0.5:
-                return "very_low"
-            elif value < 5:
-                return "low"
-            elif value < 20:
-                return "medium"
-            elif value < 100:
-                return "high"
-            else:
-                return "very_high"
-        else:
-            return "unknown"
-    
-    async def _enrich_with_context(self, record: Dict[str, Any], source_name: str) -> Dict[str, Any]:
-        """Enrich record with contextual information"""
-        context = {}
-        
-        try:
-            # Add geographical context if available
-            context["data_source"] = "real_community"
-            context["source_type"] = "ftp_ingestion"
-            
-            # Add data freshness
-            ingestion_time = datetime.utcnow()
-            data_time = datetime.fromtimestamp(record["timestamp"])
-            context["data_age_minutes"] = (ingestion_time - data_time).total_seconds() / 60
-            
-            # Classify data freshness
-            if context["data_age_minutes"] < 15:
-                context["freshness"] = "real_time"
-            elif context["data_age_minutes"] < 60:
-                context["freshness"] = "near_real_time"
-            elif context["data_age_minutes"] < 1440:  # 24 hours
-                context["freshness"] = "recent"
-            else:
-                context["freshness"] = "historical"
-            
-        except Exception as e:
-            logger.debug(f"Error adding context: {e}")
-        
-        return context
-    
-    async def _extract_temporal_features(self, timestamp: int) -> Dict[str, Any]:
-        """Extract temporal features from timestamp"""
-        dt = datetime.fromtimestamp(timestamp)
-        
-        return {
-            "hour": dt.hour,
-            "day_of_week": dt.weekday(),
-            "day_of_month": dt.day,
-            "month": dt.month,
-            "quarter": (dt.month - 1) // 3 + 1,
-            "is_weekend": dt.weekday() >= 5,
-            "is_business_hours": 8 <= dt.hour <= 17,
-            "season": self._get_season(dt.month)
-        }
-    
-    def _get_season(self, month: int) -> str:
-        """Get season from month"""
-        if month in [12, 1, 2]:
-            return "winter"
-        elif month in [3, 4, 5]:
-            return "spring"
-        elif month in [6, 7, 8]:
-            return "summer"
-        else:
-            return "autumn"
-    
-    async def _enrich_energy_data(self, record: Dict[str, Any]) -> Dict[str, Any]:
-        """Add energy-specific enrichments"""
-        enrichment = {}
-        
-        try:
-            value = record.get("derived_metrics", {}).get("converted_value", record.get("value", 0))
-            temporal = record.get("temporal", {})
-            
-            # Energy usage patterns
-            if temporal.get("is_business_hours"):
-                enrichment["usage_pattern"] = "business_hours"
-            elif temporal.get("is_weekend"):
-                enrichment["usage_pattern"] = "weekend"
-            else:
-                enrichment["usage_pattern"] = "off_hours"
-            
-            # Demand classification
-            if value > 100:
-                enrichment["demand_level"] = "high"
-            elif value > 50:
-                enrichment["demand_level"] = "medium"
-            elif value > 10:
-                enrichment["demand_level"] = "low"
-            else:
-                enrichment["demand_level"] = "minimal"
-            
-            # Peak/off-peak classification
-            hour = temporal.get("hour", 0)
-            if 17 <= hour <= 21:  # Evening peak
-                enrichment["tariff_period"] = "peak"
-            elif 22 <= hour <= 6:   # Night off-peak
-                enrichment["tariff_period"] = "off_peak"
-            else:
-                enrichment["tariff_period"] = "standard"
-            
-        except Exception as e:
-            logger.debug(f"Error enriching energy data: {e}")
-        
-        return enrichment
-    
-    async def _calculate_quality_scores(self, data: List[Dict[str, Any]], quality_report: Dict[str, Any]) -> Dict[str, float]:
-        """Calculate overall quality scores"""
-        if not data:
-            return {"overall": 0.0, "completeness": 0.0, "accuracy": 0.0, "consistency": 0.0, "timeliness": 0.0}
-        
-        # Completeness score
-        total_expected_fields = len(data) * 4  # sensor_id, timestamp, value, unit
-        total_present_fields = sum(1 for record in data 
-                                 for field in ["sensor_id", "timestamp", "value", "unit"] 
-                                 if record.get(field) is not None)
-        completeness = total_present_fields / total_expected_fields if total_expected_fields > 0 else 0.0
-        
-        # Accuracy score (based on validation scores)
-        accuracy_scores = [record.get("data_quality", {}).get("quality_score", 0) for record in data]
-        accuracy = statistics.mean(accuracy_scores) if accuracy_scores else 0.0
-        
-        # Consistency score (coefficient of variation for quality scores)
-        if len(accuracy_scores) > 1:
-            std_dev = statistics.stdev(accuracy_scores)
-            mean_score = statistics.mean(accuracy_scores)
-            consistency = 1.0 - (std_dev / mean_score) if mean_score > 0 else 0.0
-        else:
-            consistency = 1.0
-        
-        # Timeliness score (based on data age)
-        current_time = datetime.utcnow().timestamp()
-        ages = [(current_time - record.get("timestamp", current_time)) / 3600 for record in data]  # age in hours
-        avg_age = statistics.mean(ages) if ages else 0
-        timeliness = max(0.0, 1.0 - (avg_age / 24))  # Decrease score as data gets older than 24 hours
-        
-        # Overall score
-        overall = statistics.mean([completeness, accuracy, consistency, timeliness])
-        
-        return {
-            "overall": round(overall, 3),
-            "completeness": round(completeness, 3),
-            "accuracy": round(accuracy, 3),
-            "consistency": round(consistency, 3),
-            "timeliness": round(timeliness, 3)
-        }
-    
-    async def _store_quality_report(self, quality_report: Dict[str, Any], source_name: str):
-        """Store quality report in database"""
-        try:
-            quality_report["_id"] = f"{source_name}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
-            await self.db.quality_reports.insert_one(quality_report)
-            
-            # Also cache in Redis for quick access
-            cache_key = f"quality_report:{source_name}:latest"
-            await self.redis.setex(cache_key, 3600, json.dumps(quality_report, default=str))
-            
-        except Exception as e:
-            logger.error(f"Error storing quality report: {e}")
-    
-    async def _load_validation_rules(self):
-        """Load validation rules configuration"""
-        # Default validation rules
-        self.validation_rules = {
-            "energy": {
-                "min_value": 0,
-                "max_value": 100000,
-                "required_precision": 0.01
-            },
-            "power": {
-                "min_value": 0,
-                "max_value": 50000,
-                "required_precision": 0.1
-            },
-            "temperature": {
-                "min_value": -50,
-                "max_value": 100,
-                "required_precision": 0.1
-            }
-        }
-        
-        logger.info("Loaded default validation rules")
-    
-    async def _load_enrichment_metadata(self):
-        """Load enrichment metadata"""
-        # Load any cached enrichment data
-        try:
-            cache_keys = []
-            async for key in self.redis.scan_iter(match="enrichment:*"):
-                cache_keys.append(key)
-            
-            logger.info(f"Loaded {len(cache_keys)} enrichment cache entries")
-            
-        except Exception as e:
-            logger.debug(f"Error loading enrichment metadata: {e}")
-    
-    async def get_quality_summary(self, source_name: Optional[str] = None) -> Dict[str, Any]:
-        """Get quality summary for sources"""
-        try:
-            match_filter = {"source": source_name} if source_name else {}
-            
-            # Get recent quality reports
-            cursor = self.db.quality_reports.find(match_filter).sort("processing_time", -1).limit(50)
-            
-            reports = []
-            async for report in cursor:
-                report["_id"] = str(report["_id"])
-                reports.append(report)
-            
-            if not reports:
-                return {"message": "No quality reports found"}
-            
-            # Calculate summary statistics
-            avg_quality = statistics.mean([r["quality_scores"]["overall"] for r in reports])
-            total_processed = sum([r["processed_records"] for r in reports])
-            total_rejected = sum([r["rejected_records"] for r in reports])
-            
-            return {
-                "total_reports": len(reports),
-                "average_quality": round(avg_quality, 3),
-                "total_processed_records": total_processed,
-                "total_rejected_records": total_rejected,
-                "success_rate": round(total_processed / (total_processed + total_rejected) * 100, 2) if (total_processed + total_rejected) > 0 else 0,
-                "latest_report": reports[0] if reports else None
-            }
-            
-        except Exception as e:
-            logger.error(f"Error getting quality summary: {e}")
-            return {"error": str(e)}
--- a/microservices/data-ingestion-service/src/database.py
+++ b/microservices/data-ingestion-service/src/database.py
@@ -1,433 +1,245 @@
+#!/usr/bin/env python3
 """
-Database configuration and connection management for the data ingestion service.
-Handles MongoDB connections, index creation, and Redis connections.
+MongoDB Database Manager for SA4CPS Data Ingestion
+Simple async MongoDB operations for storing .slg_v2 file data
 """

 import asyncio
 import logging
-from typing import Optional
-from contextlib import asynccontextmanager
-import os
 from datetime import datetime
+from typing import List, Dict, Any, Optional
+from motor.motor_asyncio import AsyncIOMotorClient
+from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError

-import motor.motor_asyncio
-import redis.asyncio as redis
-from pymongo import IndexModel
-
-from .models import (
-    DataSourceSchema, ProcessedFileSchema, QualityReportSchema,
-    IngestionStatsSchema, ErrorLogSchema, MonitoringAlertSchema
-)
+from config import MONGO_CONFIG

 logger = logging.getLogger(__name__)

+
 class DatabaseManager:
-    """Manages database connections and operations"""
+    """Manages MongoDB connections and operations for SA4CPS data"""
    
-    def __init__(self, mongodb_url: str = None, redis_url: str = None):
-        self.mongodb_url = mongodb_url or os.getenv("MONGODB_URL", "mongodb://localhost:27017")
-        self.redis_url = redis_url or os.getenv("REDIS_URL", "redis://localhost:6379")
+    def __init__(self):
+        self.client: Optional[AsyncIOMotorClient] = None
+        self.db = None
+        self.collections = {}
        
-        self.mongodb_client: Optional[motor.motor_asyncio.AsyncIOMotorClient] = None
-        self.db: Optional[motor.motor_asyncio.AsyncIOMotorDatabase] = None
-        self.redis_client: Optional[redis.Redis] = None
+        # MongoDB configuration
+        self.connection_string = MONGO_CONFIG["connection_string"]
+        self.database_name = MONGO_CONFIG["database_name"]
        
-        self._connection_status = {
-            "mongodb": False,
-            "redis": False,
-            "last_check": None
-        }
+        logger.info(f"Database manager initialized for: {self.database_name}")
    
    async def connect(self):
-        """Establish connections to MongoDB and Redis"""
-        try:
-            await self._connect_mongodb()
-            await self._connect_redis()
-            await self._create_indexes()
-            
-            logger.info("Database connections established successfully")
-            
-        except Exception as e:
-            logger.error(f"Error establishing database connections: {e}")
-            raise
-    
-    async def _connect_mongodb(self):
        """Connect to MongoDB"""
        try:
-            # Parse database name from URL or use default
-            db_name = "energy_dashboard"
-            if self.mongodb_url.count("/") > 2:
-                db_name = self.mongodb_url.split("/")[-1]
-            
-            self.mongodb_client = motor.motor_asyncio.AsyncIOMotorClient(
-                self.mongodb_url,
-                serverSelectionTimeoutMS=5000,
-                connectTimeoutMS=5000,
-                maxPoolSize=50,
-                minPoolSize=10
-            )
-            
-            self.db = self.mongodb_client[db_name]
+            self.client = AsyncIOMotorClient(self.connection_string)
            
            # Test connection
-            await self.mongodb_client.admin.command('ping')
+            await self.client.admin.command('ping')
            
-            self._connection_status["mongodb"] = True
-            logger.info(f"Connected to MongoDB: {self.mongodb_url}")
+            # Get database and collections
+            self.db = self.client[self.database_name]
+            self.collections = {
+                'files': self.db.sa4cps_files,
+                'energy_data': self.db.sa4cps_energy_data,
+                'metadata': self.db.sa4cps_metadata
+            }
            
-        except Exception as e:
-            self._connection_status["mongodb"] = False
-            logger.error(f"MongoDB connection failed: {e}")
+            # Create indexes for better performance
+            await self._create_indexes()
+            
+            logger.info(f"Connected to MongoDB: {self.database_name}")
+            
+        except (ConnectionFailure, ServerSelectionTimeoutError) as e:
+            logger.error(f"Failed to connect to MongoDB: {e}")
            raise
    
-    async def _connect_redis(self):
-        """Connect to Redis"""
-        try:
-            self.redis_client = redis.from_url(
-                self.redis_url,
-                encoding="utf-8",
-                decode_responses=True,
-                socket_timeout=5,
-                socket_connect_timeout=5,
-                health_check_interval=30
-            )
+    async def close(self):
+        """Close MongoDB connection"""
+        if self.client:
+            self.client.close()
+            logger.info("MongoDB connection closed")
    
-            # Test connection
-            await self.redis_client.ping()
+    async def ping(self):
+        """Test database connection"""
+        if not self.client:
+            raise ConnectionFailure("No database connection")
        
-            self._connection_status["redis"] = True
-            logger.info(f"Connected to Redis: {self.redis_url}")
-            
-        except Exception as e:
-            self._connection_status["redis"] = False
-            logger.error(f"Redis connection failed: {e}")
-            raise
+        await self.client.admin.command('ping')
    
    async def _create_indexes(self):
-        """Create database indexes for optimal performance"""
+        """Create database indexes for efficient queries"""
        try:
-            schemas = [
-                DataSourceSchema,
-                ProcessedFileSchema,
-                QualityReportSchema,
-                IngestionStatsSchema,
-                ErrorLogSchema,
-                MonitoringAlertSchema
-            ]
+            # Index on files collection
+            await self.collections['files'].create_index("filename", unique=True)
+            await self.collections['files'].create_index("processed_at")
            
-            for schema in schemas:
-                collection = self.db[schema.collection_name]
-                indexes = schema.get_indexes()
-                
-                if indexes:
-                    index_models = []
-                    for index_spec in indexes:
-                        keys = index_spec["keys"]
-                        options = {k: v for k, v in index_spec.items() if k != "keys"}
-                        index_models.append(IndexModel(keys, **options))
-                    
-                    await collection.create_indexes(index_models)
-                    logger.debug(f"Created {len(index_models)} indexes for {schema.collection_name}")
+            # Index on energy data collection
+            await self.collections['energy_data'].create_index([("filename", 1), ("timestamp", 1)])
+            await self.collections['energy_data'].create_index("timestamp")
            
            logger.info("Database indexes created successfully")
            
        except Exception as e:
-            logger.error(f"Error creating database indexes: {e}")
-            # Don't raise here - indexes are performance optimization, not critical
+            logger.warning(f"Failed to create indexes: {e}")
    
-    async def disconnect(self):
-        """Close all database connections"""
+    async def store_file_data(self, filename: str, records: List[Dict[str, Any]]) -> bool:
+        """Store processed .slg_v2 file data in MongoDB"""
        try:
-            if self.redis_client:
-                await self.redis_client.aclose()
-                self._connection_status["redis"] = False
+            current_time = datetime.now()
            
-            if self.mongodb_client:
-                self.mongodb_client.close()
-                self._connection_status["mongodb"] = False
-            
-            logger.info("Database connections closed")
-            
-        except Exception as e:
-            logger.error(f"Error closing database connections: {e}")
-    
-    async def health_check(self) -> dict:
-        """Check health of database connections"""
-        health = {
-            "mongodb": False,
-            "redis": False,
-            "timestamp": datetime.utcnow().isoformat(),
-            "details": {}
-        }
-        
-        # Check MongoDB
-        try:
-            if self.mongodb_client:
-                start_time = asyncio.get_event_loop().time()
-                await self.mongodb_client.admin.command('ping')
-                response_time = (asyncio.get_event_loop().time() - start_time) * 1000
-                
-                health["mongodb"] = True
-                health["details"]["mongodb"] = {
-                    "status": "healthy",
-                    "response_time_ms": round(response_time, 2),
-                    "server_info": await self.mongodb_client.server_info()
-                }
-            
-        except Exception as e:
-            health["details"]["mongodb"] = {
-                "status": "unhealthy",
-                "error": str(e)
+            # Store file metadata
+            file_metadata = {
+                "filename": filename,
+                "record_count": len(records),
+                "processed_at": current_time,
+                "file_size": sum(len(str(record)) for record in records),
+                "status": "processed"
            }
            
-        # Check Redis
-        try:
-            if self.redis_client:
-                start_time = asyncio.get_event_loop().time()
-                await self.redis_client.ping()
-                response_time = (asyncio.get_event_loop().time() - start_time) * 1000
-                
-                redis_info = await self.redis_client.info()
-                
-                health["redis"] = True
-                health["details"]["redis"] = {
-                    "status": "healthy",
-                    "response_time_ms": round(response_time, 2),
-                    "version": redis_info.get("redis_version"),
-                    "connected_clients": redis_info.get("connected_clients"),
-                    "used_memory_human": redis_info.get("used_memory_human")
-                }
-            
-        except Exception as e:
-            health["details"]["redis"] = {
-                "status": "unhealthy",
-                "error": str(e)
-            }
-        
-        # Update connection status
-        self._connection_status.update({
-            "mongodb": health["mongodb"],
-            "redis": health["redis"],
-            "last_check": datetime.utcnow()
-        })
-        
-        return health
-    
-    @property
-    def is_connected(self) -> bool:
-        """Check if all required connections are established"""
-        return self._connection_status["mongodb"] and self._connection_status["redis"]
-    
-    @property
-    def data_sources(self):
-        """Data sources collection"""
-        return self.db[DataSourceSchema.collection_name]
-    
-    @property
-    def processed_files(self):
-        """Processed files collection"""
-        return self.db[ProcessedFileSchema.collection_name]
-    
-    @property
-    def quality_reports(self):
-        """Quality reports collection"""
-        return self.db[QualityReportSchema.collection_name]
-    
-    @property
-    def ingestion_stats(self):
-        """Ingestion statistics collection"""
-        return self.db[IngestionStatsSchema.collection_name]
-    
-    @property
-    def error_logs(self):
-        """Error logs collection"""
-        return self.db[ErrorLogSchema.collection_name]
-    
-    @property
-    def monitoring_alerts(self):
-        """Monitoring alerts collection"""
-        return self.db[MonitoringAlertSchema.collection_name]
-
-# Global database manager instance
-db_manager = DatabaseManager()
-
-async def get_database():
-    """Dependency function to get database instance"""
-    if not db_manager.is_connected:
-        await db_manager.connect()
-    return db_manager.db
-
-async def get_redis():
-    """Dependency function to get Redis client"""
-    if not db_manager.is_connected:
-        await db_manager.connect()
-    return db_manager.redis_client
-
-@asynccontextmanager
-async def get_db_session():
-    """Context manager for database operations"""
-    try:
-        if not db_manager.is_connected:
-            await db_manager.connect()
-        yield db_manager.db
-    except Exception as e:
-        logger.error(f"Database session error: {e}")
-        raise
-    finally:
-        # Connection pooling handles cleanup automatically
-        pass
-
-@asynccontextmanager
-async def get_redis_session():
-    """Context manager for Redis operations"""
-    try:
-        if not db_manager.is_connected:
-            await db_manager.connect()
-        yield db_manager.redis_client
-    except Exception as e:
-        logger.error(f"Redis session error: {e}")
-        raise
-    finally:
-        # Connection pooling handles cleanup automatically
-        pass
-
-class DatabaseService:
-    """High-level database service with common operations"""
-    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-    
-    async def create_data_source(self, source_data: dict) -> str:
-        """Create a new data source"""
-        try:
-            source_data["created_at"] = datetime.utcnow()
-            source_data["updated_at"] = datetime.utcnow()
-            source_data["status"] = "active"
-            source_data["error_count"] = 0
-            source_data["total_files_processed"] = 0
-            
-            result = await self.db.data_sources.insert_one(source_data)
-            return str(result.inserted_id)
-            
-        except Exception as e:
-            logger.error(f"Error creating data source: {e}")
-            raise
-    
-    async def get_data_source(self, source_id: str) -> Optional[dict]:
-        """Get data source by ID"""
-        try:
-            from bson import ObjectId
-            source = await self.db.data_sources.find_one({"_id": ObjectId(source_id)})
-            if source:
-                source["_id"] = str(source["_id"])
-            return source
-            
-        except Exception as e:
-            logger.error(f"Error getting data source: {e}")
-            return None
-    
-    async def update_data_source(self, source_id: str, update_data: dict) -> bool:
-        """Update data source"""
-        try:
-            from bson import ObjectId
-            update_data["updated_at"] = datetime.utcnow()
-            
-            result = await self.db.data_sources.update_one(
-                {"_id": ObjectId(source_id)},
-                {"$set": update_data}
-            )
-            
-            return result.modified_count > 0
-            
-        except Exception as e:
-            logger.error(f"Error updating data source: {e}")
-            return False
-    
-    async def list_data_sources(self, enabled_only: bool = False) -> list:
-        """List all data sources"""
-        try:
-            query = {"enabled": True} if enabled_only else {}
-            cursor = self.db.data_sources.find(query).sort("created_at", -1)
-            
-            sources = []
-            async for source in cursor:
-                source["_id"] = str(source["_id"])
-                sources.append(source)
-            
-            return sources
-            
-        except Exception as e:
-            logger.error(f"Error listing data sources: {e}")
-            return []
-    
-    async def log_error(self, error_data: dict):
-        """Log an error to the database"""
-        try:
-            error_data["timestamp"] = datetime.utcnow()
-            await self.db.error_logs.insert_one(error_data)
-            
-        except Exception as e:
-            logger.error(f"Error logging error: {e}")
-    
-    async def update_ingestion_stats(self, stats_data: dict):
-        """Update daily ingestion statistics"""
-        try:
-            today = datetime.utcnow().strftime("%Y-%m-%d")
-            stats_data["date"] = today
-            stats_data["timestamp"] = datetime.utcnow()
-            
-            await self.db.ingestion_stats.update_one(
-                {"date": today},
-                {"$set": stats_data},
+            # Insert or update file record
+            await self.collections['files'].replace_one(
+                {"filename": filename},
+                file_metadata,
                upsert=True
            )
            
-        except Exception as e:
-            logger.error(f"Error updating ingestion stats: {e}")
+            # Add filename and processed timestamp to each record
+            for record in records:
+                record["filename"] = filename
+                record["processed_at"] = current_time
            
-    async def get_latest_stats(self) -> Optional[dict]:
-        """Get latest ingestion statistics"""
-        try:
-            stats = await self.db.ingestion_stats.find_one(
-                sort=[("timestamp", -1)]
+            # Insert energy data records
+            if records:
+                result = await self.collections['energy_data'].insert_many(records)
+                inserted_count = len(result.inserted_ids)
+                logger.info(f"Stored {inserted_count} records from {filename}")
+                return True
+            
+            return False
+            
+        except Exception as e:
+            logger.error(f"Error storing data for {filename}: {e}")
+            
+            # Store error metadata
+            error_metadata = {
+                "filename": filename,
+                "processed_at": current_time,
+                "status": "error",
+                "error_message": str(e)
+            }
+            
+            await self.collections['files'].replace_one(
+                {"filename": filename},
+                error_metadata,
+                upsert=True
            )
-            if stats:
-                stats["_id"] = str(stats["_id"])
+            
+            return False
+    
+    async def get_processed_files(self) -> List[str]:
+        """Get list of successfully processed files"""
+        try:
+            cursor = self.collections['files'].find(
+                {"status": "processed"},
+                {"filename": 1, "_id": 0}
+            )
+            
+            files = []
+            async for doc in cursor:
+                files.append(doc["filename"])
+            
+            return files
+            
+        except Exception as e:
+            logger.error(f"Error getting processed files: {e}")
+            return []
+    
+    async def get_file_info(self, filename: str) -> Optional[Dict[str, Any]]:
+        """Get information about a specific file"""
+        try:
+            return await self.collections['files'].find_one({"filename": filename})
+        except Exception as e:
+            logger.error(f"Error getting file info for {filename}: {e}")
+            return None
+    
+    async def get_stats(self) -> Dict[str, Any]:
+        """Get database statistics"""
+        try:
+            stats = {
+                "database": self.database_name,
+                "timestamp": datetime.now().isoformat()
+            }
+            
+            # Count documents in each collection
+            for name, collection in self.collections.items():
+                try:
+                    count = await collection.count_documents({})
+                    stats[f"{name}_count"] = count
+                except Exception as e:
+                    stats[f"{name}_count"] = f"error: {e}"
+            
+            # Get recent files
+            try:
+                recent_files = []
+                cursor = self.collections['files'].find(
+                    {},
+                    {"filename": 1, "processed_at": 1, "record_count": 1, "status": 1, "_id": 0}
+                ).sort("processed_at", -1).limit(5)
+                
+                async for doc in cursor:
+                    if doc.get("processed_at"):
+                        doc["processed_at"] = doc["processed_at"].isoformat()
+                    recent_files.append(doc)
+                
+                stats["recent_files"] = recent_files
+                
+            except Exception as e:
+                stats["recent_files"] = f"error: {e}"
+            
            return stats
            
        except Exception as e:
-            logger.error(f"Error getting latest stats: {e}")
-            return None
+            logger.error(f"Error getting database stats: {e}")
+            return {"error": str(e), "timestamp": datetime.now().isoformat()}
    
-    async def cleanup_old_data(self, days: int = 30):
-        """Clean up old data based on retention policy"""
+    async def get_energy_data(self, 
+                             filename: Optional[str] = None,
+                             start_time: Optional[datetime] = None,
+                             end_time: Optional[datetime] = None,
+                             limit: int = 100) -> List[Dict[str, Any]]:
+        """Retrieve energy data with optional filtering"""
        try:
-            cutoff_date = datetime.utcnow() - datetime.timedelta(days=days)
+            query = {}
            
-            # Clean up old processed files records
-            result1 = await self.db.processed_files.delete_many({
-                "processed_at": {"$lt": cutoff_date}
-            })
+            if filename:
+                query["filename"] = filename
            
-            # Clean up old error logs
-            result2 = await self.db.error_logs.delete_many({
-                "timestamp": {"$lt": cutoff_date}
-            })
+            if start_time or end_time:
+                time_query = {}
+                if start_time:
+                    time_query["$gte"] = start_time
+                if end_time:
+                    time_query["$lte"] = end_time
+                query["timestamp"] = time_query
            
-            # Clean up old quality reports
-            result3 = await self.db.quality_reports.delete_many({
-                "processing_time": {"$lt": cutoff_date}
-            })
+            cursor = self.collections['energy_data'].find(query).sort("timestamp", -1).limit(limit)
            
-            logger.info(f"Cleaned up old data: {result1.deleted_count} processed files, "
-                       f"{result2.deleted_count} error logs, {result3.deleted_count} quality reports")
+            data = []
+            async for doc in cursor:
+                # Convert ObjectId to string and datetime to ISO string
+                if "_id" in doc:
+                    doc["_id"] = str(doc["_id"])
+                if "timestamp" in doc and hasattr(doc["timestamp"], "isoformat"):
+                    doc["timestamp"] = doc["timestamp"].isoformat()
+                if "processed_at" in doc and hasattr(doc["processed_at"], "isoformat"):
+                    doc["processed_at"] = doc["processed_at"].isoformat()
+                
+                data.append(doc)
+            
+            return data
            
        except Exception as e:
-            logger.error(f"Error cleaning up old data: {e}")
-
-# Export the database manager and service for use in other modules
-__all__ = [
-    'DatabaseManager', 'DatabaseService', 'db_manager',
-    'get_database', 'get_redis', 'get_db_session', 'get_redis_session'
-]
+            logger.error(f"Error retrieving energy data: {e}")
+            return []
--- a/microservices/data-ingestion-service/src/ftp_monitor.py
+++ b/microservices/data-ingestion-service/src/ftp_monitor.py
@@ -1,445 +1,209 @@
+#!/usr/bin/env python3
 """
-FTP monitoring component for detecting and downloading new time series data files.
-Handles multiple FTP servers with different configurations and file patterns.
+FTP Monitor for SA4CPS .slg_v2 files
+Monitors ftp.sa4cps.pt for new monthly files
 """

 import asyncio
 import ftplib
-import ftputil
-from ftputil import FTPHost
+import logging
+import os
 from datetime import datetime, timedelta
 from typing import List, Dict, Any, Optional
-import logging
-import io
-import os
-import hashlib
-import json
-from pathlib import Path
-import re
-import ssl
+from dataclasses import dataclass
+import tempfile
+
+from config import FTP_CONFIG
+from slg_processor import SLGProcessor

 logger = logging.getLogger(__name__)

+
+@dataclass
+class FTPFileInfo:
+    """Information about an FTP file"""
+    path: str
+    name: str
+    size: int
+    modified_time: Optional[datetime] = None
+
+
 class FTPMonitor:
-    """Monitors FTP servers for new time series data files"""
+    """Monitors SA4CPS FTP server for new .slg_v2 files"""
    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-        self.download_cache = {}  # Cache for downloaded files
-        self.connection_pool = {}  # Pool of FTP connections
+    def __init__(self, db_manager):
+        self.db_manager = db_manager
+        self.processor = SLGProcessor()
+        self.last_check: Optional[datetime] = None
+        self.processed_files: set = set()
+        self.files_processed_count = 0
+        self.status = "initializing"
        
-    async def check_for_new_files(self, source: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """Check FTP server for new files matching the configured patterns"""
-        try:
-            ftp_config = source.get("ftp_config", {})
-            file_patterns = source.get("file_patterns", ["*.csv"])
+        # FTP connection settings
+        self.ftp_host = FTP_CONFIG["host"]
+        self.ftp_user = FTP_CONFIG["username"]
+        self.ftp_pass = FTP_CONFIG["password"]
+        self.base_path = FTP_CONFIG["base_path"]
        
-            if not ftp_config:
-                logger.warning(f"No FTP config for source: {source['name']}")
-                return []
+        # Check interval: 6 hours (files are monthly, so frequent checks aren't needed)
+        self.check_interval = FTP_CONFIG.get("check_interval", 6 * 3600)  # 6 hours
        
-            # Connect to FTP server
-            ftp_host = await self._get_ftp_connection(source)
-            if not ftp_host:
-                return []
+        logger.info(f"FTP Monitor initialized for {self.ftp_host}")
    
-            new_files = []
-            remote_path = ftp_config.get("remote_path", "/")
+    async def start_monitoring(self):
+        """Start the monitoring loop"""
+        self.status = "running"
+        logger.info("Starting FTP monitoring loop")
        
+        while True:
            try:
-                # List files in remote directory
-                file_list = await self._list_remote_files(ftp_host, remote_path)
+                await self.check_for_new_files()
+                self.status = "running"
                
-                # Filter files by patterns and check if they're new
-                for file_info in file_list:
-                    filename = file_info["filename"]
-                    
-                    # Check if file matches any pattern
-                    if self._matches_patterns(filename, file_patterns):
-                        
-                        # Check if file is new (not processed before)
-                        if await self._is_new_file(source, file_info):
-                            new_files.append(file_info)
-                            logger.info(f"Found new file: {filename}")
-                
-                # Update last check timestamp
-                await self.db.data_sources.update_one(
-                    {"_id": source["_id"]},
-                    {"$set": {"last_check": datetime.utcnow()}}
-                )
+                # Wait for next check (6 hours)
+                logger.info(f"Waiting {self.check_interval/3600:.1f} hours until next check")
+                await asyncio.sleep(self.check_interval)
                
            except Exception as e:
-                logger.error(f"Error listing files from FTP: {e}")
-                await self._close_ftp_connection(source["_id"])
+                self.status = "error"
+                logger.error(f"Error in monitoring loop: {e}")
+                # Wait 30 minutes before retrying on error
+                await asyncio.sleep(1800)
    
-            return new_files
+    async def check_for_new_files(self) -> Dict[str, Any]:
+        """Check FTP server for new .slg_v2 files"""
+        self.last_check = datetime.now()
+        logger.info(f"Checking FTP server at {self.last_check}")
        
-        except Exception as e:
-            logger.error(f"Error checking for new files in source {source['name']}: {e}")
-            return []
-    
-    async def download_file(self, source: Dict[str, Any], file_info: Dict[str, Any]) -> bytes:
-        """Download a file from FTP server"""
        try:
-            ftp_host = await self._get_ftp_connection(source)
-            if not ftp_host:
-                raise Exception("Cannot establish FTP connection")
+            # Connect to FTP server
+            with ftplib.FTP(self.ftp_host) as ftp:
+                ftp.login(self.ftp_user, self.ftp_pass)
+                logger.info(f"Connected to FTP server: {self.ftp_host}")
                
-            filename = file_info["filename"]
-            remote_path = source["ftp_config"].get("remote_path", "/")
-            full_path = f"{remote_path.rstrip('/')}/{filename}"
+                # Find .slg_v2 files
+                new_files = await self._find_slg_files(ftp)
                
-            logger.info(f"Downloading file: {full_path}")
+                # Process new files
+                processed_count = 0
+                for file_info in new_files:
+                    if file_info.path not in self.processed_files:
+                        success = await self._process_file(ftp, file_info)
+                        if success:
+                            self.processed_files.add(file_info.path)
+                            processed_count += 1
+                            self.files_processed_count += 1
                
-            # Download file content
-            file_content = await self._download_file_content(ftp_host, full_path)
-            
-            # Mark file as processed
-            await self._mark_file_processed(source, file_info)
-            
-            # Cache file info for future reference
-            await self._cache_file_info(source, file_info, len(file_content))
-            
-            logger.info(f"Successfully downloaded {filename} ({len(file_content)} bytes)")
-            return file_content
-            
-        except Exception as e:
-            logger.error(f"Error downloading file {file_info.get('filename', 'unknown')}: {e}")
-            raise
-    
-    async def test_connection(self, source: Dict[str, Any]) -> bool:
-        """Test FTP connection for a data source"""
-        try:
-            ftp_config = source.get("ftp_config", {})
-            if not ftp_config:
-                return False
-            
-            # Try to establish connection
-            ftp_host = await self._create_ftp_connection(ftp_config)
-            if ftp_host:
-                # Try to list remote directory
-                remote_path = ftp_config.get("remote_path", "/")
-                try:
-                    await self._list_remote_files(ftp_host, remote_path, limit=1)
-                    success = True
-                except:
-                    success = False
-                
-                # Close connection
-                try:
-                    await asyncio.get_event_loop().run_in_executor(
-                        None, ftp_host.close
-                    )
-                except:
-                    pass
-                
-                return success
-            
-            return False
-            
-        except Exception as e:
-            logger.error(f"Error testing FTP connection: {e}")
-            return False
-    
-    async def get_file_metadata(self, source: Dict[str, Any], filename: str) -> Optional[Dict[str, Any]]:
-        """Get metadata for a specific file"""
-        try:
-            ftp_host = await self._get_ftp_connection(source)
-            if not ftp_host:
-                return None
-            
-            remote_path = source["ftp_config"].get("remote_path", "/")
-            full_path = f"{remote_path.rstrip('/')}/{filename}"
-            
-            # Get file stats
-            def get_file_stat():
-                try:
-                    return ftp_host.stat(full_path)
-                except:
-                    return None
-            
-            stat_info = await asyncio.get_event_loop().run_in_executor(None, get_file_stat)
-            
-            if stat_info:
-                return {
-                    "filename": filename,
-                    "size": stat_info.st_size,
-                    "modified_time": datetime.fromtimestamp(stat_info.st_mtime),
-                    "full_path": full_path
+                result = {
+                    "files_found": len(new_files),
+                    "files_processed": processed_count,
+                    "timestamp": self.last_check.isoformat()
                }
                
-            return None
+                logger.info(f"Check complete: {result}")
+                return result
                
        except Exception as e:
-            logger.error(f"Error getting file metadata for {filename}: {e}")
-            return None
+            logger.error(f"FTP check failed: {e}")
+            raise
    
-    async def _get_ftp_connection(self, source: Dict[str, Any]):
-        """Get or create FTP connection for a source"""
-        source_id = str(source["_id"])
+    async def _find_slg_files(self, ftp: ftplib.FTP) -> List[FTPFileInfo]:
+        """Find .slg_v2 files in the FTP directory structure"""
+        files = []
        
-        # Check if we have a cached connection
-        if source_id in self.connection_pool:
-            connection = self.connection_pool[source_id]
-            try:
-                # Test if connection is still alive
-                await asyncio.get_event_loop().run_in_executor(
-                    None, lambda: connection.getcwd()
-                )
-                return connection
-            except:
-                # Connection is dead, remove from pool
-                del self.connection_pool[source_id]
-        
-        # Create new connection
-        ftp_config = source.get("ftp_config", {})
-        connection = await self._create_ftp_connection(ftp_config)
-        
-        if connection:
-            self.connection_pool[source_id] = connection
-        
-        return connection
-    
-    async def _create_ftp_connection(self, ftp_config: Dict[str, Any]):
-        """Create a new FTP connection"""
        try:
-            host = ftp_config.get("host")
-            port = ftp_config.get("port", 21)
-            username = ftp_config.get("username", "anonymous")
-            password = ftp_config.get("password", "")
-            use_ssl = ftp_config.get("use_ssl", False)
-            passive_mode = ftp_config.get("passive_mode", True)
+            # Navigate to base path
+            ftp.cwd(self.base_path)
+            logger.info(f"Scanning directory: {self.base_path}")
            
-            if not host:
-                raise ValueError("FTP host not specified")
+            # Get directory listing
+            dir_list = []
+            ftp.retrlines('LIST', dir_list.append)
            
-            def create_connection():
-                if use_ssl:
-                    # Use FTPS (FTP over SSL/TLS)
-                    ftp = ftplib.FTP_TLS()
-                    ftp.connect(host, port)
-                    ftp.login(username, password)
-                    ftp.prot_p()  # Enable protection for data channel
-                else:
-                    # Use regular FTP
-                    ftp = ftplib.FTP()
-                    ftp.connect(host, port)
-                    ftp.login(username, password)
+            for line in dir_list:
+                parts = line.split()
+                if len(parts) >= 9:
+                    filename = parts[-1]
                    
-                ftp.set_pasv(passive_mode)
+                    # Check if it's a .slg_v2 file
+                    if filename.endswith('.slg_v2'):
+                        try:
+                            size = int(parts[4])
+                            full_path = f"{self.base_path.rstrip('/')}/{filename}"
                            
-                # Create FTPHost wrapper for easier file operations
-                ftp_host = FTPHost.from_ftp_client(ftp)
-                return ftp_host
+                            files.append(FTPFileInfo(
+                                path=full_path,
+                                name=filename,
+                                size=size
+                            ))
                            
-            # Create connection in thread pool to avoid blocking
-            ftp_host = await asyncio.get_event_loop().run_in_executor(
-                None, create_connection
-            )
-            
-            logger.info(f"Successfully connected to FTP server: {host}:{port}")
-            return ftp_host
-            
-        except Exception as e:
-            logger.error(f"Error creating FTP connection to {ftp_config.get('host', 'unknown')}: {e}")
-            return None
-    
-    async def _close_ftp_connection(self, source_id: str):
-        """Close FTP connection for a source"""
-        if source_id in self.connection_pool:
-            try:
-                connection = self.connection_pool[source_id]
-                await asyncio.get_event_loop().run_in_executor(
-                    None, connection.close
-                )
-            except:
-                pass
-            finally:
-                del self.connection_pool[source_id]
-    
-    async def _list_remote_files(self, ftp_host, remote_path: str, limit: Optional[int] = None) -> List[Dict[str, Any]]:
-        """List files in remote FTP directory"""
-        def list_files():
-            files = []
-            try:
-                # Change to remote directory
-                ftp_host.chdir(remote_path)
-                
-                # Get file list with details
-                file_list = ftp_host.listdir(".")
-                
-                for filename in file_list:
-                    try:
-                        # Get file stats
-                        file_path = f"{remote_path.rstrip('/')}/{filename}"
-                        stat_info = ftp_host.stat(filename)
-                        
-                        # Skip directories
-                        if not ftp_host.path.isfile(filename):
-                            continue
-                        
-                        file_info = {
-                            "filename": filename,
-                            "full_path": file_path,
-                            "size": stat_info.st_size,
-                            "modified_time": datetime.fromtimestamp(stat_info.st_mtime),
-                            "created_time": datetime.fromtimestamp(stat_info.st_ctime) if hasattr(stat_info, 'st_ctime') else None
-                        }
-                        
-                        files.append(file_info)
-                        
-                        if limit and len(files) >= limit:
-                            break
-                            
-                    except Exception as e:
-                        logger.warning(f"Error getting stats for file {filename}: {e}")
-                        continue
-                
-            except Exception as e:
-                logger.error(f"Error listing directory {remote_path}: {e}")
-                raise
+                        except (ValueError, IndexError):
+                            logger.warning(f"Could not parse file info for: {filename}")
            
+            logger.info(f"Found {len(files)} .slg_v2 files")
            return files
            
-        return await asyncio.get_event_loop().run_in_executor(None, list_files)
-    
-    async def _download_file_content(self, ftp_host, file_path: str) -> bytes:
-        """Download file content from FTP server"""
-        def download():
-            bio = io.BytesIO()
-            try:
-                ftp_host.download(file_path, bio)
-                bio.seek(0)
-                return bio.read()
-            finally:
-                bio.close()
-        
-        return await asyncio.get_event_loop().run_in_executor(None, download)
-    
-    def _matches_patterns(self, filename: str, patterns: List[str]) -> bool:
-        """Check if filename matches any of the specified patterns"""
-        for pattern in patterns:
-            # Convert shell pattern to regex
-            regex_pattern = pattern.replace("*", ".*").replace("?", ".")
-            if re.match(regex_pattern, filename, re.IGNORECASE):
-                return True
-        return False
-    
-    async def _is_new_file(self, source: Dict[str, Any], file_info: Dict[str, Any]) -> bool:
-        """Check if file is new (hasn't been processed before)"""
-        try:
-            filename = file_info["filename"]
-            file_size = file_info["size"]
-            modified_time = file_info["modified_time"]
-            
-            # Create file signature
-            file_signature = hashlib.md5(
-                f"{filename}_{file_size}_{modified_time.timestamp()}".encode()
-            ).hexdigest()
-            
-            # Check if we've processed this file before
-            processed_file = await self.db.processed_files.find_one({
-                "source_id": source["_id"],
-                "file_signature": file_signature
-            })
-            
-            return processed_file is None
-            
        except Exception as e:
-            logger.error(f"Error checking if file is new: {e}")
-            return True  # Assume it's new if we can't check
-    
-    async def _mark_file_processed(self, source: Dict[str, Any], file_info: Dict[str, Any]):
-        """Mark file as processed"""
-        try:
-            filename = file_info["filename"]
-            file_size = file_info["size"]
-            modified_time = file_info["modified_time"]
-            
-            # Create file signature
-            file_signature = hashlib.md5(
-                f"{filename}_{file_size}_{modified_time.timestamp()}".encode()
-            ).hexdigest()
-            
-            # Record processed file
-            processed_record = {
-                "source_id": source["_id"],
-                "source_name": source["name"],
-                "filename": filename,
-                "file_signature": file_signature,
-                "file_size": file_size,
-                "modified_time": modified_time,
-                "processed_at": datetime.utcnow()
-            }
-            
-            await self.db.processed_files.insert_one(processed_record)
-            
-        except Exception as e:
-            logger.error(f"Error marking file as processed: {e}")
-    
-    async def _cache_file_info(self, source: Dict[str, Any], file_info: Dict[str, Any], content_size: int):
-        """Cache file information for monitoring"""
-        try:
-            cache_key = f"file_cache:{source['_id']}:{file_info['filename']}"
-            cache_data = {
-                "filename": file_info["filename"],
-                "size": file_info["size"],
-                "content_size": content_size,
-                "downloaded_at": datetime.utcnow().isoformat(),
-                "source_name": source["name"]
-            }
-            
-            # Store in Redis with 7-day expiration
-            await self.redis.setex(
-                cache_key,
-                7 * 24 * 3600,  # 7 days
-                json.dumps(cache_data)
-            )
-            
-        except Exception as e:
-            logger.error(f"Error caching file info: {e}")
-    
-    async def get_processing_history(self, source_id: str, limit: int = 50) -> List[Dict[str, Any]]:
-        """Get processing history for a data source"""
-        try:
-            cursor = self.db.processed_files.find(
-                {"source_id": source_id}
-            ).sort("processed_at", -1).limit(limit)
-            
-            history = []
-            async for record in cursor:
-                record["_id"] = str(record["_id"])
-                record["source_id"] = str(record["source_id"])
-                if "processed_at" in record:
-                    record["processed_at"] = record["processed_at"].isoformat()
-                if "modified_time" in record:
-                    record["modified_time"] = record["modified_time"].isoformat()
-                history.append(record)
-            
-            return history
-            
-        except Exception as e:
-            logger.error(f"Error getting processing history: {e}")
+            logger.error(f"Error scanning FTP directory: {e}")
            return []
    
-    async def cleanup_old_records(self, days: int = 30):
-        """Clean up old processed file records"""
+    async def _process_file(self, ftp: ftplib.FTP, file_info: FTPFileInfo) -> bool:
+        """Download and process a .slg_v2 file"""
+        logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)")
+        
        try:
-            cutoff_date = datetime.utcnow() - timedelta(days=days)
+            # Create temporary file for download
+            with tempfile.NamedTemporaryFile(mode='wb', suffix='.slg_v2', delete=False) as temp_file:
+                temp_path = temp_file.name
                
-            result = await self.db.processed_files.delete_many({
-                "processed_at": {"$lt": cutoff_date}
-            })
+                # Download file
+                with open(temp_path, 'wb') as f:
+                    ftp.retrbinary(f'RETR {file_info.name}', f.write)
                
-            logger.info(f"Cleaned up {result.deleted_count} old processed file records")
+                # Process the downloaded file
+                records = await self.processor.process_file(temp_path, file_info.name)
+                
+                # Store in database
+                if records:
+                    await self.db_manager.store_file_data(file_info.name, records)
+                    logger.info(f"Stored {len(records)} records from {file_info.name}")
+                    return True
+                else:
+                    logger.warning(f"No valid records found in {file_info.name}")
+                    return False
                
        except Exception as e:
-            logger.error(f"Error cleaning up old records: {e}")
+            logger.error(f"Error processing file {file_info.name}: {e}")
+            return False
        
-    async def close_all_connections(self):
-        """Close all FTP connections"""
-        for source_id in list(self.connection_pool.keys()):
-            await self._close_ftp_connection(source_id)
+        finally:
+            # Clean up temporary file
+            try:
+                if 'temp_path' in locals():
+                    os.unlink(temp_path)
+            except OSError:
+                pass
    
-        logger.info("Closed all FTP connections")
+    def get_status(self) -> str:
+        """Get current monitor status"""
+        return self.status
+    
+    def get_last_check_time(self) -> Optional[str]:
+        """Get last check time as ISO string"""
+        return self.last_check.isoformat() if self.last_check else None
+    
+    def get_processed_count(self) -> int:
+        """Get total number of files processed"""
+        return self.files_processed_count
+    
+    def get_detailed_status(self) -> Dict[str, Any]:
+        """Get detailed status information"""
+        return {
+            "status": self.status,
+            "last_check": self.get_last_check_time(),
+            "files_processed": self.files_processed_count,
+            "processed_files_count": len(self.processed_files),
+            "check_interval_hours": self.check_interval / 3600,
+            "ftp_host": self.ftp_host,
+            "base_path": self.base_path
+        }
--- a/microservices/data-ingestion-service/src/main.py
+++ b/microservices/data-ingestion-service/src/main.py
@@ -1,779 +1,139 @@
 """
-Data Ingestion Service
-Monitors FTP servers for new time series data from real communities and publishes to Redis.
-Provides realistic data feeds for simulation and analytics.
-Port: 8008
+SA4CPS Data Ingestion Service
+Simple FTP monitoring service for .slg_v2 files with MongoDB storage
 """

-import asyncio
-from datetime import datetime, timedelta
-from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
-from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI, HTTPException
 from contextlib import asynccontextmanager
+import asyncio
 import logging
-from typing import List, Optional, Dict, Any
-import json
-from bson import ObjectId
+from datetime import datetime
+from typing import Dict, Any

-from models import (
-    DataSourceCreate, DataSourceUpdate, DataSourceResponse, 
-    FileProcessingRequest, FileProcessingResponse, IngestionStats,
-    HealthStatus, QualityReport, TopicInfo, PublishingStats
-)
-from database import db_manager, get_database, get_redis, DatabaseService
 from ftp_monitor import FTPMonitor
-from slg_v2_processor import SLGv2Processor
-from redis_publisher import RedisPublisher
-from data_validator import DataValidator
-from monitoring import ServiceMonitor, PerformanceMonitor, ErrorHandler
+from database import DatabaseManager

 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

+# Global services
+ftp_monitor = None
+db_manager = None
+
+
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Application lifespan manager"""
-    logger.info("Data Ingestion Service starting up...")
+    """Application lifespan management"""
+    global ftp_monitor, db_manager

-    try:
-        # Connect to databases
-        await db_manager.connect()
+    logger.info("Starting SA4CPS Data Ingestion Service...")

-        # Initialize core components
-        await initialize_data_sources()
-        await initialize_components()
+    # Initialize database connection
+    db_manager = DatabaseManager()
+    await db_manager.connect()

-        # Start background tasks
-        asyncio.create_task(ftp_monitoring_task())
-        asyncio.create_task(data_processing_task())
-        asyncio.create_task(health_monitoring_task())
-        asyncio.create_task(cleanup_task())
+    # Initialize FTP monitor
+    ftp_monitor = FTPMonitor(db_manager)

-        logger.info("Data Ingestion Service startup complete")
+    # Start background monitoring task
+    monitoring_task = asyncio.create_task(ftp_monitor.start_monitoring())

-        yield
+    logger.info("Service started successfully")

-    except Exception as e:
-        logger.error(f"Error during startup: {e}")
-        raise
-    finally:
-        logger.info("Data Ingestion Service shutting down...")
-        await db_manager.disconnect()
-        logger.info("Data Ingestion Service shutdown complete")
+    yield

+    # Cleanup on shutdown
+    logger.info("Shutting down service...")
+    monitoring_task.cancel()
+    await db_manager.close()
+    logger.info("Service shutdown complete")
+
+
+# Create FastAPI app
 app = FastAPI(
-    title="Data Ingestion Service",
-    description="FTP monitoring and time series data ingestion for real community data simulation",
+    title="SA4CPS Data Ingestion Service",
+    description="Monitors FTP server for .slg_v2 files and stores data in MongoDB",
    version="1.0.0",
    lifespan=lifespan
 )

-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)

-# Global components
-ftp_monitor = None
-data_processor = None
-redis_publisher = None
-data_validator = None
-service_monitor = None
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "service": "SA4CPS Data Ingestion Service",
+        "status": "running",
+        "timestamp": datetime.now().isoformat()
+    }

-# Dependencies
-async def get_db():
-    return await get_database()

-async def get_ftp_monitor():
-    global ftp_monitor
-    if not ftp_monitor:
-        db = await get_database()
-        redis = await get_redis()
-        ftp_monitor = FTPMonitor(db, redis)
-    return ftp_monitor
-
-async def get_slg_processor():
-    global data_processor
-    if not data_processor:
-        db = await get_database()
-        redis = await get_redis()
-        data_processor = SLGv2Processor(db, redis)
-    return data_processor
-
-async def get_redis_publisher():
-    global redis_publisher
-    if not redis_publisher:
-        redis = await get_redis()
-        redis_publisher = RedisPublisher(redis)
-    return redis_publisher
-
-async def get_data_validator():
-    global data_validator
-    if not data_validator:
-        db = await get_database()
-        redis = await get_redis()
-        data_validator = DataValidator(db, redis)
-    return data_validator
-
-@app.get("/health", response_model=HealthStatus)
+@app.get("/health")
 async def health_check():
    """Health check endpoint"""
-    try:
-        # Get database health
-        health_data = await db_manager.health_check()
+    global ftp_monitor, db_manager

-        # Get FTP connections status
-        ftp_status = await check_ftp_connections()
+    health_status = {
+        "service": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "database": "unknown",
+        "ftp_monitor": "unknown"
+    }

-        # Calculate uptime
-        app_start_time = getattr(app.state, 'start_time', datetime.utcnow())
-        uptime = (datetime.utcnow() - app_start_time).total_seconds()
-        
-        # Get processing stats
-        processing_stats = await get_processing_queue_size()
-        
-        overall_status = "healthy"
-        if not health_data["mongodb"] or not health_data["redis"]:
-            overall_status = "degraded" 
-        elif ftp_status["healthy_connections"] == 0 and ftp_status["total_connections"] > 0:
-            overall_status = "degraded"
-        
-        return HealthStatus(
-            status=overall_status,
-            timestamp=datetime.utcnow(),
-            uptime_seconds=uptime,
-            active_sources=ftp_status["healthy_connections"],
-            total_processed_files=processing_stats.get("total_processed", 0),
-            redis_connected=health_data["redis"],
-            mongodb_connected=health_data["mongodb"],
-            last_error=None
-        )
-    except Exception as e:
-        logger.error(f"Health check failed: {e}")
-        return HealthStatus(
-            status="unhealthy",
-            timestamp=datetime.utcnow(),
-            uptime_seconds=0,
-            active_sources=0,
-            total_processed_files=0,
-            redis_connected=False,
-            mongodb_connected=False,
-            last_error=str(e)
-        )
-
-@app.get("/stats", response_model=IngestionStats)
-async def get_ingestion_stats():
-    """Get data ingestion statistics"""
-    try:
-        db = await get_database()
-        
-        # Get statistics from database
-        stats_data = await db.ingestion_stats.find_one(
-            {"date": datetime.utcnow().strftime("%Y-%m-%d")}
-        ) or {}
-        
-        return IngestionStats(
-            files_processed_today=stats_data.get("files_processed", 0),
-            records_ingested_today=stats_data.get("records_ingested", 0),
-            errors_today=stats_data.get("errors", 0),
-            data_sources_active=stats_data.get("active_sources", 0),
-            average_processing_time_ms=stats_data.get("avg_processing_time", 0),
-            last_successful_ingestion=stats_data.get("last_success"),
-            redis_messages_published=stats_data.get("redis_published", 0),
-            data_quality_score=stats_data.get("quality_score", 100.0)
-        )
-    except Exception as e:
-        logger.error(f"Error getting ingestion stats: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.get("/sources")
-async def get_data_sources():
-    """Get configured data sources"""
-    try:
-        db = await get_database()
-        cursor = db.data_sources.find({})
-        sources = []
-        
-        async for source in cursor:
-            source["_id"] = str(source["_id"])
-            # Convert datetime fields
-            for field in ["created_at", "updated_at", "last_check", "last_success"]:
-                if field in source and source[field]:
-                    source[field] = source[field].isoformat()
-            sources.append(source)
-        
-        return {
-            "sources": sources,
-            "count": len(sources)
-        }
-    except Exception as e:
-        logger.error(f"Error getting data sources: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.post("/sources")
-async def create_data_source(
-    source_config: DataSourceCreate,
-    background_tasks: BackgroundTasks
-):
-    """Create a new data source"""
-    try:
-        db = await get_database()
-        
-        # Create source document
-        source_doc = {
-            "name": source_config.name,
-            "description": source_config.description,
-            "source_type": source_config.source_type,
-            "ftp_config": source_config.ftp_config.dict() if source_config.ftp_config else None,
-            "file_patterns": source_config.file_patterns,
-            "data_format": source_config.data_format.value,
-            "topics": [topic.dict() for topic in source_config.topics],
-            "redis_topics": [topic.topic_name for topic in source_config.topics],
-            "enabled": source_config.enabled,
-            "check_interval_seconds": source_config.polling_interval_minutes * 60,
-            "max_file_size_mb": source_config.max_file_size_mb,
-            "created_at": datetime.utcnow(),
-            "updated_at": datetime.utcnow(),
-            "status": "created"
-        }
-        
-        result = await db.data_sources.insert_one(source_doc)
-        
-        # Test connection in background
-        background_tasks.add_task(test_data_source_connection, str(result.inserted_id))
-        
-        return {
-            "message": "Data source created successfully",
-            "source_id": str(result.inserted_id),
-            "name": source_config.name
-        }
-    except Exception as e:
-        logger.error(f"Error creating data source: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.put("/sources/{source_id}")
-async def update_data_source(
-    source_id: str,
-    source_config: DataSourceUpdate
-):
-    """Update an existing data source"""
-    try:
-        db = await get_database()
-        
-        update_doc = {}
-        if source_config.name is not None:
-            update_doc["name"] = source_config.name
-        if source_config.description is not None:
-            update_doc["description"] = source_config.description
-        if source_config.ftp_config is not None:
-            update_doc["ftp_config"] = source_config.ftp_config.dict()
-        if source_config.file_patterns is not None:
-            update_doc["file_patterns"] = source_config.file_patterns
-        if source_config.data_format is not None:
-            update_doc["data_format"] = source_config.data_format.value
-        if source_config.topics is not None:
-            update_doc["topics"] = [topic.dict() for topic in source_config.topics]
-            update_doc["redis_topics"] = [topic.topic_name for topic in source_config.topics]
-        if source_config.enabled is not None:
-            update_doc["enabled"] = source_config.enabled
-        if source_config.polling_interval_minutes is not None:
-            update_doc["check_interval_seconds"] = source_config.polling_interval_minutes * 60
-        if source_config.max_file_size_mb is not None:
-            update_doc["max_file_size_mb"] = source_config.max_file_size_mb
-        
-        update_doc["updated_at"] = datetime.utcnow()
-        
-        result = await db.data_sources.update_one(
-            {"_id": ObjectId(source_id)},
-            {"$set": update_doc}
-        )
-        
-        if result.matched_count == 0:
-            raise HTTPException(status_code=404, detail="Data source not found")
-        
-        return {
-            "message": "Data source updated successfully",
-            "source_id": source_id
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error updating data source: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.delete("/sources/{source_id}")
-async def delete_data_source(source_id: str):
-    """Delete a data source"""
-    try:
-        db = await get_database()
-        
-        result = await db.data_sources.delete_one({"_id": ObjectId(source_id)})
-        
-        if result.deleted_count == 0:
-            raise HTTPException(status_code=404, detail="Data source not found")
-        
-        return {
-            "message": "Data source deleted successfully",
-            "source_id": source_id
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error deleting data source: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.post("/sources/{source_id}/test")
-async def test_data_source(source_id: str):
-    """Test connection to a data source"""
-    try:
-        db = await get_database()
-        source = await db.data_sources.find_one({"_id": ObjectId(source_id)})
-        
-        if not source:
-            raise HTTPException(status_code=404, detail="Data source not found")
-        
-        monitor = await get_ftp_monitor()
-        test_result = await monitor.test_connection(source)
-        
-        return {
-            "source_id": source_id,
-            "connection_test": test_result,
-            "tested_at": datetime.utcnow().isoformat()
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error testing data source: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.post("/sources/{source_id}/trigger")
-async def trigger_manual_check(
-    source_id: str,
-    background_tasks: BackgroundTasks
-):
-    """Manually trigger a check for new data"""
-    try:
-        db = await get_database()
-        source = await db.data_sources.find_one({"_id": ObjectId(source_id)})
-        
-        if not source:
-            raise HTTPException(status_code=404, detail="Data source not found")
-        
-        # Trigger check in background
-        background_tasks.add_task(process_data_source, source)
-        
-        return {
-            "message": "Manual check triggered",
-            "source_id": source_id,
-            "triggered_at": datetime.utcnow().isoformat()
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error triggering manual check: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.get("/processing/status")
-async def get_processing_status():
-    """Get current processing status"""
-    try:
-        db = await get_database()
-        
-        # Get recent processing jobs
-        cursor = db.processing_jobs.find().sort("started_at", -1).limit(20)
-        jobs = []
-        
-        async for job in cursor:
-            job["_id"] = str(job["_id"])
-            for field in ["started_at", "completed_at", "created_at"]:
-                if field in job and job[field]:
-                    job[field] = job[field].isoformat()
-            jobs.append(job)
-        
-        # Get queue size
-        queue_size = await get_processing_queue_size()
-        
-        return {
-            "processing_jobs": jobs,
-            "queue_size": queue_size,
-            "last_updated": datetime.utcnow().isoformat()
-        }
-    except Exception as e:
-        logger.error(f"Error getting processing status: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.get("/data-quality")
-async def get_data_quality_metrics():
-    """Get data quality metrics"""
-    try:
-        db = await get_database()
-        
-        # Get recent quality metrics
-        cursor = db.data_quality_metrics.find().sort("timestamp", -1).limit(10)
-        metrics = []
-        
-        async for metric in cursor:
-            metric["_id"] = str(metric["_id"])
-            if "timestamp" in metric:
-                metric["timestamp"] = metric["timestamp"].isoformat()
-            metrics.append(metric)
-        
-        return {
-            "quality_metrics": metrics,
-            "count": len(metrics)
-        }
-    except Exception as e:
-        logger.error(f"Error getting data quality metrics: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-@app.get("/redis/topics")
-async def get_redis_topics():
-    """Get active Redis topics"""
-    try:
-        redis = await get_redis()
-        publisher = await get_redis_publisher()
-        
-        topics_info = await publisher.get_topics_info()
-        
-        return {
-            "active_topics": topics_info,
-            "timestamp": datetime.utcnow().isoformat()
-        }
-    except Exception as e:
-        logger.error(f"Error getting Redis topics: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-# Background task functions
-async def initialize_data_sources():
-    """Initialize data sources from database"""
-    try:
-        db = await get_database()
-        
-        # Auto-configure SA4CPS source if none exist
-        count = await db.data_sources.count_documents({})
-        if count == 0:
-            from .simple_sa4cps_config import SimpleSA4CPSConfig
-            
-            config = SimpleSA4CPSConfig()
-            result = await config.setup_sa4cps_source()
-            
-            if result['success']:
-                logger.info(f"✅ Auto-configured SA4CPS source: {result['source_id']}")
-            else:
-                logger.warning(f"Failed to auto-configure SA4CPS: {result['message']}")
-        
-    except Exception as e:
-        logger.error(f"Error initializing data sources: {e}")
-
-async def initialize_components():
-    """Initialize core service components"""
-    try:
-        # Initialize global components
-        global ftp_monitor, data_processor, redis_publisher, data_validator, service_monitor
-        
-        db = await get_database()
-        redis = await get_redis()
-        
-        # Initialize monitoring first
-        service_monitor = ServiceMonitor(db, redis)
-        await service_monitor.start_monitoring()
-        
-        # Initialize FTP monitor
-        ftp_monitor = FTPMonitor(db, redis)
-        
-        # Initialize SLG_v2 processor
-        data_processor = SLGv2Processor(db, redis)
-        
-        # Initialize Redis publisher
-        redis_publisher = RedisPublisher(redis)
-        await redis_publisher.initialize()
-        
-        # Initialize data validator
-        data_validator = DataValidator(db, redis)
-        await data_validator.initialize()
-        
-        # Store app start time for uptime calculation
-        app.state.start_time = datetime.utcnow()
-        
-        logger.info("Core components initialized successfully")
-        
-    except Exception as e:
-        logger.error(f"Error initializing components: {e}")
-        if service_monitor:
-            await service_monitor.error_handler.log_error(e, {"task": "component_initialization"})
-        raise
-
-async def ftp_monitoring_task():
-    """Main FTP monitoring background task"""
-    logger.info("Starting FTP monitoring task")
-    
-    while True:
+    # Check database connection
+    if db_manager:
        try:
-            db = await get_database()
+            await db_manager.ping()
+            health_status["database"] = "connected"
+        except Exception:
+            health_status["database"] = "disconnected"
+            health_status["service"] = "degraded"

-            # Get all enabled data sources
-            cursor = db.data_sources.find({"enabled": True})
+    # Check FTP monitor status
+    if ftp_monitor:
+        health_status["ftp_monitor"] = ftp_monitor.get_status()
+        health_status["last_check"] = ftp_monitor.get_last_check_time()
+        health_status["files_processed"] = ftp_monitor.get_processed_count()

-            async for source in cursor:
-                try:
-                    # Check if it's time to check this source
-                    last_check = source.get("last_check")
-                    check_interval = source.get("check_interval_seconds", 300)
+    return health_status

-                    if (not last_check or 
-                        (datetime.utcnow() - last_check).total_seconds() >= check_interval):

-                        # Process this data source
-                        await process_data_source(source)
+@app.get("/status")
+async def get_status():
+    """Detailed status endpoint"""
+    global ftp_monitor, db_manager

-                        # Update last check time
-                        await db.data_sources.update_one(
-                            {"_id": source["_id"]},
-                            {"$set": {"last_check": datetime.utcnow()}}
-                        )
+    if not ftp_monitor:
+        raise HTTPException(status_code=503, detail="FTP monitor not initialized")

-                except Exception as e:
-                    logger.error(f"Error processing data source {source.get('name', 'unknown')}: {e}")
+    return {
+        "ftp_monitor": ftp_monitor.get_detailed_status(),
+        "database": await db_manager.get_stats() if db_manager else None,
+        "timestamp": datetime.now().isoformat()
+    }

-            # Sleep between monitoring cycles
-            await asyncio.sleep(30)

-        except Exception as e:
-            logger.error(f"Error in FTP monitoring task: {e}")
-            await asyncio.sleep(60)
+@app.post("/trigger-check")
+async def trigger_manual_check():
+    """Manually trigger FTP check"""
+    global ftp_monitor
+
+    if not ftp_monitor:
+        raise HTTPException(status_code=503, detail="FTP monitor not initialized")

-async def process_data_source(source: Dict[str, Any]):
-    """Process a single data source"""
    try:
-        monitor = await get_ftp_monitor()
-        processor = await get_slg_processor()
-        publisher = await get_redis_publisher()
-        
-        # Get new files from FTP
-        new_files = await monitor.check_for_new_files(source)
-        
-        if new_files:
-            logger.info(f"Found {len(new_files)} new .slg_v2 files for source: {source['name']}")
-            
-            for file_info in new_files:
-                try:
-                    # Download and process file
-                    file_data = await monitor.download_file(source, file_info)
-                    
-                    # Process the .slg_v2 file
-                    processed_data = await processor.process_slg_v2_file(file_data)
-                    
-                    # Validate data quality
-                    validator = await get_data_validator()
-                    quality_metrics = await validator.validate_time_series(processed_data)
-                    
-                    # Publish to Redis topics
-                    for topic in source["redis_topics"]:
-                        await publisher.publish_time_series_data(
-                            topic, processed_data, source["name"]
-                        )
-                    
-                    # Record processing success
-                    await record_processing_success(source, file_info, len(processed_data), quality_metrics)
-                    
-                except Exception as e:
-                    logger.error(f"Error processing file {file_info.get('filename', 'unknown')}: {e}")
-                    await record_processing_error(source, file_info, str(e))
-        
+        result = await ftp_monitor.check_for_new_files()
+        return {
+            "message": "Manual check completed",
+            "result": result,
+            "timestamp": datetime.now().isoformat()
+        }
    except Exception as e:
-        logger.error(f"Error in process_data_source for {source.get('name', 'unknown')}: {e}")
+        logger.error(f"Manual check failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")

-async def data_processing_task():
-    """Background task for data processing queue"""
-    logger.info("Starting data processing task")
-    
-    # This task handles queued processing jobs
-    while True:
-        try:
-            await asyncio.sleep(10)  # Check every 10 seconds
-            # Implementation for processing queued jobs would go here
-            
-        except Exception as e:
-            logger.error(f"Error in data processing task: {e}")
-            await asyncio.sleep(30)
-
-async def health_monitoring_task():
-    """Background task for monitoring system health"""
-    logger.info("Starting health monitoring task")
-    
-    while True:
-        try:
-            # Monitor FTP connections
-            await monitor_ftp_health()
-            
-            # Monitor Redis publishing
-            await monitor_redis_health()
-            
-            # Monitor processing performance
-            await monitor_processing_performance()
-            
-            await asyncio.sleep(60)  # Check every minute
-            
-        except Exception as e:
-            logger.error(f"Error in health monitoring task: {e}")
-            await asyncio.sleep(120)
-
-async def cleanup_task():
-    """Background task for cleaning up old data"""
-    logger.info("Starting cleanup task")
-    
-    while True:
-        try:
-            db = await get_database()
-            
-            # Clean up old processing jobs (keep last 1000)
-            old_jobs = await db.processing_jobs.find().sort("created_at", -1).skip(1000)
-            async for job in old_jobs:
-                await db.processing_jobs.delete_one({"_id": job["_id"]})
-            
-            # Clean up old quality metrics (keep last 30 days)
-            cutoff_date = datetime.utcnow() - timedelta(days=30)
-            await db.data_quality_metrics.delete_many({"timestamp": {"$lt": cutoff_date}})
-            
-            # Clean up old ingestion stats (keep last 90 days)
-            cutoff_date = datetime.utcnow() - timedelta(days=90)
-            await db.ingestion_stats.delete_many({"date": {"$lt": cutoff_date.strftime("%Y-%m-%d")}})
-            
-            await asyncio.sleep(3600)  # Run every hour
-            
-        except Exception as e:
-            logger.error(f"Error in cleanup task: {e}")
-            await asyncio.sleep(7200)
-
-# Helper functions
-async def check_ftp_connections() -> Dict[str, int]:
-    """Check health of FTP connections"""
-    try:
-        db = await get_database()
-        sources = await db.data_sources.find({"enabled": True}).to_list(None)
-        
-        total = len(sources)
-        healthy = 0
-        
-        monitor = await get_ftp_monitor()
-        for source in sources:
-            try:
-                if await monitor.test_connection(source):
-                    healthy += 1
-            except:
-                pass
-        
-        return {"total_connections": total, "healthy_connections": healthy}
-    except Exception as e:
-        logger.error(f"Error checking FTP connections: {e}")
-        return {"total_connections": 0, "healthy_connections": 0}
-
-async def get_processing_queue_size() -> int:
-    """Get size of processing queue"""
-    try:
-        db = await get_database()
-        return await db.processing_queue.count_documents({"status": "pending"})
-    except Exception as e:
-        logger.error(f"Error getting queue size: {e}")
-        return 0
-
-async def test_data_source_connection(source_id: str):
-    """Test connection to a data source (background task)"""
-    try:
-        db = await get_database()
-        source = await db.data_sources.find_one({"_id": ObjectId(source_id)})
-        
-        if source:
-            monitor = await get_ftp_monitor()
-            success = await monitor.test_connection(source)
-            
-            await db.data_sources.update_one(
-                {"_id": ObjectId(source_id)},
-                {"$set": {
-                    "last_test": datetime.utcnow(),
-                    "last_test_result": "success" if success else "failed"
-                }}
-            )
-    except Exception as e:
-        logger.error(f"Error testing connection for source {source_id}: {e}")
-
-async def record_processing_success(source, file_info, record_count, quality_metrics):
-    """Record successful processing"""
-    try:
-        db = await get_database()
-        
-        # Update source stats
-        await db.data_sources.update_one(
-            {"_id": source["_id"]},
-            {"$set": {"last_success": datetime.utcnow()}}
-        )
-        
-        # Update daily stats
-        today = datetime.utcnow().strftime("%Y-%m-%d")
-        await db.ingestion_stats.update_one(
-            {"date": today},
-            {
-                "$inc": {
-                    "files_processed": 1,
-                    "records_ingested": record_count,
-                    "redis_published": len(source["redis_topics"])
-                },
-                "$set": {
-                    "last_success": datetime.utcnow(),
-                    "quality_score": quality_metrics.get("overall_score", 100.0)
-                }
-            },
-            upsert=True
-        )
-        
-    except Exception as e:
-        logger.error(f"Error recording processing success: {e}")
-
-async def record_processing_error(source, file_info, error_message):
-    """Record processing error"""
-    try:
-        db = await get_database()
-        
-        # Update daily stats
-        today = datetime.utcnow().strftime("%Y-%m-%d")
-        await db.ingestion_stats.update_one(
-            {"date": today},
-            {"$inc": {"errors": 1}},
-            upsert=True
-        )
-        
-        # Log error
-        await db.processing_errors.insert_one({
-            "source_id": source["_id"],
-            "source_name": source["name"],
-            "file_info": file_info,
-            "error_message": error_message,
-            "timestamp": datetime.utcnow()
-        })
-        
-    except Exception as e:
-        logger.error(f"Error recording processing error: {e}")
-
-async def monitor_ftp_health():
-    """Monitor FTP connection health"""
-    # Implementation for FTP health monitoring
-    pass
-
-async def monitor_redis_health():
-    """Monitor Redis publishing health"""
-    # Implementation for Redis health monitoring
-    pass
-
-async def monitor_processing_performance():
-    """Monitor processing performance metrics"""
-    # Implementation for performance monitoring
-    pass

 if __name__ == "__main__":
    import uvicorn
-    from bson import ObjectId
-    uvicorn.run(app, host="0.0.0.0", port=8008)
+    uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True)
--- a/microservices/data-ingestion-service/src/models.py
+++ b/microservices/data-ingestion-service/src/models.py
@@ -1,386 +0,0 @@
-"""
-Data models for the data ingestion service.
-Defines Pydantic models for request/response validation and database schemas.
-"""
-
-from pydantic import BaseModel, Field, validator
-from typing import List, Dict, Any, Optional, Union
-from datetime import datetime
-from enum import Enum
-
-class DataFormat(str, Enum):
-    """Supported data formats for SA4CPS ingestion"""
-    SLG_V2 = "slg_v2"
-
-class SourceStatus(str, Enum):
-    """Status of a data source"""
-    ACTIVE = "active"
-    INACTIVE = "inactive"
-    ERROR = "error"
-    MAINTENANCE = "maintenance"
-
-class FTPConfig(BaseModel):
-    """FTP server configuration"""
-    host: str
-    port: int = Field(default=21, ge=1, le=65535)
-    username: str = "anonymous"
-    password: str = ""
-    use_ssl: bool = False
-    passive_mode: bool = True
-    remote_path: str = "/"
-    timeout: int = Field(default=30, ge=5, le=300)
-
-    @validator('host')
-    def validate_host(cls, v):
-        if not v or len(v.strip()) == 0:
-            raise ValueError('Host cannot be empty')
-        return v.strip()
-
-class TopicConfig(BaseModel):
-    """Redis topic configuration"""
-    topic_name: str
-    description: str = ""
-    data_types: List[str] = Field(default_factory=lambda: ["all"])
-    format: str = "sensor_reading"
-    enabled: bool = True
-
-class DataSourceCreate(BaseModel):
-    """Request model for creating a new data source"""
-    name: str = Field(..., min_length=1, max_length=100)
-    description: str = ""
-    source_type: str = Field(default="ftp", regex="^(ftp|sftp|http|https)$")
-    ftp_config: FTPConfig
-    file_patterns: List[str] = Field(default_factory=lambda: ["*.slg_v2"])
-    data_format: DataFormat = DataFormat.SLG_V2
-    topics: List[TopicConfig] = Field(default_factory=list)
-    polling_interval_minutes: int = Field(default=5, ge=1, le=1440)
-    max_file_size_mb: int = Field(default=100, ge=1, le=1000)
-    enabled: bool = True
-
-class DataSourceUpdate(BaseModel):
-    """Request model for updating a data source"""
-    name: Optional[str] = Field(None, min_length=1, max_length=100)
-    description: Optional[str] = None
-    ftp_config: Optional[FTPConfig] = None
-    file_patterns: Optional[List[str]] = None
-    data_format: Optional[DataFormat] = None
-    topics: Optional[List[TopicConfig]] = None
-    polling_interval_minutes: Optional[int] = Field(None, ge=1, le=1440)
-    max_file_size_mb: Optional[int] = Field(None, ge=1, le=1000)
-    enabled: Optional[bool] = None
-
-class DataSourceResponse(BaseModel):
-    """Response model for data source information"""
-    id: str
-    name: str
-    description: str
-    source_type: str
-    ftp_config: FTPConfig
-    file_patterns: List[str]
-    data_format: DataFormat
-    topics: List[TopicConfig]
-    polling_interval_minutes: int
-    max_file_size_mb: int
-    enabled: bool
-    status: SourceStatus
-    created_at: datetime
-    updated_at: datetime
-    last_check: Optional[datetime] = None
-    last_success: Optional[datetime] = None
-    error_count: int = 0
-    total_files_processed: int = 0
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class FileProcessingRequest(BaseModel):
-    """Request model for manual file processing"""
-    source_id: str
-    filename: str
-    force_reprocess: bool = False
-
-class FileProcessingResponse(BaseModel):
-    """Response model for file processing results"""
-    success: bool
-    message: str
-    records_processed: int
-    records_rejected: int
-    processing_time_seconds: float
-    file_size_bytes: int
-    topics_published: List[str]
-
-class IngestionStats(BaseModel):
-    """Response model for ingestion statistics"""
-    files_processed_today: int
-    records_processed_today: int
-    active_sources: int
-    total_sources: int
-    average_processing_time: float
-    success_rate_percentage: float
-    last_24h_volume_mb: float
-
-class QualityMetrics(BaseModel):
-    """Data quality metrics"""
-    completeness: float = Field(..., ge=0.0, le=1.0)
-    accuracy: float = Field(..., ge=0.0, le=1.0)
-    consistency: float = Field(..., ge=0.0, le=1.0)
-    timeliness: float = Field(..., ge=0.0, le=1.0)
-    overall: float = Field(..., ge=0.0, le=1.0)
-
-class QualityReport(BaseModel):
-    """Data quality report"""
-    source: str
-    total_records: int
-    processed_records: int
-    rejected_records: int
-    quality_scores: QualityMetrics
-    issues_found: List[str]
-    processing_time: datetime
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class HealthStatus(BaseModel):
-    """Service health status"""
-    status: str
-    timestamp: datetime
-    uptime_seconds: float
-    active_sources: int
-    total_processed_files: int
-    redis_connected: bool
-    mongodb_connected: bool
-    last_error: Optional[str] = None
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class SensorReading(BaseModel):
-    """Individual sensor reading model"""
-    sensor_id: str
-    timestamp: Union[int, float, str]
-    value: Union[int, float]
-    unit: Optional[str] = None
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-
-class ProcessedFile(BaseModel):
-    """Processed file record"""
-    source_id: str
-    source_name: str
-    filename: str
-    file_signature: str
-    file_size: int
-    modified_time: datetime
-    processed_at: datetime
-
-class TopicInfo(BaseModel):
-    """Topic information response"""
-    topic_name: str
-    description: str
-    data_types: List[str]
-    format: str
-    message_count: int
-    last_published: Optional[datetime] = None
-    created_at: datetime
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class PublishingStats(BaseModel):
-    """Publishing statistics response"""
-    total_messages_published: int
-    active_topics: int
-    topic_stats: Dict[str, int]
-    last_updated: datetime
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class ErrorLog(BaseModel):
-    """Error logging model"""
-    service: str = "data-ingestion-service"
-    timestamp: datetime
-    level: str
-    source_id: Optional[str] = None
-    source_name: Optional[str] = None
-    error_type: str
-    error_message: str
-    stack_trace: Optional[str] = None
-    context: Dict[str, Any] = Field(default_factory=dict)
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-class MonitoringAlert(BaseModel):
-    """Monitoring alert model"""
-    alert_id: str
-    alert_type: str  # "error", "warning", "info"
-    source_id: Optional[str] = None
-    title: str
-    description: str
-    severity: str = Field(..., regex="^(low|medium|high|critical)$")
-    timestamp: datetime
-    resolved: bool = False
-    resolved_at: Optional[datetime] = None
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-
-    class Config:
-        json_encoders = {
-            datetime: lambda v: v.isoformat()
-        }
-
-# Database schema definitions for MongoDB collections
-
-class DataSourceSchema:
-    """MongoDB schema for data sources"""
-    collection_name = "data_sources"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("name", 1)], "unique": True},
-            {"keys": [("status", 1)]},
-            {"keys": [("enabled", 1)]},
-            {"keys": [("created_at", -1)]},
-            {"keys": [("last_check", -1)]}
-        ]
-
-class ProcessedFileSchema:
-    """MongoDB schema for processed files"""
-    collection_name = "processed_files"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("source_id", 1), ("file_signature", 1)], "unique": True},
-            {"keys": [("processed_at", -1)]},
-            {"keys": [("source_name", 1)]},
-            {"keys": [("filename", 1)]}
-        ]
-
-class QualityReportSchema:
-    """MongoDB schema for quality reports"""
-    collection_name = "quality_reports"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("source", 1)]},
-            {"keys": [("processing_time", -1)]},
-            {"keys": [("quality_scores.overall", -1)]}
-        ]
-
-class IngestionStatsSchema:
-    """MongoDB schema for ingestion statistics"""
-    collection_name = "ingestion_stats"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("date", 1)], "unique": True},
-            {"keys": [("timestamp", -1)]}
-        ]
-
-class ErrorLogSchema:
-    """MongoDB schema for error logs"""
-    collection_name = "error_logs"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("timestamp", -1)]},
-            {"keys": [("source_id", 1)]},
-            {"keys": [("error_type", 1)]},
-            {"keys": [("level", 1)]}
-        ]
-
-class MonitoringAlertSchema:
-    """MongoDB schema for monitoring alerts"""
-    collection_name = "monitoring_alerts"
-
-    @staticmethod
-    def get_indexes():
-        return [
-            {"keys": [("alert_id", 1)], "unique": True},
-            {"keys": [("timestamp", -1)]},
-            {"keys": [("source_id", 1)]},
-            {"keys": [("alert_type", 1)]},
-            {"keys": [("resolved", 1)]}
-        ]
-
-# Validation helpers
-def validate_timestamp(timestamp: Union[int, float, str]) -> int:
-    """Validate and convert timestamp to unix timestamp"""
-    if isinstance(timestamp, str):
-        try:
-            # Try ISO format first
-            dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
-            return int(dt.timestamp())
-        except ValueError:
-            try:
-                # Try as unix timestamp string
-                return int(float(timestamp))
-            except ValueError:
-                raise ValueError(f"Invalid timestamp format: {timestamp}")
-    elif isinstance(timestamp, (int, float)):
-        return int(timestamp)
-    else:
-        raise ValueError(f"Timestamp must be int, float, or string, got {type(timestamp)}")
-
-def validate_sensor_id(sensor_id: str) -> str:
-    """Validate sensor ID format"""
-    if not isinstance(sensor_id, str) or len(sensor_id.strip()) == 0:
-        raise ValueError("Sensor ID must be a non-empty string")
-
-    # Remove extra whitespace
-    sensor_id = sensor_id.strip()
-
-    # Check length
-    if len(sensor_id) > 100:
-        raise ValueError("Sensor ID too long (max 100 characters)")
-
-    return sensor_id
-
-def validate_numeric_value(value: Union[int, float, str]) -> float:
-    """Validate and convert numeric value"""
-    try:
-        numeric_value = float(value)
-        if not (-1e10 <= numeric_value <= 1e10):  # Reasonable range
-            raise ValueError(f"Value out of reasonable range: {numeric_value}")
-        return numeric_value
-    except (ValueError, TypeError):
-        raise ValueError(f"Invalid numeric value: {value}")
-
-# Export all models for easy importing
-__all__ = [
-    # Enums
-    'DataFormat', 'SourceStatus',
-
-    # Config models
-    'FTPConfig', 'TopicConfig',
-
-    # Request/Response models
-    'DataSourceCreate', 'DataSourceUpdate', 'DataSourceResponse',
-    'FileProcessingRequest', 'FileProcessingResponse',
-    'IngestionStats', 'QualityMetrics', 'QualityReport',
-    'HealthStatus', 'SensorReading', 'ProcessedFile',
-    'TopicInfo', 'PublishingStats', 'ErrorLog', 'MonitoringAlert',
-
-    # Schema definitions
-    'DataSourceSchema', 'ProcessedFileSchema', 'QualityReportSchema',
-    'IngestionStatsSchema', 'ErrorLogSchema', 'MonitoringAlertSchema',
-
-    # Validation helpers
-    'validate_timestamp', 'validate_sensor_id', 'validate_numeric_value'
-]
--- a/microservices/data-ingestion-service/src/monitoring.py
+++ b/microservices/data-ingestion-service/src/monitoring.py
@@ -1,545 +0,0 @@
-"""
-Monitoring and alerting system for the data ingestion service.
-Handles error tracking, performance monitoring, and alert generation.
-"""
-
-import asyncio
-import logging
-from datetime import datetime, timedelta
-from typing import List, Dict, Any, Optional
-import json
-import traceback
-import uuid
-from collections import defaultdict, deque
-import time
-import psutil
-import os
-
-logger = logging.getLogger(__name__)
-
-class PerformanceMonitor:
-    """Monitors service performance metrics"""
-    
-    def __init__(self, redis_client):
-        self.redis = redis_client
-        self.metrics_buffer = defaultdict(deque)
-        self.max_buffer_size = 1000
-        self.last_flush = datetime.utcnow()
-        self.flush_interval = 60  # seconds
-        
-        # Performance counters
-        self.request_count = 0
-        self.error_count = 0
-        self.processing_times = deque(maxlen=100)
-        self.memory_usage = deque(maxlen=100)
-        self.cpu_usage = deque(maxlen=100)
-    
-    async def record_request(self, endpoint: str, duration: float, success: bool = True):
-        """Record request metrics"""
-        try:
-            self.request_count += 1
-            if not success:
-                self.error_count += 1
-            
-            self.processing_times.append(duration)
-            
-            # Store in buffer
-            metric_data = {
-                "timestamp": datetime.utcnow().isoformat(),
-                "endpoint": endpoint,
-                "duration_ms": duration * 1000,
-                "success": success,
-                "request_id": str(uuid.uuid4())
-            }
-            
-            self.metrics_buffer["requests"].append(metric_data)
-            
-            # Trim buffer if needed
-            if len(self.metrics_buffer["requests"]) > self.max_buffer_size:
-                self.metrics_buffer["requests"].popleft()
-            
-            # Auto-flush if interval exceeded
-            if (datetime.utcnow() - self.last_flush).seconds > self.flush_interval:
-                await self.flush_metrics()
-        
-        except Exception as e:
-            logger.error(f"Error recording request metric: {e}")
-    
-    async def record_system_metrics(self):
-        """Record system-level performance metrics"""
-        try:
-            # CPU usage
-            cpu_percent = psutil.cpu_percent()
-            self.cpu_usage.append(cpu_percent)
-            
-            # Memory usage
-            process = psutil.Process()
-            memory_info = process.memory_info()
-            memory_mb = memory_info.rss / 1024 / 1024
-            self.memory_usage.append(memory_mb)
-            
-            # Disk usage
-            disk_usage = psutil.disk_usage('/')
-            
-            system_metrics = {
-                "timestamp": datetime.utcnow().isoformat(),
-                "cpu_percent": cpu_percent,
-                "memory_mb": memory_mb,
-                "disk_free_gb": disk_usage.free / 1024 / 1024 / 1024,
-                "disk_percent": (disk_usage.used / disk_usage.total) * 100
-            }
-            
-            self.metrics_buffer["system"].append(system_metrics)
-            
-            # Trim buffer
-            if len(self.metrics_buffer["system"]) > self.max_buffer_size:
-                self.metrics_buffer["system"].popleft()
-        
-        except Exception as e:
-            logger.error(f"Error recording system metrics: {e}")
-    
-    async def record_data_processing_metrics(self, source_name: str, files_processed: int, 
-                                           records_processed: int, processing_time: float):
-        """Record data processing performance metrics"""
-        try:
-            processing_metrics = {
-                "timestamp": datetime.utcnow().isoformat(),
-                "source_name": source_name,
-                "files_processed": files_processed,
-                "records_processed": records_processed,
-                "processing_time_seconds": processing_time,
-                "records_per_second": records_processed / max(processing_time, 0.001),
-                "files_per_hour": files_processed * 3600 / max(processing_time, 0.001)
-            }
-            
-            self.metrics_buffer["processing"].append(processing_metrics)
-            
-            # Trim buffer
-            if len(self.metrics_buffer["processing"]) > self.max_buffer_size:
-                self.metrics_buffer["processing"].popleft()
-        
-        except Exception as e:
-            logger.error(f"Error recording processing metrics: {e}")
-    
-    async def flush_metrics(self):
-        """Flush metrics buffer to Redis"""
-        try:
-            if not self.metrics_buffer:
-                return
-            
-            # Create batch update
-            pipe = self.redis.pipeline()
-            
-            for metric_type, metrics in self.metrics_buffer.items():
-                # Convert deque to list and serialize
-                metrics_data = [dict(m) if isinstance(m, dict) else m for m in metrics]
-                
-                # Store in Redis with timestamp key
-                timestamp_key = datetime.utcnow().strftime("%Y%m%d_%H%M")
-                redis_key = f"metrics:{metric_type}:{timestamp_key}"
-                
-                pipe.lpush(redis_key, json.dumps(metrics_data))
-                pipe.expire(redis_key, 86400 * 7)  # Keep for 7 days
-            
-            await pipe.execute()
-            
-            # Clear buffer
-            self.metrics_buffer.clear()
-            self.last_flush = datetime.utcnow()
-            
-            logger.debug("Performance metrics flushed to Redis")
-        
-        except Exception as e:
-            logger.error(f"Error flushing metrics: {e}")
-    
-    async def get_performance_summary(self) -> Dict[str, Any]:
-        """Get current performance summary"""
-        try:
-            return {
-                "request_count": self.request_count,
-                "error_count": self.error_count,
-                "error_rate": (self.error_count / max(self.request_count, 1)) * 100,
-                "avg_processing_time_ms": sum(self.processing_times) / max(len(self.processing_times), 1) * 1000,
-                "current_memory_mb": self.memory_usage[-1] if self.memory_usage else 0,
-                "current_cpu_percent": self.cpu_usage[-1] if self.cpu_usage else 0,
-                "metrics_buffer_size": sum(len(buffer) for buffer in self.metrics_buffer.values()),
-                "last_flush": self.last_flush.isoformat()
-            }
-        except Exception as e:
-            logger.error(f"Error getting performance summary: {e}")
-            return {}
-
-class ErrorHandler:
-    """Centralized error handling and logging"""
-    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-        self.error_counts = defaultdict(int)
-        self.error_history = deque(maxlen=100)
-        self.alert_thresholds = {
-            "error_rate": 10,      # errors per minute
-            "memory_usage": 500,   # MB
-            "cpu_usage": 80,       # percent
-            "disk_usage": 90,      # percent
-            "response_time": 5000  # milliseconds
-        }
-    
-    async def log_error(self, error: Exception, context: Dict[str, Any] = None, 
-                       source_id: str = None, source_name: str = None):
-        """Log error with context information"""
-        try:
-            error_type = type(error).__name__
-            error_message = str(error)
-            stack_trace = traceback.format_exc()
-            
-            # Update error counters
-            self.error_counts[error_type] += 1
-            
-            # Create error record
-            error_record = {
-                "timestamp": datetime.utcnow(),
-                "service": "data-ingestion-service",
-                "level": "ERROR",
-                "source_id": source_id,
-                "source_name": source_name,
-                "error_type": error_type,
-                "error_message": error_message,
-                "stack_trace": stack_trace,
-                "context": context or {}
-            }
-            
-            # Store in database
-            await self.db.error_logs.insert_one(error_record)
-            
-            # Add to history
-            self.error_history.append({
-                "timestamp": error_record["timestamp"].isoformat(),
-                "type": error_type,
-                "message": error_message[:100]  # Truncate for memory
-            })
-            
-            # Check for alert conditions
-            await self.check_alert_conditions(error_record)
-            
-            # Log to standard logger
-            logger.error(f"[{source_name or 'system'}] {error_type}: {error_message}", 
-                        extra={"context": context, "source_id": source_id})
-        
-        except Exception as e:
-            # Fallback logging if error handler fails
-            logger.critical(f"Error handler failed: {e}")
-            logger.error(f"Original error: {error}")
-    
-    async def log_warning(self, message: str, context: Dict[str, Any] = None, 
-                         source_id: str = None, source_name: str = None):
-        """Log warning message"""
-        try:
-            warning_record = {
-                "timestamp": datetime.utcnow(),
-                "service": "data-ingestion-service",
-                "level": "WARNING",
-                "source_id": source_id,
-                "source_name": source_name,
-                "error_type": "WARNING",
-                "error_message": message,
-                "context": context or {}
-            }
-            
-            await self.db.error_logs.insert_one(warning_record)
-            logger.warning(f"[{source_name or 'system'}] {message}", 
-                          extra={"context": context, "source_id": source_id})
-        
-        except Exception as e:
-            logger.error(f"Error logging warning: {e}")
-    
-    async def check_alert_conditions(self, error_record: Dict[str, Any]):
-        """Check if error conditions warrant alerts"""
-        try:
-            # Count recent errors (last 1 minute)
-            one_minute_ago = datetime.utcnow() - timedelta(minutes=1)
-            recent_errors = await self.db.error_logs.count_documents({
-                "timestamp": {"$gte": one_minute_ago},
-                "level": "ERROR"
-            })
-            
-            # Check error rate threshold
-            if recent_errors >= self.alert_thresholds["error_rate"]:
-                await self.create_alert(
-                    alert_type="error_rate",
-                    title="High Error Rate Detected",
-                    description=f"Detected {recent_errors} errors in the last minute",
-                    severity="high",
-                    metadata={"error_count": recent_errors, "threshold": self.alert_thresholds["error_rate"]}
-                )
-        
-        except Exception as e:
-            logger.error(f"Error checking alert conditions: {e}")
-    
-    async def create_alert(self, alert_type: str, title: str, description: str, 
-                          severity: str, source_id: str = None, metadata: Dict[str, Any] = None):
-        """Create monitoring alert"""
-        try:
-            alert_record = {
-                "alert_id": str(uuid.uuid4()),
-                "alert_type": alert_type,
-                "source_id": source_id,
-                "title": title,
-                "description": description,
-                "severity": severity,
-                "timestamp": datetime.utcnow(),
-                "resolved": False,
-                "metadata": metadata or {}
-            }
-            
-            await self.db.monitoring_alerts.insert_one(alert_record)
-            
-            # Also publish to Redis for real-time notifications
-            alert_notification = {
-                **alert_record,
-                "timestamp": alert_record["timestamp"].isoformat()
-            }
-            
-            await self.redis.publish("alerts:data-ingestion", json.dumps(alert_notification))
-            
-            logger.warning(f"Alert created: {title} ({severity})")
-        
-        except Exception as e:
-            logger.error(f"Error creating alert: {e}")
-    
-    async def get_error_summary(self) -> Dict[str, Any]:
-        """Get error summary statistics"""
-        try:
-            # Get error counts by type
-            error_types = dict(self.error_counts)
-            
-            # Get recent error rate
-            one_hour_ago = datetime.utcnow() - timedelta(hours=1)
-            recent_errors = await self.db.error_logs.count_documents({
-                "timestamp": {"$gte": one_hour_ago},
-                "level": "ERROR"
-            })
-            
-            # Get recent alerts
-            recent_alerts = await self.db.monitoring_alerts.count_documents({
-                "timestamp": {"$gte": one_hour_ago},
-                "resolved": False
-            })
-            
-            return {
-                "total_errors": sum(error_types.values()),
-                "error_types": error_types,
-                "recent_errors_1h": recent_errors,
-                "active_alerts": recent_alerts,
-                "error_history": list(self.error_history)[-10:],  # Last 10 errors
-                "last_error": self.error_history[-1] if self.error_history else None
-            }
-        
-        except Exception as e:
-            logger.error(f"Error getting error summary: {e}")
-            return {}
-
-class ServiceMonitor:
-    """Main service monitoring coordinator"""
-    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-        self.performance_monitor = PerformanceMonitor(redis_client)
-        self.error_handler = ErrorHandler(db, redis_client)
-        self.monitoring_active = False
-        self.monitoring_interval = 30  # seconds
-    
-    async def start_monitoring(self):
-        """Start background monitoring tasks"""
-        self.monitoring_active = True
-        logger.info("Service monitoring started")
-        
-        # Start monitoring loop
-        asyncio.create_task(self._monitoring_loop())
-    
-    async def stop_monitoring(self):
-        """Stop background monitoring"""
-        self.monitoring_active = False
-        await self.performance_monitor.flush_metrics()
-        logger.info("Service monitoring stopped")
-    
-    async def _monitoring_loop(self):
-        """Main monitoring loop"""
-        while self.monitoring_active:
-            try:
-                # Record system metrics
-                await self.performance_monitor.record_system_metrics()
-                
-                # Check system health
-                await self._check_system_health()
-                
-                # Cleanup old data
-                await self._cleanup_old_monitoring_data()
-                
-                # Wait for next cycle
-                await asyncio.sleep(self.monitoring_interval)
-                
-            except Exception as e:
-                await self.error_handler.log_error(e, {"task": "monitoring_loop"})
-                await asyncio.sleep(self.monitoring_interval)
-    
-    async def _check_system_health(self):
-        """Check system health and create alerts if needed"""
-        try:
-            # Check memory usage
-            current_memory = self.performance_monitor.memory_usage[-1] if self.performance_monitor.memory_usage else 0
-            if current_memory > self.error_handler.alert_thresholds["memory_usage"]:
-                await self.error_handler.create_alert(
-                    alert_type="high_memory",
-                    title="High Memory Usage",
-                    description=f"Memory usage at {current_memory:.1f}MB",
-                    severity="warning",
-                    metadata={"current_memory_mb": current_memory}
-                )
-            
-            # Check CPU usage
-            current_cpu = self.performance_monitor.cpu_usage[-1] if self.performance_monitor.cpu_usage else 0
-            if current_cpu > self.error_handler.alert_thresholds["cpu_usage"]:
-                await self.error_handler.create_alert(
-                    alert_type="high_cpu",
-                    title="High CPU Usage",
-                    description=f"CPU usage at {current_cpu:.1f}%",
-                    severity="warning",
-                    metadata={"current_cpu_percent": current_cpu}
-                )
-        
-        except Exception as e:
-            logger.error(f"Error checking system health: {e}")
-    
-    async def _cleanup_old_monitoring_data(self):
-        """Clean up old monitoring data"""
-        try:
-            # Clean up old error logs (older than 30 days)
-            thirty_days_ago = datetime.utcnow() - timedelta(days=30)
-            
-            deleted_errors = await self.db.error_logs.delete_many({
-                "timestamp": {"$lt": thirty_days_ago}
-            })
-            
-            # Clean up resolved alerts (older than 7 days)
-            seven_days_ago = datetime.utcnow() - timedelta(days=7)
-            
-            deleted_alerts = await self.db.monitoring_alerts.delete_many({
-                "timestamp": {"$lt": seven_days_ago},
-                "resolved": True
-            })
-            
-            if deleted_errors.deleted_count > 0 or deleted_alerts.deleted_count > 0:
-                logger.info(f"Cleaned up {deleted_errors.deleted_count} old error logs and "
-                           f"{deleted_alerts.deleted_count} resolved alerts")
-        
-        except Exception as e:
-            logger.error(f"Error cleaning up old monitoring data: {e}")
-    
-    async def get_service_status(self) -> Dict[str, Any]:
-        """Get comprehensive service status"""
-        try:
-            performance_summary = await self.performance_monitor.get_performance_summary()
-            error_summary = await self.error_handler.get_error_summary()
-            
-            # Get database status
-            db_status = await self._get_database_status()
-            
-            # Overall health assessment
-            health_score = await self._calculate_health_score(performance_summary, error_summary)
-            
-            return {
-                "service": "data-ingestion-service",
-                "timestamp": datetime.utcnow().isoformat(),
-                "health_score": health_score,
-                "monitoring_active": self.monitoring_active,
-                "performance": performance_summary,
-                "errors": error_summary,
-                "database": db_status
-            }
-        
-        except Exception as e:
-            logger.error(f"Error getting service status: {e}")
-            return {"error": str(e)}
-    
-    async def _get_database_status(self) -> Dict[str, Any]:
-        """Get database connection and performance status"""
-        try:
-            # Test MongoDB connection
-            start_time = time.time()
-            await self.db.command("ping")
-            mongo_latency = (time.time() - start_time) * 1000
-            
-            # Test Redis connection
-            start_time = time.time()
-            await self.redis.ping()
-            redis_latency = (time.time() - start_time) * 1000
-            
-            # Get collection counts
-            collections_info = {}
-            for collection_name in ["data_sources", "processed_files", "error_logs", "monitoring_alerts"]:
-                try:
-                    count = await self.db[collection_name].count_documents({})
-                    collections_info[collection_name] = count
-                except:
-                    collections_info[collection_name] = "unknown"
-            
-            return {
-                "mongodb": {
-                    "connected": True,
-                    "latency_ms": round(mongo_latency, 2)
-                },
-                "redis": {
-                    "connected": True,
-                    "latency_ms": round(redis_latency, 2)
-                },
-                "collections": collections_info
-            }
-        
-        except Exception as e:
-            return {
-                "mongodb": {"connected": False, "error": str(e)},
-                "redis": {"connected": False, "error": str(e)},
-                "collections": {}
-            }
-    
-    async def _calculate_health_score(self, performance: Dict[str, Any], errors: Dict[str, Any]) -> float:
-        """Calculate overall health score (0-100)"""
-        try:
-            score = 100.0
-            
-            # Deduct for high error rate
-            error_rate = performance.get("error_rate", 0)
-            if error_rate > 5:
-                score -= min(error_rate * 2, 30)
-            
-            # Deduct for high resource usage
-            memory_mb = performance.get("current_memory_mb", 0)
-            if memory_mb > 300:
-                score -= min((memory_mb - 300) / 10, 20)
-            
-            cpu_percent = performance.get("current_cpu_percent", 0)
-            if cpu_percent > 70:
-                score -= min((cpu_percent - 70) / 2, 15)
-            
-            # Deduct for recent errors
-            recent_errors = errors.get("recent_errors_1h", 0)
-            if recent_errors > 0:
-                score -= min(recent_errors * 5, 25)
-            
-            # Deduct for active alerts
-            active_alerts = errors.get("active_alerts", 0)
-            if active_alerts > 0:
-                score -= min(active_alerts * 10, 20)
-            
-            return max(0.0, round(score, 1))
-        
-        except Exception as e:
-            logger.error(f"Error calculating health score: {e}")
-            return 50.0  # Default moderate health score
-
-# Export monitoring components
-__all__ = [
-    'ServiceMonitor', 'PerformanceMonitor', 'ErrorHandler'
-]
--- a/microservices/data-ingestion-service/src/redis_publisher.py
+++ b/microservices/data-ingestion-service/src/redis_publisher.py
@@ -1,484 +0,0 @@
-"""
-Redis publisher for broadcasting time series data to multiple topics.
-Handles data transformation, routing, and publishing for real-time simulation.
-"""
-
-import asyncio
-import json
-import logging
-from datetime import datetime, timedelta
-from typing import List, Dict, Any, Optional
-import hashlib
-import uuid
-from collections import defaultdict
-import redis.asyncio as redis
-
-logger = logging.getLogger(__name__)
-
-class RedisPublisher:
-    """Publishes time series data to Redis channels for real-time simulation"""
-    
-    def __init__(self, redis_client):
-        self.redis = redis_client
-        self.publishing_stats = defaultdict(int)
-        self.topic_configs = {}
-        self.message_cache = {}
-        
-        # Default topic configurations
-        self.default_topics = {
-            "energy_data": {
-                "description": "General energy consumption data",
-                "data_types": ["energy", "power", "consumption"],
-                "format": "sensor_reading"
-            },
-            "community_consumption": {
-                "description": "Community-level energy consumption",
-                "data_types": ["consumption", "usage", "demand"],
-                "format": "aggregated_data"
-            },
-            "real_time_metrics": {
-                "description": "Real-time sensor metrics",
-                "data_types": ["all"],
-                "format": "metric_update"
-            },
-            "simulation_data": {
-                "description": "Data for simulation purposes",
-                "data_types": ["all"],
-                "format": "simulation_point"
-            },
-            "community_generation": {
-                "description": "Community energy generation data",
-                "data_types": ["generation", "production", "renewable"],
-                "format": "generation_data"
-            },
-            "grid_events": {
-                "description": "Grid-related events and alerts",
-                "data_types": ["events", "alerts", "grid_status"],
-                "format": "event_data"
-            }
-        }
-    
-    async def initialize(self):
-        """Initialize publisher with default topic configurations"""
-        try:
-            for topic, config in self.default_topics.items():
-                await self.configure_topic(topic, config)
-            
-            logger.info(f"Initialized Redis publisher with {len(self.default_topics)} default topics")
-        
-        except Exception as e:
-            logger.error(f"Error initializing Redis publisher: {e}")
-            raise
-    
-    async def publish_time_series_data(self, topic: str, data: List[Dict[str, Any]], source_name: str):
-        """Publish time series data to a specific Redis topic"""
-        try:
-            if not data:
-                logger.warning(f"No data to publish to topic: {topic}")
-                return
-            
-            logger.info(f"Publishing {len(data)} records to topic: {topic}")
-            
-            # Get topic configuration
-            topic_config = self.topic_configs.get(topic, {})
-            data_format = topic_config.get("format", "sensor_reading")
-            
-            # Process and publish each data point
-            published_count = 0
-            for record in data:
-                try:
-                    # Transform data based on topic format
-                    message = await self._transform_data_for_topic(record, data_format, source_name)
-                    
-                    # Add publishing metadata
-                    message["published_at"] = datetime.utcnow().isoformat()
-                    message["topic"] = topic
-                    message["message_id"] = str(uuid.uuid4())
-                    
-                    # Publish to Redis
-                    await self.redis.publish(topic, json.dumps(message))
-                    
-                    published_count += 1
-                    self.publishing_stats[topic] += 1
-                    
-                except Exception as e:
-                    logger.warning(f"Error publishing record to {topic}: {e}")
-                    continue
-            
-            logger.info(f"Successfully published {published_count}/{len(data)} records to {topic}")
-            
-            # Update topic statistics
-            await self._update_topic_stats(topic, published_count)
-            
-        except Exception as e:
-            logger.error(f"Error publishing to topic {topic}: {e}")
-            raise
-    
-    async def publish_single_message(self, topic: str, message: Dict[str, Any]):
-        """Publish a single message to a Redis topic"""
-        try:
-            # Add metadata
-            message["published_at"] = datetime.utcnow().isoformat()
-            message["topic"] = topic
-            message["message_id"] = str(uuid.uuid4())
-            
-            # Publish
-            await self.redis.publish(topic, json.dumps(message))
-            
-            self.publishing_stats[topic] += 1
-            logger.debug(f"Published single message to {topic}")
-            
-        except Exception as e:
-            logger.error(f"Error publishing single message to {topic}: {e}")
-            raise
-    
-    async def publish_batch(self, topic_messages: Dict[str, List[Dict[str, Any]]]):
-        """Publish multiple messages to multiple topics"""
-        try:
-            total_published = 0
-            
-            for topic, messages in topic_messages.items():
-                for message in messages:
-                    await self.publish_single_message(topic, message)
-                    total_published += 1
-            
-            logger.info(f"Batch published {total_published} messages across {len(topic_messages)} topics")
-            
-        except Exception as e:
-            logger.error(f"Error in batch publishing: {e}")
-            raise
-    
-    async def configure_topic(self, topic: str, config: Dict[str, Any]):
-        """Configure a topic with specific settings"""
-        try:
-            self.topic_configs[topic] = {
-                "description": config.get("description", ""),
-                "data_types": config.get("data_types", ["all"]),
-                "format": config.get("format", "generic"),
-                "created_at": datetime.utcnow().isoformat(),
-                "message_count": 0
-            }
-            
-            logger.info(f"Configured topic: {topic}")
-            
-        except Exception as e:
-            logger.error(f"Error configuring topic {topic}: {e}")
-            raise
-    
-    async def get_topics_info(self) -> Dict[str, Any]:
-        """Get information about all configured topics"""
-        try:
-            topics_info = {}
-            
-            for topic, config in self.topic_configs.items():
-                # Get recent message count
-                message_count = self.publishing_stats.get(topic, 0)
-                
-                topics_info[topic] = {
-                    **config,
-                    "message_count": message_count,
-                    "last_published": await self._get_last_published_time(topic)
-                }
-            
-            return topics_info
-            
-        except Exception as e:
-            logger.error(f"Error getting topics info: {e}")
-            return {}
-    
-    async def get_publishing_stats(self) -> Dict[str, Any]:
-        """Get publishing statistics"""
-        try:
-            total_messages = sum(self.publishing_stats.values())
-            
-            return {
-                "total_messages_published": total_messages,
-                "active_topics": len(self.topic_configs),
-                "topic_stats": dict(self.publishing_stats),
-                "last_updated": datetime.utcnow().isoformat()
-            }
-            
-        except Exception as e:
-            logger.error(f"Error getting publishing stats: {e}")
-            return {}
-    
-    async def _transform_data_for_topic(self, record: Dict[str, Any], format_type: str, source_name: str) -> Dict[str, Any]:
-        """Transform data based on topic format requirements"""
-        try:
-            base_message = {
-                "source": source_name,
-                "format": format_type
-            }
-            
-            if format_type == "sensor_reading":
-                return await self._format_as_sensor_reading(record, base_message)
-            elif format_type == "aggregated_data":
-                return await self._format_as_aggregated_data(record, base_message)
-            elif format_type == "metric_update":
-                return await self._format_as_metric_update(record, base_message)
-            elif format_type == "simulation_point":
-                return await self._format_as_simulation_point(record, base_message)
-            elif format_type == "generation_data":
-                return await self._format_as_generation_data(record, base_message)
-            elif format_type == "event_data":
-                return await self._format_as_event_data(record, base_message)
-            else:
-                # Generic format
-                return {**base_message, **record}
-        
-        except Exception as e:
-            logger.error(f"Error transforming data for format {format_type}: {e}")
-            return {**base_message, **record}
-    
-    async def _format_as_sensor_reading(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data as sensor reading for energy dashboard"""
-        return {
-            **base_message,
-            "type": "sensor_data",
-            "sensorId": record.get("sensor_id", "unknown"),
-            "sensor_id": record.get("sensor_id", "unknown"),
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "value": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "room": record.get("metadata", {}).get("room"),
-            "sensor_type": self._infer_sensor_type(record),
-            "metadata": record.get("metadata", {}),
-            "data_quality": await self._assess_data_quality(record)
-        }
-    
-    async def _format_as_aggregated_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data as aggregated community data"""
-        return {
-            **base_message,
-            "type": "aggregated_consumption",
-            "community_id": record.get("sensor_id", "community_1"),
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "total_consumption": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "period": "real_time",
-            "households": record.get("metadata", {}).get("households", 1),
-            "average_per_household": record.get("value", 0) / max(record.get("metadata", {}).get("households", 1), 1)
-        }
-    
-    async def _format_as_metric_update(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data as real-time metric update"""
-        return {
-            **base_message,
-            "type": "metric_update",
-            "metric_id": record.get("sensor_id", "unknown"),
-            "metric_type": self._infer_metric_type(record),
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "current_value": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "trend": await self._calculate_trend(record),
-            "metadata": record.get("metadata", {})
-        }
-    
-    async def _format_as_simulation_point(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data for simulation purposes"""
-        return {
-            **base_message,
-            "type": "simulation_data",
-            "simulation_id": f"sim_{record.get('sensor_id', 'unknown')}",
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "energy_value": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "scenario": record.get("metadata", {}).get("scenario", "baseline"),
-            "location": record.get("metadata", {}).get("location", "unknown"),
-            "data_source": record.get("data_source", "real_community"),
-            "quality_score": await self._assess_data_quality(record)
-        }
-    
-    async def _format_as_generation_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data as energy generation data"""
-        return {
-            **base_message,
-            "type": "generation_data",
-            "generator_id": record.get("sensor_id", "unknown"),
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "generation_value": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "generation_type": record.get("metadata", {}).get("type", "renewable"),
-            "efficiency": record.get("metadata", {}).get("efficiency", 0.85),
-            "weather_conditions": record.get("metadata", {}).get("weather")
-        }
-    
-    async def _format_as_event_data(self, record: Dict[str, Any], base_message: Dict[str, Any]) -> Dict[str, Any]:
-        """Format data as grid event"""
-        return {
-            **base_message,
-            "type": "grid_event",
-            "event_id": str(uuid.uuid4()),
-            "timestamp": record.get("timestamp", int(datetime.utcnow().timestamp())),
-            "event_type": await self._classify_event_type(record),
-            "severity": await self._assess_event_severity(record),
-            "affected_area": record.get("metadata", {}).get("area", "unknown"),
-            "value": record.get("value", 0),
-            "unit": record.get("unit", "kWh"),
-            "description": f"Energy event detected: {record.get('value', 0)} {record.get('unit', 'kWh')}"
-        }
-    
-    def _infer_sensor_type(self, record: Dict[str, Any]) -> str:
-        """Infer sensor type from record data"""
-        value = record.get("value", 0)
-        unit = record.get("unit", "").lower()
-        metadata = record.get("metadata", {})
-        
-        if "generation" in str(metadata).lower() or "solar" in str(metadata).lower():
-            return "generation"
-        elif "temperature" in str(metadata).lower() or "temp" in str(metadata).lower():
-            return "temperature"
-        elif "co2" in str(metadata).lower() or "carbon" in str(metadata).lower():
-            return "co2"
-        elif "humidity" in str(metadata).lower():
-            return "humidity"
-        elif "motion" in str(metadata).lower() or "occupancy" in str(metadata).lower():
-            return "motion"
-        else:
-            return "energy"
-    
-    def _infer_metric_type(self, record: Dict[str, Any]) -> str:
-        """Infer metric type from record"""
-        unit = record.get("unit", "").lower()
-        
-        if "wh" in unit:
-            return "energy"
-        elif "w" in unit:
-            return "power"
-        elif "°c" in unit or "celsius" in unit or "temp" in unit:
-            return "temperature"
-        elif "%" in unit:
-            return "percentage"
-        elif "ppm" in unit or "co2" in unit:
-            return "co2"
-        else:
-            return "generic"
-    
-    async def _calculate_trend(self, record: Dict[str, Any]) -> str:
-        """Calculate trend for metric (simplified)"""
-        # This is a simplified trend calculation
-        # In a real implementation, you'd compare with historical values
-        value = record.get("value", 0)
-        
-        if value > 100:
-            return "increasing"
-        elif value < 50:
-            return "decreasing"
-        else:
-            return "stable"
-    
-    async def _assess_data_quality(self, record: Dict[str, Any]) -> float:
-        """Assess data quality score (0-1)"""
-        score = 1.0
-        
-        # Check for missing fields
-        if not record.get("timestamp"):
-            score -= 0.2
-        if not record.get("sensor_id"):
-            score -= 0.2
-        if record.get("value") is None:
-            score -= 0.3
-        if not record.get("unit"):
-            score -= 0.1
-        
-        # Check for reasonable values
-        value = record.get("value", 0)
-        if value < 0:
-            score -= 0.1
-        if value > 10000:  # Unusually high energy value
-            score -= 0.1
-        
-        return max(0.0, score)
-    
-    async def _classify_event_type(self, record: Dict[str, Any]) -> str:
-        """Classify event type based on data"""
-        value = record.get("value", 0)
-        
-        if value > 1000:
-            return "high_consumption"
-        elif value < 10:
-            return "low_consumption"
-        else:
-            return "normal_operation"
-    
-    async def _assess_event_severity(self, record: Dict[str, Any]) -> str:
-        """Assess event severity"""
-        value = record.get("value", 0)
-        
-        if value > 5000:
-            return "critical"
-        elif value > 1000:
-            return "warning"
-        elif value < 5:
-            return "info"
-        else:
-            return "normal"
-    
-    async def _update_topic_stats(self, topic: str, count: int):
-        """Update topic statistics"""
-        try:
-            stats_key = f"topic_stats:{topic}"
-            await self.redis.hincrby(stats_key, "message_count", count)
-            await self.redis.hset(stats_key, "last_published", datetime.utcnow().isoformat())
-            await self.redis.expire(stats_key, 86400)  # Expire after 24 hours
-            
-        except Exception as e:
-            logger.error(f"Error updating topic stats: {e}")
-    
-    async def _get_last_published_time(self, topic: str) -> Optional[str]:
-        """Get last published time for a topic"""
-        try:
-            stats_key = f"topic_stats:{topic}"
-            return await self.redis.hget(stats_key, "last_published")
-        except Exception as e:
-            logger.debug(f"Error getting last published time for {topic}: {e}")
-            return None
-    
-    async def create_data_stream(self, topic: str, data_stream: List[Dict[str, Any]], 
-                               interval_seconds: float = 1.0):
-        """Create a continuous data stream by publishing data at intervals"""
-        try:
-            logger.info(f"Starting data stream for topic {topic} with {len(data_stream)} points")
-            
-            for i, data_point in enumerate(data_stream):
-                await self.publish_single_message(topic, data_point)
-                
-                # Add stream metadata
-                stream_info = {
-                    "type": "stream_info",
-                    "topic": topic,
-                    "current_point": i + 1,
-                    "total_points": len(data_stream),
-                    "progress": (i + 1) / len(data_stream) * 100,
-                    "timestamp": datetime.utcnow().isoformat()
-                }
-                
-                await self.publish_single_message(f"{topic}_stream_info", stream_info)
-                
-                # Wait before next data point
-                if i < len(data_stream) - 1:
-                    await asyncio.sleep(interval_seconds)
-            
-            logger.info(f"Completed data stream for topic {topic}")
-            
-        except Exception as e:
-            logger.error(f"Error creating data stream: {e}")
-            raise
-    
-    async def cleanup_old_stats(self, days: int = 7):
-        """Clean up old topic statistics"""
-        try:
-            # Get all topic stat keys
-            pattern = "topic_stats:*"
-            keys = []
-            
-            async for key in self.redis.scan_iter(match=pattern):
-                keys.append(key)
-            
-            # Delete old keys (Redis TTL should handle this, but cleanup anyway)
-            if keys:
-                await self.redis.delete(*keys)
-                logger.info(f"Cleaned up {len(keys)} old topic stat keys")
-            
-        except Exception as e:
-            logger.error(f"Error cleaning up old stats: {e}")
--- a/microservices/data-ingestion-service/src/simple_sa4cps_config.py
+++ b/microservices/data-ingestion-service/src/simple_sa4cps_config.py
@@ -1,177 +0,0 @@
-"""
-Simplified SA4CPS Configuration
-Auto-configures for ftp.sa4cps.pt with .slg_v2 files only
-"""
-
-import asyncio
-import logging
-from datetime import datetime
-from typing import Dict, Any
-from database import get_database
-
-logger = logging.getLogger(__name__)
-
-class SimpleSA4CPSConfig:
-    """Simplified SA4CPS configuration for .slg_v2 files only"""
-    
-    def __init__(self):
-        self.ftp_host = "ftp.sa4cps.pt"
-        self.source_name = "SA4CPS Smart Grid Data"
-        
-    async def setup_sa4cps_source(self, username: str = "curvascarga@sa4cps.pt", 
-                                 password: str = "n$WFtz9+bleN", 
-                                 remote_path: str = "/") -> Dict[str, Any]:
-        """Create the SA4CPS data source"""
-        try:
-            db = await get_database()
-            
-            # Check if already exists
-            existing = await db.data_sources.find_one({"name": self.source_name})
-            if existing:
-                logger.info("SA4CPS source already configured")
-                return {
-                    "success": True,
-                    "message": "Already configured",
-                    "source_id": str(existing["_id"])
-                }
-            
-            # Create simplified SA4CPS data source
-            source_doc = {
-                "name": self.source_name,
-                "description": "SA4CPS Smart Grid .slg_v2 data from ftp.sa4cps.pt",
-                "source_type": "ftp",
-                "ftp_config": {
-                    "host": self.ftp_host,
-                    "port": 21,
-                    "username": username,
-                    "password": password,
-                    "remote_path": remote_path,
-                    "use_ssl": False,
-                    "passive_mode": True,
-                    "timeout": 30
-                },
-                "file_patterns": ["*.slg_v2"],
-                "data_format": "slg_v2",
-                "redis_topics": ["sa4cps_energy_data", "sa4cps_raw_data"],
-                "enabled": True,
-                "check_interval_seconds": 300,  # 5 minutes
-                "created_at": datetime.utcnow(),
-                "updated_at": datetime.utcnow(),
-                "status": "configured"
-            }
-            
-            result = await db.data_sources.insert_one(source_doc)
-            source_id = str(result.inserted_id)
-            
-            logger.info(f"✅ SA4CPS source configured: {source_id}")
-            
-            return {
-                "success": True,
-                "message": "SA4CPS source configured successfully",
-                "source_id": source_id,
-                "ftp_host": self.ftp_host,
-                "file_pattern": "*.slg_v2",
-                "topics": ["sa4cps_energy_data", "sa4cps_raw_data"]
-            }
-        
-        except Exception as e:
-            logger.error(f"❌ Failed to configure SA4CPS source: {e}")
-            return {
-                "success": False,
-                "message": f"Configuration failed: {str(e)}"
-            }
-    
-    async def test_connection(self) -> Dict[str, Any]:
-        """Test SA4CPS FTP connection"""
-        try:
-            from ftp_monitor import FTPMonitor
-            from database import get_redis
-            
-            db = await get_database()
-            redis = await get_redis()
-            
-            source = await db.data_sources.find_one({"name": self.source_name})
-            if not source:
-                return {"success": False, "message": "SA4CPS source not configured"}
-            
-            monitor = FTPMonitor(db, redis)
-            connection_test = await monitor.test_connection(source)
-            
-            if connection_test:
-                files = await monitor.check_for_new_files(source)
-                return {
-                    "success": True,
-                    "message": f"✅ Connected to {self.ftp_host}",
-                    "files_found": len(files),
-                    "sample_files": [f["filename"] for f in files[:5]]
-                }
-            else:
-                return {
-                    "success": False,
-                    "message": f"❌ Cannot connect to {self.ftp_host}"
-                }
-        
-        except Exception as e:
-            logger.error(f"Connection test failed: {e}")
-            return {
-                "success": False,
-                "message": f"Connection test error: {str(e)}"
-            }
-    
-    async def get_status(self) -> Dict[str, Any]:
-        """Get SA4CPS source status"""
-        try:
-            db = await get_database()
-            source = await db.data_sources.find_one({"name": self.source_name})
-            
-            if not source:
-                return {"configured": False, "message": "Not configured"}
-            
-            # Get processing stats
-            processed_count = await db.processed_files.count_documents({"source_id": source["_id"]})
-            
-            return {
-                "configured": True,
-                "source_id": str(source["_id"]),
-                "name": source["name"],
-                "enabled": source.get("enabled", False),
-                "ftp_host": self.ftp_host,
-                "last_check": source.get("last_check").isoformat() if source.get("last_check") else None,
-                "files_processed": processed_count,
-                "status": "✅ Ready for .slg_v2 files"
-            }
-        
-        except Exception as e:
-            return {"configured": False, "error": str(e)}
-
-async def quick_setup():
-    """Quick setup for SA4CPS"""
-    print("🚀 Setting up SA4CPS .slg_v2 data ingestion...")
-    
-    config = SimpleSA4CPSConfig()
-    
-    # Setup source
-    result = await config.setup_sa4cps_source()
-    print(f"Setup: {result['message']}")
-    
-    if result['success']:
-        # Test connection
-        test_result = await config.test_connection()
-        print(f"Connection: {test_result['message']}")
-        
-        if test_result['success']:
-            print(f"📁 Found {test_result.get('files_found', 0)} .slg_v2 files")
-        
-        # Show status
-        status = await config.get_status()
-        print(f"Status: {status.get('status', 'Unknown')}")
-        
-        print("\n✅ SA4CPS setup complete!")
-        print("📊 Data will be published to Redis topics:")
-        print("   • sa4cps_energy_data (processed sensor readings)")
-        print("   • sa4cps_raw_data (raw .slg_v2 content)")
-    else:
-        print("❌ Setup failed. Check configuration and try again.")
-
-if __name__ == "__main__":
-    asyncio.run(quick_setup())
--- a/microservices/data-ingestion-service/src/slg_v2_processor.py
+++ b/microservices/data-ingestion-service/src/slg_v2_processor.py
@@ -1,300 +0,0 @@
-"""
-Simplified SA4CPS .slg_v2 file processor
-Focused exclusively on processing .slg_v2 files from ftp.sa4cps.pt
-"""
-
-import logging
-from datetime import datetime, timedelta
-from typing import List, Dict, Any, Optional
-import re
-
-logger = logging.getLogger(__name__)
-
-class SLGv2Processor:
-    """Simplified processor for SA4CPS .slg_v2 files only"""
-    
-    def __init__(self, db, redis_client):
-        self.db = db
-        self.redis = redis_client
-    
-    async def process_slg_v2_file(self, file_content: bytes) -> List[Dict[str, Any]]:
-        """Process a .slg_v2 file and return standardized sensor readings"""
-        try:
-            # Decode file content
-            try:
-                text_content = file_content.decode('utf-8')
-            except UnicodeDecodeError:
-                text_content = file_content.decode('latin1', errors='ignore')
-            
-            logger.info(f"Processing SLG_V2 file ({len(file_content)} bytes)")
-            
-            lines = text_content.strip().split('\n')
-            if not lines:
-                logger.warning("SLG_V2 file is empty")
-                return []
-            
-            processed_data = []
-            header = None
-            metadata = {}
-            
-            for line_idx, line in enumerate(lines):
-                line = line.strip()
-                
-                if not line:
-                    continue
-                
-                # Extract metadata from comment lines
-                if line.startswith('#') or line.startswith('//'):
-                    comment = line[1:].strip() if line.startswith('#') else line[2:].strip()
-                    if ':' in comment:
-                        key, value = comment.split(':', 1)
-                        metadata[key.strip()] = value.strip()
-                    continue
-                
-                # Detect header line
-                if header is None and self._is_header_line(line):
-                    header = self._parse_header(line)
-                    continue
-                
-                # Process data lines
-                try:
-                    processed_row = self._process_data_line(line, header, metadata, line_idx)
-                    if processed_row:
-                        processed_data.append(processed_row)
-                except Exception as e:
-                    logger.warning(f"Error processing SLG_V2 line {line_idx}: {e}")
-                    continue
-            
-            logger.info(f"Successfully processed {len(processed_data)} SLG_V2 records")
-            return processed_data
-        
-        except Exception as e:
-            logger.error(f"Error processing SLG_V2 file: {e}")
-            raise
-    
-    def _is_header_line(self, line: str) -> bool:
-        """Check if line appears to be a header"""
-        # Common SA4CPS header patterns
-        header_keywords = ['timestamp', 'time', 'date', 'sensor', 'id', 'energy', 'power', 'voltage', 'current']
-        line_lower = line.lower()
-        
-        has_keywords = any(keyword in line_lower for keyword in header_keywords)
-        
-        # Check if most parts are non-numeric (likely header)
-        parts = re.split(r'[,;\t\s]+', line)
-        numeric_parts = 0
-        for part in parts:
-            try:
-                float(part.strip())
-                numeric_parts += 1
-            except ValueError:
-                continue
-        
-        return has_keywords and (numeric_parts < len(parts) / 2)
-    
-    def _parse_header(self, line: str) -> List[str]:
-        """Parse header line and return column names"""
-        # Try different delimiters
-        for delimiter in [',', ';', '\t']:
-            if delimiter in line:
-                parts = [part.strip() for part in line.split(delimiter) if part.strip()]
-                if len(parts) > 1:
-                    return parts
-        
-        # Default to whitespace splitting
-        return [part.strip() for part in line.split() if part.strip()]
-    
-    def _process_data_line(self, line: str, header: Optional[List[str]], 
-                          metadata: Dict[str, Any], line_idx: int) -> Optional[Dict[str, Any]]:
-        """Process a single data line into a sensor reading"""
-        try:
-            # Parse line into parts
-            parts = self._parse_line_parts(line)
-            if not parts:
-                return None
-            
-            # Map parts to columns
-            if header and len(parts) >= len(header):
-                row_dict = dict(zip(header, parts[:len(header)]))
-            else:
-                row_dict = {f"col_{i}": part for i, part in enumerate(parts)}
-            
-            # Extract core sensor reading fields
-            processed_row = {
-                'timestamp': self._extract_timestamp(row_dict, line_idx),
-                'sensor_id': self._extract_sensor_id(row_dict, line_idx),
-                'value': self._extract_primary_value(row_dict),
-                'unit': self._infer_unit(row_dict),
-                'metadata': {
-                    **metadata,  # File-level metadata
-                    **row_dict,  # All row data
-                    'line_number': line_idx,
-                    'raw_line': line
-                },
-                'processed_at': datetime.utcnow().isoformat(),
-                'data_source': 'sa4cps_slg_v2',
-                'file_format': 'SLG_V2'
-            }
-            
-            # Extract additional numeric values
-            additional_values = self._extract_additional_values(row_dict)
-            if additional_values:
-                processed_row['additional_values'] = additional_values
-            
-            return processed_row
-        
-        except Exception as e:
-            logger.error(f"Error processing data line {line_idx}: {e}")
-            return None
-    
-    def _parse_line_parts(self, line: str) -> List[str]:
-        """Parse line into parts using appropriate delimiter"""
-        for delimiter in [',', ';', '\t']:
-            if delimiter in line:
-                parts = [part.strip() for part in line.split(delimiter) if part.strip()]
-                if len(parts) > 1:
-                    return parts
-        
-        # Fallback to whitespace
-        return [part.strip() for part in line.split() if part.strip()]
-    
-    def _extract_timestamp(self, row_dict: Dict[str, str], line_idx: int) -> int:
-        """Extract timestamp from row data"""
-        # Look for timestamp columns
-        for key, val in row_dict.items():
-            if any(ts_word in key.lower() for ts_word in ['time', 'date', 'timestamp', 'ts']):
-                timestamp = self._parse_timestamp(val)
-                if timestamp:
-                    return int(timestamp.timestamp())
-        
-        # Use current time with line offset if no timestamp found
-        return int((datetime.utcnow() + timedelta(seconds=line_idx)).timestamp())
-    
-    def _extract_sensor_id(self, row_dict: Dict[str, str], line_idx: int) -> str:
-        """Extract sensor ID from row data"""
-        for key, val in row_dict.items():
-            if any(id_word in key.lower() for id_word in ['sensor', 'device', 'meter', 'id']):
-                return str(val).strip()
-        
-        return f"sa4cps_sensor_{line_idx}"
-    
-    def _extract_primary_value(self, row_dict: Dict[str, str]) -> Optional[float]:
-        """Extract the primary numeric value (typically energy)"""
-        # Priority order for SA4CPS data
-        priority_keys = ['energy', 'consumption', 'kwh', 'power', 'watt', 'value']
-        
-        # First, try priority keys
-        for priority_key in priority_keys:
-            for key, val in row_dict.items():
-                if priority_key in key.lower():
-                    numeric_val = self._parse_numeric(val)
-                    if numeric_val is not None:
-                        return numeric_val
-        
-        # Fallback: first numeric value found
-        for key, val in row_dict.items():
-            if not any(skip_word in key.lower() for skip_word in ['time', 'date', 'id', 'sensor', 'device']):
-                numeric_val = self._parse_numeric(val)
-                if numeric_val is not None:
-                    return numeric_val
-        
-        return None
-    
-    def _extract_additional_values(self, row_dict: Dict[str, str]) -> Dict[str, Dict[str, Any]]:
-        """Extract additional numeric values beyond the primary one"""
-        additional = {}
-        
-        for key, val in row_dict.items():
-            if any(skip_word in key.lower() for skip_word in ['time', 'date', 'id', 'sensor', 'device']):
-                continue
-            
-            numeric_val = self._parse_numeric(val)
-            if numeric_val is not None:
-                additional[key] = {
-                    'value': numeric_val,
-                    'unit': self._infer_unit_from_key(key, numeric_val)
-                }
-        
-        return additional
-    
-    def _infer_unit(self, row_dict: Dict[str, str]) -> str:
-        """Infer unit from column names and values"""
-        for key in row_dict.keys():
-            unit = self._infer_unit_from_key(key, 0)
-            if unit != "unknown":
-                return unit
-        return "kWh"  # Default for SA4CPS energy data
-    
-    def _infer_unit_from_key(self, key: str, value: float) -> str:
-        """Infer unit based on column name"""
-        key_lower = key.lower()
-        
-        if any(word in key_lower for word in ['energy', 'kwh', 'consumption']):
-            return "kWh"
-        elif any(word in key_lower for word in ['power', 'watt', 'w']):
-            return "W"
-        elif any(word in key_lower for word in ['voltage', 'volt', 'v']):
-            return "V"
-        elif any(word in key_lower for word in ['current', 'amp', 'a']):
-            return "A"
-        elif any(word in key_lower for word in ['temp', 'temperature']):
-            return "°C"
-        elif any(word in key_lower for word in ['freq', 'frequency']):
-            return "Hz"
-        elif any(word in key_lower for word in ['percent', '%']):
-            return "%"
-        else:
-            return "unknown"
-    
-    def _parse_timestamp(self, timestamp_str: str) -> Optional[datetime]:
-        """Parse timestamp from string"""
-        try:
-            # Common SA4CPS timestamp formats
-            formats = [
-                "%Y-%m-%d %H:%M:%S",
-                "%Y-%m-%dT%H:%M:%S",
-                "%Y-%m-%dT%H:%M:%SZ",
-                "%d/%m/%Y %H:%M:%S",
-                "%Y/%m/%d %H:%M:%S"
-            ]
-            
-            for fmt in formats:
-                try:
-                    return datetime.strptime(timestamp_str.strip(), fmt)
-                except ValueError:
-                    continue
-            
-            # Try parsing as unix timestamp
-            try:
-                timestamp_num = float(timestamp_str)
-                if timestamp_num > 1e10:  # Milliseconds
-                    timestamp_num = timestamp_num / 1000
-                return datetime.fromtimestamp(timestamp_num)
-            except:
-                pass
-            
-            return None
-        
-        except Exception as e:
-            logger.debug(f"Error parsing timestamp '{timestamp_str}': {e}")
-            return None
-    
-    def _parse_numeric(self, value_str: str) -> Optional[float]:
-        """Parse numeric value from string"""
-        try:
-            # Clean the string of non-numeric characters (except decimal point and minus)
-            cleaned = re.sub(r'[^\d.-]', '', value_str.strip())
-            if cleaned:
-                return float(cleaned)
-            return None
-        except Exception:
-            return None
-    
-    async def get_processing_stats(self) -> Dict[str, Any]:
-        """Get processing statistics"""
-        return {
-            "supported_formats": ["slg_v2"],
-            "format_description": "SA4CPS Smart Grid Data Format v2",
-            "specializations": ["energy_monitoring", "smart_grid", "sensor_telemetry"],
-            "last_updated": datetime.utcnow().isoformat()
-        }
--- a/microservices/data-ingestion-service/tests/test_simple_processor.py
+++ b/microservices/data-ingestion-service/tests/test_simple_processor.py
@@ -1,103 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple test for the streamlined SA4CPS .slg_v2 processor
-"""
-
-import asyncio
-import json
-import sys
-from pathlib import Path
-
-# Add src directory to path
-sys.path.append(str(Path(__file__).parent.parent / "src"))
-from slg_v2_processor import SLGv2Processor
-
-# Sample SA4CPS .slg_v2 test data
-SAMPLE_SLG_V2_DATA = """# SA4CPS Smart Grid Data Export
-# Location: Research Building A
-# System: Energy Monitoring v2.1
-# Date: 2024-01-15
-timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a
-2024-01-15T10:00:00,GRID_A_001,1234.5,850.2,230.1,3.7
-2024-01-15T10:01:00,GRID_A_001,1235.1,865.3,229.8,3.8
-2024-01-15T10:02:00,GRID_A_002,987.3,654.2,228.9,2.9
-2024-01-15T10:03:00,GRID_A_002,988.1,661.5,229.2,2.9
-"""
-
-SPACE_DELIMITED_DATA = """# Smart Building Energy Data
-# Building: Laboratory Complex
-2024-01-15T10:00:00 LAB_SENSOR_01 1500.23 750.5 240.1
-2024-01-15T10:01:00 LAB_SENSOR_01 1501.85 780.2 239.8
-2024-01-15T10:02:00 LAB_SENSOR_02 890.45 420.8 241.2
-"""
-
-class MockProcessor(SLGv2Processor):
-    def __init__(self):
-        # Mock without database dependencies
-        pass
-
-async def test_slg_v2_processing():
-    """Test the simplified .slg_v2 processor"""
-    print("🧪 Testing Simplified SA4CPS .slg_v2 Processor")
-    print("=" * 50)
-    
-    processor = MockProcessor()
-    
-    # Test 1: CSV-style .slg_v2
-    print("\n📋 Test 1: CSV-style SA4CPS data")
-    try:
-        result1 = await processor.process_slg_v2_file(SAMPLE_SLG_V2_DATA.encode('utf-8'))
-        print(f"✅ Processed {len(result1)} records")
-        
-        if result1:
-            sample = result1[0]
-            print("📄 Sample record:")
-            print(f"   Sensor: {sample['sensor_id']}")
-            print(f"   Timestamp: {sample['timestamp']}")
-            print(f"   Value: {sample['value']} {sample['unit']}")
-            print(f"   Additional values: {len(sample.get('additional_values', {}))}")
-            
-    except Exception as e:
-        print(f"❌ Test 1 failed: {e}")
-    
-    # Test 2: Space-delimited data
-    print("\n📋 Test 2: Space-delimited SA4CPS data")
-    try:
-        result2 = await processor.process_slg_v2_file(SPACE_DELIMITED_DATA.encode('utf-8'))
-        print(f"✅ Processed {len(result2)} records")
-        
-        if result2:
-            sample = result2[0]
-            print("📄 Sample record:")
-            print(f"   Sensor: {sample['sensor_id']}")
-            print(f"   Value: {sample['value']} {sample['unit']}")
-            print(f"   Metadata keys: {len(sample.get('metadata', {}))}")
-            
-    except Exception as e:
-        print(f"❌ Test 2 failed: {e}")
-    
-    # Test 3: Processing stats
-    print("\n📊 Test 3: Processing statistics")
-    try:
-        stats = await processor.get_processing_stats()
-        print("✅ Processor statistics:")
-        print(f"   Supported formats: {stats['supported_formats']}")
-        print(f"   Description: {stats['format_description']}")
-        print(f"   Specializations: {', '.join(stats['specializations'])}")
-        
-    except Exception as e:
-        print(f"❌ Test 3 failed: {e}")
-    
-    print("\n🎉 Testing complete!")
-    print("\n📈 Benefits of simplified processor:")
-    print("   • 70% less code complexity")
-    print("   • Focused only on SA4CPS .slg_v2 format")
-    print("   • Optimized for energy monitoring data")
-    print("   • Faster processing and easier maintenance")
-    print("\n🔗 Integration:")
-    print("   • Auto-connects to ftp.sa4cps.pt")
-    print("   • Processes *.slg_v2 files automatically")
-    print("   • Publishes to sa4cps_energy_data Redis topic")
-
-if __name__ == "__main__":
-    asyncio.run(test_slg_v2_processing())
--- a/microservices/data-ingestion-service/tests/verify_setup.py
+++ b/microservices/data-ingestion-service/tests/verify_setup.py
@@ -1,197 +0,0 @@
-#!/usr/bin/env python3
-"""
-Verification script for simplified SA4CPS data ingestion service
-Checks all components without requiring database connections
-"""
-
-import os
-import sys
-from pathlib import Path
-
-def check_file_exists(filepath, description):
-    """Check if a file exists and report status"""
-    if Path(filepath).exists():
-        print(f"✅ {description}: {filepath}")
-        return True
-    else:
-        print(f"❌ MISSING {description}: {filepath}")
-        return False
-
-def check_directory_structure():
-    """Verify all required files are present"""
-    print("📁 Checking SA4CPS Data Ingestion Service Structure")
-    print("=" * 55)
-    
-    src_files = [
-        ("src/main.py", "FastAPI main application"),
-        ("src/models.py", "Pydantic data models"),
-        ("src/database.py", "Database connection manager"),
-        ("src/slg_v2_processor.py", "SA4CPS .slg_v2 file processor"),
-        ("src/simple_sa4cps_config.py", "Simplified SA4CPS configuration"),
-        ("src/ftp_monitor.py", "FTP monitoring service"),
-        ("src/redis_publisher.py", "Redis message publisher"),
-        ("src/data_validator.py", "Data validation utilities"),
-        ("src/monitoring.py", "Service monitoring components")
-    ]
-    
-    test_files = [
-        ("tests/test_simple_processor.py", "Processor test suite"),
-        ("tests/verify_setup.py", "Setup verification script")
-    ]
-    
-    config_files = [
-        ("requirements.txt", "Python dependencies"),
-        ("Dockerfile", "Docker container configuration")
-    ]
-    
-    files_to_check = src_files + test_files + config_files
-    
-    all_present = True
-    for filename, description in files_to_check:
-        if not check_file_exists(filename, description):
-            all_present = False
-    
-    return all_present
-
-def check_configuration():
-    """Verify SA4CPS configuration"""
-    print(f"\n🔧 Checking SA4CPS Configuration")
-    print("-" * 35)
-    
-    # Check if simple_sa4cps_config.py has correct settings
-    try:
-        with open("src/simple_sa4cps_config.py", "r") as f:
-            content = f.read()
-            
-        if "ftp.sa4cps.pt" in content:
-            print("✅ FTP host configured: ftp.sa4cps.pt")
-        else:
-            print("❌ FTP host not found in config")
-            
-        if "curvascarga@sa4cps.pt" in content:
-            print("✅ FTP username configured")
-        else:
-            print("❌ FTP username not found")
-            
-        if ".slg_v2" in content:
-            print("✅ SLG_V2 file format configured")
-        else:
-            print("❌ SLG_V2 format not configured")
-            
-        if "sa4cps_energy_data" in content:
-            print("✅ Redis topics configured")
-        else:
-            print("❌ Redis topics not configured")
-            
-        return True
-    except Exception as e:
-        print(f"❌ Error reading config: {e}")
-        return False
-
-def check_processor():
-    """Verify processor functionality"""
-    print(f"\n⚙️ Checking SLG_V2 Processor")
-    print("-" * 30)
-    
-    try:
-        # Import without database dependencies
-        sys.path.append('.')
-        
-        # Check if processor can be imported
-        print("✅ SLGv2Processor class available")
-        
-        # Check test file
-        if Path("tests/test_simple_processor.py").exists():
-            with open("tests/test_simple_processor.py", "r") as f:
-                test_content = f.read()
-            
-            if "CSV-style SA4CPS data" in test_content:
-                print("✅ CSV format test available")
-            if "Space-delimited SA4CPS data" in test_content:
-                print("✅ Space-delimited format test available")
-            if "Processing statistics" in test_content:
-                print("✅ Statistics test available")
-                
-        return True
-    except Exception as e:
-        print(f"❌ Processor check failed: {e}")
-        return False
-
-def check_docker_setup():
-    """Verify Docker configuration"""
-    print(f"\n🐳 Checking Docker Configuration")
-    print("-" * 35)
-    
-    # Check Dockerfile
-    if Path("Dockerfile").exists():
-        with open("Dockerfile", "r") as f:
-            dockerfile_content = f.read()
-        
-        if "python:3.9-slim" in dockerfile_content:
-            print("✅ Python 3.9 base image")
-        if "requirements.txt" in dockerfile_content:
-            print("✅ Dependencies installation configured")
-        if "8008" in dockerfile_content:
-            print("✅ Port 8008 exposed")
-        if "uvicorn" in dockerfile_content:
-            print("✅ ASGI server configured")
-    else:
-        print("❌ Dockerfile missing")
-        return False
-    
-    # Check requirements.txt
-    if Path("requirements.txt").exists():
-        with open("requirements.txt", "r") as f:
-            requirements = f.read()
-        
-        required_deps = ["fastapi", "motor", "redis", "ftputil", "pandas"]
-        for dep in required_deps:
-            if dep in requirements:
-                print(f"✅ {dep} dependency listed")
-            else:
-                print(f"❌ {dep} dependency missing")
-    
-    return True
-
-def generate_summary():
-    """Generate setup summary"""
-    print(f"\n📊 SA4CPS Service Summary")
-    print("=" * 30)
-    print("🎯 Purpose: Monitor ftp.sa4cps.pt for .slg_v2 files")
-    print("📁 File Format: SA4CPS Smart Grid Data (.slg_v2)")
-    print("🌐 FTP Server: ftp.sa4cps.pt")
-    print("👤 Username: curvascarga@sa4cps.pt")
-    print("🔄 Processing: Real-time sensor data extraction")
-    print("📤 Output: Redis topics (sa4cps_energy_data, sa4cps_raw_data)")
-    print("🐳 Deployment: Docker container on port 8008")
-    
-    print(f"\n🚀 Next Steps:")
-    print("1. Run: docker-compose up data-ingestion-service")
-    print("2. Test: python test_simple_processor.py")
-    print("3. Configure: python simple_sa4cps_config.py")
-    print("4. Monitor: Check /health endpoint")
-
-def main():
-    """Main verification function"""
-    print("🔍 SA4CPS Data Ingestion Service Verification")
-    print("=" * 50)
-    
-    # Run all checks
-    structure_ok = check_directory_structure()
-    config_ok = check_configuration()
-    processor_ok = check_processor()
-    docker_ok = check_docker_setup()
-    
-    # Final status
-    print(f"\n{'='*50}")
-    if all([structure_ok, config_ok, processor_ok, docker_ok]):
-        print("🎉 SA4CPS Data Ingestion Service: READY FOR DEPLOYMENT")
-        print("✅ All components verified successfully")
-    else:
-        print("⚠️  SA4CPS Data Ingestion Service: ISSUES FOUND")
-        print("❌ Please fix the issues above before deployment")
-    
-    generate_summary()
-
-if __name__ == "__main__":
-    main()