first commit

2025-09-09 13:46:42 +01:00
commit a7a18e6295
77 changed files with 8678 additions and 0 deletions
--- a/microservices/api-gateway/Dockerfile
+++ b/microservices/api-gateway/Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Run the application
+CMD ["python", "main.py"]
--- a/microservices/api-gateway/auth_middleware.py
+++ b/microservices/api-gateway/auth_middleware.py
@@ -0,0 +1,89 @@
+"""
+Authentication middleware for API Gateway
+"""
+
+import aiohttp
+from fastapi import HTTPException, Request
+from typing import Optional, Dict, Any
+import logging
+
+logger = logging.getLogger(__name__)
+
+class AuthMiddleware:
+    """Authentication middleware for validating tokens"""
+    
+    def __init__(self, token_service_url: str = "http://localhost:8001"):
+        self.token_service_url = token_service_url
+    
+    async def verify_token(self, request: Request) -> Optional[Dict[str, Any]]:
+        """
+        Verify authentication token from request headers
+        Returns token payload if valid, raises HTTPException if invalid
+        """
+        # Extract token from Authorization header
+        auth_header = request.headers.get("Authorization")
+        if not auth_header:
+            raise HTTPException(status_code=401, detail="Authorization header required")
+        
+        if not auth_header.startswith("Bearer "):
+            raise HTTPException(status_code=401, detail="Bearer token required")
+        
+        token = auth_header[7:]  # Remove "Bearer " prefix
+        
+        try:
+            # Validate token with token service
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    f"{self.token_service_url}/tokens/validate",
+                    json={"token": token},
+                    timeout=aiohttp.ClientTimeout(total=5)
+                ) as response:
+                    
+                    if response.status != 200:
+                        raise HTTPException(status_code=401, detail="Token validation failed")
+                    
+                    token_data = await response.json()
+                    
+                    if not token_data.get("valid"):
+                        error_msg = token_data.get("error", "Invalid token")
+                        raise HTTPException(status_code=401, detail=error_msg)
+                    
+                    # Token is valid, return decoded payload
+                    return token_data.get("decoded")
+                    
+        except aiohttp.ClientError as e:
+            logger.error(f"Token service connection error: {e}")
+            raise HTTPException(status_code=503, detail="Authentication service unavailable")
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(f"Token verification error: {e}")
+            raise HTTPException(status_code=500, detail="Authentication error")
+    
+    async def check_permissions(self, token_payload: Dict[str, Any], required_resources: list) -> bool:
+        """
+        Check if token has required permissions for specific resources
+        """
+        if not token_payload:
+            return False
+        
+        # Get list of resources the token has access to
+        token_resources = token_payload.get("list_of_resources", [])
+        
+        # Check if token has access to all required resources
+        for resource in required_resources:
+            if resource not in token_resources:
+                return False
+        
+        return True
+    
+    def extract_user_info(self, token_payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract user information from token payload"""
+        return {
+            "name": token_payload.get("name"),
+            "resources": token_payload.get("list_of_resources", []),
+            "data_aggregation": token_payload.get("data_aggregation", False),
+            "time_aggregation": token_payload.get("time_aggregation", False),
+            "embargo": token_payload.get("embargo", 0),
+            "expires_at": token_payload.get("exp")
+        }
--- a/microservices/api-gateway/load_balancer.py
+++ b/microservices/api-gateway/load_balancer.py
@@ -0,0 +1,124 @@
+"""
+Load balancer for distributing requests across service instances
+"""
+
+import random
+from typing import List, Dict, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+class LoadBalancer:
+    """Simple load balancer for microservice requests"""
+    
+    def __init__(self):
+        # In a real implementation, this would track multiple instances per service
+        self.service_instances: Dict[str, List[str]] = {}
+        self.current_index: Dict[str, int] = {}
+    
+    def register_service_instance(self, service_name: str, instance_url: str):
+        """Register a new service instance"""
+        if service_name not in self.service_instances:
+            self.service_instances[service_name] = []
+            self.current_index[service_name] = 0
+        
+        if instance_url not in self.service_instances[service_name]:
+            self.service_instances[service_name].append(instance_url)
+            logger.info(f"Registered instance {instance_url} for service {service_name}")
+    
+    def unregister_service_instance(self, service_name: str, instance_url: str):
+        """Unregister a service instance"""
+        if service_name in self.service_instances:
+            try:
+                self.service_instances[service_name].remove(instance_url)
+                logger.info(f"Unregistered instance {instance_url} for service {service_name}")
+                
+                # Reset index if it's out of bounds
+                if self.current_index[service_name] >= len(self.service_instances[service_name]):
+                    self.current_index[service_name] = 0
+                    
+            except ValueError:
+                logger.warning(f"Instance {instance_url} not found for service {service_name}")
+    
+    async def get_service_url(self, service_name: str, strategy: str = "single") -> Optional[str]:
+        """
+        Get a service URL using the specified load balancing strategy
+        
+        Strategies:
+        - single: Single instance (default for this simple implementation)
+        - round_robin: Round-robin across instances
+        - random: Random selection
+        """
+        # For this microservice setup, we typically have one instance per service
+        # In a production environment, you'd have multiple instances
+        
+        if strategy == "single":
+            # Default behavior - get the service URL from service registry
+            from service_registry import ServiceRegistry
+            service_registry = ServiceRegistry()
+            return await service_registry.get_service_url(service_name)
+        
+        elif strategy == "round_robin":
+            return await self._round_robin_select(service_name)
+        
+        elif strategy == "random":
+            return await self._random_select(service_name)
+        
+        else:
+            logger.error(f"Unknown load balancing strategy: {strategy}")
+            return None
+    
+    async def _round_robin_select(self, service_name: str) -> Optional[str]:
+        """Select service instance using round-robin"""
+        instances = self.service_instances.get(service_name, [])
+        if not instances:
+            # Fall back to service registry
+            from service_registry import ServiceRegistry
+            service_registry = ServiceRegistry()
+            return await service_registry.get_service_url(service_name)
+        
+        # Round-robin selection
+        current_idx = self.current_index[service_name]
+        selected_instance = instances[current_idx]
+        
+        # Update index for next request
+        self.current_index[service_name] = (current_idx + 1) % len(instances)
+        
+        logger.debug(f"Round-robin selected {selected_instance} for {service_name}")
+        return selected_instance
+    
+    async def _random_select(self, service_name: str) -> Optional[str]:
+        """Select service instance randomly"""
+        instances = self.service_instances.get(service_name, [])
+        if not instances:
+            # Fall back to service registry
+            from service_registry import ServiceRegistry
+            service_registry = ServiceRegistry()
+            return await service_registry.get_service_url(service_name)
+        
+        selected_instance = random.choice(instances)
+        logger.debug(f"Random selected {selected_instance} for {service_name}")
+        return selected_instance
+    
+    def get_service_instances(self, service_name: str) -> List[str]:
+        """Get all registered instances for a service"""
+        return self.service_instances.get(service_name, [])
+    
+    def get_instance_count(self, service_name: str) -> int:
+        """Get number of registered instances for a service"""
+        return len(self.service_instances.get(service_name, []))
+    
+    def get_all_services(self) -> Dict[str, List[str]]:
+        """Get all services and their instances"""
+        return self.service_instances.copy()
+    
+    def health_check_failed(self, service_name: str, instance_url: str):
+        """Handle health check failure for a service instance"""
+        logger.warning(f"Health check failed for {instance_url} ({service_name})")
+        # In a production system, you might temporarily remove unhealthy instances
+        # For now, we just log the failure
+    
+    def health_check_recovered(self, service_name: str, instance_url: str):
+        """Handle health check recovery for a service instance"""
+        logger.info(f"Health check recovered for {instance_url} ({service_name})")
+        # Re-register the instance if it was temporarily removed
--- a/microservices/api-gateway/main.py
+++ b/microservices/api-gateway/main.py
@@ -0,0 +1,352 @@
+"""
+API Gateway for Energy Management Microservices
+Central entry point that routes requests to appropriate microservices.
+Port: 8000
+"""
+
+import asyncio
+import aiohttp
+from datetime import datetime
+from fastapi import FastAPI, HTTPException, Depends, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from contextlib import asynccontextmanager
+import logging
+import json
+from typing import Dict, Any, Optional
+import os
+
+from models import ServiceConfig, HealthResponse, GatewayStats
+from service_registry import ServiceRegistry
+from load_balancer import LoadBalancer
+from auth_middleware import AuthMiddleware
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan manager"""
+    logger.info("API Gateway starting up...")
+    
+    # Initialize service registry
+    await service_registry.initialize()
+    
+    # Start health check task
+    asyncio.create_task(health_check_task())
+    
+    logger.info("API Gateway startup complete")
+    
+    yield
+    
+    logger.info("API Gateway shutting down...")
+    await service_registry.close()
+    logger.info("API Gateway shutdown complete")
+
+app = FastAPI(
+    title="Energy Management API Gateway",
+    description="Central API gateway for energy management microservices",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Service registry and load balancer
+service_registry = ServiceRegistry()
+load_balancer = LoadBalancer()
+auth_middleware = AuthMiddleware()
+
+# Service configuration
+SERVICES = {
+    "token-service": ServiceConfig(
+        name="token-service",
+        base_url="http://localhost:8001",
+        health_endpoint="/health",
+        auth_required=False
+    ),
+    "battery-service": ServiceConfig(
+        name="battery-service",
+        base_url="http://localhost:8002",
+        health_endpoint="/health",
+        auth_required=True
+    ),
+    "demand-response-service": ServiceConfig(
+        name="demand-response-service",
+        base_url="http://localhost:8003",
+        health_endpoint="/health",
+        auth_required=True
+    ),
+    "p2p-trading-service": ServiceConfig(
+        name="p2p-trading-service",
+        base_url="http://localhost:8004",
+        health_endpoint="/health",
+        auth_required=True
+    ),
+    "forecasting-service": ServiceConfig(
+        name="forecasting-service",
+        base_url="http://localhost:8005",
+        health_endpoint="/health",
+        auth_required=True
+    ),
+    "iot-control-service": ServiceConfig(
+        name="iot-control-service",
+        base_url="http://localhost:8006",
+        health_endpoint="/health",
+        auth_required=True
+    )
+}
+
+# Request statistics
+request_stats = {
+    "total_requests": 0,
+    "successful_requests": 0,
+    "failed_requests": 0,
+    "service_requests": {service: 0 for service in SERVICES.keys()},
+    "start_time": datetime.utcnow()
+}
+
+@app.get("/health", response_model=HealthResponse)
+async def gateway_health_check():
+    """Gateway health check endpoint"""
+    try:
+        # Check all services
+        service_health = await service_registry.get_all_service_health()
+        
+        healthy_services = sum(1 for status in service_health.values() if status.get("status") == "healthy")
+        total_services = len(SERVICES)
+        
+        overall_status = "healthy" if healthy_services == total_services else "degraded"
+        
+        return HealthResponse(
+            service="api-gateway",
+            status=overall_status,
+            timestamp=datetime.utcnow(),
+            version="1.0.0",
+            services=service_health,
+            healthy_services=healthy_services,
+            total_services=total_services
+        )
+    except Exception as e:
+        logger.error(f"Gateway health check failed: {e}")
+        raise HTTPException(status_code=503, detail="Service Unavailable")
+
+@app.get("/services/status")
+async def get_services_status():
+    """Get status of all registered services"""
+    try:
+        service_health = await service_registry.get_all_service_health()
+        return {
+            "services": service_health,
+            "timestamp": datetime.utcnow().isoformat(),
+            "total_services": len(SERVICES),
+            "healthy_services": sum(1 for status in service_health.values() if status.get("status") == "healthy")
+        }
+    except Exception as e:
+        logger.error(f"Error getting services status: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+@app.get("/stats", response_model=GatewayStats)
+async def get_gateway_stats():
+    """Get API gateway statistics"""
+    uptime = (datetime.utcnow() - request_stats["start_time"]).total_seconds()
+    
+    return GatewayStats(
+        total_requests=request_stats["total_requests"],
+        successful_requests=request_stats["successful_requests"],
+        failed_requests=request_stats["failed_requests"],
+        success_rate=round((request_stats["successful_requests"] / max(request_stats["total_requests"], 1)) * 100, 2),
+        uptime_seconds=uptime,
+        service_requests=request_stats["service_requests"],
+        timestamp=datetime.utcnow()
+    )
+
+# Token Service Routes
+@app.api_route("/api/v1/tokens/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def token_service_proxy(request: Request, path: str):
+    """Proxy requests to token service"""
+    return await proxy_request(request, "token-service", f"/{path}")
+
+# Battery Service Routes
+@app.api_route("/api/v1/batteries/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def battery_service_proxy(request: Request, path: str):
+    """Proxy requests to battery service"""
+    return await proxy_request(request, "battery-service", f"/{path}")
+
+# Demand Response Service Routes
+@app.api_route("/api/v1/demand-response/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def demand_response_service_proxy(request: Request, path: str):
+    """Proxy requests to demand response service"""
+    return await proxy_request(request, "demand-response-service", f"/{path}")
+
+# P2P Trading Service Routes
+@app.api_route("/api/v1/p2p/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def p2p_trading_service_proxy(request: Request, path: str):
+    """Proxy requests to P2P trading service"""
+    return await proxy_request(request, "p2p-trading-service", f"/{path}")
+
+# Forecasting Service Routes
+@app.api_route("/api/v1/forecast/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def forecasting_service_proxy(request: Request, path: str):
+    """Proxy requests to forecasting service"""
+    return await proxy_request(request, "forecasting-service", f"/{path}")
+
+# IoT Control Service Routes
+@app.api_route("/api/v1/iot/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def iot_control_service_proxy(request: Request, path: str):
+    """Proxy requests to IoT control service"""
+    return await proxy_request(request, "iot-control-service", f"/{path}")
+
+async def proxy_request(request: Request, service_name: str, path: str):
+    """Generic request proxy function"""
+    try:
+        # Update request statistics
+        request_stats["total_requests"] += 1
+        request_stats["service_requests"][service_name] += 1
+        
+        # Get service configuration
+        service_config = SERVICES.get(service_name)
+        if not service_config:
+            raise HTTPException(status_code=404, detail=f"Service {service_name} not found")
+        
+        # Check authentication if required
+        if service_config.auth_required:
+            await auth_middleware.verify_token(request)
+        
+        # Get healthy service instance
+        service_url = await load_balancer.get_service_url(service_name)
+        
+        # Prepare request
+        url = f"{service_url}{path}"
+        method = request.method
+        headers = dict(request.headers)
+        
+        # Remove hop-by-hop headers
+        headers.pop("host", None)
+        headers.pop("content-length", None)
+        
+        # Get request body
+        body = None
+        if method in ["POST", "PUT", "PATCH"]:
+            body = await request.body()
+        
+        # Make request to service
+        async with aiohttp.ClientSession() as session:
+            async with session.request(
+                method=method,
+                url=url,
+                headers=headers,
+                data=body,
+                params=dict(request.query_params),
+                timeout=aiohttp.ClientTimeout(total=30)
+            ) as response:
+                
+                # Get response data
+                response_data = await response.read()
+                response_headers = dict(response.headers)
+                
+                # Remove hop-by-hop headers from response
+                response_headers.pop("transfer-encoding", None)
+                response_headers.pop("connection", None)
+                
+                # Update success statistics
+                if response.status < 400:
+                    request_stats["successful_requests"] += 1
+                else:
+                    request_stats["failed_requests"] += 1
+                
+                # Return response
+                return Response(
+                    content=response_data,
+                    status_code=response.status,
+                    headers=response_headers,
+                    media_type=response_headers.get("content-type")
+                )
+                
+    except aiohttp.ClientError as e:
+        request_stats["failed_requests"] += 1
+        logger.error(f"Service {service_name} connection error: {e}")
+        raise HTTPException(status_code=503, detail=f"Service {service_name} unavailable")
+    
+    except HTTPException:
+        request_stats["failed_requests"] += 1
+        raise
+    
+    except Exception as e:
+        request_stats["failed_requests"] += 1
+        logger.error(f"Proxy error for {service_name}: {e}")
+        raise HTTPException(status_code=500, detail="Internal gateway error")
+
+@app.get("/api/v1/overview")
+async def get_system_overview():
+    """Get comprehensive system overview from all services"""
+    try:
+        overview = {}
+        
+        # Get data from each service
+        for service_name in SERVICES.keys():
+            try:
+                if await service_registry.is_service_healthy(service_name):
+                    service_url = await load_balancer.get_service_url(service_name)
+                    
+                    async with aiohttp.ClientSession() as session:
+                        # Try to get service-specific overview data
+                        overview_endpoints = {
+                            "battery-service": "/batteries",
+                            "demand-response-service": "/flexibility/current",
+                            "p2p-trading-service": "/market/status",
+                            "forecasting-service": "/forecast/summary",
+                            "iot-control-service": "/devices/summary"
+                        }
+                        
+                        endpoint = overview_endpoints.get(service_name)
+                        if endpoint:
+                            async with session.get(f"{service_url}{endpoint}", timeout=aiohttp.ClientTimeout(total=5)) as response:
+                                if response.status == 200:
+                                    data = await response.json()
+                                    overview[service_name] = data
+                                else:
+                                    overview[service_name] = {"status": "error", "message": "Service returned error"}
+                        else:
+                            overview[service_name] = {"status": "available"}
+                            
+            except Exception as e:
+                logger.warning(f"Could not get overview from {service_name}: {e}")
+                overview[service_name] = {"status": "unavailable", "error": str(e)}
+        
+        return {
+            "system_overview": overview,
+            "timestamp": datetime.utcnow().isoformat(),
+            "services_checked": len(SERVICES)
+        }
+        
+    except Exception as e:
+        logger.error(f"Error getting system overview: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+async def health_check_task():
+    """Background task for periodic health checks"""
+    logger.info("Starting health check task")
+    
+    while True:
+        try:
+            await service_registry.update_all_service_health()
+            await asyncio.sleep(30)  # Check every 30 seconds
+            
+        except Exception as e:
+            logger.error(f"Error in health check task: {e}")
+            await asyncio.sleep(60)
+
+# Initialize service registry with services
+asyncio.create_task(service_registry.register_services(SERVICES))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/microservices/api-gateway/models.py
+++ b/microservices/api-gateway/models.py
@@ -0,0 +1,77 @@
+"""
+Models for API Gateway
+"""
+
+from pydantic import BaseModel, Field
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+
+class ServiceConfig(BaseModel):
+    """Configuration for a microservice"""
+    name: str
+    base_url: str
+    health_endpoint: str = "/health"
+    auth_required: bool = True
+    timeout_seconds: int = 30
+    retry_attempts: int = 3
+
+class ServiceHealth(BaseModel):
+    """Health status of a service"""
+    service: str
+    status: str  # healthy, unhealthy, unknown
+    response_time_ms: Optional[float] = None
+    last_check: datetime
+    error_message: Optional[str] = None
+
+class HealthResponse(BaseModel):
+    """Gateway health response"""
+    service: str
+    status: str
+    timestamp: datetime
+    version: str
+    services: Optional[Dict[str, Any]] = None
+    healthy_services: Optional[int] = None
+    total_services: Optional[int] = None
+    
+    class Config:
+        json_encoders = {
+            datetime: lambda v: v.isoformat()
+        }
+
+class GatewayStats(BaseModel):
+    """API Gateway statistics"""
+    total_requests: int
+    successful_requests: int
+    failed_requests: int
+    success_rate: float
+    uptime_seconds: float
+    service_requests: Dict[str, int]
+    timestamp: datetime
+    
+    class Config:
+        json_encoders = {
+            datetime: lambda v: v.isoformat()
+        }
+
+class AuthToken(BaseModel):
+    """Authentication token model"""
+    token: str
+    user_id: Optional[str] = None
+    permissions: List[str] = Field(default_factory=list)
+
+class ProxyRequest(BaseModel):
+    """Proxy request model"""
+    service: str
+    path: str
+    method: str
+    headers: Dict[str, str]
+    query_params: Dict[str, Any]
+    body: Optional[bytes] = None
+
+class ProxyResponse(BaseModel):
+    """Proxy response model"""
+    status_code: int
+    headers: Dict[str, str]
+    body: bytes
+    service: str
+    response_time_ms: float
--- a/microservices/api-gateway/requirements.txt
+++ b/microservices/api-gateway/requirements.txt
@@ -0,0 +1,5 @@
+fastapi
+uvicorn[standard]
+aiohttp
+python-dotenv
+pydantic
--- a/microservices/api-gateway/service_registry.py
+++ b/microservices/api-gateway/service_registry.py
@@ -0,0 +1,194 @@
+"""
+Service registry for managing microservice discovery and health monitoring
+"""
+
+import aiohttp
+import asyncio
+from datetime import datetime
+from typing import Dict, List, Optional
+import logging
+
+from models import ServiceConfig, ServiceHealth
+
+logger = logging.getLogger(__name__)
+
+class ServiceRegistry:
+    """Service registry for microservice management"""
+    
+    def __init__(self):
+        self.services: Dict[str, ServiceConfig] = {}
+        self.service_health: Dict[str, ServiceHealth] = {}
+        self.session: Optional[aiohttp.ClientSession] = None
+    
+    async def initialize(self):
+        """Initialize the service registry"""
+        self.session = aiohttp.ClientSession(
+            timeout=aiohttp.ClientTimeout(total=10)
+        )
+        logger.info("Service registry initialized")
+    
+    async def close(self):
+        """Close the service registry"""
+        if self.session:
+            await self.session.close()
+        logger.info("Service registry closed")
+    
+    async def register_services(self, services: Dict[str, ServiceConfig]):
+        """Register multiple services"""
+        self.services.update(services)
+        
+        # Initialize health status for all services
+        for service_name, config in services.items():
+            self.service_health[service_name] = ServiceHealth(
+                service=service_name,
+                status="unknown",
+                last_check=datetime.utcnow()
+            )
+        
+        logger.info(f"Registered {len(services)} services")
+        
+        # Perform initial health check
+        await self.update_all_service_health()
+    
+    async def register_service(self, service_config: ServiceConfig):
+        """Register a single service"""
+        self.services[service_config.name] = service_config
+        self.service_health[service_config.name] = ServiceHealth(
+            service=service_config.name,
+            status="unknown",
+            last_check=datetime.utcnow()
+        )
+        
+        logger.info(f"Registered service: {service_config.name}")
+        
+        # Check health of the newly registered service
+        await self.check_service_health(service_config.name)
+    
+    async def unregister_service(self, service_name: str):
+        """Unregister a service"""
+        self.services.pop(service_name, None)
+        self.service_health.pop(service_name, None)
+        logger.info(f"Unregistered service: {service_name}")
+    
+    async def check_service_health(self, service_name: str) -> ServiceHealth:
+        """Check health of a specific service"""
+        service_config = self.services.get(service_name)
+        if not service_config:
+            logger.error(f"Service {service_name} not found in registry")
+            return ServiceHealth(
+                service=service_name,
+                status="unknown",
+                last_check=datetime.utcnow(),
+                error_message="Service not registered"
+            )
+        
+        start_time = datetime.utcnow()
+        
+        try:
+            health_url = f"{service_config.base_url}{service_config.health_endpoint}"
+            
+            async with self.session.get(health_url) as response:
+                end_time = datetime.utcnow()
+                response_time = (end_time - start_time).total_seconds() * 1000
+                
+                if response.status == 200:
+                    health_data = await response.json()
+                    status = "healthy" if health_data.get("status") in ["healthy", "ok"] else "unhealthy"
+                    
+                    health = ServiceHealth(
+                        service=service_name,
+                        status=status,
+                        response_time_ms=response_time,
+                        last_check=end_time
+                    )
+                else:
+                    health = ServiceHealth(
+                        service=service_name,
+                        status="unhealthy",
+                        response_time_ms=response_time,
+                        last_check=end_time,
+                        error_message=f"HTTP {response.status}"
+                    )
+                
+        except aiohttp.ClientError as e:
+            health = ServiceHealth(
+                service=service_name,
+                status="unhealthy",
+                last_check=datetime.utcnow(),
+                error_message=f"Connection error: {str(e)}"
+            )
+        except Exception as e:
+            health = ServiceHealth(
+                service=service_name,
+                status="unhealthy",
+                last_check=datetime.utcnow(),
+                error_message=f"Health check failed: {str(e)}"
+            )
+        
+        # Update health status
+        self.service_health[service_name] = health
+        
+        # Log health status changes
+        if health.status != "healthy":
+            logger.warning(f"Service {service_name} health check failed: {health.error_message}")
+        
+        return health
+    
+    async def update_all_service_health(self):
+        """Update health status for all registered services"""
+        health_checks = [
+            self.check_service_health(service_name)
+            for service_name in self.services.keys()
+        ]
+        
+        if health_checks:
+            await asyncio.gather(*health_checks, return_exceptions=True)
+            
+            # Log summary
+            healthy_count = sum(1 for h in self.service_health.values() if h.status == "healthy")
+            total_count = len(self.services)
+            logger.info(f"Health check complete: {healthy_count}/{total_count} services healthy")
+    
+    async def get_service_health(self, service_name: str) -> Optional[ServiceHealth]:
+        """Get health status of a specific service"""
+        return self.service_health.get(service_name)
+    
+    async def get_all_service_health(self) -> Dict[str, Dict]:
+        """Get health status of all services"""
+        health_dict = {}
+        for service_name, health in self.service_health.items():
+            health_dict[service_name] = {
+                "status": health.status,
+                "response_time_ms": health.response_time_ms,
+                "last_check": health.last_check.isoformat(),
+                "error_message": health.error_message
+            }
+        return health_dict
+    
+    async def is_service_healthy(self, service_name: str) -> bool:
+        """Check if a service is healthy"""
+        health = self.service_health.get(service_name)
+        return health is not None and health.status == "healthy"
+    
+    async def get_healthy_services(self) -> List[str]:
+        """Get list of healthy service names"""
+        return [
+            service_name
+            for service_name, health in self.service_health.items()
+            if health.status == "healthy"
+        ]
+    
+    def get_service_config(self, service_name: str) -> Optional[ServiceConfig]:
+        """Get configuration for a specific service"""
+        return self.services.get(service_name)
+    
+    def get_all_services(self) -> Dict[str, ServiceConfig]:
+        """Get all registered services"""
+        return self.services.copy()
+    
+    async def get_service_url(self, service_name: str) -> Optional[str]:
+        """Get base URL for a healthy service"""
+        if await self.is_service_healthy(service_name):
+            service_config = self.services.get(service_name)
+            return service_config.base_url if service_config else None
+        return None