first commit

This commit is contained in:
rafaeldpsilva
2025-09-09 13:46:42 +01:00
commit a7a18e6295
77 changed files with 8678 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
FROM python:3.9-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Run the application
CMD ["python", "main.py"]

View File

@@ -0,0 +1,89 @@
"""
Authentication middleware for API Gateway
"""
import aiohttp
from fastapi import HTTPException, Request
from typing import Optional, Dict, Any
import logging
logger = logging.getLogger(__name__)
class AuthMiddleware:
"""Authentication middleware for validating tokens"""
def __init__(self, token_service_url: str = "http://localhost:8001"):
self.token_service_url = token_service_url
async def verify_token(self, request: Request) -> Optional[Dict[str, Any]]:
"""
Verify authentication token from request headers
Returns token payload if valid, raises HTTPException if invalid
"""
# Extract token from Authorization header
auth_header = request.headers.get("Authorization")
if not auth_header:
raise HTTPException(status_code=401, detail="Authorization header required")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Bearer token required")
token = auth_header[7:] # Remove "Bearer " prefix
try:
# Validate token with token service
async with aiohttp.ClientSession() as session:
async with session.post(
f"{self.token_service_url}/tokens/validate",
json={"token": token},
timeout=aiohttp.ClientTimeout(total=5)
) as response:
if response.status != 200:
raise HTTPException(status_code=401, detail="Token validation failed")
token_data = await response.json()
if not token_data.get("valid"):
error_msg = token_data.get("error", "Invalid token")
raise HTTPException(status_code=401, detail=error_msg)
# Token is valid, return decoded payload
return token_data.get("decoded")
except aiohttp.ClientError as e:
logger.error(f"Token service connection error: {e}")
raise HTTPException(status_code=503, detail="Authentication service unavailable")
except HTTPException:
raise
except Exception as e:
logger.error(f"Token verification error: {e}")
raise HTTPException(status_code=500, detail="Authentication error")
async def check_permissions(self, token_payload: Dict[str, Any], required_resources: list) -> bool:
"""
Check if token has required permissions for specific resources
"""
if not token_payload:
return False
# Get list of resources the token has access to
token_resources = token_payload.get("list_of_resources", [])
# Check if token has access to all required resources
for resource in required_resources:
if resource not in token_resources:
return False
return True
def extract_user_info(self, token_payload: Dict[str, Any]) -> Dict[str, Any]:
"""Extract user information from token payload"""
return {
"name": token_payload.get("name"),
"resources": token_payload.get("list_of_resources", []),
"data_aggregation": token_payload.get("data_aggregation", False),
"time_aggregation": token_payload.get("time_aggregation", False),
"embargo": token_payload.get("embargo", 0),
"expires_at": token_payload.get("exp")
}

View File

@@ -0,0 +1,124 @@
"""
Load balancer for distributing requests across service instances
"""
import random
from typing import List, Dict, Optional
import logging
logger = logging.getLogger(__name__)
class LoadBalancer:
"""Simple load balancer for microservice requests"""
def __init__(self):
# In a real implementation, this would track multiple instances per service
self.service_instances: Dict[str, List[str]] = {}
self.current_index: Dict[str, int] = {}
def register_service_instance(self, service_name: str, instance_url: str):
"""Register a new service instance"""
if service_name not in self.service_instances:
self.service_instances[service_name] = []
self.current_index[service_name] = 0
if instance_url not in self.service_instances[service_name]:
self.service_instances[service_name].append(instance_url)
logger.info(f"Registered instance {instance_url} for service {service_name}")
def unregister_service_instance(self, service_name: str, instance_url: str):
"""Unregister a service instance"""
if service_name in self.service_instances:
try:
self.service_instances[service_name].remove(instance_url)
logger.info(f"Unregistered instance {instance_url} for service {service_name}")
# Reset index if it's out of bounds
if self.current_index[service_name] >= len(self.service_instances[service_name]):
self.current_index[service_name] = 0
except ValueError:
logger.warning(f"Instance {instance_url} not found for service {service_name}")
async def get_service_url(self, service_name: str, strategy: str = "single") -> Optional[str]:
"""
Get a service URL using the specified load balancing strategy
Strategies:
- single: Single instance (default for this simple implementation)
- round_robin: Round-robin across instances
- random: Random selection
"""
# For this microservice setup, we typically have one instance per service
# In a production environment, you'd have multiple instances
if strategy == "single":
# Default behavior - get the service URL from service registry
from service_registry import ServiceRegistry
service_registry = ServiceRegistry()
return await service_registry.get_service_url(service_name)
elif strategy == "round_robin":
return await self._round_robin_select(service_name)
elif strategy == "random":
return await self._random_select(service_name)
else:
logger.error(f"Unknown load balancing strategy: {strategy}")
return None
async def _round_robin_select(self, service_name: str) -> Optional[str]:
"""Select service instance using round-robin"""
instances = self.service_instances.get(service_name, [])
if not instances:
# Fall back to service registry
from service_registry import ServiceRegistry
service_registry = ServiceRegistry()
return await service_registry.get_service_url(service_name)
# Round-robin selection
current_idx = self.current_index[service_name]
selected_instance = instances[current_idx]
# Update index for next request
self.current_index[service_name] = (current_idx + 1) % len(instances)
logger.debug(f"Round-robin selected {selected_instance} for {service_name}")
return selected_instance
async def _random_select(self, service_name: str) -> Optional[str]:
"""Select service instance randomly"""
instances = self.service_instances.get(service_name, [])
if not instances:
# Fall back to service registry
from service_registry import ServiceRegistry
service_registry = ServiceRegistry()
return await service_registry.get_service_url(service_name)
selected_instance = random.choice(instances)
logger.debug(f"Random selected {selected_instance} for {service_name}")
return selected_instance
def get_service_instances(self, service_name: str) -> List[str]:
"""Get all registered instances for a service"""
return self.service_instances.get(service_name, [])
def get_instance_count(self, service_name: str) -> int:
"""Get number of registered instances for a service"""
return len(self.service_instances.get(service_name, []))
def get_all_services(self) -> Dict[str, List[str]]:
"""Get all services and their instances"""
return self.service_instances.copy()
def health_check_failed(self, service_name: str, instance_url: str):
"""Handle health check failure for a service instance"""
logger.warning(f"Health check failed for {instance_url} ({service_name})")
# In a production system, you might temporarily remove unhealthy instances
# For now, we just log the failure
def health_check_recovered(self, service_name: str, instance_url: str):
"""Handle health check recovery for a service instance"""
logger.info(f"Health check recovered for {instance_url} ({service_name})")
# Re-register the instance if it was temporarily removed

View File

@@ -0,0 +1,352 @@
"""
API Gateway for Energy Management Microservices
Central entry point that routes requests to appropriate microservices.
Port: 8000
"""
import asyncio
import aiohttp
from datetime import datetime
from fastapi import FastAPI, HTTPException, Depends, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from contextlib import asynccontextmanager
import logging
import json
from typing import Dict, Any, Optional
import os
from models import ServiceConfig, HealthResponse, GatewayStats
from service_registry import ServiceRegistry
from load_balancer import LoadBalancer
from auth_middleware import AuthMiddleware
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager"""
logger.info("API Gateway starting up...")
# Initialize service registry
await service_registry.initialize()
# Start health check task
asyncio.create_task(health_check_task())
logger.info("API Gateway startup complete")
yield
logger.info("API Gateway shutting down...")
await service_registry.close()
logger.info("API Gateway shutdown complete")
app = FastAPI(
title="Energy Management API Gateway",
description="Central API gateway for energy management microservices",
version="1.0.0",
lifespan=lifespan
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Service registry and load balancer
service_registry = ServiceRegistry()
load_balancer = LoadBalancer()
auth_middleware = AuthMiddleware()
# Service configuration
SERVICES = {
"token-service": ServiceConfig(
name="token-service",
base_url="http://localhost:8001",
health_endpoint="/health",
auth_required=False
),
"battery-service": ServiceConfig(
name="battery-service",
base_url="http://localhost:8002",
health_endpoint="/health",
auth_required=True
),
"demand-response-service": ServiceConfig(
name="demand-response-service",
base_url="http://localhost:8003",
health_endpoint="/health",
auth_required=True
),
"p2p-trading-service": ServiceConfig(
name="p2p-trading-service",
base_url="http://localhost:8004",
health_endpoint="/health",
auth_required=True
),
"forecasting-service": ServiceConfig(
name="forecasting-service",
base_url="http://localhost:8005",
health_endpoint="/health",
auth_required=True
),
"iot-control-service": ServiceConfig(
name="iot-control-service",
base_url="http://localhost:8006",
health_endpoint="/health",
auth_required=True
)
}
# Request statistics
request_stats = {
"total_requests": 0,
"successful_requests": 0,
"failed_requests": 0,
"service_requests": {service: 0 for service in SERVICES.keys()},
"start_time": datetime.utcnow()
}
@app.get("/health", response_model=HealthResponse)
async def gateway_health_check():
"""Gateway health check endpoint"""
try:
# Check all services
service_health = await service_registry.get_all_service_health()
healthy_services = sum(1 for status in service_health.values() if status.get("status") == "healthy")
total_services = len(SERVICES)
overall_status = "healthy" if healthy_services == total_services else "degraded"
return HealthResponse(
service="api-gateway",
status=overall_status,
timestamp=datetime.utcnow(),
version="1.0.0",
services=service_health,
healthy_services=healthy_services,
total_services=total_services
)
except Exception as e:
logger.error(f"Gateway health check failed: {e}")
raise HTTPException(status_code=503, detail="Service Unavailable")
@app.get("/services/status")
async def get_services_status():
"""Get status of all registered services"""
try:
service_health = await service_registry.get_all_service_health()
return {
"services": service_health,
"timestamp": datetime.utcnow().isoformat(),
"total_services": len(SERVICES),
"healthy_services": sum(1 for status in service_health.values() if status.get("status") == "healthy")
}
except Exception as e:
logger.error(f"Error getting services status: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@app.get("/stats", response_model=GatewayStats)
async def get_gateway_stats():
"""Get API gateway statistics"""
uptime = (datetime.utcnow() - request_stats["start_time"]).total_seconds()
return GatewayStats(
total_requests=request_stats["total_requests"],
successful_requests=request_stats["successful_requests"],
failed_requests=request_stats["failed_requests"],
success_rate=round((request_stats["successful_requests"] / max(request_stats["total_requests"], 1)) * 100, 2),
uptime_seconds=uptime,
service_requests=request_stats["service_requests"],
timestamp=datetime.utcnow()
)
# Token Service Routes
@app.api_route("/api/v1/tokens/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def token_service_proxy(request: Request, path: str):
"""Proxy requests to token service"""
return await proxy_request(request, "token-service", f"/{path}")
# Battery Service Routes
@app.api_route("/api/v1/batteries/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def battery_service_proxy(request: Request, path: str):
"""Proxy requests to battery service"""
return await proxy_request(request, "battery-service", f"/{path}")
# Demand Response Service Routes
@app.api_route("/api/v1/demand-response/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def demand_response_service_proxy(request: Request, path: str):
"""Proxy requests to demand response service"""
return await proxy_request(request, "demand-response-service", f"/{path}")
# P2P Trading Service Routes
@app.api_route("/api/v1/p2p/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def p2p_trading_service_proxy(request: Request, path: str):
"""Proxy requests to P2P trading service"""
return await proxy_request(request, "p2p-trading-service", f"/{path}")
# Forecasting Service Routes
@app.api_route("/api/v1/forecast/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def forecasting_service_proxy(request: Request, path: str):
"""Proxy requests to forecasting service"""
return await proxy_request(request, "forecasting-service", f"/{path}")
# IoT Control Service Routes
@app.api_route("/api/v1/iot/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def iot_control_service_proxy(request: Request, path: str):
"""Proxy requests to IoT control service"""
return await proxy_request(request, "iot-control-service", f"/{path}")
async def proxy_request(request: Request, service_name: str, path: str):
"""Generic request proxy function"""
try:
# Update request statistics
request_stats["total_requests"] += 1
request_stats["service_requests"][service_name] += 1
# Get service configuration
service_config = SERVICES.get(service_name)
if not service_config:
raise HTTPException(status_code=404, detail=f"Service {service_name} not found")
# Check authentication if required
if service_config.auth_required:
await auth_middleware.verify_token(request)
# Get healthy service instance
service_url = await load_balancer.get_service_url(service_name)
# Prepare request
url = f"{service_url}{path}"
method = request.method
headers = dict(request.headers)
# Remove hop-by-hop headers
headers.pop("host", None)
headers.pop("content-length", None)
# Get request body
body = None
if method in ["POST", "PUT", "PATCH"]:
body = await request.body()
# Make request to service
async with aiohttp.ClientSession() as session:
async with session.request(
method=method,
url=url,
headers=headers,
data=body,
params=dict(request.query_params),
timeout=aiohttp.ClientTimeout(total=30)
) as response:
# Get response data
response_data = await response.read()
response_headers = dict(response.headers)
# Remove hop-by-hop headers from response
response_headers.pop("transfer-encoding", None)
response_headers.pop("connection", None)
# Update success statistics
if response.status < 400:
request_stats["successful_requests"] += 1
else:
request_stats["failed_requests"] += 1
# Return response
return Response(
content=response_data,
status_code=response.status,
headers=response_headers,
media_type=response_headers.get("content-type")
)
except aiohttp.ClientError as e:
request_stats["failed_requests"] += 1
logger.error(f"Service {service_name} connection error: {e}")
raise HTTPException(status_code=503, detail=f"Service {service_name} unavailable")
except HTTPException:
request_stats["failed_requests"] += 1
raise
except Exception as e:
request_stats["failed_requests"] += 1
logger.error(f"Proxy error for {service_name}: {e}")
raise HTTPException(status_code=500, detail="Internal gateway error")
@app.get("/api/v1/overview")
async def get_system_overview():
"""Get comprehensive system overview from all services"""
try:
overview = {}
# Get data from each service
for service_name in SERVICES.keys():
try:
if await service_registry.is_service_healthy(service_name):
service_url = await load_balancer.get_service_url(service_name)
async with aiohttp.ClientSession() as session:
# Try to get service-specific overview data
overview_endpoints = {
"battery-service": "/batteries",
"demand-response-service": "/flexibility/current",
"p2p-trading-service": "/market/status",
"forecasting-service": "/forecast/summary",
"iot-control-service": "/devices/summary"
}
endpoint = overview_endpoints.get(service_name)
if endpoint:
async with session.get(f"{service_url}{endpoint}", timeout=aiohttp.ClientTimeout(total=5)) as response:
if response.status == 200:
data = await response.json()
overview[service_name] = data
else:
overview[service_name] = {"status": "error", "message": "Service returned error"}
else:
overview[service_name] = {"status": "available"}
except Exception as e:
logger.warning(f"Could not get overview from {service_name}: {e}")
overview[service_name] = {"status": "unavailable", "error": str(e)}
return {
"system_overview": overview,
"timestamp": datetime.utcnow().isoformat(),
"services_checked": len(SERVICES)
}
except Exception as e:
logger.error(f"Error getting system overview: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
async def health_check_task():
"""Background task for periodic health checks"""
logger.info("Starting health check task")
while True:
try:
await service_registry.update_all_service_health()
await asyncio.sleep(30) # Check every 30 seconds
except Exception as e:
logger.error(f"Error in health check task: {e}")
await asyncio.sleep(60)
# Initialize service registry with services
asyncio.create_task(service_registry.register_services(SERVICES))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -0,0 +1,77 @@
"""
Models for API Gateway
"""
from pydantic import BaseModel, Field
from typing import Dict, Any, Optional, List
from datetime import datetime
class ServiceConfig(BaseModel):
"""Configuration for a microservice"""
name: str
base_url: str
health_endpoint: str = "/health"
auth_required: bool = True
timeout_seconds: int = 30
retry_attempts: int = 3
class ServiceHealth(BaseModel):
"""Health status of a service"""
service: str
status: str # healthy, unhealthy, unknown
response_time_ms: Optional[float] = None
last_check: datetime
error_message: Optional[str] = None
class HealthResponse(BaseModel):
"""Gateway health response"""
service: str
status: str
timestamp: datetime
version: str
services: Optional[Dict[str, Any]] = None
healthy_services: Optional[int] = None
total_services: Optional[int] = None
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class GatewayStats(BaseModel):
"""API Gateway statistics"""
total_requests: int
successful_requests: int
failed_requests: int
success_rate: float
uptime_seconds: float
service_requests: Dict[str, int]
timestamp: datetime
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class AuthToken(BaseModel):
"""Authentication token model"""
token: str
user_id: Optional[str] = None
permissions: List[str] = Field(default_factory=list)
class ProxyRequest(BaseModel):
"""Proxy request model"""
service: str
path: str
method: str
headers: Dict[str, str]
query_params: Dict[str, Any]
body: Optional[bytes] = None
class ProxyResponse(BaseModel):
"""Proxy response model"""
status_code: int
headers: Dict[str, str]
body: bytes
service: str
response_time_ms: float

View File

@@ -0,0 +1,5 @@
fastapi
uvicorn[standard]
aiohttp
python-dotenv
pydantic

View File

@@ -0,0 +1,194 @@
"""
Service registry for managing microservice discovery and health monitoring
"""
import aiohttp
import asyncio
from datetime import datetime
from typing import Dict, List, Optional
import logging
from models import ServiceConfig, ServiceHealth
logger = logging.getLogger(__name__)
class ServiceRegistry:
"""Service registry for microservice management"""
def __init__(self):
self.services: Dict[str, ServiceConfig] = {}
self.service_health: Dict[str, ServiceHealth] = {}
self.session: Optional[aiohttp.ClientSession] = None
async def initialize(self):
"""Initialize the service registry"""
self.session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=10)
)
logger.info("Service registry initialized")
async def close(self):
"""Close the service registry"""
if self.session:
await self.session.close()
logger.info("Service registry closed")
async def register_services(self, services: Dict[str, ServiceConfig]):
"""Register multiple services"""
self.services.update(services)
# Initialize health status for all services
for service_name, config in services.items():
self.service_health[service_name] = ServiceHealth(
service=service_name,
status="unknown",
last_check=datetime.utcnow()
)
logger.info(f"Registered {len(services)} services")
# Perform initial health check
await self.update_all_service_health()
async def register_service(self, service_config: ServiceConfig):
"""Register a single service"""
self.services[service_config.name] = service_config
self.service_health[service_config.name] = ServiceHealth(
service=service_config.name,
status="unknown",
last_check=datetime.utcnow()
)
logger.info(f"Registered service: {service_config.name}")
# Check health of the newly registered service
await self.check_service_health(service_config.name)
async def unregister_service(self, service_name: str):
"""Unregister a service"""
self.services.pop(service_name, None)
self.service_health.pop(service_name, None)
logger.info(f"Unregistered service: {service_name}")
async def check_service_health(self, service_name: str) -> ServiceHealth:
"""Check health of a specific service"""
service_config = self.services.get(service_name)
if not service_config:
logger.error(f"Service {service_name} not found in registry")
return ServiceHealth(
service=service_name,
status="unknown",
last_check=datetime.utcnow(),
error_message="Service not registered"
)
start_time = datetime.utcnow()
try:
health_url = f"{service_config.base_url}{service_config.health_endpoint}"
async with self.session.get(health_url) as response:
end_time = datetime.utcnow()
response_time = (end_time - start_time).total_seconds() * 1000
if response.status == 200:
health_data = await response.json()
status = "healthy" if health_data.get("status") in ["healthy", "ok"] else "unhealthy"
health = ServiceHealth(
service=service_name,
status=status,
response_time_ms=response_time,
last_check=end_time
)
else:
health = ServiceHealth(
service=service_name,
status="unhealthy",
response_time_ms=response_time,
last_check=end_time,
error_message=f"HTTP {response.status}"
)
except aiohttp.ClientError as e:
health = ServiceHealth(
service=service_name,
status="unhealthy",
last_check=datetime.utcnow(),
error_message=f"Connection error: {str(e)}"
)
except Exception as e:
health = ServiceHealth(
service=service_name,
status="unhealthy",
last_check=datetime.utcnow(),
error_message=f"Health check failed: {str(e)}"
)
# Update health status
self.service_health[service_name] = health
# Log health status changes
if health.status != "healthy":
logger.warning(f"Service {service_name} health check failed: {health.error_message}")
return health
async def update_all_service_health(self):
"""Update health status for all registered services"""
health_checks = [
self.check_service_health(service_name)
for service_name in self.services.keys()
]
if health_checks:
await asyncio.gather(*health_checks, return_exceptions=True)
# Log summary
healthy_count = sum(1 for h in self.service_health.values() if h.status == "healthy")
total_count = len(self.services)
logger.info(f"Health check complete: {healthy_count}/{total_count} services healthy")
async def get_service_health(self, service_name: str) -> Optional[ServiceHealth]:
"""Get health status of a specific service"""
return self.service_health.get(service_name)
async def get_all_service_health(self) -> Dict[str, Dict]:
"""Get health status of all services"""
health_dict = {}
for service_name, health in self.service_health.items():
health_dict[service_name] = {
"status": health.status,
"response_time_ms": health.response_time_ms,
"last_check": health.last_check.isoformat(),
"error_message": health.error_message
}
return health_dict
async def is_service_healthy(self, service_name: str) -> bool:
"""Check if a service is healthy"""
health = self.service_health.get(service_name)
return health is not None and health.status == "healthy"
async def get_healthy_services(self) -> List[str]:
"""Get list of healthy service names"""
return [
service_name
for service_name, health in self.service_health.items()
if health.status == "healthy"
]
def get_service_config(self, service_name: str) -> Optional[ServiceConfig]:
"""Get configuration for a specific service"""
return self.services.get(service_name)
def get_all_services(self) -> Dict[str, ServiceConfig]:
"""Get all registered services"""
return self.services.copy()
async def get_service_url(self, service_name: str) -> Optional[str]:
"""Get base URL for a healthy service"""
if await self.is_service_healthy(service_name):
service_config = self.services.get(service_name)
return service_config.base_url if service_config else None
return None