Add data-ingestion-service for SA4CPS FTP integration

- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add
robust data processor with multi-format and unit inference support -
Publish parsed data to Redis topics for real-time dashboard simulation -
Include validation, monitoring, and auto-configuration scripts - Provide
documentation and test scripts for SA4CPS integration
This commit is contained in:
rafaeldpsilva
2025-09-10 14:43:30 +01:00
parent d4f280de93
commit 5fdce00e5d
16 changed files with 6353 additions and 0 deletions

View File

@@ -0,0 +1,301 @@
"""
SA4CPS FTP Configuration
Configure the data ingestion service for SA4CPS FTP server at ftp.sa4cps.pt
"""
import asyncio
import json
from datetime import datetime
from typing import Dict, Any
import logging
from database import get_database, get_redis
from models import DataSourceCreate, FTPConfig, TopicConfig
logger = logging.getLogger(__name__)
class SA4CPSConfigurator:
"""Configures data sources for SA4CPS FTP server"""
def __init__(self):
self.ftp_host = "ftp.sa4cps.pt"
self.file_extension = "*.slg_v2"
async def create_sa4cps_data_source(self,
username: str = "anonymous",
password: str = "",
remote_path: str = "/",
use_ssl: bool = False) -> Dict[str, Any]:
"""Create SA4CPS data source configuration"""
try:
db = await get_database()
# Check if SA4CPS source already exists
existing_source = await db.data_sources.find_one({
"name": "SA4CPS Energy Data",
"ftp_config.host": self.ftp_host
})
if existing_source:
logger.info("SA4CPS data source already exists")
return {
"success": True,
"message": "SA4CPS data source already configured",
"source_id": str(existing_source["_id"])
}
# Create FTP configuration
ftp_config = {
"host": self.ftp_host,
"port": 21,
"username": username,
"password": password,
"use_ssl": use_ssl,
"passive_mode": True,
"remote_path": remote_path,
"timeout": 30
}
# Create topic configurations for different data types
topic_configs = [
{
"topic_name": "sa4cps_energy_data",
"description": "Real-time energy data from SA4CPS sensors",
"data_types": ["energy", "power", "consumption"],
"format": "sensor_reading",
"enabled": True
},
{
"topic_name": "sa4cps_sensor_metrics",
"description": "Sensor metrics and telemetry from SA4CPS",
"data_types": ["telemetry", "status", "diagnostics"],
"format": "sensor_reading",
"enabled": True
},
{
"topic_name": "sa4cps_raw_data",
"description": "Raw unprocessed data from SA4CPS .slg_v2 files",
"data_types": ["raw"],
"format": "raw_data",
"enabled": True
}
]
# Create the data source document
source_doc = {
"name": "SA4CPS Energy Data",
"description": "Real-time energy monitoring data from SA4CPS project FTP server",
"source_type": "ftp",
"ftp_config": ftp_config,
"file_patterns": [self.file_extension, "*.slg_v2"],
"data_format": "slg_v2", # Custom format for .slg_v2 files
"redis_topics": [topic["topic_name"] for topic in topic_configs],
"topics": topic_configs,
"polling_interval_minutes": 5, # Check every 5 minutes
"max_file_size_mb": 50, # Reasonable limit for sensor data
"enabled": True,
"check_interval_seconds": 300, # 5 minutes in seconds
"created_at": datetime.utcnow(),
"updated_at": datetime.utcnow(),
"status": "configured"
}
# Insert the data source
result = await db.data_sources.insert_one(source_doc)
source_id = str(result.inserted_id)
logger.info(f"Created SA4CPS data source with ID: {source_id}")
return {
"success": True,
"message": "SA4CPS data source created successfully",
"source_id": source_id,
"ftp_host": self.ftp_host,
"file_pattern": self.file_extension,
"topics": [topic["topic_name"] for topic in topic_configs]
}
except Exception as e:
logger.error(f"Error creating SA4CPS data source: {e}")
return {
"success": False,
"message": f"Failed to create SA4CPS data source: {str(e)}"
}
async def update_sa4cps_credentials(self, username: str, password: str) -> Dict[str, Any]:
"""Update SA4CPS FTP credentials"""
try:
db = await get_database()
# Find SA4CPS data source
source = await db.data_sources.find_one({
"name": "SA4CPS Energy Data",
"ftp_config.host": self.ftp_host
})
if not source:
return {
"success": False,
"message": "SA4CPS data source not found. Please create it first."
}
# Update credentials
result = await db.data_sources.update_one(
{"_id": source["_id"]},
{
"$set": {
"ftp_config.username": username,
"ftp_config.password": password,
"updated_at": datetime.utcnow()
}
}
)
if result.modified_count > 0:
logger.info("Updated SA4CPS FTP credentials")
return {
"success": True,
"message": "SA4CPS FTP credentials updated successfully"
}
else:
return {
"success": False,
"message": "No changes made to SA4CPS credentials"
}
except Exception as e:
logger.error(f"Error updating SA4CPS credentials: {e}")
return {
"success": False,
"message": f"Failed to update credentials: {str(e)}"
}
async def test_sa4cps_connection(self) -> Dict[str, Any]:
"""Test connection to SA4CPS FTP server"""
try:
from ftp_monitor import FTPMonitor
db = await get_database()
redis = await get_redis()
# Get SA4CPS data source
source = await db.data_sources.find_one({
"name": "SA4CPS Energy Data",
"ftp_config.host": self.ftp_host
})
if not source:
return {
"success": False,
"message": "SA4CPS data source not found. Please create it first."
}
# Test connection
monitor = FTPMonitor(db, redis)
connection_success = await monitor.test_connection(source)
if connection_success:
# Try to list files
new_files = await monitor.check_for_new_files(source)
return {
"success": True,
"message": "Successfully connected to SA4CPS FTP server",
"connection_status": "connected",
"files_found": len(new_files),
"file_list": [f["filename"] for f in new_files[:10]] # First 10 files
}
else:
return {
"success": False,
"message": "Failed to connect to SA4CPS FTP server",
"connection_status": "failed"
}
except Exception as e:
logger.error(f"Error testing SA4CPS connection: {e}")
return {
"success": False,
"message": f"Connection test failed: {str(e)}",
"connection_status": "error"
}
async def get_sa4cps_status(self) -> Dict[str, Any]:
"""Get SA4CPS data source status"""
try:
db = await get_database()
source = await db.data_sources.find_one({
"name": "SA4CPS Energy Data",
"ftp_config.host": self.ftp_host
})
if not source:
return {
"configured": False,
"message": "SA4CPS data source not found"
}
# Get processing history
processed_count = await db.processed_files.count_documents({
"source_id": source["_id"]
})
# Get recent files
recent_files = []
cursor = db.processed_files.find({
"source_id": source["_id"]
}).sort("processed_at", -1).limit(5)
async for file_record in cursor:
recent_files.append({
"filename": file_record["filename"],
"processed_at": file_record["processed_at"].isoformat(),
"file_size": file_record.get("file_size", 0)
})
return {
"configured": True,
"source_id": str(source["_id"]),
"name": source["name"],
"enabled": source.get("enabled", False),
"status": source.get("status", "unknown"),
"ftp_host": source["ftp_config"]["host"],
"file_pattern": source["file_patterns"],
"last_check": source.get("last_check").isoformat() if source.get("last_check") else None,
"last_success": source.get("last_success").isoformat() if source.get("last_success") else None,
"total_files_processed": processed_count,
"recent_files": recent_files,
"topics": source.get("redis_topics", [])
}
except Exception as e:
logger.error(f"Error getting SA4CPS status: {e}")
return {
"configured": False,
"error": str(e)
}
async def main():
"""Main function to setup SA4CPS configuration"""
print("Setting up SA4CPS Data Ingestion Configuration...")
configurator = SA4CPSConfigurator()
# Create the data source
result = await configurator.create_sa4cps_data_source()
print(f"Configuration result: {json.dumps(result, indent=2)}")
# Test connection
print("\nTesting connection to SA4CPS FTP server...")
test_result = await configurator.test_sa4cps_connection()
print(f"Connection test: {json.dumps(test_result, indent=2)}")
# Show status
print("\nSA4CPS Data Source Status:")
status = await configurator.get_sa4cps_status()
print(f"Status: {json.dumps(status, indent=2)}")
if __name__ == "__main__":
asyncio.run(main())