- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add robust data processor with multi-format and unit inference support - Publish parsed data to Redis topics for real-time dashboard simulation - Include validation, monitoring, and auto-configuration scripts - Provide documentation and test scripts for SA4CPS integration
301 lines
11 KiB
Python
301 lines
11 KiB
Python
"""
|
|
SA4CPS FTP Configuration
|
|
Configure the data ingestion service for SA4CPS FTP server at ftp.sa4cps.pt
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Dict, Any
|
|
import logging
|
|
|
|
from database import get_database, get_redis
|
|
from models import DataSourceCreate, FTPConfig, TopicConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class SA4CPSConfigurator:
|
|
"""Configures data sources for SA4CPS FTP server"""
|
|
|
|
def __init__(self):
|
|
self.ftp_host = "ftp.sa4cps.pt"
|
|
self.file_extension = "*.slg_v2"
|
|
|
|
async def create_sa4cps_data_source(self,
|
|
username: str = "anonymous",
|
|
password: str = "",
|
|
remote_path: str = "/",
|
|
use_ssl: bool = False) -> Dict[str, Any]:
|
|
"""Create SA4CPS data source configuration"""
|
|
|
|
try:
|
|
db = await get_database()
|
|
|
|
# Check if SA4CPS source already exists
|
|
existing_source = await db.data_sources.find_one({
|
|
"name": "SA4CPS Energy Data",
|
|
"ftp_config.host": self.ftp_host
|
|
})
|
|
|
|
if existing_source:
|
|
logger.info("SA4CPS data source already exists")
|
|
return {
|
|
"success": True,
|
|
"message": "SA4CPS data source already configured",
|
|
"source_id": str(existing_source["_id"])
|
|
}
|
|
|
|
# Create FTP configuration
|
|
ftp_config = {
|
|
"host": self.ftp_host,
|
|
"port": 21,
|
|
"username": username,
|
|
"password": password,
|
|
"use_ssl": use_ssl,
|
|
"passive_mode": True,
|
|
"remote_path": remote_path,
|
|
"timeout": 30
|
|
}
|
|
|
|
# Create topic configurations for different data types
|
|
topic_configs = [
|
|
{
|
|
"topic_name": "sa4cps_energy_data",
|
|
"description": "Real-time energy data from SA4CPS sensors",
|
|
"data_types": ["energy", "power", "consumption"],
|
|
"format": "sensor_reading",
|
|
"enabled": True
|
|
},
|
|
{
|
|
"topic_name": "sa4cps_sensor_metrics",
|
|
"description": "Sensor metrics and telemetry from SA4CPS",
|
|
"data_types": ["telemetry", "status", "diagnostics"],
|
|
"format": "sensor_reading",
|
|
"enabled": True
|
|
},
|
|
{
|
|
"topic_name": "sa4cps_raw_data",
|
|
"description": "Raw unprocessed data from SA4CPS .slg_v2 files",
|
|
"data_types": ["raw"],
|
|
"format": "raw_data",
|
|
"enabled": True
|
|
}
|
|
]
|
|
|
|
# Create the data source document
|
|
source_doc = {
|
|
"name": "SA4CPS Energy Data",
|
|
"description": "Real-time energy monitoring data from SA4CPS project FTP server",
|
|
"source_type": "ftp",
|
|
"ftp_config": ftp_config,
|
|
"file_patterns": [self.file_extension, "*.slg_v2"],
|
|
"data_format": "slg_v2", # Custom format for .slg_v2 files
|
|
"redis_topics": [topic["topic_name"] for topic in topic_configs],
|
|
"topics": topic_configs,
|
|
"polling_interval_minutes": 5, # Check every 5 minutes
|
|
"max_file_size_mb": 50, # Reasonable limit for sensor data
|
|
"enabled": True,
|
|
"check_interval_seconds": 300, # 5 minutes in seconds
|
|
"created_at": datetime.utcnow(),
|
|
"updated_at": datetime.utcnow(),
|
|
"status": "configured"
|
|
}
|
|
|
|
# Insert the data source
|
|
result = await db.data_sources.insert_one(source_doc)
|
|
source_id = str(result.inserted_id)
|
|
|
|
logger.info(f"Created SA4CPS data source with ID: {source_id}")
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "SA4CPS data source created successfully",
|
|
"source_id": source_id,
|
|
"ftp_host": self.ftp_host,
|
|
"file_pattern": self.file_extension,
|
|
"topics": [topic["topic_name"] for topic in topic_configs]
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating SA4CPS data source: {e}")
|
|
return {
|
|
"success": False,
|
|
"message": f"Failed to create SA4CPS data source: {str(e)}"
|
|
}
|
|
|
|
async def update_sa4cps_credentials(self, username: str, password: str) -> Dict[str, Any]:
|
|
"""Update SA4CPS FTP credentials"""
|
|
try:
|
|
db = await get_database()
|
|
|
|
# Find SA4CPS data source
|
|
source = await db.data_sources.find_one({
|
|
"name": "SA4CPS Energy Data",
|
|
"ftp_config.host": self.ftp_host
|
|
})
|
|
|
|
if not source:
|
|
return {
|
|
"success": False,
|
|
"message": "SA4CPS data source not found. Please create it first."
|
|
}
|
|
|
|
# Update credentials
|
|
result = await db.data_sources.update_one(
|
|
{"_id": source["_id"]},
|
|
{
|
|
"$set": {
|
|
"ftp_config.username": username,
|
|
"ftp_config.password": password,
|
|
"updated_at": datetime.utcnow()
|
|
}
|
|
}
|
|
)
|
|
|
|
if result.modified_count > 0:
|
|
logger.info("Updated SA4CPS FTP credentials")
|
|
return {
|
|
"success": True,
|
|
"message": "SA4CPS FTP credentials updated successfully"
|
|
}
|
|
else:
|
|
return {
|
|
"success": False,
|
|
"message": "No changes made to SA4CPS credentials"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating SA4CPS credentials: {e}")
|
|
return {
|
|
"success": False,
|
|
"message": f"Failed to update credentials: {str(e)}"
|
|
}
|
|
|
|
async def test_sa4cps_connection(self) -> Dict[str, Any]:
|
|
"""Test connection to SA4CPS FTP server"""
|
|
try:
|
|
from ftp_monitor import FTPMonitor
|
|
|
|
db = await get_database()
|
|
redis = await get_redis()
|
|
|
|
# Get SA4CPS data source
|
|
source = await db.data_sources.find_one({
|
|
"name": "SA4CPS Energy Data",
|
|
"ftp_config.host": self.ftp_host
|
|
})
|
|
|
|
if not source:
|
|
return {
|
|
"success": False,
|
|
"message": "SA4CPS data source not found. Please create it first."
|
|
}
|
|
|
|
# Test connection
|
|
monitor = FTPMonitor(db, redis)
|
|
connection_success = await monitor.test_connection(source)
|
|
|
|
if connection_success:
|
|
# Try to list files
|
|
new_files = await monitor.check_for_new_files(source)
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "Successfully connected to SA4CPS FTP server",
|
|
"connection_status": "connected",
|
|
"files_found": len(new_files),
|
|
"file_list": [f["filename"] for f in new_files[:10]] # First 10 files
|
|
}
|
|
else:
|
|
return {
|
|
"success": False,
|
|
"message": "Failed to connect to SA4CPS FTP server",
|
|
"connection_status": "failed"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error testing SA4CPS connection: {e}")
|
|
return {
|
|
"success": False,
|
|
"message": f"Connection test failed: {str(e)}",
|
|
"connection_status": "error"
|
|
}
|
|
|
|
async def get_sa4cps_status(self) -> Dict[str, Any]:
|
|
"""Get SA4CPS data source status"""
|
|
try:
|
|
db = await get_database()
|
|
|
|
source = await db.data_sources.find_one({
|
|
"name": "SA4CPS Energy Data",
|
|
"ftp_config.host": self.ftp_host
|
|
})
|
|
|
|
if not source:
|
|
return {
|
|
"configured": False,
|
|
"message": "SA4CPS data source not found"
|
|
}
|
|
|
|
# Get processing history
|
|
processed_count = await db.processed_files.count_documents({
|
|
"source_id": source["_id"]
|
|
})
|
|
|
|
# Get recent files
|
|
recent_files = []
|
|
cursor = db.processed_files.find({
|
|
"source_id": source["_id"]
|
|
}).sort("processed_at", -1).limit(5)
|
|
|
|
async for file_record in cursor:
|
|
recent_files.append({
|
|
"filename": file_record["filename"],
|
|
"processed_at": file_record["processed_at"].isoformat(),
|
|
"file_size": file_record.get("file_size", 0)
|
|
})
|
|
|
|
return {
|
|
"configured": True,
|
|
"source_id": str(source["_id"]),
|
|
"name": source["name"],
|
|
"enabled": source.get("enabled", False),
|
|
"status": source.get("status", "unknown"),
|
|
"ftp_host": source["ftp_config"]["host"],
|
|
"file_pattern": source["file_patterns"],
|
|
"last_check": source.get("last_check").isoformat() if source.get("last_check") else None,
|
|
"last_success": source.get("last_success").isoformat() if source.get("last_success") else None,
|
|
"total_files_processed": processed_count,
|
|
"recent_files": recent_files,
|
|
"topics": source.get("redis_topics", [])
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting SA4CPS status: {e}")
|
|
return {
|
|
"configured": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def main():
|
|
"""Main function to setup SA4CPS configuration"""
|
|
print("Setting up SA4CPS Data Ingestion Configuration...")
|
|
|
|
configurator = SA4CPSConfigurator()
|
|
|
|
# Create the data source
|
|
result = await configurator.create_sa4cps_data_source()
|
|
print(f"Configuration result: {json.dumps(result, indent=2)}")
|
|
|
|
# Test connection
|
|
print("\nTesting connection to SA4CPS FTP server...")
|
|
test_result = await configurator.test_sa4cps_connection()
|
|
print(f"Connection test: {json.dumps(test_result, indent=2)}")
|
|
|
|
# Show status
|
|
print("\nSA4CPS Data Source Status:")
|
|
status = await configurator.get_sa4cps_status()
|
|
print(f"Status: {json.dumps(status, indent=2)}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |