- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add robust data processor with multi-format and unit inference support - Publish parsed data to Redis topics for real-time dashboard simulation - Include validation, monitoring, and auto-configuration scripts - Provide documentation and test scripts for SA4CPS integration
215 lines
7.5 KiB
Python
215 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for .slg_v2 file processing
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
from datetime import datetime
|
|
from data_processor import DataProcessor
|
|
|
|
# Sample .slg_v2 content for testing
|
|
SAMPLE_SLG_V2_CONTENT = """# SA4CPS Energy Monitoring Data
|
|
# System: Smart Grid Monitoring
|
|
# Location: Research Facility
|
|
# Start Time: 2024-01-15T10:00:00Z
|
|
timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a
|
|
2024-01-15T10:00:00Z,SENSOR_001,1234.5,850.2,230.1,3.7
|
|
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8,3.8
|
|
2024-01-15T10:02:00Z,SENSOR_001,1235.8,872.1,230.5,3.8
|
|
2024-01-15T10:03:00Z,SENSOR_002,987.3,654.2,228.9,2.9
|
|
2024-01-15T10:04:00Z,SENSOR_002,988.1,661.5,229.2,2.9
|
|
"""
|
|
|
|
SAMPLE_SLG_V2_SPACE_DELIMITED = """# Energy consumption data
|
|
# Facility: Lab Building A
|
|
2024-01-15T10:00:00 LAB_A_001 1500.23 750.5
|
|
2024-01-15T10:01:00 LAB_A_001 1501.85 780.2
|
|
2024-01-15T10:02:00 LAB_A_002 890.45 420.8
|
|
2024-01-15T10:03:00 LAB_A_002 891.20 435.1
|
|
"""
|
|
|
|
async def test_slg_v2_processing():
|
|
"""Test the .slg_v2 processing functionality"""
|
|
print("🧪 Testing SA4CPS .slg_v2 file processing...")
|
|
|
|
# Create a mock DataProcessor (without database dependencies)
|
|
class MockDataProcessor(DataProcessor):
|
|
def __init__(self):
|
|
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
|
|
self.time_formats = [
|
|
"%Y-%m-%d %H:%M:%S",
|
|
"%Y-%m-%d %H:%M",
|
|
"%Y-%m-%dT%H:%M:%S",
|
|
"%Y-%m-%dT%H:%M:%SZ",
|
|
"%d/%m/%Y %H:%M:%S",
|
|
"%d-%m-%Y %H:%M:%S",
|
|
"%Y/%m/%d %H:%M:%S"
|
|
]
|
|
|
|
processor = MockDataProcessor()
|
|
|
|
# Test 1: CSV-style .slg_v2 file
|
|
print("\n📋 Test 1: CSV-style .slg_v2 file")
|
|
try:
|
|
result1 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_CONTENT)
|
|
print(f"✅ Processed {len(result1)} records")
|
|
|
|
if result1:
|
|
sample_record = result1[0]
|
|
print("Sample record:")
|
|
print(json.dumps({
|
|
"sensor_id": sample_record.get("sensor_id"),
|
|
"timestamp": sample_record.get("datetime"),
|
|
"value": sample_record.get("value"),
|
|
"unit": sample_record.get("unit"),
|
|
"value_type": sample_record.get("value_type"),
|
|
"file_format": sample_record.get("file_format")
|
|
}, indent=2))
|
|
|
|
except Exception as e:
|
|
print(f"❌ Test 1 failed: {e}")
|
|
|
|
# Test 2: Space-delimited .slg_v2 file
|
|
print("\n📋 Test 2: Space-delimited .slg_v2 file")
|
|
try:
|
|
result2 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_SPACE_DELIMITED)
|
|
print(f"✅ Processed {len(result2)} records")
|
|
|
|
if result2:
|
|
sample_record = result2[0]
|
|
print("Sample record:")
|
|
print(json.dumps({
|
|
"sensor_id": sample_record.get("sensor_id"),
|
|
"timestamp": sample_record.get("datetime"),
|
|
"value": sample_record.get("value"),
|
|
"unit": sample_record.get("unit"),
|
|
"metadata_keys": list(sample_record.get("metadata", {}).keys())
|
|
}, indent=2))
|
|
|
|
except Exception as e:
|
|
print(f"❌ Test 2 failed: {e}")
|
|
|
|
# Test 3: Unit inference
|
|
print("\n📋 Test 3: Unit inference testing")
|
|
test_units = [
|
|
("energy_kwh", 1234.5),
|
|
("power_w", 850.2),
|
|
("voltage_v", 230.1),
|
|
("current_a", 3.7),
|
|
("temperature", 25.5),
|
|
("frequency", 50.0)
|
|
]
|
|
|
|
for col_name, value in test_units:
|
|
unit = await processor._infer_slg_v2_unit(col_name, value)
|
|
print(f" {col_name} ({value}) -> {unit}")
|
|
|
|
print("\n🎉 All tests completed!")
|
|
|
|
async def test_integration():
|
|
"""Test integration with the main processing pipeline"""
|
|
print("\n🔗 Testing integration with main processing pipeline...")
|
|
|
|
# Create a mock DataProcessor (without database dependencies)
|
|
class MockDataProcessor(DataProcessor):
|
|
def __init__(self):
|
|
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
|
|
self.time_formats = [
|
|
"%Y-%m-%d %H:%M:%S",
|
|
"%Y-%m-%d %H:%M",
|
|
"%Y-%m-%dT%H:%M:%S",
|
|
"%Y-%m-%dT%H:%M:%SZ",
|
|
"%d/%m/%Y %H:%M:%S",
|
|
"%d-%m-%Y %H:%M:%S",
|
|
"%Y/%m/%d %H:%M:%S"
|
|
]
|
|
|
|
processor = MockDataProcessor()
|
|
|
|
# Test processing through the main interface
|
|
try:
|
|
file_content = SAMPLE_SLG_V2_CONTENT.encode('utf-8')
|
|
processed_data = await processor.process_time_series_data(file_content, "slg_v2")
|
|
|
|
print(f"✅ Main pipeline processed {len(processed_data)} records")
|
|
|
|
if processed_data:
|
|
# Analyze the data
|
|
sensor_ids = set(record.get("sensor_id") for record in processed_data)
|
|
value_types = set(record.get("value_type") for record in processed_data if record.get("value_type"))
|
|
|
|
print(f"📊 Found {len(sensor_ids)} unique sensors: {', '.join(sensor_ids)}")
|
|
print(f"📈 Value types detected: {', '.join(value_types)}")
|
|
|
|
# Show statistics
|
|
values = [record.get("value", 0) for record in processed_data if record.get("value")]
|
|
if values:
|
|
print(f"📉 Value range: {min(values):.2f} - {max(values):.2f}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Integration test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def print_usage_info():
|
|
"""Print usage information for the SA4CPS FTP service"""
|
|
print("""
|
|
🚀 SA4CPS FTP Service Implementation Complete!
|
|
|
|
📁 Key Files Created/Modified:
|
|
• data-ingestion-service/sa4cps_config.py - SA4CPS configuration
|
|
• data-ingestion-service/data_processor.py - Added .slg_v2 support
|
|
• data-ingestion-service/startup_sa4cps.py - Auto-configuration script
|
|
• data-ingestion-service/models.py - Added SLG_V2 format
|
|
• docker-compose.yml - Added data-ingestion-service
|
|
|
|
🔧 To Deploy and Run:
|
|
|
|
1. Build and start the services:
|
|
cd microservices
|
|
docker-compose up -d data-ingestion-service
|
|
|
|
2. Configure SA4CPS connection:
|
|
docker-compose exec data-ingestion-service python startup_sa4cps.py
|
|
|
|
3. Monitor the service:
|
|
# Check health
|
|
curl http://localhost:8008/health
|
|
|
|
# View data sources
|
|
curl http://localhost:8008/sources
|
|
|
|
# Check processing stats
|
|
curl http://localhost:8008/stats
|
|
|
|
4. Manual FTP credentials (if needed):
|
|
# Update credentials via API
|
|
curl -X POST http://localhost:8008/sources/{source_id}/credentials \\
|
|
-H "Content-Type: application/json" \\
|
|
-d '{"username": "your_user", "password": "your_pass"}'
|
|
|
|
📋 Environment Variables (in docker-compose.yml):
|
|
• FTP_SA4CPS_HOST=ftp.sa4cps.pt
|
|
• FTP_SA4CPS_USERNAME=anonymous
|
|
• FTP_SA4CPS_PASSWORD=
|
|
• FTP_SA4CPS_REMOTE_PATH=/
|
|
|
|
🔍 Features:
|
|
✅ Monitors ftp.sa4cps.pt for .slg_v2 files
|
|
✅ Processes multiple data formats (CSV, space-delimited, etc.)
|
|
✅ Auto-detects headers and data columns
|
|
✅ Intelligent unit inference
|
|
✅ Publishes to Redis topics: sa4cps_energy_data, sa4cps_sensor_metrics, sa4cps_raw_data
|
|
✅ Comprehensive error handling and monitoring
|
|
✅ Duplicate file detection
|
|
✅ Real-time processing status
|
|
""")
|
|
|
|
if __name__ == "__main__":
|
|
# Run tests
|
|
asyncio.run(test_slg_v2_processing())
|
|
asyncio.run(test_integration())
|
|
|
|
# Print usage info
|
|
print_usage_info() |