Add data-ingestion-service for SA4CPS FTP integration
- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add robust data processor with multi-format and unit inference support - Publish parsed data to Redis topics for real-time dashboard simulation - Include validation, monitoring, and auto-configuration scripts - Provide documentation and test scripts for SA4CPS integration
This commit is contained in:
215
microservices/data-ingestion-service/test_slg_v2.py
Normal file
215
microservices/data-ingestion-service/test_slg_v2.py
Normal file
@@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for .slg_v2 file processing
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from data_processor import DataProcessor
|
||||
|
||||
# Sample .slg_v2 content for testing
|
||||
SAMPLE_SLG_V2_CONTENT = """# SA4CPS Energy Monitoring Data
|
||||
# System: Smart Grid Monitoring
|
||||
# Location: Research Facility
|
||||
# Start Time: 2024-01-15T10:00:00Z
|
||||
timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a
|
||||
2024-01-15T10:00:00Z,SENSOR_001,1234.5,850.2,230.1,3.7
|
||||
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8,3.8
|
||||
2024-01-15T10:02:00Z,SENSOR_001,1235.8,872.1,230.5,3.8
|
||||
2024-01-15T10:03:00Z,SENSOR_002,987.3,654.2,228.9,2.9
|
||||
2024-01-15T10:04:00Z,SENSOR_002,988.1,661.5,229.2,2.9
|
||||
"""
|
||||
|
||||
SAMPLE_SLG_V2_SPACE_DELIMITED = """# Energy consumption data
|
||||
# Facility: Lab Building A
|
||||
2024-01-15T10:00:00 LAB_A_001 1500.23 750.5
|
||||
2024-01-15T10:01:00 LAB_A_001 1501.85 780.2
|
||||
2024-01-15T10:02:00 LAB_A_002 890.45 420.8
|
||||
2024-01-15T10:03:00 LAB_A_002 891.20 435.1
|
||||
"""
|
||||
|
||||
async def test_slg_v2_processing():
|
||||
"""Test the .slg_v2 processing functionality"""
|
||||
print("🧪 Testing SA4CPS .slg_v2 file processing...")
|
||||
|
||||
# Create a mock DataProcessor (without database dependencies)
|
||||
class MockDataProcessor(DataProcessor):
|
||||
def __init__(self):
|
||||
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
|
||||
self.time_formats = [
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
"%Y-%m-%d %H:%M",
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%SZ",
|
||||
"%d/%m/%Y %H:%M:%S",
|
||||
"%d-%m-%Y %H:%M:%S",
|
||||
"%Y/%m/%d %H:%M:%S"
|
||||
]
|
||||
|
||||
processor = MockDataProcessor()
|
||||
|
||||
# Test 1: CSV-style .slg_v2 file
|
||||
print("\n📋 Test 1: CSV-style .slg_v2 file")
|
||||
try:
|
||||
result1 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_CONTENT)
|
||||
print(f"✅ Processed {len(result1)} records")
|
||||
|
||||
if result1:
|
||||
sample_record = result1[0]
|
||||
print("Sample record:")
|
||||
print(json.dumps({
|
||||
"sensor_id": sample_record.get("sensor_id"),
|
||||
"timestamp": sample_record.get("datetime"),
|
||||
"value": sample_record.get("value"),
|
||||
"unit": sample_record.get("unit"),
|
||||
"value_type": sample_record.get("value_type"),
|
||||
"file_format": sample_record.get("file_format")
|
||||
}, indent=2))
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Test 1 failed: {e}")
|
||||
|
||||
# Test 2: Space-delimited .slg_v2 file
|
||||
print("\n📋 Test 2: Space-delimited .slg_v2 file")
|
||||
try:
|
||||
result2 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_SPACE_DELIMITED)
|
||||
print(f"✅ Processed {len(result2)} records")
|
||||
|
||||
if result2:
|
||||
sample_record = result2[0]
|
||||
print("Sample record:")
|
||||
print(json.dumps({
|
||||
"sensor_id": sample_record.get("sensor_id"),
|
||||
"timestamp": sample_record.get("datetime"),
|
||||
"value": sample_record.get("value"),
|
||||
"unit": sample_record.get("unit"),
|
||||
"metadata_keys": list(sample_record.get("metadata", {}).keys())
|
||||
}, indent=2))
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Test 2 failed: {e}")
|
||||
|
||||
# Test 3: Unit inference
|
||||
print("\n📋 Test 3: Unit inference testing")
|
||||
test_units = [
|
||||
("energy_kwh", 1234.5),
|
||||
("power_w", 850.2),
|
||||
("voltage_v", 230.1),
|
||||
("current_a", 3.7),
|
||||
("temperature", 25.5),
|
||||
("frequency", 50.0)
|
||||
]
|
||||
|
||||
for col_name, value in test_units:
|
||||
unit = await processor._infer_slg_v2_unit(col_name, value)
|
||||
print(f" {col_name} ({value}) -> {unit}")
|
||||
|
||||
print("\n🎉 All tests completed!")
|
||||
|
||||
async def test_integration():
|
||||
"""Test integration with the main processing pipeline"""
|
||||
print("\n🔗 Testing integration with main processing pipeline...")
|
||||
|
||||
# Create a mock DataProcessor (without database dependencies)
|
||||
class MockDataProcessor(DataProcessor):
|
||||
def __init__(self):
|
||||
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
|
||||
self.time_formats = [
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
"%Y-%m-%d %H:%M",
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%SZ",
|
||||
"%d/%m/%Y %H:%M:%S",
|
||||
"%d-%m-%Y %H:%M:%S",
|
||||
"%Y/%m/%d %H:%M:%S"
|
||||
]
|
||||
|
||||
processor = MockDataProcessor()
|
||||
|
||||
# Test processing through the main interface
|
||||
try:
|
||||
file_content = SAMPLE_SLG_V2_CONTENT.encode('utf-8')
|
||||
processed_data = await processor.process_time_series_data(file_content, "slg_v2")
|
||||
|
||||
print(f"✅ Main pipeline processed {len(processed_data)} records")
|
||||
|
||||
if processed_data:
|
||||
# Analyze the data
|
||||
sensor_ids = set(record.get("sensor_id") for record in processed_data)
|
||||
value_types = set(record.get("value_type") for record in processed_data if record.get("value_type"))
|
||||
|
||||
print(f"📊 Found {len(sensor_ids)} unique sensors: {', '.join(sensor_ids)}")
|
||||
print(f"📈 Value types detected: {', '.join(value_types)}")
|
||||
|
||||
# Show statistics
|
||||
values = [record.get("value", 0) for record in processed_data if record.get("value")]
|
||||
if values:
|
||||
print(f"📉 Value range: {min(values):.2f} - {max(values):.2f}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Integration test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def print_usage_info():
|
||||
"""Print usage information for the SA4CPS FTP service"""
|
||||
print("""
|
||||
🚀 SA4CPS FTP Service Implementation Complete!
|
||||
|
||||
📁 Key Files Created/Modified:
|
||||
• data-ingestion-service/sa4cps_config.py - SA4CPS configuration
|
||||
• data-ingestion-service/data_processor.py - Added .slg_v2 support
|
||||
• data-ingestion-service/startup_sa4cps.py - Auto-configuration script
|
||||
• data-ingestion-service/models.py - Added SLG_V2 format
|
||||
• docker-compose.yml - Added data-ingestion-service
|
||||
|
||||
🔧 To Deploy and Run:
|
||||
|
||||
1. Build and start the services:
|
||||
cd microservices
|
||||
docker-compose up -d data-ingestion-service
|
||||
|
||||
2. Configure SA4CPS connection:
|
||||
docker-compose exec data-ingestion-service python startup_sa4cps.py
|
||||
|
||||
3. Monitor the service:
|
||||
# Check health
|
||||
curl http://localhost:8008/health
|
||||
|
||||
# View data sources
|
||||
curl http://localhost:8008/sources
|
||||
|
||||
# Check processing stats
|
||||
curl http://localhost:8008/stats
|
||||
|
||||
4. Manual FTP credentials (if needed):
|
||||
# Update credentials via API
|
||||
curl -X POST http://localhost:8008/sources/{source_id}/credentials \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"username": "your_user", "password": "your_pass"}'
|
||||
|
||||
📋 Environment Variables (in docker-compose.yml):
|
||||
• FTP_SA4CPS_HOST=ftp.sa4cps.pt
|
||||
• FTP_SA4CPS_USERNAME=anonymous
|
||||
• FTP_SA4CPS_PASSWORD=
|
||||
• FTP_SA4CPS_REMOTE_PATH=/
|
||||
|
||||
🔍 Features:
|
||||
✅ Monitors ftp.sa4cps.pt for .slg_v2 files
|
||||
✅ Processes multiple data formats (CSV, space-delimited, etc.)
|
||||
✅ Auto-detects headers and data columns
|
||||
✅ Intelligent unit inference
|
||||
✅ Publishes to Redis topics: sa4cps_energy_data, sa4cps_sensor_metrics, sa4cps_raw_data
|
||||
✅ Comprehensive error handling and monitoring
|
||||
✅ Duplicate file detection
|
||||
✅ Real-time processing status
|
||||
""")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run tests
|
||||
asyncio.run(test_slg_v2_processing())
|
||||
asyncio.run(test_integration())
|
||||
|
||||
# Print usage info
|
||||
print_usage_info()
|
||||
Reference in New Issue
Block a user