Files
sac4cps-backend/microservices/data-ingestion-service/test_slg_v2.py
rafaeldpsilva 5fdce00e5d Add data-ingestion-service for SA4CPS FTP integration
- Implement FTP monitoring and ingestion for SA4CPS .slg_v2 files - Add
robust data processor with multi-format and unit inference support -
Publish parsed data to Redis topics for real-time dashboard simulation -
Include validation, monitoring, and auto-configuration scripts - Provide
documentation and test scripts for SA4CPS integration
2025-09-10 14:43:30 +01:00

215 lines
7.5 KiB
Python

#!/usr/bin/env python3
"""
Test script for .slg_v2 file processing
"""
import asyncio
import json
from datetime import datetime
from data_processor import DataProcessor
# Sample .slg_v2 content for testing
SAMPLE_SLG_V2_CONTENT = """# SA4CPS Energy Monitoring Data
# System: Smart Grid Monitoring
# Location: Research Facility
# Start Time: 2024-01-15T10:00:00Z
timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a
2024-01-15T10:00:00Z,SENSOR_001,1234.5,850.2,230.1,3.7
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8,3.8
2024-01-15T10:02:00Z,SENSOR_001,1235.8,872.1,230.5,3.8
2024-01-15T10:03:00Z,SENSOR_002,987.3,654.2,228.9,2.9
2024-01-15T10:04:00Z,SENSOR_002,988.1,661.5,229.2,2.9
"""
SAMPLE_SLG_V2_SPACE_DELIMITED = """# Energy consumption data
# Facility: Lab Building A
2024-01-15T10:00:00 LAB_A_001 1500.23 750.5
2024-01-15T10:01:00 LAB_A_001 1501.85 780.2
2024-01-15T10:02:00 LAB_A_002 890.45 420.8
2024-01-15T10:03:00 LAB_A_002 891.20 435.1
"""
async def test_slg_v2_processing():
"""Test the .slg_v2 processing functionality"""
print("🧪 Testing SA4CPS .slg_v2 file processing...")
# Create a mock DataProcessor (without database dependencies)
class MockDataProcessor(DataProcessor):
def __init__(self):
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
self.time_formats = [
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%d %H:%M",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%d/%m/%Y %H:%M:%S",
"%d-%m-%Y %H:%M:%S",
"%Y/%m/%d %H:%M:%S"
]
processor = MockDataProcessor()
# Test 1: CSV-style .slg_v2 file
print("\n📋 Test 1: CSV-style .slg_v2 file")
try:
result1 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_CONTENT)
print(f"✅ Processed {len(result1)} records")
if result1:
sample_record = result1[0]
print("Sample record:")
print(json.dumps({
"sensor_id": sample_record.get("sensor_id"),
"timestamp": sample_record.get("datetime"),
"value": sample_record.get("value"),
"unit": sample_record.get("unit"),
"value_type": sample_record.get("value_type"),
"file_format": sample_record.get("file_format")
}, indent=2))
except Exception as e:
print(f"❌ Test 1 failed: {e}")
# Test 2: Space-delimited .slg_v2 file
print("\n📋 Test 2: Space-delimited .slg_v2 file")
try:
result2 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_SPACE_DELIMITED)
print(f"✅ Processed {len(result2)} records")
if result2:
sample_record = result2[0]
print("Sample record:")
print(json.dumps({
"sensor_id": sample_record.get("sensor_id"),
"timestamp": sample_record.get("datetime"),
"value": sample_record.get("value"),
"unit": sample_record.get("unit"),
"metadata_keys": list(sample_record.get("metadata", {}).keys())
}, indent=2))
except Exception as e:
print(f"❌ Test 2 failed: {e}")
# Test 3: Unit inference
print("\n📋 Test 3: Unit inference testing")
test_units = [
("energy_kwh", 1234.5),
("power_w", 850.2),
("voltage_v", 230.1),
("current_a", 3.7),
("temperature", 25.5),
("frequency", 50.0)
]
for col_name, value in test_units:
unit = await processor._infer_slg_v2_unit(col_name, value)
print(f" {col_name} ({value}) -> {unit}")
print("\n🎉 All tests completed!")
async def test_integration():
"""Test integration with the main processing pipeline"""
print("\n🔗 Testing integration with main processing pipeline...")
# Create a mock DataProcessor (without database dependencies)
class MockDataProcessor(DataProcessor):
def __init__(self):
self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
self.time_formats = [
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%d %H:%M",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%d/%m/%Y %H:%M:%S",
"%d-%m-%Y %H:%M:%S",
"%Y/%m/%d %H:%M:%S"
]
processor = MockDataProcessor()
# Test processing through the main interface
try:
file_content = SAMPLE_SLG_V2_CONTENT.encode('utf-8')
processed_data = await processor.process_time_series_data(file_content, "slg_v2")
print(f"✅ Main pipeline processed {len(processed_data)} records")
if processed_data:
# Analyze the data
sensor_ids = set(record.get("sensor_id") for record in processed_data)
value_types = set(record.get("value_type") for record in processed_data if record.get("value_type"))
print(f"📊 Found {len(sensor_ids)} unique sensors: {', '.join(sensor_ids)}")
print(f"📈 Value types detected: {', '.join(value_types)}")
# Show statistics
values = [record.get("value", 0) for record in processed_data if record.get("value")]
if values:
print(f"📉 Value range: {min(values):.2f} - {max(values):.2f}")
except Exception as e:
print(f"❌ Integration test failed: {e}")
import traceback
traceback.print_exc()
def print_usage_info():
"""Print usage information for the SA4CPS FTP service"""
print("""
🚀 SA4CPS FTP Service Implementation Complete!
📁 Key Files Created/Modified:
• data-ingestion-service/sa4cps_config.py - SA4CPS configuration
• data-ingestion-service/data_processor.py - Added .slg_v2 support
• data-ingestion-service/startup_sa4cps.py - Auto-configuration script
• data-ingestion-service/models.py - Added SLG_V2 format
• docker-compose.yml - Added data-ingestion-service
🔧 To Deploy and Run:
1. Build and start the services:
cd microservices
docker-compose up -d data-ingestion-service
2. Configure SA4CPS connection:
docker-compose exec data-ingestion-service python startup_sa4cps.py
3. Monitor the service:
# Check health
curl http://localhost:8008/health
# View data sources
curl http://localhost:8008/sources
# Check processing stats
curl http://localhost:8008/stats
4. Manual FTP credentials (if needed):
# Update credentials via API
curl -X POST http://localhost:8008/sources/{source_id}/credentials \\
-H "Content-Type: application/json" \\
-d '{"username": "your_user", "password": "your_pass"}'
📋 Environment Variables (in docker-compose.yml):
• FTP_SA4CPS_HOST=ftp.sa4cps.pt
• FTP_SA4CPS_USERNAME=anonymous
• FTP_SA4CPS_PASSWORD=
• FTP_SA4CPS_REMOTE_PATH=/
🔍 Features:
✅ Monitors ftp.sa4cps.pt for .slg_v2 files
✅ Processes multiple data formats (CSV, space-delimited, etc.)
✅ Auto-detects headers and data columns
✅ Intelligent unit inference
✅ Publishes to Redis topics: sa4cps_energy_data, sa4cps_sensor_metrics, sa4cps_raw_data
✅ Comprehensive error handling and monitoring
✅ Duplicate file detection
✅ Real-time processing status
""")
if __name__ == "__main__":
# Run tests
asyncio.run(test_slg_v2_processing())
asyncio.run(test_integration())
# Print usage info
print_usage_info()