sac4cps-backend/microservices/data-ingestion-service/test_slg_v2.py

#!/usr/bin/env python3
"""
Test script for .slg_v2 file processing
"""

import asyncio
import json
from datetime import datetime
from data_processor import DataProcessor

# Sample .slg_v2 content for testing
SAMPLE_SLG_V2_CONTENT = """# SA4CPS Energy Monitoring Data
# System: Smart Grid Monitoring
# Location: Research Facility
# Start Time: 2024-01-15T10:00:00Z
timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a
2024-01-15T10:00:00Z,SENSOR_001,1234.5,850.2,230.1,3.7
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8,3.8
2024-01-15T10:02:00Z,SENSOR_001,1235.8,872.1,230.5,3.8
2024-01-15T10:03:00Z,SENSOR_002,987.3,654.2,228.9,2.9
2024-01-15T10:04:00Z,SENSOR_002,988.1,661.5,229.2,2.9
"""

SAMPLE_SLG_V2_SPACE_DELIMITED = """# Energy consumption data
# Facility: Lab Building A
2024-01-15T10:00:00 LAB_A_001 1500.23 750.5
2024-01-15T10:01:00 LAB_A_001 1501.85 780.2
2024-01-15T10:02:00 LAB_A_002 890.45 420.8
2024-01-15T10:03:00 LAB_A_002 891.20 435.1
"""

async def test_slg_v2_processing():
    """Test the .slg_v2 processing functionality"""
    print("🧪 Testing SA4CPS .slg_v2 file processing...")

    # Create a mock DataProcessor (without database dependencies)
    class MockDataProcessor(DataProcessor):
        def __init__(self):
            self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
            self.time_formats = [
                "%Y-%m-%d %H:%M:%S",
                "%Y-%m-%d %H:%M",
                "%Y-%m-%dT%H:%M:%S",
                "%Y-%m-%dT%H:%M:%SZ",
                "%d/%m/%Y %H:%M:%S",
                "%d-%m-%Y %H:%M:%S",
                "%Y/%m/%d %H:%M:%S"
            ]

    processor = MockDataProcessor()

    # Test 1: CSV-style .slg_v2 file
    print("\n📋 Test 1: CSV-style .slg_v2 file")
    try:
        result1 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_CONTENT)
        print(f"✅ Processed {len(result1)} records")

        if result1:
            sample_record = result1[0]
            print("Sample record:")
            print(json.dumps({
                "sensor_id": sample_record.get("sensor_id"),
                "timestamp": sample_record.get("datetime"),
                "value": sample_record.get("value"),
                "unit": sample_record.get("unit"),
                "value_type": sample_record.get("value_type"),
                "file_format": sample_record.get("file_format")
            }, indent=2))

    except Exception as e:
        print(f"❌ Test 1 failed: {e}")

    # Test 2: Space-delimited .slg_v2 file
    print("\n📋 Test 2: Space-delimited .slg_v2 file")
    try:
        result2 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_SPACE_DELIMITED)
        print(f"✅ Processed {len(result2)} records")

        if result2:
            sample_record = result2[0]
            print("Sample record:")
            print(json.dumps({
                "sensor_id": sample_record.get("sensor_id"),
                "timestamp": sample_record.get("datetime"),
                "value": sample_record.get("value"),
                "unit": sample_record.get("unit"),
                "metadata_keys": list(sample_record.get("metadata", {}).keys())
            }, indent=2))

    except Exception as e:
        print(f"❌ Test 2 failed: {e}")

    # Test 3: Unit inference
    print("\n📋 Test 3: Unit inference testing")
    test_units = [
        ("energy_kwh", 1234.5),
        ("power_w", 850.2),
        ("voltage_v", 230.1),
        ("current_a", 3.7),
        ("temperature", 25.5),
        ("frequency", 50.0)
    ]

    for col_name, value in test_units:
        unit = await processor._infer_slg_v2_unit(col_name, value)
        print(f"  {col_name} ({value}) -> {unit}")

    print("\n🎉 All tests completed!")

async def test_integration():
    """Test integration with the main processing pipeline"""
    print("\n🔗 Testing integration with main processing pipeline...")

    # Create a mock DataProcessor (without database dependencies)
    class MockDataProcessor(DataProcessor):
        def __init__(self):
            self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"]
            self.time_formats = [
                "%Y-%m-%d %H:%M:%S",
                "%Y-%m-%d %H:%M",
                "%Y-%m-%dT%H:%M:%S",
                "%Y-%m-%dT%H:%M:%SZ",
                "%d/%m/%Y %H:%M:%S",
                "%d-%m-%Y %H:%M:%S",
                "%Y/%m/%d %H:%M:%S"
            ]

    processor = MockDataProcessor()

    # Test processing through the main interface
    try:
        file_content = SAMPLE_SLG_V2_CONTENT.encode('utf-8')
        processed_data = await processor.process_time_series_data(file_content, "slg_v2")

        print(f"✅ Main pipeline processed {len(processed_data)} records")

        if processed_data:
            # Analyze the data
            sensor_ids = set(record.get("sensor_id") for record in processed_data)
            value_types = set(record.get("value_type") for record in processed_data if record.get("value_type"))

            print(f"📊 Found {len(sensor_ids)} unique sensors: {', '.join(sensor_ids)}")
            print(f"📈 Value types detected: {', '.join(value_types)}")

            # Show statistics
            values = [record.get("value", 0) for record in processed_data if record.get("value")]
            if values:
                print(f"📉 Value range: {min(values):.2f} - {max(values):.2f}")

    except Exception as e:
        print(f"❌ Integration test failed: {e}")
        import traceback
        traceback.print_exc()

def print_usage_info():
    """Print usage information for the SA4CPS FTP service"""
    print("""
🚀 SA4CPS FTP Service Implementation Complete!

📁 Key Files Created/Modified:
  • data-ingestion-service/sa4cps_config.py - SA4CPS configuration
  • data-ingestion-service/data_processor.py - Added .slg_v2 support
  • data-ingestion-service/startup_sa4cps.py - Auto-configuration script
  • data-ingestion-service/models.py - Added SLG_V2 format
  • docker-compose.yml - Added data-ingestion-service

🔧 To Deploy and Run:

1. Build and start the services:
   cd microservices
   docker-compose up -d data-ingestion-service

2. Configure SA4CPS connection:
   docker-compose exec data-ingestion-service python startup_sa4cps.py

3. Monitor the service:
   # Check health
   curl http://localhost:8008/health

   # View data sources
   curl http://localhost:8008/sources

   # Check processing stats
   curl http://localhost:8008/stats

4. Manual FTP credentials (if needed):
   # Update credentials via API
   curl -X POST http://localhost:8008/sources/{source_id}/credentials \\
        -H "Content-Type: application/json" \\
        -d '{"username": "your_user", "password": "your_pass"}'

📋 Environment Variables (in docker-compose.yml):
  • FTP_SA4CPS_HOST=ftp.sa4cps.pt
  • FTP_SA4CPS_USERNAME=anonymous
  • FTP_SA4CPS_PASSWORD=
  • FTP_SA4CPS_REMOTE_PATH=/

🔍 Features:
  ✅ Monitors ftp.sa4cps.pt for .slg_v2 files
  ✅ Processes multiple data formats (CSV, space-delimited, etc.)
  ✅ Auto-detects headers and data columns
  ✅ Intelligent unit inference
  ✅ Publishes to Redis topics: sa4cps_energy_data, sa4cps_sensor_metrics, sa4cps_raw_data
  ✅ Comprehensive error handling and monitoring
  ✅ Duplicate file detection
  ✅ Real-time processing status
""")

if __name__ == "__main__":
    # Run tests
    asyncio.run(test_slg_v2_processing())
    asyncio.run(test_integration())

    # Print usage info
    print_usage_info()