#!/usr/bin/env python3 """ Test script for .slg_v2 file processing """ import asyncio import json from datetime import datetime from data_processor import DataProcessor # Sample .slg_v2 content for testing SAMPLE_SLG_V2_CONTENT = """# SA4CPS Energy Monitoring Data # System: Smart Grid Monitoring # Location: Research Facility # Start Time: 2024-01-15T10:00:00Z timestamp,sensor_id,energy_kwh,power_w,voltage_v,current_a 2024-01-15T10:00:00Z,SENSOR_001,1234.5,850.2,230.1,3.7 2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8,3.8 2024-01-15T10:02:00Z,SENSOR_001,1235.8,872.1,230.5,3.8 2024-01-15T10:03:00Z,SENSOR_002,987.3,654.2,228.9,2.9 2024-01-15T10:04:00Z,SENSOR_002,988.1,661.5,229.2,2.9 """ SAMPLE_SLG_V2_SPACE_DELIMITED = """# Energy consumption data # Facility: Lab Building A 2024-01-15T10:00:00 LAB_A_001 1500.23 750.5 2024-01-15T10:01:00 LAB_A_001 1501.85 780.2 2024-01-15T10:02:00 LAB_A_002 890.45 420.8 2024-01-15T10:03:00 LAB_A_002 891.20 435.1 """ async def test_slg_v2_processing(): """Test the .slg_v2 processing functionality""" print("๐Ÿงช Testing SA4CPS .slg_v2 file processing...") # Create a mock DataProcessor (without database dependencies) class MockDataProcessor(DataProcessor): def __init__(self): self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"] self.time_formats = [ "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ", "%d/%m/%Y %H:%M:%S", "%d-%m-%Y %H:%M:%S", "%Y/%m/%d %H:%M:%S" ] processor = MockDataProcessor() # Test 1: CSV-style .slg_v2 file print("\n๐Ÿ“‹ Test 1: CSV-style .slg_v2 file") try: result1 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_CONTENT) print(f"โœ… Processed {len(result1)} records") if result1: sample_record = result1[0] print("Sample record:") print(json.dumps({ "sensor_id": sample_record.get("sensor_id"), "timestamp": sample_record.get("datetime"), "value": sample_record.get("value"), "unit": sample_record.get("unit"), "value_type": sample_record.get("value_type"), "file_format": sample_record.get("file_format") }, indent=2)) except Exception as e: print(f"โŒ Test 1 failed: {e}") # Test 2: Space-delimited .slg_v2 file print("\n๐Ÿ“‹ Test 2: Space-delimited .slg_v2 file") try: result2 = await processor._process_slg_v2_data(SAMPLE_SLG_V2_SPACE_DELIMITED) print(f"โœ… Processed {len(result2)} records") if result2: sample_record = result2[0] print("Sample record:") print(json.dumps({ "sensor_id": sample_record.get("sensor_id"), "timestamp": sample_record.get("datetime"), "value": sample_record.get("value"), "unit": sample_record.get("unit"), "metadata_keys": list(sample_record.get("metadata", {}).keys()) }, indent=2)) except Exception as e: print(f"โŒ Test 2 failed: {e}") # Test 3: Unit inference print("\n๐Ÿ“‹ Test 3: Unit inference testing") test_units = [ ("energy_kwh", 1234.5), ("power_w", 850.2), ("voltage_v", 230.1), ("current_a", 3.7), ("temperature", 25.5), ("frequency", 50.0) ] for col_name, value in test_units: unit = await processor._infer_slg_v2_unit(col_name, value) print(f" {col_name} ({value}) -> {unit}") print("\n๐ŸŽ‰ All tests completed!") async def test_integration(): """Test integration with the main processing pipeline""" print("\n๐Ÿ”— Testing integration with main processing pipeline...") # Create a mock DataProcessor (without database dependencies) class MockDataProcessor(DataProcessor): def __init__(self): self.supported_formats = ["csv", "json", "txt", "xlsx", "slg_v2"] self.time_formats = [ "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ", "%d/%m/%Y %H:%M:%S", "%d-%m-%Y %H:%M:%S", "%Y/%m/%d %H:%M:%S" ] processor = MockDataProcessor() # Test processing through the main interface try: file_content = SAMPLE_SLG_V2_CONTENT.encode('utf-8') processed_data = await processor.process_time_series_data(file_content, "slg_v2") print(f"โœ… Main pipeline processed {len(processed_data)} records") if processed_data: # Analyze the data sensor_ids = set(record.get("sensor_id") for record in processed_data) value_types = set(record.get("value_type") for record in processed_data if record.get("value_type")) print(f"๐Ÿ“Š Found {len(sensor_ids)} unique sensors: {', '.join(sensor_ids)}") print(f"๐Ÿ“ˆ Value types detected: {', '.join(value_types)}") # Show statistics values = [record.get("value", 0) for record in processed_data if record.get("value")] if values: print(f"๐Ÿ“‰ Value range: {min(values):.2f} - {max(values):.2f}") except Exception as e: print(f"โŒ Integration test failed: {e}") import traceback traceback.print_exc() def print_usage_info(): """Print usage information for the SA4CPS FTP service""" print(""" ๐Ÿš€ SA4CPS FTP Service Implementation Complete! ๐Ÿ“ Key Files Created/Modified: โ€ข data-ingestion-service/sa4cps_config.py - SA4CPS configuration โ€ข data-ingestion-service/data_processor.py - Added .slg_v2 support โ€ข data-ingestion-service/startup_sa4cps.py - Auto-configuration script โ€ข data-ingestion-service/models.py - Added SLG_V2 format โ€ข docker-compose.yml - Added data-ingestion-service ๐Ÿ”ง To Deploy and Run: 1. Build and start the services: cd microservices docker-compose up -d data-ingestion-service 2. Configure SA4CPS connection: docker-compose exec data-ingestion-service python startup_sa4cps.py 3. Monitor the service: # Check health curl http://localhost:8008/health # View data sources curl http://localhost:8008/sources # Check processing stats curl http://localhost:8008/stats 4. Manual FTP credentials (if needed): # Update credentials via API curl -X POST http://localhost:8008/sources/{source_id}/credentials \\ -H "Content-Type: application/json" \\ -d '{"username": "your_user", "password": "your_pass"}' ๐Ÿ“‹ Environment Variables (in docker-compose.yml): โ€ข FTP_SA4CPS_HOST=ftp.sa4cps.pt โ€ข FTP_SA4CPS_USERNAME=anonymous โ€ข FTP_SA4CPS_PASSWORD= โ€ข FTP_SA4CPS_REMOTE_PATH=/ ๐Ÿ” Features: โœ… Monitors ftp.sa4cps.pt for .slg_v2 files โœ… Processes multiple data formats (CSV, space-delimited, etc.) โœ… Auto-detects headers and data columns โœ… Intelligent unit inference โœ… Publishes to Redis topics: sa4cps_energy_data, sa4cps_sensor_metrics, sa4cps_raw_data โœ… Comprehensive error handling and monitoring โœ… Duplicate file detection โœ… Real-time processing status """) if __name__ == "__main__": # Run tests asyncio.run(test_slg_v2_processing()) asyncio.run(test_integration()) # Print usage info print_usage_info()