Switch to PyMongo, update config and requirements, fix FTP extension
typo - Replace Motor (async) with PyMongo (sync) in database manager - Update environment variable names for FTP and MongoDB config - Remove unused dependencies from requirements.txt - Fix file extension typo: .slg_v2 → .sgl_v2 throughout code and docs - Add debug prints for MongoDB env vars in config - Update FTP monitor to use correct file extension and PyMongo - Adjust FastAPI descriptions for new extension
This commit is contained in:
@@ -9,10 +9,10 @@ The Data Ingestion Service provides comprehensive FTP monitoring and data proces
|
|||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
```
|
```
|
||||||
ftp.sa4cps.pt (.slg_v2 files)
|
ftp.sa4cps.pt (.slg_v2 files)
|
||||||
↓
|
↓
|
||||||
FTP Monitor (polls every 5 minutes)
|
FTP Monitor (polls every 5 minutes)
|
||||||
↓
|
↓
|
||||||
Data Processor (supports multiple formats)
|
Data Processor (supports multiple formats)
|
||||||
↓
|
↓
|
||||||
Redis Publisher (3 topic channels)
|
Redis Publisher (3 topic channels)
|
||||||
@@ -84,9 +84,9 @@ Set these in the `docker-compose.yml`:
|
|||||||
environment:
|
environment:
|
||||||
- FTP_SA4CPS_HOST=ftp.sa4cps.pt # FTP server hostname
|
- FTP_SA4CPS_HOST=ftp.sa4cps.pt # FTP server hostname
|
||||||
- FTP_SA4CPS_PORT=21 # FTP port (default: 21)
|
- FTP_SA4CPS_PORT=21 # FTP port (default: 21)
|
||||||
- FTP_SA4CPS_USERNAME=anonymous # FTP username
|
- FTP_SA4CPS_USERNAME= # FTP username
|
||||||
- FTP_SA4CPS_PASSWORD= # FTP password (empty for anonymous)
|
- FTP_SA4CPS_PASSWORD= # FTP password (empty for anonymous)
|
||||||
- FTP_SA4CPS_REMOTE_PATH=/ # Remote directory path
|
- FTP_SA4CPS_REMOTE_PATH=/ # Remote directory path
|
||||||
```
|
```
|
||||||
|
|
||||||
### Manual Configuration
|
### Manual Configuration
|
||||||
@@ -101,7 +101,7 @@ configurator = SA4CPSConfigurator()
|
|||||||
# Create data source
|
# Create data source
|
||||||
result = await configurator.create_sa4cps_data_source(
|
result = await configurator.create_sa4cps_data_source(
|
||||||
username="your_username",
|
username="your_username",
|
||||||
password="your_password",
|
password="your_password",
|
||||||
remote_path="/data/energy"
|
remote_path="/data/energy"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ timestamp,sensor_id,energy_kwh,power_w,voltage_v
|
|||||||
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8
|
2024-01-15T10:01:00Z,SENSOR_001,1235.1,865.3,229.8
|
||||||
```
|
```
|
||||||
|
|
||||||
### Space-Delimited Format
|
### Space-Delimited Format
|
||||||
```
|
```
|
||||||
# Energy consumption data
|
# Energy consumption data
|
||||||
# System: Smart Grid Monitor
|
# System: Smart Grid Monitor
|
||||||
@@ -191,7 +191,7 @@ All processed data is converted to a standardized sensor reading format:
|
|||||||
### sa4cps_energy_data
|
### sa4cps_energy_data
|
||||||
Primary energy consumption and power readings:
|
Primary energy consumption and power readings:
|
||||||
- Energy consumption (kWh, MWh)
|
- Energy consumption (kWh, MWh)
|
||||||
- Power readings (W, kW, MW)
|
- Power readings (W, kW, MW)
|
||||||
- Efficiency metrics
|
- Efficiency metrics
|
||||||
|
|
||||||
### sa4cps_sensor_metrics
|
### sa4cps_sensor_metrics
|
||||||
@@ -201,7 +201,7 @@ Sensor telemetry and environmental data:
|
|||||||
- Sensor status/diagnostics
|
- Sensor status/diagnostics
|
||||||
- System health metrics
|
- System health metrics
|
||||||
|
|
||||||
### sa4cps_raw_data
|
### sa4cps_raw_data
|
||||||
Raw unprocessed data for debugging:
|
Raw unprocessed data for debugging:
|
||||||
- Original file content
|
- Original file content
|
||||||
- Processing metadata
|
- Processing metadata
|
||||||
@@ -278,10 +278,10 @@ class CustomSA4CPSProcessor(DataProcessor):
|
|||||||
async def _process_slg_v2_line(self, line, header, metadata, line_idx):
|
async def _process_slg_v2_line(self, line, header, metadata, line_idx):
|
||||||
# Custom line processing logic
|
# Custom line processing logic
|
||||||
processed = await super()._process_slg_v2_line(line, header, metadata, line_idx)
|
processed = await super()._process_slg_v2_line(line, header, metadata, line_idx)
|
||||||
|
|
||||||
# Add custom fields
|
# Add custom fields
|
||||||
processed['custom_field'] = 'custom_value'
|
processed['custom_field'] = 'custom_value'
|
||||||
|
|
||||||
return processed
|
return processed
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -295,4 +295,4 @@ For issues or questions:
|
|||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This implementation is part of the SA4CPS project energy monitoring dashboard.
|
This implementation is part of the SA4CPS project energy monitoring dashboard.
|
||||||
|
|||||||
@@ -3,26 +3,12 @@ fastapi==0.104.1
|
|||||||
uvicorn==0.24.0
|
uvicorn==0.24.0
|
||||||
pydantic==2.5.0
|
pydantic==2.5.0
|
||||||
|
|
||||||
# Database dependencies
|
# Database dependencies - using PyMongo (sync) instead of Motor (async)
|
||||||
motor==3.3.2
|
|
||||||
pymongo==4.6.0
|
pymongo==4.6.0
|
||||||
redis==5.0.1
|
|
||||||
|
|
||||||
# FTP handling
|
# FTP handling
|
||||||
ftputil==5.0.4
|
ftputil==5.0.4
|
||||||
|
|
||||||
# Data processing
|
|
||||||
pandas==2.1.4
|
|
||||||
numpy==1.25.2
|
|
||||||
openpyxl==3.1.2
|
|
||||||
xlrd==2.0.1
|
|
||||||
|
|
||||||
# Async HTTP client
|
|
||||||
httpx==0.25.2
|
|
||||||
|
|
||||||
# Logging and monitoring
|
|
||||||
structlog==23.2.0
|
|
||||||
|
|
||||||
# Date/time utilities
|
# Date/time utilities
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
|
|
||||||
@@ -31,5 +17,4 @@ typing-extensions==4.8.0
|
|||||||
|
|
||||||
# Development dependencies (optional)
|
# Development dependencies (optional)
|
||||||
pytest==7.4.3
|
pytest==7.4.3
|
||||||
pytest-asyncio==0.21.1
|
pytest-asyncio==0.21.1
|
||||||
pytest-cov==4.1.0
|
|
||||||
@@ -9,20 +9,24 @@ from typing import Dict, Any
|
|||||||
|
|
||||||
# FTP Configuration for SA4CPS server
|
# FTP Configuration for SA4CPS server
|
||||||
FTP_CONFIG: Dict[str, Any] = {
|
FTP_CONFIG: Dict[str, Any] = {
|
||||||
"host": os.getenv("SA4CPS_FTP_HOST", "ftp.sa4cps.pt"),
|
"host": os.getenv("FTP_SA4CPS_HOST", "ftp.sa4cps.pt"),
|
||||||
"username": os.getenv("SA4CPS_FTP_USER", "curvascarga@sa4cps.pt"),
|
"username": os.getenv("FTP_SA4CPS_USERNAME", "curvascarga@sa4cps.pt"),
|
||||||
"password": os.getenv("SA4CPS_FTP_PASS", ""), # Set via environment variable
|
"password": os.getenv("FTP_SA4CPS_PASSWORD", 'n$WFtz9+bleN'), # Set via environment variable
|
||||||
"base_path": os.getenv("SA4CPS_FTP_PATH", "/SLGs/Faial/PT0010000000015181AA/"),
|
"base_path": os.getenv("FTP_SA4CPS_REMOTE_PATH", "/SLGs/Faial/"),
|
||||||
"check_interval": int(os.getenv("SA4CPS_CHECK_INTERVAL", "21600")) # 6 hours default
|
"check_interval": int(os.getenv("FTP_CHECK_INTERVAL", "21600")) # 6 hours default
|
||||||
}
|
}
|
||||||
|
|
||||||
# MongoDB Configuration
|
# MongoDB Configuration
|
||||||
|
# Debug environment variables
|
||||||
|
print(f"DEBUG: MONGO_URL env var = {os.getenv('MONGO_URL', 'NOT SET')}")
|
||||||
|
print(f"DEBUG: All env vars starting with MONGO: {[k for k in os.environ.keys() if k.startswith('MONGO')]}")
|
||||||
|
|
||||||
MONGO_CONFIG: Dict[str, Any] = {
|
MONGO_CONFIG: Dict[str, Any] = {
|
||||||
"connection_string": os.getenv(
|
"connection_string": os.getenv(
|
||||||
"MONGODB_URL",
|
"MONGO_URL",
|
||||||
"mongodb://admin:admin@localhost:27018/sa4cps_energy?authSource=admin"
|
"mongodb://admin:password123@localhost:27017/digitalmente_ingestion?authSource=admin"
|
||||||
),
|
),
|
||||||
"database_name": os.getenv("MONGODB_DATABASE", "sa4cps_energy")
|
"database_name": os.getenv("MONGODB_DATABASE", "digitalmente_ingestion")
|
||||||
}
|
}
|
||||||
|
|
||||||
# Logging Configuration
|
# Logging Configuration
|
||||||
|
|||||||
@@ -1,14 +1,12 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
"""
|
||||||
MongoDB Database Manager for SA4CPS Data Ingestion
|
MongoDB Database Manager for SA4CPS Data Ingestion
|
||||||
Simple async MongoDB operations for storing .slg_v2 file data
|
Simple sync MongoDB operations for storing .sgl_v2 file data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from motor.motor_asyncio import AsyncIOMotorClient
|
from pymongo import MongoClient
|
||||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
||||||
|
|
||||||
from config import MONGO_CONFIG
|
from config import MONGO_CONFIG
|
||||||
@@ -18,26 +16,27 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class DatabaseManager:
|
class DatabaseManager:
|
||||||
"""Manages MongoDB connections and operations for SA4CPS data"""
|
"""Manages MongoDB connections and operations for SA4CPS data"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.client: Optional[AsyncIOMotorClient] = None
|
self.client: Optional[MongoClient] = None
|
||||||
self.db = None
|
self.db = None
|
||||||
self.collections = {}
|
self.collections = {}
|
||||||
|
|
||||||
# MongoDB configuration
|
# MongoDB configuration
|
||||||
self.connection_string = MONGO_CONFIG["connection_string"]
|
self.connection_string = MONGO_CONFIG["connection_string"]
|
||||||
self.database_name = MONGO_CONFIG["database_name"]
|
self.database_name = MONGO_CONFIG["database_name"]
|
||||||
|
|
||||||
logger.info(f"Database manager initialized for: {self.database_name}")
|
logger.info(f"Database manager initialized for: {self.database_name}")
|
||||||
|
|
||||||
async def connect(self):
|
async def connect(self):
|
||||||
"""Connect to MongoDB"""
|
"""Connect to MongoDB"""
|
||||||
try:
|
try:
|
||||||
self.client = AsyncIOMotorClient(self.connection_string)
|
logger.info(f"Connecting to MongoDB at: {self.connection_string}")
|
||||||
|
self.client = MongoClient(self.connection_string, serverSelectionTimeoutMS=5000)
|
||||||
|
|
||||||
# Test connection
|
# Test connection
|
||||||
await self.client.admin.command('ping')
|
await self.ping()
|
||||||
|
|
||||||
# Get database and collections
|
# Get database and collections
|
||||||
self.db = self.client[self.database_name]
|
self.db = self.client[self.database_name]
|
||||||
self.collections = {
|
self.collections = {
|
||||||
@@ -45,50 +44,58 @@ class DatabaseManager:
|
|||||||
'energy_data': self.db.sa4cps_energy_data,
|
'energy_data': self.db.sa4cps_energy_data,
|
||||||
'metadata': self.db.sa4cps_metadata
|
'metadata': self.db.sa4cps_metadata
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create indexes for better performance
|
# Create indexes for better performance
|
||||||
await self._create_indexes()
|
self._create_indexes()
|
||||||
|
|
||||||
logger.info(f"Connected to MongoDB: {self.database_name}")
|
logger.info(f"Connected to MongoDB database: {self.database_name}")
|
||||||
|
|
||||||
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
||||||
logger.error(f"Failed to connect to MongoDB: {e}")
|
logger.error(f"Failed to connect to MongoDB: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Close MongoDB connection"""
|
"""Close MongoDB connection"""
|
||||||
if self.client:
|
if self.client:
|
||||||
self.client.close()
|
self.client.close()
|
||||||
logger.info("MongoDB connection closed")
|
logger.info("MongoDB connection closed")
|
||||||
|
|
||||||
async def ping(self):
|
async def ping(self):
|
||||||
"""Test database connection"""
|
"""Test database connection"""
|
||||||
if not self.client:
|
if not self.client:
|
||||||
raise ConnectionFailure("No database connection")
|
raise ConnectionFailure("No database connection")
|
||||||
|
|
||||||
await self.client.admin.command('ping')
|
try:
|
||||||
|
# The ping command is cheap and does not require auth.
|
||||||
async def _create_indexes(self):
|
self.client.admin.command('ping')
|
||||||
|
logger.info("MongoDB ping successful")
|
||||||
|
except ConnectionFailure as e:
|
||||||
|
logger.error(f"MongoDB ping failed - Server not available: {e}")
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"MongoDB ping failed with error: {e}")
|
||||||
|
raise ConnectionFailure(f"Ping failed: {e}")
|
||||||
|
|
||||||
|
def _create_indexes(self):
|
||||||
"""Create database indexes for efficient queries"""
|
"""Create database indexes for efficient queries"""
|
||||||
try:
|
try:
|
||||||
# Index on files collection
|
# Index on files collection
|
||||||
await self.collections['files'].create_index("filename", unique=True)
|
self.collections['files'].create_index("filename", unique=True)
|
||||||
await self.collections['files'].create_index("processed_at")
|
self.collections['files'].create_index("processed_at")
|
||||||
|
|
||||||
# Index on energy data collection
|
# Index on energy data collection
|
||||||
await self.collections['energy_data'].create_index([("filename", 1), ("timestamp", 1)])
|
self.collections['energy_data'].create_index([("filename", 1), ("timestamp", 1)])
|
||||||
await self.collections['energy_data'].create_index("timestamp")
|
self.collections['energy_data'].create_index("timestamp")
|
||||||
|
|
||||||
logger.info("Database indexes created successfully")
|
logger.info("Database indexes created successfully")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to create indexes: {e}")
|
logger.warning(f"Failed to create indexes: {e}")
|
||||||
|
|
||||||
async def store_file_data(self, filename: str, records: List[Dict[str, Any]]) -> bool:
|
async def store_file_data(self, filename: str, records: List[Dict[str, Any]]) -> bool:
|
||||||
"""Store processed .slg_v2 file data in MongoDB"""
|
"""Store processed .sgl_v2 file data in MongoDB"""
|
||||||
try:
|
try:
|
||||||
current_time = datetime.now()
|
current_time = datetime.now()
|
||||||
|
|
||||||
# Store file metadata
|
# Store file metadata
|
||||||
file_metadata = {
|
file_metadata = {
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
@@ -97,31 +104,31 @@ class DatabaseManager:
|
|||||||
"file_size": sum(len(str(record)) for record in records),
|
"file_size": sum(len(str(record)) for record in records),
|
||||||
"status": "processed"
|
"status": "processed"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Insert or update file record
|
# Insert or update file record
|
||||||
await self.collections['files'].replace_one(
|
self.collections['files'].replace_one(
|
||||||
{"filename": filename},
|
{"filename": filename},
|
||||||
file_metadata,
|
file_metadata,
|
||||||
upsert=True
|
upsert=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add filename and processed timestamp to each record
|
# Add filename and processed timestamp to each record
|
||||||
for record in records:
|
for record in records:
|
||||||
record["filename"] = filename
|
record["filename"] = filename
|
||||||
record["processed_at"] = current_time
|
record["processed_at"] = current_time
|
||||||
|
|
||||||
# Insert energy data records
|
# Insert energy data records
|
||||||
if records:
|
if records:
|
||||||
result = await self.collections['energy_data'].insert_many(records)
|
result = self.collections['energy_data'].insert_many(records)
|
||||||
inserted_count = len(result.inserted_ids)
|
inserted_count = len(result.inserted_ids)
|
||||||
logger.info(f"Stored {inserted_count} records from {filename}")
|
logger.info(f"Stored {inserted_count} records from {filename}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error storing data for {filename}: {e}")
|
logger.error(f"Error storing data for {filename}: {e}")
|
||||||
|
|
||||||
# Store error metadata
|
# Store error metadata
|
||||||
error_metadata = {
|
error_metadata = {
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
@@ -129,15 +136,15 @@ class DatabaseManager:
|
|||||||
"status": "error",
|
"status": "error",
|
||||||
"error_message": str(e)
|
"error_message": str(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
await self.collections['files'].replace_one(
|
self.collections['files'].replace_one(
|
||||||
{"filename": filename},
|
{"filename": filename},
|
||||||
error_metadata,
|
error_metadata,
|
||||||
upsert=True
|
upsert=True
|
||||||
)
|
)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def get_processed_files(self) -> List[str]:
|
async def get_processed_files(self) -> List[str]:
|
||||||
"""Get list of successfully processed files"""
|
"""Get list of successfully processed files"""
|
||||||
try:
|
try:
|
||||||
@@ -145,25 +152,25 @@ class DatabaseManager:
|
|||||||
{"status": "processed"},
|
{"status": "processed"},
|
||||||
{"filename": 1, "_id": 0}
|
{"filename": 1, "_id": 0}
|
||||||
)
|
)
|
||||||
|
|
||||||
files = []
|
files = []
|
||||||
async for doc in cursor:
|
for doc in cursor:
|
||||||
files.append(doc["filename"])
|
files.append(doc["filename"])
|
||||||
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting processed files: {e}")
|
logger.error(f"Error getting processed files: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def get_file_info(self, filename: str) -> Optional[Dict[str, Any]]:
|
async def get_file_info(self, filename: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Get information about a specific file"""
|
"""Get information about a specific file"""
|
||||||
try:
|
try:
|
||||||
return await self.collections['files'].find_one({"filename": filename})
|
return self.collections['files'].find_one({"filename": filename})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting file info for {filename}: {e}")
|
logger.error(f"Error getting file info for {filename}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_stats(self) -> Dict[str, Any]:
|
async def get_stats(self) -> Dict[str, Any]:
|
||||||
"""Get database statistics"""
|
"""Get database statistics"""
|
||||||
try:
|
try:
|
||||||
@@ -171,15 +178,15 @@ class DatabaseManager:
|
|||||||
"database": self.database_name,
|
"database": self.database_name,
|
||||||
"timestamp": datetime.now().isoformat()
|
"timestamp": datetime.now().isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
# Count documents in each collection
|
# Count documents in each collection
|
||||||
for name, collection in self.collections.items():
|
for name, collection in self.collections.items():
|
||||||
try:
|
try:
|
||||||
count = await collection.count_documents({})
|
count = collection.count_documents({})
|
||||||
stats[f"{name}_count"] = count
|
stats[f"{name}_count"] = count
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
stats[f"{name}_count"] = f"error: {e}"
|
stats[f"{name}_count"] = f"error: {e}"
|
||||||
|
|
||||||
# Get recent files
|
# Get recent files
|
||||||
try:
|
try:
|
||||||
recent_files = []
|
recent_files = []
|
||||||
@@ -187,24 +194,24 @@ class DatabaseManager:
|
|||||||
{},
|
{},
|
||||||
{"filename": 1, "processed_at": 1, "record_count": 1, "status": 1, "_id": 0}
|
{"filename": 1, "processed_at": 1, "record_count": 1, "status": 1, "_id": 0}
|
||||||
).sort("processed_at", -1).limit(5)
|
).sort("processed_at", -1).limit(5)
|
||||||
|
|
||||||
async for doc in cursor:
|
for doc in cursor:
|
||||||
if doc.get("processed_at"):
|
if doc.get("processed_at"):
|
||||||
doc["processed_at"] = doc["processed_at"].isoformat()
|
doc["processed_at"] = doc["processed_at"].isoformat()
|
||||||
recent_files.append(doc)
|
recent_files.append(doc)
|
||||||
|
|
||||||
stats["recent_files"] = recent_files
|
stats["recent_files"] = recent_files
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
stats["recent_files"] = f"error: {e}"
|
stats["recent_files"] = f"error: {e}"
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting database stats: {e}")
|
logger.error(f"Error getting database stats: {e}")
|
||||||
return {"error": str(e), "timestamp": datetime.now().isoformat()}
|
return {"error": str(e), "timestamp": datetime.now().isoformat()}
|
||||||
|
|
||||||
async def get_energy_data(self,
|
async def get_energy_data(self,
|
||||||
filename: Optional[str] = None,
|
filename: Optional[str] = None,
|
||||||
start_time: Optional[datetime] = None,
|
start_time: Optional[datetime] = None,
|
||||||
end_time: Optional[datetime] = None,
|
end_time: Optional[datetime] = None,
|
||||||
@@ -212,10 +219,10 @@ class DatabaseManager:
|
|||||||
"""Retrieve energy data with optional filtering"""
|
"""Retrieve energy data with optional filtering"""
|
||||||
try:
|
try:
|
||||||
query = {}
|
query = {}
|
||||||
|
|
||||||
if filename:
|
if filename:
|
||||||
query["filename"] = filename
|
query["filename"] = filename
|
||||||
|
|
||||||
if start_time or end_time:
|
if start_time or end_time:
|
||||||
time_query = {}
|
time_query = {}
|
||||||
if start_time:
|
if start_time:
|
||||||
@@ -223,11 +230,11 @@ class DatabaseManager:
|
|||||||
if end_time:
|
if end_time:
|
||||||
time_query["$lte"] = end_time
|
time_query["$lte"] = end_time
|
||||||
query["timestamp"] = time_query
|
query["timestamp"] = time_query
|
||||||
|
|
||||||
cursor = self.collections['energy_data'].find(query).sort("timestamp", -1).limit(limit)
|
cursor = self.collections['energy_data'].find(query).sort("timestamp", -1).limit(limit)
|
||||||
|
|
||||||
data = []
|
data = []
|
||||||
async for doc in cursor:
|
for doc in cursor:
|
||||||
# Convert ObjectId to string and datetime to ISO string
|
# Convert ObjectId to string and datetime to ISO string
|
||||||
if "_id" in doc:
|
if "_id" in doc:
|
||||||
doc["_id"] = str(doc["_id"])
|
doc["_id"] = str(doc["_id"])
|
||||||
@@ -235,11 +242,11 @@ class DatabaseManager:
|
|||||||
doc["timestamp"] = doc["timestamp"].isoformat()
|
doc["timestamp"] = doc["timestamp"].isoformat()
|
||||||
if "processed_at" in doc and hasattr(doc["processed_at"], "isoformat"):
|
if "processed_at" in doc and hasattr(doc["processed_at"], "isoformat"):
|
||||||
doc["processed_at"] = doc["processed_at"].isoformat()
|
doc["processed_at"] = doc["processed_at"].isoformat()
|
||||||
|
|
||||||
data.append(doc)
|
data.append(doc)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error retrieving energy data: {e}")
|
logger.error(f"Error retrieving energy data: {e}")
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ Monitors ftp.sa4cps.pt for new monthly files
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import ftplib
|
from ftplib import FTP
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import tempfile
|
import tempfile
|
||||||
@@ -30,7 +30,7 @@ class FTPFileInfo:
|
|||||||
|
|
||||||
class FTPMonitor:
|
class FTPMonitor:
|
||||||
"""Monitors SA4CPS FTP server for new .slg_v2 files"""
|
"""Monitors SA4CPS FTP server for new .slg_v2 files"""
|
||||||
|
|
||||||
def __init__(self, db_manager):
|
def __init__(self, db_manager):
|
||||||
self.db_manager = db_manager
|
self.db_manager = db_manager
|
||||||
self.processor = SLGProcessor()
|
self.processor = SLGProcessor()
|
||||||
@@ -38,52 +38,50 @@ class FTPMonitor:
|
|||||||
self.processed_files: set = set()
|
self.processed_files: set = set()
|
||||||
self.files_processed_count = 0
|
self.files_processed_count = 0
|
||||||
self.status = "initializing"
|
self.status = "initializing"
|
||||||
|
|
||||||
# FTP connection settings
|
# FTP connection settings
|
||||||
self.ftp_host = FTP_CONFIG["host"]
|
self.ftp_host = FTP_CONFIG["host"]
|
||||||
self.ftp_user = FTP_CONFIG["username"]
|
self.ftp_user = FTP_CONFIG["username"]
|
||||||
self.ftp_pass = FTP_CONFIG["password"]
|
self.ftp_pass = FTP_CONFIG["password"]
|
||||||
self.base_path = FTP_CONFIG["base_path"]
|
self.base_path = FTP_CONFIG["base_path"]
|
||||||
|
self.check_interval = FTP_CONFIG["check_interval"]
|
||||||
# Check interval: 6 hours (files are monthly, so frequent checks aren't needed)
|
|
||||||
self.check_interval = FTP_CONFIG.get("check_interval", 6 * 3600) # 6 hours
|
|
||||||
|
|
||||||
logger.info(f"FTP Monitor initialized for {self.ftp_host}")
|
logger.info(f"FTP Monitor initialized for {self.ftp_host}")
|
||||||
|
|
||||||
async def start_monitoring(self):
|
async def start_monitoring(self):
|
||||||
"""Start the monitoring loop"""
|
"""Start the monitoring loop"""
|
||||||
self.status = "running"
|
self.status = "running"
|
||||||
logger.info("Starting FTP monitoring loop")
|
logger.info("Starting FTP monitoring loop")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
await self.check_for_new_files()
|
await self.check_for_new_files()
|
||||||
self.status = "running"
|
self.status = "running"
|
||||||
|
|
||||||
# Wait for next check (6 hours)
|
# Wait for next check (6 hours)
|
||||||
logger.info(f"Waiting {self.check_interval/3600:.1f} hours until next check")
|
logger.info(f"Waiting {self.check_interval/3600:.1f} hours until next check")
|
||||||
await asyncio.sleep(self.check_interval)
|
await asyncio.sleep(self.check_interval)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.status = "error"
|
self.status = "error"
|
||||||
logger.error(f"Error in monitoring loop: {e}")
|
logger.error(f"Error in monitoring loop: {e}")
|
||||||
# Wait 30 minutes before retrying on error
|
# Wait 30 minutes before retrying on error
|
||||||
await asyncio.sleep(1800)
|
await asyncio.sleep(1800)
|
||||||
|
|
||||||
async def check_for_new_files(self) -> Dict[str, Any]:
|
async def check_for_new_files(self) -> Dict[str, Any]:
|
||||||
"""Check FTP server for new .slg_v2 files"""
|
"""Check FTP server for new .slg_v2 files"""
|
||||||
self.last_check = datetime.now()
|
self.last_check = datetime.now()
|
||||||
logger.info(f"Checking FTP server at {self.last_check}")
|
logger.info(f"Checking FTP server at {self.last_check}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Connect to FTP server
|
# Connect to FTP server
|
||||||
with ftplib.FTP(self.ftp_host) as ftp:
|
with FTP(self.ftp_host) as ftp:
|
||||||
ftp.login(self.ftp_user, self.ftp_pass)
|
ftp.login(self.ftp_user, self.ftp_pass)
|
||||||
logger.info(f"Connected to FTP server: {self.ftp_host}")
|
logger.info(f"Connected to FTP server: {self.ftp_host}")
|
||||||
|
|
||||||
# Find .slg_v2 files
|
# Find .slg_v2 files
|
||||||
new_files = await self._find_slg_files(ftp)
|
new_files = await self._find_slg_files(ftp)
|
||||||
|
|
||||||
# Process new files
|
# Process new files
|
||||||
processed_count = 0
|
processed_count = 0
|
||||||
for file_info in new_files:
|
for file_info in new_files:
|
||||||
@@ -93,76 +91,78 @@ class FTPMonitor:
|
|||||||
self.processed_files.add(file_info.path)
|
self.processed_files.add(file_info.path)
|
||||||
processed_count += 1
|
processed_count += 1
|
||||||
self.files_processed_count += 1
|
self.files_processed_count += 1
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"files_found": len(new_files),
|
"files_found": len(new_files),
|
||||||
"files_processed": processed_count,
|
"files_processed": processed_count,
|
||||||
"timestamp": self.last_check.isoformat()
|
"timestamp": self.last_check.isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Check complete: {result}")
|
logger.info(f"Check complete: {result}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"FTP check failed: {e}")
|
logger.error(f"FTP check failed: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def _find_slg_files(self, ftp: ftplib.FTP) -> List[FTPFileInfo]:
|
async def _find_slg_files(self, ftp: FTP) -> List[FTPFileInfo]:
|
||||||
"""Find .slg_v2 files in the FTP directory structure"""
|
"""Find .sgl_v2 files in the FTP directory structure"""
|
||||||
files = []
|
files = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Navigate to base path
|
# Navigate to base path
|
||||||
ftp.cwd(self.base_path)
|
ftp.cwd(self.base_path)
|
||||||
logger.info(f"Scanning directory: {self.base_path}")
|
logger.info(f"Scanning directory: {self.base_path}")
|
||||||
|
|
||||||
# Get directory listing
|
# Get directory listing
|
||||||
dir_list = []
|
dir_list = []
|
||||||
ftp.retrlines('LIST', dir_list.append)
|
ftp.retrlines('LIST', dir_list.append)
|
||||||
|
logger.info(f"Received {len(dir_list)} directory entries")
|
||||||
|
|
||||||
for line in dir_list:
|
for line in dir_list:
|
||||||
|
print(line)
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) >= 9:
|
if len(parts) >= 9:
|
||||||
filename = parts[-1]
|
filename = parts[-1]
|
||||||
|
|
||||||
# Check if it's a .slg_v2 file
|
# Check if it's a .slg_v2 file
|
||||||
if filename.endswith('.slg_v2'):
|
if filename.endswith('.sgl_v2'):
|
||||||
|
print('found file')
|
||||||
try:
|
try:
|
||||||
size = int(parts[4])
|
size = int(parts[4])
|
||||||
full_path = f"{self.base_path.rstrip('/')}/{filename}"
|
full_path = f"{self.base_path.rstrip('/')}/{filename}"
|
||||||
|
|
||||||
files.append(FTPFileInfo(
|
files.append(FTPFileInfo(
|
||||||
path=full_path,
|
path=full_path,
|
||||||
name=filename,
|
name=filename,
|
||||||
size=size
|
size=size
|
||||||
))
|
))
|
||||||
|
|
||||||
except (ValueError, IndexError):
|
except (ValueError, IndexError):
|
||||||
logger.warning(f"Could not parse file info for: {filename}")
|
logger.warning(f"Could not parse file info for: {filename}")
|
||||||
|
|
||||||
logger.info(f"Found {len(files)} .slg_v2 files")
|
logger.info(f"Found {len(files)} .slg_v2 files")
|
||||||
return files
|
return files
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error scanning FTP directory: {e}")
|
logger.error(f"Error scanning FTP directory: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def _process_file(self, ftp: ftplib.FTP, file_info: FTPFileInfo) -> bool:
|
async def _process_file(self, ftp: FTP, file_info: FTPFileInfo) -> bool:
|
||||||
"""Download and process a .slg_v2 file"""
|
"""Download and process a .slg_v2 file"""
|
||||||
logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)")
|
logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create temporary file for download
|
# Create temporary file for download
|
||||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.slg_v2', delete=False) as temp_file:
|
with tempfile.NamedTemporaryFile(mode='wb', suffix='.slg_v2', delete=False) as temp_file:
|
||||||
temp_path = temp_file.name
|
temp_path = temp_file.name
|
||||||
|
|
||||||
# Download file
|
# Download file
|
||||||
with open(temp_path, 'wb') as f:
|
with open(temp_path, 'wb') as f:
|
||||||
ftp.retrbinary(f'RETR {file_info.name}', f.write)
|
ftp.retrbinary(f'RETR {file_info.name}', f.write)
|
||||||
|
|
||||||
# Process the downloaded file
|
# Process the downloaded file
|
||||||
records = await self.processor.process_file(temp_path, file_info.name)
|
records = await self.processor.process_file(temp_path, file_info.name)
|
||||||
|
|
||||||
# Store in database
|
# Store in database
|
||||||
if records:
|
if records:
|
||||||
await self.db_manager.store_file_data(file_info.name, records)
|
await self.db_manager.store_file_data(file_info.name, records)
|
||||||
@@ -171,11 +171,11 @@ class FTPMonitor:
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"No valid records found in {file_info.name}")
|
logger.warning(f"No valid records found in {file_info.name}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing file {file_info.name}: {e}")
|
logger.error(f"Error processing file {file_info.name}: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Clean up temporary file
|
# Clean up temporary file
|
||||||
try:
|
try:
|
||||||
@@ -183,19 +183,19 @@ class FTPMonitor:
|
|||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_status(self) -> str:
|
def get_status(self) -> str:
|
||||||
"""Get current monitor status"""
|
"""Get current monitor status"""
|
||||||
return self.status
|
return self.status
|
||||||
|
|
||||||
def get_last_check_time(self) -> Optional[str]:
|
def get_last_check_time(self) -> Optional[str]:
|
||||||
"""Get last check time as ISO string"""
|
"""Get last check time as ISO string"""
|
||||||
return self.last_check.isoformat() if self.last_check else None
|
return self.last_check.isoformat() if self.last_check else None
|
||||||
|
|
||||||
def get_processed_count(self) -> int:
|
def get_processed_count(self) -> int:
|
||||||
"""Get total number of files processed"""
|
"""Get total number of files processed"""
|
||||||
return self.files_processed_count
|
return self.files_processed_count
|
||||||
|
|
||||||
def get_detailed_status(self) -> Dict[str, Any]:
|
def get_detailed_status(self) -> Dict[str, Any]:
|
||||||
"""Get detailed status information"""
|
"""Get detailed status information"""
|
||||||
return {
|
return {
|
||||||
@@ -206,4 +206,4 @@ class FTPMonitor:
|
|||||||
"check_interval_hours": self.check_interval / 3600,
|
"check_interval_hours": self.check_interval / 3600,
|
||||||
"ftp_host": self.ftp_host,
|
"ftp_host": self.ftp_host,
|
||||||
"base_path": self.base_path
|
"base_path": self.base_path
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""
|
"""
|
||||||
SA4CPS Data Ingestion Service
|
SA4CPS Data Ingestion Service
|
||||||
Simple FTP monitoring service for .slg_v2 files with MongoDB storage
|
Simple FTP monitoring service for .sgl_v2 files with MongoDB storage
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
|
|||||||
# Create FastAPI app
|
# Create FastAPI app
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="SA4CPS Data Ingestion Service",
|
title="SA4CPS Data Ingestion Service",
|
||||||
description="Monitors FTP server for .slg_v2 files and stores data in MongoDB",
|
description="Monitors FTP server for .sgl_v2 files and stores data in MongoDB",
|
||||||
version="1.0.0",
|
version="1.0.0",
|
||||||
lifespan=lifespan
|
lifespan=lifespan
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user