Switch to PyMongo, update config and requirements, fix FTP extension
typo - Replace Motor (async) with PyMongo (sync) in database manager - Update environment variable names for FTP and MongoDB config - Remove unused dependencies from requirements.txt - Fix file extension typo: .slg_v2 → .sgl_v2 throughout code and docs - Add debug prints for MongoDB env vars in config - Update FTP monitor to use correct file extension and PyMongo - Adjust FastAPI descriptions for new extension
This commit is contained in:
@@ -84,7 +84,7 @@ Set these in the `docker-compose.yml`:
|
||||
environment:
|
||||
- FTP_SA4CPS_HOST=ftp.sa4cps.pt # FTP server hostname
|
||||
- FTP_SA4CPS_PORT=21 # FTP port (default: 21)
|
||||
- FTP_SA4CPS_USERNAME=anonymous # FTP username
|
||||
- FTP_SA4CPS_USERNAME= # FTP username
|
||||
- FTP_SA4CPS_PASSWORD= # FTP password (empty for anonymous)
|
||||
- FTP_SA4CPS_REMOTE_PATH=/ # Remote directory path
|
||||
```
|
||||
|
||||
@@ -3,26 +3,12 @@ fastapi==0.104.1
|
||||
uvicorn==0.24.0
|
||||
pydantic==2.5.0
|
||||
|
||||
# Database dependencies
|
||||
motor==3.3.2
|
||||
# Database dependencies - using PyMongo (sync) instead of Motor (async)
|
||||
pymongo==4.6.0
|
||||
redis==5.0.1
|
||||
|
||||
# FTP handling
|
||||
ftputil==5.0.4
|
||||
|
||||
# Data processing
|
||||
pandas==2.1.4
|
||||
numpy==1.25.2
|
||||
openpyxl==3.1.2
|
||||
xlrd==2.0.1
|
||||
|
||||
# Async HTTP client
|
||||
httpx==0.25.2
|
||||
|
||||
# Logging and monitoring
|
||||
structlog==23.2.0
|
||||
|
||||
# Date/time utilities
|
||||
python-dateutil==2.8.2
|
||||
|
||||
@@ -32,4 +18,3 @@ typing-extensions==4.8.0
|
||||
# Development dependencies (optional)
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-cov==4.1.0
|
||||
@@ -9,20 +9,24 @@ from typing import Dict, Any
|
||||
|
||||
# FTP Configuration for SA4CPS server
|
||||
FTP_CONFIG: Dict[str, Any] = {
|
||||
"host": os.getenv("SA4CPS_FTP_HOST", "ftp.sa4cps.pt"),
|
||||
"username": os.getenv("SA4CPS_FTP_USER", "curvascarga@sa4cps.pt"),
|
||||
"password": os.getenv("SA4CPS_FTP_PASS", ""), # Set via environment variable
|
||||
"base_path": os.getenv("SA4CPS_FTP_PATH", "/SLGs/Faial/PT0010000000015181AA/"),
|
||||
"check_interval": int(os.getenv("SA4CPS_CHECK_INTERVAL", "21600")) # 6 hours default
|
||||
"host": os.getenv("FTP_SA4CPS_HOST", "ftp.sa4cps.pt"),
|
||||
"username": os.getenv("FTP_SA4CPS_USERNAME", "curvascarga@sa4cps.pt"),
|
||||
"password": os.getenv("FTP_SA4CPS_PASSWORD", 'n$WFtz9+bleN'), # Set via environment variable
|
||||
"base_path": os.getenv("FTP_SA4CPS_REMOTE_PATH", "/SLGs/Faial/"),
|
||||
"check_interval": int(os.getenv("FTP_CHECK_INTERVAL", "21600")) # 6 hours default
|
||||
}
|
||||
|
||||
# MongoDB Configuration
|
||||
# Debug environment variables
|
||||
print(f"DEBUG: MONGO_URL env var = {os.getenv('MONGO_URL', 'NOT SET')}")
|
||||
print(f"DEBUG: All env vars starting with MONGO: {[k for k in os.environ.keys() if k.startswith('MONGO')]}")
|
||||
|
||||
MONGO_CONFIG: Dict[str, Any] = {
|
||||
"connection_string": os.getenv(
|
||||
"MONGODB_URL",
|
||||
"mongodb://admin:admin@localhost:27018/sa4cps_energy?authSource=admin"
|
||||
"MONGO_URL",
|
||||
"mongodb://admin:password123@localhost:27017/digitalmente_ingestion?authSource=admin"
|
||||
),
|
||||
"database_name": os.getenv("MONGODB_DATABASE", "sa4cps_energy")
|
||||
"database_name": os.getenv("MONGODB_DATABASE", "digitalmente_ingestion")
|
||||
}
|
||||
|
||||
# Logging Configuration
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MongoDB Database Manager for SA4CPS Data Ingestion
|
||||
Simple async MongoDB operations for storing .slg_v2 file data
|
||||
Simple sync MongoDB operations for storing .sgl_v2 file data
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from pymongo import MongoClient
|
||||
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
||||
|
||||
from config import MONGO_CONFIG
|
||||
@@ -20,7 +18,7 @@ class DatabaseManager:
|
||||
"""Manages MongoDB connections and operations for SA4CPS data"""
|
||||
|
||||
def __init__(self):
|
||||
self.client: Optional[AsyncIOMotorClient] = None
|
||||
self.client: Optional[MongoClient] = None
|
||||
self.db = None
|
||||
self.collections = {}
|
||||
|
||||
@@ -33,10 +31,11 @@ class DatabaseManager:
|
||||
async def connect(self):
|
||||
"""Connect to MongoDB"""
|
||||
try:
|
||||
self.client = AsyncIOMotorClient(self.connection_string)
|
||||
logger.info(f"Connecting to MongoDB at: {self.connection_string}")
|
||||
self.client = MongoClient(self.connection_string, serverSelectionTimeoutMS=5000)
|
||||
|
||||
# Test connection
|
||||
await self.client.admin.command('ping')
|
||||
await self.ping()
|
||||
|
||||
# Get database and collections
|
||||
self.db = self.client[self.database_name]
|
||||
@@ -47,9 +46,9 @@ class DatabaseManager:
|
||||
}
|
||||
|
||||
# Create indexes for better performance
|
||||
await self._create_indexes()
|
||||
self._create_indexes()
|
||||
|
||||
logger.info(f"Connected to MongoDB: {self.database_name}")
|
||||
logger.info(f"Connected to MongoDB database: {self.database_name}")
|
||||
|
||||
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
||||
logger.error(f"Failed to connect to MongoDB: {e}")
|
||||
@@ -66,26 +65,34 @@ class DatabaseManager:
|
||||
if not self.client:
|
||||
raise ConnectionFailure("No database connection")
|
||||
|
||||
await self.client.admin.command('ping')
|
||||
try:
|
||||
# The ping command is cheap and does not require auth.
|
||||
self.client.admin.command('ping')
|
||||
logger.info("MongoDB ping successful")
|
||||
except ConnectionFailure as e:
|
||||
logger.error(f"MongoDB ping failed - Server not available: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"MongoDB ping failed with error: {e}")
|
||||
raise ConnectionFailure(f"Ping failed: {e}")
|
||||
|
||||
async def _create_indexes(self):
|
||||
def _create_indexes(self):
|
||||
"""Create database indexes for efficient queries"""
|
||||
try:
|
||||
# Index on files collection
|
||||
await self.collections['files'].create_index("filename", unique=True)
|
||||
await self.collections['files'].create_index("processed_at")
|
||||
self.collections['files'].create_index("filename", unique=True)
|
||||
self.collections['files'].create_index("processed_at")
|
||||
|
||||
# Index on energy data collection
|
||||
await self.collections['energy_data'].create_index([("filename", 1), ("timestamp", 1)])
|
||||
await self.collections['energy_data'].create_index("timestamp")
|
||||
self.collections['energy_data'].create_index([("filename", 1), ("timestamp", 1)])
|
||||
self.collections['energy_data'].create_index("timestamp")
|
||||
|
||||
logger.info("Database indexes created successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to create indexes: {e}")
|
||||
|
||||
async def store_file_data(self, filename: str, records: List[Dict[str, Any]]) -> bool:
|
||||
"""Store processed .slg_v2 file data in MongoDB"""
|
||||
"""Store processed .sgl_v2 file data in MongoDB"""
|
||||
try:
|
||||
current_time = datetime.now()
|
||||
|
||||
@@ -99,7 +106,7 @@ class DatabaseManager:
|
||||
}
|
||||
|
||||
# Insert or update file record
|
||||
await self.collections['files'].replace_one(
|
||||
self.collections['files'].replace_one(
|
||||
{"filename": filename},
|
||||
file_metadata,
|
||||
upsert=True
|
||||
@@ -112,7 +119,7 @@ class DatabaseManager:
|
||||
|
||||
# Insert energy data records
|
||||
if records:
|
||||
result = await self.collections['energy_data'].insert_many(records)
|
||||
result = self.collections['energy_data'].insert_many(records)
|
||||
inserted_count = len(result.inserted_ids)
|
||||
logger.info(f"Stored {inserted_count} records from {filename}")
|
||||
return True
|
||||
@@ -130,7 +137,7 @@ class DatabaseManager:
|
||||
"error_message": str(e)
|
||||
}
|
||||
|
||||
await self.collections['files'].replace_one(
|
||||
self.collections['files'].replace_one(
|
||||
{"filename": filename},
|
||||
error_metadata,
|
||||
upsert=True
|
||||
@@ -147,7 +154,7 @@ class DatabaseManager:
|
||||
)
|
||||
|
||||
files = []
|
||||
async for doc in cursor:
|
||||
for doc in cursor:
|
||||
files.append(doc["filename"])
|
||||
|
||||
return files
|
||||
@@ -159,7 +166,7 @@ class DatabaseManager:
|
||||
async def get_file_info(self, filename: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get information about a specific file"""
|
||||
try:
|
||||
return await self.collections['files'].find_one({"filename": filename})
|
||||
return self.collections['files'].find_one({"filename": filename})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting file info for {filename}: {e}")
|
||||
return None
|
||||
@@ -175,7 +182,7 @@ class DatabaseManager:
|
||||
# Count documents in each collection
|
||||
for name, collection in self.collections.items():
|
||||
try:
|
||||
count = await collection.count_documents({})
|
||||
count = collection.count_documents({})
|
||||
stats[f"{name}_count"] = count
|
||||
except Exception as e:
|
||||
stats[f"{name}_count"] = f"error: {e}"
|
||||
@@ -188,7 +195,7 @@ class DatabaseManager:
|
||||
{"filename": 1, "processed_at": 1, "record_count": 1, "status": 1, "_id": 0}
|
||||
).sort("processed_at", -1).limit(5)
|
||||
|
||||
async for doc in cursor:
|
||||
for doc in cursor:
|
||||
if doc.get("processed_at"):
|
||||
doc["processed_at"] = doc["processed_at"].isoformat()
|
||||
recent_files.append(doc)
|
||||
@@ -227,7 +234,7 @@ class DatabaseManager:
|
||||
cursor = self.collections['energy_data'].find(query).sort("timestamp", -1).limit(limit)
|
||||
|
||||
data = []
|
||||
async for doc in cursor:
|
||||
for doc in cursor:
|
||||
# Convert ObjectId to string and datetime to ISO string
|
||||
if "_id" in doc:
|
||||
doc["_id"] = str(doc["_id"])
|
||||
|
||||
@@ -5,10 +5,10 @@ Monitors ftp.sa4cps.pt for new monthly files
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import ftplib
|
||||
from ftplib import FTP
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
import tempfile
|
||||
@@ -44,9 +44,7 @@ class FTPMonitor:
|
||||
self.ftp_user = FTP_CONFIG["username"]
|
||||
self.ftp_pass = FTP_CONFIG["password"]
|
||||
self.base_path = FTP_CONFIG["base_path"]
|
||||
|
||||
# Check interval: 6 hours (files are monthly, so frequent checks aren't needed)
|
||||
self.check_interval = FTP_CONFIG.get("check_interval", 6 * 3600) # 6 hours
|
||||
self.check_interval = FTP_CONFIG["check_interval"]
|
||||
|
||||
logger.info(f"FTP Monitor initialized for {self.ftp_host}")
|
||||
|
||||
@@ -77,7 +75,7 @@ class FTPMonitor:
|
||||
|
||||
try:
|
||||
# Connect to FTP server
|
||||
with ftplib.FTP(self.ftp_host) as ftp:
|
||||
with FTP(self.ftp_host) as ftp:
|
||||
ftp.login(self.ftp_user, self.ftp_pass)
|
||||
logger.info(f"Connected to FTP server: {self.ftp_host}")
|
||||
|
||||
@@ -107,8 +105,8 @@ class FTPMonitor:
|
||||
logger.error(f"FTP check failed: {e}")
|
||||
raise
|
||||
|
||||
async def _find_slg_files(self, ftp: ftplib.FTP) -> List[FTPFileInfo]:
|
||||
"""Find .slg_v2 files in the FTP directory structure"""
|
||||
async def _find_slg_files(self, ftp: FTP) -> List[FTPFileInfo]:
|
||||
"""Find .sgl_v2 files in the FTP directory structure"""
|
||||
files = []
|
||||
|
||||
try:
|
||||
@@ -119,14 +117,16 @@ class FTPMonitor:
|
||||
# Get directory listing
|
||||
dir_list = []
|
||||
ftp.retrlines('LIST', dir_list.append)
|
||||
logger.info(f"Received {len(dir_list)} directory entries")
|
||||
|
||||
for line in dir_list:
|
||||
print(line)
|
||||
parts = line.split()
|
||||
if len(parts) >= 9:
|
||||
filename = parts[-1]
|
||||
|
||||
# Check if it's a .slg_v2 file
|
||||
if filename.endswith('.slg_v2'):
|
||||
if filename.endswith('.sgl_v2'):
|
||||
print('found file')
|
||||
try:
|
||||
size = int(parts[4])
|
||||
full_path = f"{self.base_path.rstrip('/')}/{filename}"
|
||||
@@ -147,7 +147,7 @@ class FTPMonitor:
|
||||
logger.error(f"Error scanning FTP directory: {e}")
|
||||
return []
|
||||
|
||||
async def _process_file(self, ftp: ftplib.FTP, file_info: FTPFileInfo) -> bool:
|
||||
async def _process_file(self, ftp: FTP, file_info: FTPFileInfo) -> bool:
|
||||
"""Download and process a .slg_v2 file"""
|
||||
logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)")
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
SA4CPS Data Ingestion Service
|
||||
Simple FTP monitoring service for .slg_v2 files with MongoDB storage
|
||||
Simple FTP monitoring service for .sgl_v2 files with MongoDB storage
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="SA4CPS Data Ingestion Service",
|
||||
description="Monitors FTP server for .slg_v2 files and stores data in MongoDB",
|
||||
description="Monitors FTP server for .sgl_v2 files and stores data in MongoDB",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user