209 lines
7.5 KiB
Python
209 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
FTP Monitor for SA4CPS .slg_v2 files
|
|
Monitors ftp.sa4cps.pt for new monthly files
|
|
"""
|
|
|
|
import asyncio
|
|
import ftplib
|
|
import logging
|
|
import os
|
|
from datetime import datetime, timedelta
|
|
from typing import List, Dict, Any, Optional
|
|
from dataclasses import dataclass
|
|
import tempfile
|
|
|
|
from config import FTP_CONFIG
|
|
from slg_processor import SLGProcessor
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class FTPFileInfo:
|
|
"""Information about an FTP file"""
|
|
path: str
|
|
name: str
|
|
size: int
|
|
modified_time: Optional[datetime] = None
|
|
|
|
|
|
class FTPMonitor:
|
|
"""Monitors SA4CPS FTP server for new .slg_v2 files"""
|
|
|
|
def __init__(self, db_manager):
|
|
self.db_manager = db_manager
|
|
self.processor = SLGProcessor()
|
|
self.last_check: Optional[datetime] = None
|
|
self.processed_files: set = set()
|
|
self.files_processed_count = 0
|
|
self.status = "initializing"
|
|
|
|
# FTP connection settings
|
|
self.ftp_host = FTP_CONFIG["host"]
|
|
self.ftp_user = FTP_CONFIG["username"]
|
|
self.ftp_pass = FTP_CONFIG["password"]
|
|
self.base_path = FTP_CONFIG["base_path"]
|
|
|
|
# Check interval: 6 hours (files are monthly, so frequent checks aren't needed)
|
|
self.check_interval = FTP_CONFIG.get("check_interval", 6 * 3600) # 6 hours
|
|
|
|
logger.info(f"FTP Monitor initialized for {self.ftp_host}")
|
|
|
|
async def start_monitoring(self):
|
|
"""Start the monitoring loop"""
|
|
self.status = "running"
|
|
logger.info("Starting FTP monitoring loop")
|
|
|
|
while True:
|
|
try:
|
|
await self.check_for_new_files()
|
|
self.status = "running"
|
|
|
|
# Wait for next check (6 hours)
|
|
logger.info(f"Waiting {self.check_interval/3600:.1f} hours until next check")
|
|
await asyncio.sleep(self.check_interval)
|
|
|
|
except Exception as e:
|
|
self.status = "error"
|
|
logger.error(f"Error in monitoring loop: {e}")
|
|
# Wait 30 minutes before retrying on error
|
|
await asyncio.sleep(1800)
|
|
|
|
async def check_for_new_files(self) -> Dict[str, Any]:
|
|
"""Check FTP server for new .slg_v2 files"""
|
|
self.last_check = datetime.now()
|
|
logger.info(f"Checking FTP server at {self.last_check}")
|
|
|
|
try:
|
|
# Connect to FTP server
|
|
with ftplib.FTP(self.ftp_host) as ftp:
|
|
ftp.login(self.ftp_user, self.ftp_pass)
|
|
logger.info(f"Connected to FTP server: {self.ftp_host}")
|
|
|
|
# Find .slg_v2 files
|
|
new_files = await self._find_slg_files(ftp)
|
|
|
|
# Process new files
|
|
processed_count = 0
|
|
for file_info in new_files:
|
|
if file_info.path not in self.processed_files:
|
|
success = await self._process_file(ftp, file_info)
|
|
if success:
|
|
self.processed_files.add(file_info.path)
|
|
processed_count += 1
|
|
self.files_processed_count += 1
|
|
|
|
result = {
|
|
"files_found": len(new_files),
|
|
"files_processed": processed_count,
|
|
"timestamp": self.last_check.isoformat()
|
|
}
|
|
|
|
logger.info(f"Check complete: {result}")
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"FTP check failed: {e}")
|
|
raise
|
|
|
|
async def _find_slg_files(self, ftp: ftplib.FTP) -> List[FTPFileInfo]:
|
|
"""Find .slg_v2 files in the FTP directory structure"""
|
|
files = []
|
|
|
|
try:
|
|
# Navigate to base path
|
|
ftp.cwd(self.base_path)
|
|
logger.info(f"Scanning directory: {self.base_path}")
|
|
|
|
# Get directory listing
|
|
dir_list = []
|
|
ftp.retrlines('LIST', dir_list.append)
|
|
|
|
for line in dir_list:
|
|
parts = line.split()
|
|
if len(parts) >= 9:
|
|
filename = parts[-1]
|
|
|
|
# Check if it's a .slg_v2 file
|
|
if filename.endswith('.slg_v2'):
|
|
try:
|
|
size = int(parts[4])
|
|
full_path = f"{self.base_path.rstrip('/')}/{filename}"
|
|
|
|
files.append(FTPFileInfo(
|
|
path=full_path,
|
|
name=filename,
|
|
size=size
|
|
))
|
|
|
|
except (ValueError, IndexError):
|
|
logger.warning(f"Could not parse file info for: {filename}")
|
|
|
|
logger.info(f"Found {len(files)} .slg_v2 files")
|
|
return files
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scanning FTP directory: {e}")
|
|
return []
|
|
|
|
async def _process_file(self, ftp: ftplib.FTP, file_info: FTPFileInfo) -> bool:
|
|
"""Download and process a .slg_v2 file"""
|
|
logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)")
|
|
|
|
try:
|
|
# Create temporary file for download
|
|
with tempfile.NamedTemporaryFile(mode='wb', suffix='.slg_v2', delete=False) as temp_file:
|
|
temp_path = temp_file.name
|
|
|
|
# Download file
|
|
with open(temp_path, 'wb') as f:
|
|
ftp.retrbinary(f'RETR {file_info.name}', f.write)
|
|
|
|
# Process the downloaded file
|
|
records = await self.processor.process_file(temp_path, file_info.name)
|
|
|
|
# Store in database
|
|
if records:
|
|
await self.db_manager.store_file_data(file_info.name, records)
|
|
logger.info(f"Stored {len(records)} records from {file_info.name}")
|
|
return True
|
|
else:
|
|
logger.warning(f"No valid records found in {file_info.name}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing file {file_info.name}: {e}")
|
|
return False
|
|
|
|
finally:
|
|
# Clean up temporary file
|
|
try:
|
|
if 'temp_path' in locals():
|
|
os.unlink(temp_path)
|
|
except OSError:
|
|
pass
|
|
|
|
def get_status(self) -> str:
|
|
"""Get current monitor status"""
|
|
return self.status
|
|
|
|
def get_last_check_time(self) -> Optional[str]:
|
|
"""Get last check time as ISO string"""
|
|
return self.last_check.isoformat() if self.last_check else None
|
|
|
|
def get_processed_count(self) -> int:
|
|
"""Get total number of files processed"""
|
|
return self.files_processed_count
|
|
|
|
def get_detailed_status(self) -> Dict[str, Any]:
|
|
"""Get detailed status information"""
|
|
return {
|
|
"status": self.status,
|
|
"last_check": self.get_last_check_time(),
|
|
"files_processed": self.files_processed_count,
|
|
"processed_files_count": len(self.processed_files),
|
|
"check_interval_hours": self.check_interval / 3600,
|
|
"ftp_host": self.ftp_host,
|
|
"base_path": self.base_path
|
|
} |