#!/usr/bin/env python3 """ FTP Monitor for SA4CPS .slg_v2 files Monitors ftp.sa4cps.pt for new monthly files """ import asyncio import ftplib import logging import os from datetime import datetime, timedelta from typing import List, Dict, Any, Optional from dataclasses import dataclass import tempfile from config import FTP_CONFIG from slg_processor import SLGProcessor logger = logging.getLogger(__name__) @dataclass class FTPFileInfo: """Information about an FTP file""" path: str name: str size: int modified_time: Optional[datetime] = None class FTPMonitor: """Monitors SA4CPS FTP server for new .slg_v2 files""" def __init__(self, db_manager): self.db_manager = db_manager self.processor = SLGProcessor() self.last_check: Optional[datetime] = None self.processed_files: set = set() self.files_processed_count = 0 self.status = "initializing" # FTP connection settings self.ftp_host = FTP_CONFIG["host"] self.ftp_user = FTP_CONFIG["username"] self.ftp_pass = FTP_CONFIG["password"] self.base_path = FTP_CONFIG["base_path"] # Check interval: 6 hours (files are monthly, so frequent checks aren't needed) self.check_interval = FTP_CONFIG.get("check_interval", 6 * 3600) # 6 hours logger.info(f"FTP Monitor initialized for {self.ftp_host}") async def start_monitoring(self): """Start the monitoring loop""" self.status = "running" logger.info("Starting FTP monitoring loop") while True: try: await self.check_for_new_files() self.status = "running" # Wait for next check (6 hours) logger.info(f"Waiting {self.check_interval/3600:.1f} hours until next check") await asyncio.sleep(self.check_interval) except Exception as e: self.status = "error" logger.error(f"Error in monitoring loop: {e}") # Wait 30 minutes before retrying on error await asyncio.sleep(1800) async def check_for_new_files(self) -> Dict[str, Any]: """Check FTP server for new .slg_v2 files""" self.last_check = datetime.now() logger.info(f"Checking FTP server at {self.last_check}") try: # Connect to FTP server with ftplib.FTP(self.ftp_host) as ftp: ftp.login(self.ftp_user, self.ftp_pass) logger.info(f"Connected to FTP server: {self.ftp_host}") # Find .slg_v2 files new_files = await self._find_slg_files(ftp) # Process new files processed_count = 0 for file_info in new_files: if file_info.path not in self.processed_files: success = await self._process_file(ftp, file_info) if success: self.processed_files.add(file_info.path) processed_count += 1 self.files_processed_count += 1 result = { "files_found": len(new_files), "files_processed": processed_count, "timestamp": self.last_check.isoformat() } logger.info(f"Check complete: {result}") return result except Exception as e: logger.error(f"FTP check failed: {e}") raise async def _find_slg_files(self, ftp: ftplib.FTP) -> List[FTPFileInfo]: """Find .slg_v2 files in the FTP directory structure""" files = [] try: # Navigate to base path ftp.cwd(self.base_path) logger.info(f"Scanning directory: {self.base_path}") # Get directory listing dir_list = [] ftp.retrlines('LIST', dir_list.append) for line in dir_list: parts = line.split() if len(parts) >= 9: filename = parts[-1] # Check if it's a .slg_v2 file if filename.endswith('.slg_v2'): try: size = int(parts[4]) full_path = f"{self.base_path.rstrip('/')}/{filename}" files.append(FTPFileInfo( path=full_path, name=filename, size=size )) except (ValueError, IndexError): logger.warning(f"Could not parse file info for: {filename}") logger.info(f"Found {len(files)} .slg_v2 files") return files except Exception as e: logger.error(f"Error scanning FTP directory: {e}") return [] async def _process_file(self, ftp: ftplib.FTP, file_info: FTPFileInfo) -> bool: """Download and process a .slg_v2 file""" logger.info(f"Processing file: {file_info.name} ({file_info.size} bytes)") try: # Create temporary file for download with tempfile.NamedTemporaryFile(mode='wb', suffix='.slg_v2', delete=False) as temp_file: temp_path = temp_file.name # Download file with open(temp_path, 'wb') as f: ftp.retrbinary(f'RETR {file_info.name}', f.write) # Process the downloaded file records = await self.processor.process_file(temp_path, file_info.name) # Store in database if records: await self.db_manager.store_file_data(file_info.name, records) logger.info(f"Stored {len(records)} records from {file_info.name}") return True else: logger.warning(f"No valid records found in {file_info.name}") return False except Exception as e: logger.error(f"Error processing file {file_info.name}: {e}") return False finally: # Clean up temporary file try: if 'temp_path' in locals(): os.unlink(temp_path) except OSError: pass def get_status(self) -> str: """Get current monitor status""" return self.status def get_last_check_time(self) -> Optional[str]: """Get last check time as ISO string""" return self.last_check.isoformat() if self.last_check else None def get_processed_count(self) -> int: """Get total number of files processed""" return self.files_processed_count def get_detailed_status(self) -> Dict[str, Any]: """Get detailed status information""" return { "status": self.status, "last_check": self.get_last_check_time(), "files_processed": self.files_processed_count, "processed_files_count": len(self.processed_files), "check_interval_hours": self.check_interval / 3600, "ftp_host": self.ftp_host, "base_path": self.base_path }