Implement iterative FTP scan and skip logic with processed file cache

- Add iterative directory scanning to prevent infinite recursion - Cache
processed files in memory to avoid redundant database lookups - Skip
already processed files using cache and database fallback - Add tests
for skip logic and iterative scan behavior - Change logging for MongoDB
connection and file storage to debug level - Clean up FastAPI app and
remove redundant docstrings
This commit is contained in:
rafaeldpsilva
2025-09-12 13:43:21 +01:00
parent a703240b27
commit aa07347604
8 changed files with 906 additions and 136 deletions

View File

@@ -1,56 +1,43 @@
"""
SA4CPS Data Ingestion Service
Simple FTP monitoring service for .sgl_v2 files with MongoDB storage
"""
from fastapi import FastAPI, HTTPException
from contextlib import asynccontextmanager
import asyncio
import logging
from datetime import datetime
from typing import Dict, Any
from typing import Any
from ftp_monitor import FTPMonitor
from database import DatabaseManager
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global services
ftp_monitor = None
db_manager = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan management"""
global ftp_monitor, db_manager
logger.info("Starting SA4CPS Data Ingestion Service...")
# Initialize database connection
db_manager = DatabaseManager()
await db_manager.connect()
# Initialize FTP monitor
ftp_monitor = FTPMonitor(db_manager)
# Start background monitoring task
monitoring_task = asyncio.create_task(ftp_monitor.start_monitoring())
logger.info("Service started successfully")
yield
# Cleanup on shutdown
logger.info("Shutting down service...")
monitoring_task.cancel()
await db_manager.close()
logger.info("Service shutdown complete")
# Create FastAPI app
app = FastAPI(
title="SA4CPS Data Ingestion Service",
description="Monitors FTP server for .sgl_v2 files and stores data in MongoDB",
@@ -61,7 +48,6 @@ app = FastAPI(
@app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "SA4CPS Data Ingestion Service",
"status": "running",
@@ -71,7 +57,6 @@ async def root():
@app.get("/health")
async def health_check():
"""Health check endpoint"""
global ftp_monitor, db_manager
health_status = {
@@ -101,7 +86,6 @@ async def health_check():
@app.get("/status")
async def get_status():
"""Detailed status endpoint"""
global ftp_monitor, db_manager
if not ftp_monitor:
@@ -116,7 +100,6 @@ async def get_status():
@app.post("/trigger-check")
async def trigger_manual_check():
"""Manually trigger FTP check"""
global ftp_monitor
if not ftp_monitor: