Files
sac4cps-backend/microservices/data-ingestion-service/src/main.py
rafaeldpsilva 41b8753a92 Add scan cache tracking and improve health checks
- Track scanned FTP directories in MongoDB to avoid redundant scans -
Add endpoints to view and clear scan cache - Improve health check logic
for better startup and error reporting - Add readiness endpoint for
deployment probes - Add test script for health check improvements -
Increase logging verbosity for debugging
2025-09-22 15:12:40 +01:00

240 lines
7.3 KiB
Python

from fastapi import FastAPI, HTTPException
from contextlib import asynccontextmanager
import asyncio
import logging
from datetime import datetime
from typing import Any
from ftp_monitor import FTPMonitor
from database import DatabaseManager
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
ftp_monitor = None
db_manager = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global ftp_monitor, db_manager
logger.info("Starting SA4CPS Data Ingestion Service...")
db_manager = DatabaseManager()
await db_manager.connect()
logger.info("Database connection established")
ftp_monitor = FTPMonitor(db_manager)
logger.info("FTP monitor created")
monitoring_task = asyncio.create_task(ftp_monitor.start_monitoring())
logger.info("FTP monitoring task started in background")
logger.info("Service startup complete - HTTP server ready to accept requests")
yield
logger.info("Shutting down service...")
# Cancel monitoring task and wait for graceful shutdown
if not monitoring_task.done():
monitoring_task.cancel()
try:
await asyncio.wait_for(monitoring_task, timeout=5.0)
logger.info("Monitoring task stopped gracefully")
except asyncio.TimeoutError:
logger.warning("Monitoring task shutdown timeout - forcing termination")
except asyncio.CancelledError:
logger.info("Monitoring task cancelled successfully")
# Close database connection
if db_manager:
await db_manager.close()
logger.info("Database connection closed")
logger.info("Service shutdown complete")
app = FastAPI(
title="SA4CPS Data Ingestion Service",
description="Monitors FTP server for .sgl_v2 files and stores data in MongoDB",
version="1.0.0",
lifespan=lifespan
)
@app.get("/")
async def root():
return {
"service": "SA4CPS Data Ingestion Service",
"status": "running",
"timestamp": datetime.now().isoformat()
}
@app.get("/health")
async def health_check():
global ftp_monitor, db_manager
health_status = {
"service": "healthy",
"timestamp": datetime.now().isoformat(),
"database": "unknown",
"ftp_monitor": "unknown"
}
service_issues = []
# Check database connection
if db_manager:
try:
await db_manager.ping()
health_status["database"] = "connected"
except Exception as e:
health_status["database"] = "disconnected"
service_issues.append("database_disconnected")
logger.warning(f"Database health check failed: {e}")
else:
health_status["database"] = "not_initialized"
health_status["service"] = "starting"
# Check FTP monitor status
if ftp_monitor:
ftp_status = ftp_monitor.get_status()
health_status["ftp_monitor"] = ftp_status
try:
health_status["last_check"] = ftp_monitor.get_last_check_time()
health_status["files_processed"] = ftp_monitor.get_processed_count()
except:
# Don't fail health check if optional status fields fail
pass
# Improved service status logic - be more tolerant during startup
if ftp_status == "initializing":
# Service is initializing but can still be considered healthy for basic operations
if health_status["database"] == "connected":
health_status["service"] = "healthy" # Database is ready, FTP is starting
else:
health_status["service"] = "starting"
elif ftp_status == "error":
service_issues.append("ftp_monitor_error")
elif ftp_status == "running":
pass # Keep healthy status
else:
health_status["ftp_monitor"] = "not_initialized"
# Don't mark as starting if database is connected - service can be functional
if health_status["database"] != "connected":
health_status["service"] = "starting"
# Determine final service status
if service_issues:
health_status["service"] = "degraded"
health_status["issues"] = service_issues
elif health_status["service"] != "starting":
health_status["service"] = "healthy"
return health_status
@app.get("/readiness")
async def readiness_check():
global ftp_monitor, db_manager
if not db_manager or not ftp_monitor:
raise HTTPException(status_code=503, detail="Service not ready - components not initialized")
# Check database connectivity
try:
await db_manager.ping()
except Exception as e:
raise HTTPException(status_code=503, detail=f"Service not ready - database issue: {str(e)}")
# FTP monitor should be at least initializing
ftp_status = ftp_monitor.get_status()
if ftp_status == "error":
raise HTTPException(status_code=503, detail="Service not ready - FTP monitor in error state")
return {
"ready": True,
"timestamp": datetime.now().isoformat(),
"ftp_monitor_status": ftp_status
}
@app.get("/status")
async def get_status():
global ftp_monitor, db_manager
if not ftp_monitor:
raise HTTPException(status_code=503, detail="FTP monitor not initialized")
return {
"ftp_monitor": ftp_monitor.get_detailed_status(),
"database": await db_manager.get_stats() if db_manager else None,
"timestamp": datetime.now().isoformat()
}
@app.post("/trigger-check")
async def trigger_manual_check():
global ftp_monitor
if not ftp_monitor:
raise HTTPException(status_code=503, detail="FTP monitor not initialized")
try:
result = await ftp_monitor.check_for_new_files()
return {
"message": "Manual check completed",
"result": result,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Manual check failed: {e}")
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
@app.get("/scan-cache")
async def get_scan_cache():
global db_manager
if not db_manager:
raise HTTPException(status_code=503, detail="Database not initialized")
try:
scanned_dirs = await db_manager.get_scanned_directories()
return {
"scanned_directories": scanned_dirs,
"total_directories": len(scanned_dirs),
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error getting scan cache: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scan cache: {str(e)}")
@app.delete("/scan-cache")
async def clear_scan_cache():
global db_manager
if not db_manager:
raise HTTPException(status_code=503, detail="Database not initialized")
try:
result = db_manager.collections['scanned_directories'].delete_many({})
return {
"message": "Scan cache cleared successfully",
"deleted_count": result.deleted_count,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error clearing scan cache: {e}")
raise HTTPException(status_code=500, detail=f"Failed to clear scan cache: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True)