Add scan cache tracking and improve health checks
- Track scanned FTP directories in MongoDB to avoid redundant scans - Add endpoints to view and clear scan cache - Improve health check logic for better startup and error reporting - Add readiness endpoint for deployment probes - Add test script for health check improvements - Increase logging verbosity for debugging
This commit is contained in:
@@ -8,7 +8,7 @@ from typing import Any
|
||||
from ftp_monitor import FTPMonitor
|
||||
from database import DatabaseManager
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ftp_monitor = None
|
||||
@@ -23,18 +23,36 @@ async def lifespan(app: FastAPI):
|
||||
|
||||
db_manager = DatabaseManager()
|
||||
await db_manager.connect()
|
||||
logger.info("Database connection established")
|
||||
|
||||
ftp_monitor = FTPMonitor(db_manager)
|
||||
logger.info("FTP monitor created")
|
||||
|
||||
monitoring_task = asyncio.create_task(ftp_monitor.start_monitoring())
|
||||
logger.info("FTP monitoring task started in background")
|
||||
|
||||
logger.info("Service started successfully")
|
||||
logger.info("Service startup complete - HTTP server ready to accept requests")
|
||||
|
||||
yield
|
||||
|
||||
logger.info("Shutting down service...")
|
||||
monitoring_task.cancel()
|
||||
await db_manager.close()
|
||||
|
||||
# Cancel monitoring task and wait for graceful shutdown
|
||||
if not monitoring_task.done():
|
||||
monitoring_task.cancel()
|
||||
try:
|
||||
await asyncio.wait_for(monitoring_task, timeout=5.0)
|
||||
logger.info("Monitoring task stopped gracefully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Monitoring task shutdown timeout - forcing termination")
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Monitoring task cancelled successfully")
|
||||
|
||||
# Close database connection
|
||||
if db_manager:
|
||||
await db_manager.close()
|
||||
logger.info("Database connection closed")
|
||||
|
||||
logger.info("Service shutdown complete")
|
||||
|
||||
|
||||
@@ -66,24 +84,85 @@ async def health_check():
|
||||
"ftp_monitor": "unknown"
|
||||
}
|
||||
|
||||
service_issues = []
|
||||
|
||||
# Check database connection
|
||||
if db_manager:
|
||||
try:
|
||||
await db_manager.ping()
|
||||
health_status["database"] = "connected"
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
health_status["database"] = "disconnected"
|
||||
health_status["service"] = "degraded"
|
||||
service_issues.append("database_disconnected")
|
||||
logger.warning(f"Database health check failed: {e}")
|
||||
else:
|
||||
health_status["database"] = "not_initialized"
|
||||
health_status["service"] = "starting"
|
||||
|
||||
# Check FTP monitor status
|
||||
if ftp_monitor:
|
||||
health_status["ftp_monitor"] = ftp_monitor.get_status()
|
||||
health_status["last_check"] = ftp_monitor.get_last_check_time()
|
||||
health_status["files_processed"] = ftp_monitor.get_processed_count()
|
||||
ftp_status = ftp_monitor.get_status()
|
||||
health_status["ftp_monitor"] = ftp_status
|
||||
|
||||
try:
|
||||
health_status["last_check"] = ftp_monitor.get_last_check_time()
|
||||
health_status["files_processed"] = ftp_monitor.get_processed_count()
|
||||
except:
|
||||
# Don't fail health check if optional status fields fail
|
||||
pass
|
||||
|
||||
# Improved service status logic - be more tolerant during startup
|
||||
if ftp_status == "initializing":
|
||||
# Service is initializing but can still be considered healthy for basic operations
|
||||
if health_status["database"] == "connected":
|
||||
health_status["service"] = "healthy" # Database is ready, FTP is starting
|
||||
else:
|
||||
health_status["service"] = "starting"
|
||||
elif ftp_status == "error":
|
||||
service_issues.append("ftp_monitor_error")
|
||||
elif ftp_status == "running":
|
||||
pass # Keep healthy status
|
||||
else:
|
||||
health_status["ftp_monitor"] = "not_initialized"
|
||||
# Don't mark as starting if database is connected - service can be functional
|
||||
if health_status["database"] != "connected":
|
||||
health_status["service"] = "starting"
|
||||
|
||||
# Determine final service status
|
||||
if service_issues:
|
||||
health_status["service"] = "degraded"
|
||||
health_status["issues"] = service_issues
|
||||
elif health_status["service"] != "starting":
|
||||
health_status["service"] = "healthy"
|
||||
|
||||
return health_status
|
||||
|
||||
|
||||
@app.get("/readiness")
|
||||
async def readiness_check():
|
||||
global ftp_monitor, db_manager
|
||||
|
||||
if not db_manager or not ftp_monitor:
|
||||
raise HTTPException(status_code=503, detail="Service not ready - components not initialized")
|
||||
|
||||
# Check database connectivity
|
||||
try:
|
||||
await db_manager.ping()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=503, detail=f"Service not ready - database issue: {str(e)}")
|
||||
|
||||
# FTP monitor should be at least initializing
|
||||
ftp_status = ftp_monitor.get_status()
|
||||
if ftp_status == "error":
|
||||
raise HTTPException(status_code=503, detail="Service not ready - FTP monitor in error state")
|
||||
|
||||
return {
|
||||
"ready": True,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"ftp_monitor_status": ftp_status
|
||||
}
|
||||
|
||||
|
||||
@app.get("/status")
|
||||
async def get_status():
|
||||
global ftp_monitor, db_manager
|
||||
@@ -117,6 +196,44 @@ async def trigger_manual_check():
|
||||
raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/scan-cache")
|
||||
async def get_scan_cache():
|
||||
global db_manager
|
||||
|
||||
if not db_manager:
|
||||
raise HTTPException(status_code=503, detail="Database not initialized")
|
||||
|
||||
try:
|
||||
scanned_dirs = await db_manager.get_scanned_directories()
|
||||
return {
|
||||
"scanned_directories": scanned_dirs,
|
||||
"total_directories": len(scanned_dirs),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting scan cache: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scan cache: {str(e)}")
|
||||
|
||||
|
||||
@app.delete("/scan-cache")
|
||||
async def clear_scan_cache():
|
||||
global db_manager
|
||||
|
||||
if not db_manager:
|
||||
raise HTTPException(status_code=503, detail="Database not initialized")
|
||||
|
||||
try:
|
||||
result = db_manager.collections['scanned_directories'].delete_many({})
|
||||
return {
|
||||
"message": "Scan cache cleared successfully",
|
||||
"deleted_count": result.deleted_count,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing scan cache: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to clear scan cache: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True)
|
||||
|
||||
Reference in New Issue
Block a user