sac4cps-backend/microservices/data-ingestion-service/src/main.py

from fastapi import FastAPI, HTTPException
from contextlib import asynccontextmanager
import asyncio
import logging
from datetime import datetime
from typing import Any

from ftp_monitor import FTPMonitor
from database import DatabaseManager

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

ftp_monitor = None
db_manager = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    global ftp_monitor, db_manager

    logger.info("Starting SA4CPS Data Ingestion Service...")

    db_manager = DatabaseManager()
    await db_manager.connect()
    logger.info("Database connection established")

    ftp_monitor = FTPMonitor(db_manager)
    logger.info("FTP monitor created")

    monitoring_task = asyncio.create_task(ftp_monitor.start_monitoring())
    logger.info("FTP monitoring task started in background")

    logger.info("Service startup complete - HTTP server ready to accept requests")

    yield

    logger.info("Shutting down service...")

    # Cancel monitoring task and wait for graceful shutdown
    if not monitoring_task.done():
        monitoring_task.cancel()
        try:
            await asyncio.wait_for(monitoring_task, timeout=5.0)
            logger.info("Monitoring task stopped gracefully")
        except asyncio.TimeoutError:
            logger.warning("Monitoring task shutdown timeout - forcing termination")
        except asyncio.CancelledError:
            logger.info("Monitoring task cancelled successfully")

    # Close database connection
    if db_manager:
        await db_manager.close()
        logger.info("Database connection closed")

    logger.info("Service shutdown complete")


app = FastAPI(
    title="SA4CPS Data Ingestion Service",
    description="Monitors FTP server for .sgl_v2 files and stores data in MongoDB",
    version="1.0.0",
    lifespan=lifespan
)


@app.get("/")
async def root():
    return {
        "service": "SA4CPS Data Ingestion Service",
        "status": "running",
        "timestamp": datetime.now().isoformat()
    }


@app.get("/health")
async def health_check():
    global ftp_monitor, db_manager

    health_status = {
        "service": "healthy",
        "timestamp": datetime.now().isoformat(),
        "database": "unknown",
        "ftp_monitor": "unknown"
    }

    service_issues = []

    # Check database connection
    if db_manager:
        try:
            await db_manager.ping()
            health_status["database"] = "connected"
        except Exception as e:
            health_status["database"] = "disconnected"
            service_issues.append("database_disconnected")
            logger.warning(f"Database health check failed: {e}")
    else:
        health_status["database"] = "not_initialized"
        health_status["service"] = "starting"

    # Check FTP monitor status
    if ftp_monitor:
        ftp_status = ftp_monitor.get_status()
        health_status["ftp_monitor"] = ftp_status

        try:
            health_status["last_check"] = ftp_monitor.get_last_check_time()
            health_status["files_processed"] = ftp_monitor.get_processed_count()
        except:
            # Don't fail health check if optional status fields fail
            pass

        # Improved service status logic - be more tolerant during startup
        if ftp_status == "initializing":
            # Service is initializing but can still be considered healthy for basic operations
            if health_status["database"] == "connected":
                health_status["service"] = "healthy"  # Database is ready, FTP is starting
            else:
                health_status["service"] = "starting"
        elif ftp_status == "error":
            service_issues.append("ftp_monitor_error")
        elif ftp_status == "running":
            pass  # Keep healthy status
    else:
        health_status["ftp_monitor"] = "not_initialized"
        # Don't mark as starting if database is connected - service can be functional
        if health_status["database"] != "connected":
            health_status["service"] = "starting"

    # Determine final service status
    if service_issues:
        health_status["service"] = "degraded"
        health_status["issues"] = service_issues
    elif health_status["service"] != "starting":
        health_status["service"] = "healthy"

    return health_status


@app.get("/readiness")
async def readiness_check():
    global ftp_monitor, db_manager

    if not db_manager or not ftp_monitor:
        raise HTTPException(status_code=503, detail="Service not ready - components not initialized")

    # Check database connectivity
    try:
        await db_manager.ping()
    except Exception as e:
        raise HTTPException(status_code=503, detail=f"Service not ready - database issue: {str(e)}")

    # FTP monitor should be at least initializing
    ftp_status = ftp_monitor.get_status()
    if ftp_status == "error":
        raise HTTPException(status_code=503, detail="Service not ready - FTP monitor in error state")

    return {
        "ready": True,
        "timestamp": datetime.now().isoformat(),
        "ftp_monitor_status": ftp_status
    }


@app.get("/status")
async def get_status():
    global ftp_monitor, db_manager

    if not ftp_monitor:
        raise HTTPException(status_code=503, detail="FTP monitor not initialized")

    return {
        "ftp_monitor": ftp_monitor.get_detailed_status(),
        "database": await db_manager.get_stats() if db_manager else None,
        "timestamp": datetime.now().isoformat()
    }


@app.post("/trigger-check")
async def trigger_manual_check():
    global ftp_monitor

    if not ftp_monitor:
        raise HTTPException(status_code=503, detail="FTP monitor not initialized")

    try:
        result = await ftp_monitor.check_for_new_files()
        return {
            "message": "Manual check completed",
            "result": result,
            "timestamp": datetime.now().isoformat()
        }
    except Exception as e:
        logger.error(f"Manual check failed: {e}")
        raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}")


@app.get("/scan-cache")
async def get_scan_cache():
    global db_manager

    if not db_manager:
        raise HTTPException(status_code=503, detail="Database not initialized")

    try:
        scanned_dirs = await db_manager.get_scanned_directories()
        return {
            "scanned_directories": scanned_dirs,
            "total_directories": len(scanned_dirs),
            "timestamp": datetime.now().isoformat()
        }
    except Exception as e:
        logger.error(f"Error getting scan cache: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to get scan cache: {str(e)}")


@app.delete("/scan-cache")
async def clear_scan_cache():
    global db_manager

    if not db_manager:
        raise HTTPException(status_code=503, detail="Database not initialized")

    try:
        result = db_manager.collections['scanned_directories'].delete_many({})
        return {
            "message": "Scan cache cleared successfully",
            "deleted_count": result.deleted_count,
            "timestamp": datetime.now().isoformat()
        }
    except Exception as e:
        logger.error(f"Error clearing scan cache: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to clear scan cache: {str(e)}")


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True)