Files
sac4cps-backend/microservices/data-ingestion-service/tests/test_iterative_scan.py
rafaeldpsilva aa07347604 Implement iterative FTP scan and skip logic with processed file cache
- Add iterative directory scanning to prevent infinite recursion - Cache
processed files in memory to avoid redundant database lookups - Skip
already processed files using cache and database fallback - Add tests
for skip logic and iterative scan behavior - Change logging for MongoDB
connection and file storage to debug level - Clean up FastAPI app and
remove redundant docstrings
2025-09-12 13:43:21 +01:00

330 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Test FTP Monitor iterative directory scanning
Tests the new queue-based approach that prevents infinite loops
"""
import asyncio
import sys
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
from typing import List
# Add src directory to path
sys.path.append(str(Path(__file__).parent.parent / "src"))
from ftp_monitor import FTPMonitor, FTPFileInfo
class MockFTP:
"""Mock FTP client for testing iterative scanning"""
def __init__(self, directory_structure):
self.directory_structure = directory_structure
self.current_dir = '/'
self.operations_log = [] # Track all operations for debugging
def pwd(self):
return self.current_dir
def cwd(self, path):
self.operations_log.append(f"CWD: {path}")
if path in self.directory_structure:
self.current_dir = path
else:
raise Exception(f"Directory not found: {path}")
def retrlines(self, command, callback):
"""Mock LIST command"""
if not command.startswith('LIST'):
raise Exception(f"Unsupported command: {command}")
self.operations_log.append(f"LIST: {self.current_dir}")
current_struct = self.directory_structure.get(self.current_dir, {})
# Add directories
for dirname in current_struct.get('directories', {}):
callback(f"drwxr-xr-x 2 user group 4096 Jan 01 12:00 {dirname}")
# Add files
for filename in current_struct.get('files', []):
callback(f"-rw-r--r-- 1 user group 1024 Jan 01 12:00 {filename}")
async def test_simple_directory_structure():
"""Test iterative scanning with simple nested structure"""
print("🧪 Testing simple directory structure")
print("-" * 40)
directory_structure = {
'/': {
'files': ['root.sgl_v2'],
'directories': {
'level1': {},
'level2': {}
}
},
'/level1': {
'files': ['file1.sgl_v2'],
'directories': {
'nested': {}
}
},
'/level1/nested': {
'files': ['nested.sgl_v2'],
'directories': {}
},
'/level2': {
'files': ['file2.sgl_v2'],
'directories': {}
}
}
mock_db = MagicMock()
with patch('ftp_monitor.FTP_CONFIG', {
'host': 'test.example.com',
'username': 'testuser',
'password': 'testpass',
'base_path': '/',
'check_interval': 3600,
'recursive_scan': True,
'max_recursion_depth': 10
}):
monitor = FTPMonitor(mock_db)
mock_ftp = MockFTP(directory_structure)
# Test iterative scan
files = []
await monitor._scan_directories_iterative(mock_ftp, '/', files)
print(f"✅ Found {len(files)} files")
print(f" Operations: {len(mock_ftp.operations_log)}")
# Verify all files were found
file_names = [f.name for f in files]
expected_files = ['root.sgl_v2', 'file1.sgl_v2', 'nested.sgl_v2', 'file2.sgl_v2']
assert len(files) == 4, f"Expected 4 files, got {len(files)}"
for expected_file in expected_files:
assert expected_file in file_names, f"Missing file: {expected_file}"
# Check that operations are reasonable (no infinite loops)
assert len(mock_ftp.operations_log) < 20, f"Too many operations: {len(mock_ftp.operations_log)}"
print("✅ Simple structure test passed")
async def test_circular_references():
"""Test that circular references are handled correctly"""
print("\n🧪 Testing circular references")
print("-" * 40)
# Create structure with circular reference
directory_structure = {
'/': {
'files': ['root.sgl_v2'],
'directories': {
'dirA': {}
}
},
'/dirA': {
'files': ['fileA.sgl_v2'],
'directories': {
'dirB': {}
}
},
'/dirA/dirB': {
'files': ['fileB.sgl_v2'],
'directories': {
'dirA': {} # This would create A -> B -> A loop in recursive approach
}
}
}
mock_db = MagicMock()
with patch('ftp_monitor.FTP_CONFIG', {
'host': 'test.example.com',
'username': 'testuser',
'password': 'testpass',
'base_path': '/',
'check_interval': 3600,
'recursive_scan': True,
'max_recursion_depth': 5
}):
monitor = FTPMonitor(mock_db)
mock_ftp = MockFTP(directory_structure)
# Test iterative scan
files = []
await monitor._scan_directories_iterative(mock_ftp, '/', files)
print(f"✅ Handled circular references")
print(f" Files found: {len(files)}")
print(f" Operations: {len(mock_ftp.operations_log)}")
# Should find all files without getting stuck
file_names = [f.name for f in files]
expected_files = ['root.sgl_v2', 'fileA.sgl_v2', 'fileB.sgl_v2']
assert len(files) == 3, f"Expected 3 files, got {len(files)}"
for expected_file in expected_files:
assert expected_file in file_names, f"Missing file: {expected_file}"
# Should not have excessive operations (indicating no infinite loop)
assert len(mock_ftp.operations_log) < 15, f"Too many operations: {len(mock_ftp.operations_log)}"
print("✅ Circular references test passed")
async def test_deep_structure_with_limit():
"""Test deep directory structure respects depth limit"""
print("\n🧪 Testing deep structure with depth limit")
print("-" * 45)
# Create deep structure
directory_structure = {
'/': {
'files': ['root.sgl_v2'],
'directories': {
'level1': {}
}
},
'/level1': {
'files': ['file1.sgl_v2'],
'directories': {
'level2': {}
}
},
'/level1/level2': {
'files': ['file2.sgl_v2'],
'directories': {
'level3': {}
}
},
'/level1/level2/level3': {
'files': ['deep_file.sgl_v2'], # Should not be found due to depth limit
'directories': {}
}
}
mock_db = MagicMock()
# Set low depth limit
with patch('ftp_monitor.FTP_CONFIG', {
'host': 'test.example.com',
'username': 'testuser',
'password': 'testpass',
'base_path': '/',
'check_interval': 3600,
'recursive_scan': True,
'max_recursion_depth': 2 # Should stop at level 2
}):
monitor = FTPMonitor(mock_db)
mock_ftp = MockFTP(directory_structure)
# Test iterative scan with depth limit
files = []
await monitor._scan_directories_iterative(mock_ftp, '/', files)
print(f"✅ Depth limit respected")
print(f" Files found: {len(files)}")
# Should find files up to depth 2, but not deeper
file_names = [f.name for f in files]
assert 'root.sgl_v2' in file_names, "Should find root file (depth 0)"
assert 'file1.sgl_v2' in file_names, "Should find level 1 file (depth 1)"
assert 'file2.sgl_v2' in file_names, "Should find level 2 file (depth 2)"
assert 'deep_file.sgl_v2' not in file_names, "Should NOT find deep file (depth 3)"
print("✅ Depth limit test passed")
async def test_queue_behavior():
"""Test that the queue processes directories in FIFO order"""
print("\n🧪 Testing queue FIFO behavior")
print("-" * 35)
directory_structure = {
'/': {
'files': [],
'directories': {
'first': {},
'second': {},
'third': {}
}
},
'/first': {
'files': ['first.sgl_v2'],
'directories': {}
},
'/second': {
'files': ['second.sgl_v2'],
'directories': {}
},
'/third': {
'files': ['third.sgl_v2'],
'directories': {}
}
}
mock_db = MagicMock()
with patch('ftp_monitor.FTP_CONFIG', {
'host': 'test.example.com',
'username': 'testuser',
'password': 'testpass',
'base_path': '/',
'check_interval': 3600,
'recursive_scan': True,
'max_recursion_depth': 5
}):
monitor = FTPMonitor(mock_db)
mock_ftp = MockFTP(directory_structure)
# Test iterative scan
files = []
await monitor._scan_directories_iterative(mock_ftp, '/', files)
print(f"✅ Queue behavior test completed")
print(f" Files found: {len(files)}")
# Should find all files
assert len(files) == 3, f"Expected 3 files, got {len(files)}"
file_names = [f.name for f in files]
expected_files = ['first.sgl_v2', 'second.sgl_v2', 'third.sgl_v2']
for expected_file in expected_files:
assert expected_file in file_names, f"Missing file: {expected_file}"
print("✅ Queue behavior test passed")
async def main():
"""Main test function"""
print("🚀 FTP Monitor Iterative Scanning Test Suite")
print("=" * 55)
try:
await test_simple_directory_structure()
await test_circular_references()
await test_deep_structure_with_limit()
await test_queue_behavior()
print("\n" + "=" * 55)
print("✅ All iterative scanning tests passed!")
print("🔄 Queue-based approach is working correctly")
except Exception as e:
print(f"\n❌ Test failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())