# Import and Initialization

In [1]:
# Create a refactored file monitoring system for the Ki2 Alerts Python Package
# that accounts for the YYYY/MM directory structure with Data, Data_Burst, and Data_Burst_Symbols subfolders

import os
from pathlib import Path
import datetime
from typing import Dict, List, Tuple, Optional
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import polars as pl
import logging
import time
import re
import pickle

from watchdog.observers import Observer

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

Operation of the monitoring system is based on the HFTA Burst Monitor Application being placed in the YYYY/MM Folder under the in the Data/Burst_Data Folder under the Project Root Directory.  This causes the Busrts Datat to be loaded into each of appropriate subfolders.  At the end of each month, the Burst Monitor program and it two support files (HFTAIP.txt and HFTUser.txt) are moved to the new monthly folder so that new daily data captures will go into the appropriate subfolders.  Using this approach segments the raw captured data by month making it available to packages such as DuckDB and packages that expect a Partitioned Parquet Data Lake or Partition Table like structure.
To initiate daily monitoring the Burst Monitor executable must be turend on and the "Connect" Button push to connect to the remote server.  There is no automated way to do this at this time although PlayWrite or other browser automation tools migh be implemented in the Future.
As part of this project an n8n method needs to be developed to execute the startup of the executable file aon a time basis.  In order to catach early morning "Flyer" activity intiated by social media promotersm the current thinking is that this should take place at 6 AM New York time.
When the monitor is operational the Ki2AlertsFileMonitor Object must be created. The code for this is as follows:

    ```# Initialize the monitor - Set base_path to project root + "Data/Burst_Data"

    project_root = Path.cwd().parent
    print(f"Project root: {project_root}")
    base_path = project_root / "Data" / "Burst_Data"
    base_path.mkdir(parents=True, exist_ok=True)
    monitor = Ki2AlertsFileMonitor(str(base_path))```

Next the get_current_year_month_path method is executed to make sure the directory is created and available

    ```# Directory creation
    current_path = monitor.get_current_year_month_path()```

This verifies that the directories for the current yyyy/mm for the current data are created and availble.  A list of Monitored directories can then be generated

```    # List monitored directories
    directories = monitor.list_all_monitored_directories()```

This code will check the monitored directories and retrun only the files associated with the target date.  This date must be within the YYYY/MM that was previously selected.  This will alos print the files that are found and make three separate file list parsed_files_data, parsed_files_burst, and parsed_files_burst_symbols

```target_date = date.today()
    # or target_date = target_date = date(2025, 8, 29)

    # Iterate through all subfolder types and scan for files in each, select only files from target date
    # target_date = date(2025, 8, 28)
    target_date = date.today()
    subfolder_types = ['data', 'data_burst', 'data_burst_symbols']

    parsed_files_data = [monitor.parse_filename(f, subfolder_types[0]) for f in parsed_files_data]
    parsed_files_data = [info for info in parsed_files_data if info]
    for info in parsed_files_data:
        print(info)

    parsed_files_burst = monitor.scan_for_files(subfolder_types[1], target_date, filter_by_date=True)
    parsed_files_burst = [monitor.parse_filename(f, subfolder_types[1]) for f in parsed_files_burst]
    parsed_files_burst = [info for info in parsed_files_burst if info]
    for info in parsed_files_burst:
        print(info)

    parsed_files_burst_symbols = monitor.scan_for_files(subfolder_types[2], target_date, filter_by_date=True)
    parsed_files_burst_symbols = [monitor.parse_filename(f, subfolder_types[2]) for f in parsed_files_burst_symbols]
    parsed_files_burst_symbols = [info for info in parsed_files_burst_symbols if info]

    for info in parsed_files_burst_symbols:
        print(info) ```

    Once this setup is done there will be three dictionaries with files that need to be process for a specifc target day and each dictionary will have full file path and date in both datetime and date_string form


# Date File Monitoring Class

In [2]:
class Ki2AlertsFileMonitor:
    """
    File monitoring system that handles the YYYY/MM directory structure
    with Data, Data_Burst, and Data_Burst_Symbols subfolders.
    """
    
    def __init__(self, base_path: str):
        self.base_path = Path(base_path)
        self.observers = {}
        self.file_handlers = {}
        
        # Define the expected subfolder structure
        self.subfolders = {
            'data': 'Data',
            'data_burst': 'Data_Burst', 
            'data_burst_symbols': 'Data_Burst_Symbols',
            'processed_data': 'Processed_Data'
        }
        
        # File patterns for different folder types
        self.file_patterns = {
            'data': r'^(\d{8})_hft([123])\.csv$',  # YYYYMMDDhft1.csv, etc.
            'data_burst': r'^(\d{8})\.csv$',       # YYYYMMDD.csv
            'data_burst_symbols': r'^Burst_(\d{8})_(\d{6})\.csv$'  # BurstYYYYMMDDhhmmss.csv
        }
        
        self._validate_base_path()
    
    def _validate_base_path(self):
        """Validate that the base path exists and create if necessary."""
        if not self.base_path.exists():
            self.base_path.mkdir(parents=True, exist_ok=True)
            logger.info(f"Created base directory: {self.base_path}")
    
    def get_year_month_directories(self) -> List[Tuple[str, str, Path]]:
        """
        Scan the base path for existing YYYY/MM directory structures.
        Returns: List of (year, month, path) tuples
        """
        directories = []
        
        for year_dir in self.base_path.iterdir():
            if year_dir.is_dir() and re.match(r'^\d{4}$', year_dir.name):
                year = year_dir.name
                
                for month_dir in year_dir.iterdir():
                    if month_dir.is_dir() and re.match(r'^\d{2}$', month_dir.name):
                        month = month_dir.name
                        directories.append((year, month, month_dir))
        
        return sorted(directories)
    
    def create_directory_structure(self, year: str, month: str) -> Path:
        """
        Create the complete directory structure for a given year/month. If one exists, it will be verified.

        Structure:
        base_path/
        └── YYYY/
            └── MM/
                ├── Data/
                ├── Data_Burst/
                ├── Data_Burst_Symbols/
                └── ProcessedData/
        """
        year_month_path = self.base_path / year / month
        created = False
        for subfolder in self.subfolders.values():
            subfolder_path = year_month_path / subfolder
            if not subfolder_path.exists():
                subfolder_path.mkdir(parents=True, exist_ok=True)
                created = True
        if created:
            logger.info(f"Created directory structure for {year}/{month}")
        else:
            logger.info(f"Verified directory structure for {year}/{month}")
        return year_month_path
    
    def get_current_year_month_path(self) -> Path:
        """Get or create the directory path for the current year/month."""
        current_date = datetime.date.today()
        year = str(current_date.year)
        month = f"{current_date.month:02d}"
        
        return self.create_directory_structure(year, month)
    
    def get_paths_for_date(self, target_date: datetime.date) -> Dict[str, Path]:
        """
        Get all subfolder paths for a specific date.
        
        Args:
            target_date: The date for which to get paths
            
        Returns:
            Dictionary with subfolder names as keys and Path objects as values
        """
        
        year = str(target_date.year)
        month = f"{target_date.month:02d}"
        
        base_path = self.create_directory_structure(year, month)
        
        paths = {}
        for key, subfolder in self.subfolders.items():
            paths[key] = base_path / subfolder
        
        return paths

    def scan_for_files(self, subfolder_type: str, target_date: 'Optional[datetime.date]' = None, filter_by_date: bool = True) -> List[Path]:
        """
        Scan a specific subfolder type for files matching the expected pattern.

        Args:
            subfolder_type: One of 'data', 'data_burst', 'data_burst_symbols'
            target_date: Specific date to scan. If None, returns all files in the folder.
            filter_by_date: If True, only return files matching the target_date. If False, return all files.

        Returns:
            List of Path objects for matching files
        """
    
        if target_date is None:
            # If no date, scan all year/month folders for all files
            matching_files = []
            for year, month, month_dir in self.get_year_month_directories():
                folder_path = month_dir / self.subfolders[subfolder_type]
                if not folder_path.exists():
                    continue
                pattern = self.file_patterns[subfolder_type]
                for file_path in folder_path.iterdir():
                    if file_path.is_file() and re.match(pattern, file_path.name):
                        matching_files.append(file_path)
            return sorted(matching_files)
        
        # If date is given, scan only that year/month folder
        print(f"Scanning {subfolder_type} for date {target_date}")
        paths = self.get_paths_for_date(target_date)
        folder_path = paths[subfolder_type]
        if not folder_path.exists():
            return []
        pattern = self.file_patterns[subfolder_type]
        matching_files = []
        for file_path in folder_path.iterdir():
            if file_path.is_file() and re.match(pattern, file_path.name):
                if filter_by_date:
                    parsed = self.parse_filename(file_path, subfolder_type)
                    if parsed.get('date') == target_date:
                        matching_files.append(file_path)
                else:
                    matching_files.append(file_path)
        return sorted(matching_files)
    
    def parse_filename(self, file_path: Path, subfolder_type: str) -> Dict:
        """
        Parse filename to extract date and other information.
        
        Args:
            file_path: Path to the file
            subfolder_type: Type of subfolder ('data', 'data_burst', 'data_burst_symbols')
            
        Returns:
            Dictionary with parsed information
        """
        filename = file_path.name
        pattern = self.file_patterns[subfolder_type]
        match = re.match(pattern, filename)
        
        if not match:
            return {}
        
        result = {
            'filename': filename,
            'full_path': str(file_path),
            'subfolder_type': subfolder_type
        }
        
        if subfolder_type == 'data':
            date_str, file_num = match.groups()
            result.update({
                'date': datetime.datetime.strptime(date_str, '%Y%m%d').date(),
                'file_number': int(file_num),
                'date_string': date_str
            })
        elif subfolder_type == 'data_burst':
            date_str = match.group(1)
            result.update({
                'date': datetime.datetime.strptime(date_str, '%Y%m%d').date(),
                'date_string': date_str
            })
        elif subfolder_type == 'data_burst_symbols':
            date_str, time_str = match.groups()
            datetime_str = date_str + time_str
            result.update({
                'datetime': datetime.datetime.strptime(datetime_str, '%Y%m%d%H%M%S'),
                'date': datetime.datetime.strptime(date_str, '%Y%m%d').date(),
                'date_string': date_str,
                'time_string': time_str
            })
        
        return result
    
    def get_processed_data_path(self, target_date: 'Optional[datetime.date]' = None) -> Path:
        """Get the ProcessedData path for a specific date."""
        if target_date is None:
            target_date = datetime.date.today()

        paths = self.get_paths_for_date(target_date)
        return paths['processed_data']
    
    def list_all_monitored_directories(self) -> List[Dict]:
        """
        List all existing YYYY/MM directories that should be monitored.
        
        Returns:
            List of dictionaries with directory information
        """
        directories = []
        
        for year, month, path in self.get_year_month_directories():
            dir_info = {
                'year': year,
                'month': month,
                'path': str(path),
                'subfolders': {}
            }
            
            # Check which subfolders exist
            for key, subfolder in self.subfolders.items():
                subfolder_path = path / subfolder
                dir_info['subfolders'][key] = {
                    'exists': subfolder_path.exists(),
                    'path': str(subfolder_path)
                }
            
            directories.append(dir_info)
        
        return directories
    
    def get_last_position_path(self, file_path: Path) -> Path:
        """
        Get the path for the .pkl file that stores the last processed file pointer for a given file.
        """
        # Try to parse the date from the filename
        # You need to know the subfolder_type; here is a safe guess based on parent folder name
        parent_folder = file_path.parent.name.lower()
        if "burst_symbol" in parent_folder:
            subfolder_type = "data_burst_symbols"
        elif "burst" in parent_folder:
            subfolder_type = "data_burst"
        elif "data" in parent_folder:
            subfolder_type = "data"
        else:
            subfolder_type = "data"

        parsed = self.parse_filename(file_path, subfolder_type)
        target_date = parsed.get("date", None)

        processed_dir = self.get_processed_data_path(target_date)
        last_pos_filename = f"{file_path.stem}_lastpos.pkl"
        return processed_dir / last_pos_filename

    def load_last_position(self, file_path: Path) -> int:
        """
        Load the last processed file pointer position from a .pkl file.
        """
        last_pos_path = self.get_last_position_path(file_path)
        if last_pos_path.exists():
            with open(last_pos_path, "rb") as f:
                return pickle.load(f)
        return 0

    def save_last_position(self, file_path: Path, position: int):
        """
        Save the last processed file pointer position to a .pkl file.
        """
        last_pos_path = self.get_last_position_path(file_path)
        last_pos_path.parent.mkdir(parents=True, exist_ok=True)
        with open(last_pos_path, "wb") as f:
            pickle.dump(position, f)

# File Event Handling Class

In [3]:
# Create an event handler for file monitoring
class BurstFileEventHandler(FileSystemEventHandler):
    """Event handler for monitoring file changes in the burst monitoring system."""
    
    def __init__(self, monitor: Ki2AlertsFileMonitor, subfolder_type: str):
        self.monitor = monitor
        self.subfolder_type = subfolder_type
        super().__init__()
    
    def on_created(self, event):
        if not event.is_directory:
            self._handle_file_event(event.src_path, "created")
    
    def on_modified(self, event):
        if not event.is_directory:
            self._handle_file_event(event.src_path, "modified")
    
    def _handle_file_event(self, file_path: str, event_type: str):
        file_path = Path(file_path)
        parsed_info = self.monitor.parse_filename(file_path, self.subfolder_type)

        if parsed_info:
            logger.info(f"File {event_type}: {file_path.name} in {self.subfolder_type}")
            logger.info(f"Parsed info: {parsed_info}")
            # Only process new information since last pointer
            last_pos = self.monitor.load_last_position(file_path)
            logger.info(f"Last processed position: {last_pos}")
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    f.seek(last_pos)
                    new_lines = f.readlines()
                    if new_lines:
                        logger.info(f"Processing {len(new_lines)} new lines in {file_path.name}")
                        # Here you would process new_lines as needed
                        # For demonstration, just log the first line
                        logger.info(f"First new line: {new_lines[0].strip() if new_lines else 'None'}")
                        # Save new file pointer position
                        self.monitor.save_last_position(file_path, f.tell())
            except Exception as e:
                logger.error(f"Error processing file {file_path}: {e}")
            # Trigger appropriate processing based on subfolder type
            if self.subfolder_type == 'data_burst_symbols':
                self._process_burst_symbols_file(file_path, parsed_info)
            elif self.subfolder_type == 'data_burst':
                self._process_data_burst_file(file_path, parsed_info)
            elif self.subfolder_type == 'data':
                self._process_data_file(file_path, parsed_info)
    
    def _process_burst_symbols_file(self, file_path: Path, parsed_info: Dict):
        """Process BurstYYYYMMDDhhmmss.csv files."""
        logger.info(f"Processing burst symbols file: {file_path.name}")
        # This would trigger the main analysis pipeline
        
    def _process_data_burst_file(self, file_path: Path, parsed_info: Dict):
        """Process YYYYMMDD.csv files from Data_Burst folder."""
        logger.info(f"Processing data burst file: {file_path.name}")
        
    def _process_data_file(self, file_path: Path, parsed_info: Dict):
        """Process YYYYMMDDhft*.csv files from Data folder."""
        logger.info(f"Processing data file: {file_path.name}")

# Daily Usage Script

In [4]:
# Initialize the monitor - Set base_path to project root + "Data/Burst_Data"
project_root = Path.cwd().parent
print(f"Project root: {project_root}")
base_path = project_root / "Data" / "Burst_Data"
base_path.mkdir(parents=True, exist_ok=True)
monitor = Ki2AlertsFileMonitor(str(base_path))

# Directory creation / Verification
current_path = monitor.get_current_year_month_path()

# List monitored directories
directories = monitor.list_all_monitored_directories()

# Iterate through all subfolder types and scan for files in each, select only files from target date
target_date = datetime.date(2025, 8, 28)
# target_date = date.today()
subfolder_types = ['data', 'data_burst', 'data_burst_symbols']

parsed_files_data = monitor.scan_for_files(subfolder_types[0], target_date, filter_by_date=True)
parsed_files_data = [monitor.parse_filename(f, subfolder_types[0]) for f in parsed_files_data]
parsed_files_data = [info for info in parsed_files_data if info]
for info in parsed_files_data:
    print(info)

parsed_files_burst = monitor.scan_for_files(subfolder_types[1], target_date, filter_by_date=True)
parsed_files_burst = [monitor.parse_filename(f, subfolder_types[1]) for f in parsed_files_burst]
parsed_files_burst = [info for info in parsed_files_burst if info]
for info in parsed_files_burst:
    print(info)

parsed_files_burst_symbols = monitor.scan_for_files(subfolder_types[2], target_date, filter_by_date=True)
parsed_files_burst_symbols = [monitor.parse_filename(f, subfolder_types[2]) for f in parsed_files_burst_symbols]
parsed_files_burst_symbols = [info for info in parsed_files_burst_symbols if info]
for info in parsed_files_burst_symbols:
    print(info)

parsed_files_data

2025-08-31 15:56:31,653 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:31,656 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:31,660 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:31,664 - INFO - Verified directory structure for 2025/08


Project root: c:\Users\vande\Desktop\Projects\Ki2_Alerts
Scanning data for date 2025-08-28
{'filename': '20250828_hft1.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft1.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 28), 'file_number': 1, 'date_string': '20250828'}
{'filename': '20250828_hft2.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft2.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 28), 'file_number': 2, 'date_string': '20250828'}
{'filename': '20250828_hft3.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft3.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 28), 'file_number': 3, 'date_string': '20250828'}
Scanning data_burst for date 2025-08-28
{'filename': '20250828.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Dat

[{'filename': '20250828_hft1.csv',
  'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft1.csv',
  'subfolder_type': 'data',
  'date': datetime.date(2025, 8, 28),
  'file_number': 1,
  'date_string': '20250828'},
 {'filename': '20250828_hft2.csv',
  'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft2.csv',
  'subfolder_type': 'data',
  'date': datetime.date(2025, 8, 28),
  'file_number': 2,
  'date_string': '20250828'},
 {'filename': '20250828_hft3.csv',
  'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250828_hft3.csv',
  'subfolder_type': 'data',
  'date': datetime.date(2025, 8, 28),
  'file_number': 3,
  'date_string': '20250828'}]

In [None]:
from watchdog.observers import Observer

# Assume monitor is your Ki2AlertsFileMonitor instance
observer = Observer()

# For each subfolder type you want to monitor
for subfolder_type in ['data', 'data_burst', 'data_burst_symbols']:
    # Get the directory path for the current date (or any date you want)
    paths = monitor.get_paths_for_date(target_date)
    print(f"Monitoring paths: {paths}")
    folder_path = paths[subfolder_type]
    print(f"Monitoring folder_path: {folder_path}")
    # Create and schedule the event handler
    event_handler = BurstFileEventHandler(monitor, subfolder_type)
    observer.schedule(event_handler, str(folder_path), recursive=False)

# Start monitoring (this runs in the background)
observer.start()

# Keep the script running (or use try/finally to stop observer on exit)
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    observer.stop()
observer.join()

2025-08-31 15:56:42,954 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:42,957 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:42,958 - INFO - Verified directory structure for 2025/08


Monitoring paths: {'data': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data'), 'data_burst': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data_Burst'), 'data_burst_symbols': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data_Burst_Symbols'), 'processed_data': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Processed_Data')}
Monitoring folder_path: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08\Data
Monitoring paths: {'data': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data'), 'data_burst': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data_Burst'), 'data_burst_symbols': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025/08/Data_Burst_Symbols'), 'processed_data': WindowsPath('c:/Users/vande/Desktop/Projects/Ki2_Alerts/Data/Burst_Data/2025

2025-08-31 15:56:58,971 - INFO - File modified: 20250827.csv in data_burst
2025-08-31 15:56:58,975 - INFO - Parsed info: {'filename': '20250827.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data_Burst\\20250827.csv', 'subfolder_type': 'data_burst', 'date': datetime.date(2025, 8, 27), 'date_string': '20250827'}
2025-08-31 15:56:58,977 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:58,979 - INFO - Last processed position: 0
2025-08-31 15:56:58,979 - INFO - Processing 93 new lines in 20250827.csv
2025-08-31 15:56:58,981 - INFO - First new line: 093001,3,0,0,87,288,1,0,0
2025-08-31 15:56:58,986 - INFO - Verified directory structure for 2025/08
2025-08-31 15:56:58,988 - INFO - Processing data burst file: 20250827.csv
2025-08-31 15:57:04,150 - INFO - File modified: 20250827.csv in data_burst
2025-08-31 15:57:04,155 - INFO - Parsed info: {'filename': '20250827.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Al

In [None]:
datetime.date(2025, 8, 25)

datetime.date(2025, 8, 28)

# Implementation Testing

In [None]:
def test_monitor_system():
    """Test the monitoring system."""

    # Initialize the monitor
    # Set base_path to project root + "Data/Burst_Data"

    project_root = Path.cwd().parent
    print(f"Project root: {project_root}")
    base_path = project_root / "Data" / "Burst_Data"
    base_path.mkdir(parents=True, exist_ok=True)
    monitor = Ki2AlertsFileMonitor(str(base_path))

    print("=== Ki2 Alerts File Monitor Test ===")
    print(f"Base path: {monitor.base_path}")
    print()

    # Test directory creation
    current_path = monitor.get_current_year_month_path()
    print(f"Current year/month path: {current_path}")
    print()

    # Test paths for specific date
    test_date = date(2025, 8, 29)
    paths = monitor.get_paths_for_date(test_date)
    print(f"Paths for {test_date}:")
    for key, path in paths.items():
        print(f"  {key}: {path}")
    print()

    # Test filename parsing
    # test_files = [
    #     "20250829_hft1.csv",
    #     "20250829.csv", 
    #     "Burst_20250829_143022.csv"
    # ]

    # subfolder_types = ['data', 'data_burst', 'data_burst_symbols']

    # List monitored directories
    directories = monitor.list_all_monitored_directories()
    print(f"Monitored directories: {len(directories)}")
    for dir_info in directories:
        print(f"  {dir_info['year']}/{dir_info['month']}: {dir_info['path']}")
    return monitor

# Run the test
monitor = test_monitor_system()

2025-08-29 17:16:28,816 - INFO - Verified directory structure for 2025/08
2025-08-29 17:16:28,818 - INFO - Verified directory structure for 2025/08


Project root: c:\Users\vande\Desktop\Projects\Ki2_Alerts
=== Ki2 Alerts File Monitor Test ===
Base path: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data

Current year/month path: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08

Paths for 2025-08-29:
  data: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08\Data
  data_burst: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08\Data_Burst
  data_burst_symbols: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08\Data_Burst_Symbols
  processed_data: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08\Processed_Data

Monitored directories: 1
  2025/08: c:\Users\vande\Desktop\Projects\Ki2_Alerts\Data\Burst_Data\2025\08


In [18]:
# Iterate through all subfolder types and scan for files in each, select only files from target date
target_date = date(2025, 8, 29)
subfolder_types = ['data', 'data_burst', 'data_burst_symbols']

for subfolder_type in subfolder_types:
    print(f"\n--- Scanning '{subfolder_type}' for {target_date} ---")
    files = monitor.scan_for_files(subfolder_type, target_date)
    filtered_files = [
        f for f in files
        if monitor.parse_filename(f, subfolder_type).get('date') == target_date
    ]
    parsed_files = [monitor.parse_filename(f, subfolder_type) for f in filtered_files]
    parsed_files = [info for info in parsed_files if info]
    for info in parsed_files:
        print(info)

2025-08-29 17:17:01,177 - INFO - Verified directory structure for 2025/08
2025-08-29 17:17:01,182 - INFO - Verified directory structure for 2025/08
2025-08-29 17:17:01,184 - INFO - Verified directory structure for 2025/08



--- Scanning 'data' for 2025-08-29 ---
{'filename': '20250829_hft1.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250829_hft1.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 29), 'file_number': 1, 'date_string': '20250829'}
{'filename': '20250829_hft2.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250829_hft2.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 29), 'file_number': 2, 'date_string': '20250829'}
{'filename': '20250829_hft3.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data\\20250829_hft3.csv', 'subfolder_type': 'data', 'date': datetime.date(2025, 8, 29), 'file_number': 3, 'date_string': '20250829'}

--- Scanning 'data_burst' for 2025-08-29 ---
{'filename': '20250829.csv', 'full_path': 'c:\\Users\\vande\\Desktop\\Projects\\Ki2_Alerts\\Data\\Burst_Data\\2025\\08\\Data_Burst\\20250829