In [1]:
import os
import csv
from pathlib import Path

In [2]:

def process_specific_m_folders(root_directory, m_folders, output_csv):
    """
    Process specific M-folders and their subfolders for matching files.
    
    Args:
        root_directory (str): Path to the root directory
        m_folders (list): List of M-folder names to process (e.g., ['M016', 'M017'])
        output_csv (str): Path to the output CSV file
    """
    results = []
    
    # Process each specified M-directory
    for m_dir in sorted(m_folders):
        m_path = os.path.join(root_directory, m_dir)
        
        # Check if the M-directory exists
        if not os.path.exists(m_path):
            print(f"Warning: Directory {m_path} does not exist. Skipping...")
            continue
        
        # Walk through all subdirectories in this M-directory
        for dirpath, dirnames, filenames in os.walk(m_path):
            # Check if directory matches the pattern (contains MXXX-20YY-MM-DD)
            current_dir = os.path.basename(dirpath)
            if not any(part.startswith(m_dir) and '-20' in part for part in current_dir.split('_')):
                continue
                
            # Get the prefix part (MXXX-20YY-MM-DD)
            prefix = current_dir.split('_')[0]
            
            # Look for the two required files
            opto_file = None
            ntt_file = None
            
            for file in filenames:
                if file.startswith(prefix):
                    if file.endswith('.t'):
                        opto_file = file
                    elif file.endswith('.ntt'):
                        ntt_file = file
            
            # If both files are found, add to results
            if opto_file and ntt_file:
                results.append([
                    os.path.abspath(dirpath),  # absolute folder path
                    opto_file,                 # opto cluster file
                    ntt_file,                  # ntt file
                    m_dir                      # M-directory identifier
                ])
    
    # Write results to CSV
    with open(output_csv, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Folder Path', 'Opto Cluster', 'NTT File', 'M Directory'])
        writer.writerows(results)

In [6]:
root_dir = "data"
output_file = "output\\folder_analysis.csv"

# Specify which M-folders to process
m_folders_to_process = ['M016', 'M017', 'M018', 'M019', 'M020']

process_specific_m_folders(root_dir, m_folders_to_process, output_file)