In [None]:
import os
import pandas as pd
from tkinter import Tk, filedialog

# Open dialog to select a folder
root = Tk()
root.withdraw()  # Hide the root window
input_dir = filedialog.askdirectory(title="Select folder with output_roi CSVs")

# Collect only .csv files with 'output_roi' in the filename
csv_files = [f for f in os.listdir(input_dir) if f.endswith(".csv") and "output_roi" in f]

# Read each CSV into a dictionary
csv_data = {
    filename: pd.read_csv(os.path.join(input_dir, filename), header=None)
    for filename in csv_files
}

# Report status and preview one file
print(f"{len(csv_data)} CSV files loaded from: {input_dir}")
if csv_data:
    example_key = next(iter(csv_data))
    print(f"Example file: {example_key}")
    display(csv_data[example_key].head())


In [None]:
len(csv_data.keys())
csv_data['output_roi2_frame1_Z-38.7.csv']

In [40]:
# Alternative for batch via HiperGator etc
import os
import pandas as pd
import pickle
import re

# Step 1: Load directory list from text file
with open("dir_list.txt", "r") as f:
    lines = [line.strip() for line in f if line.strip()]

master_dir = lines[0]
subdirs = lines[1:]

# Step 2: Loop through each subdirectory
for subdir in subdirs:
    input_dir = os.path.join(master_dir, subdir)
    if not os.path.isdir(input_dir):
        print(f"⚠️ Skipping missing directory: {input_dir}")
        continue

    # Step 3: Load all matching CSV files
    # csv_files = [f for f in os.listdir(input_dir) if f.endswith(".csv") and "output_roi" in f]
    csv_files = [
        f for f in os.listdir(input_dir)
        if (
            f.endswith(".csv")
            and f.startswith("output_roi")
            and "frame" in f
            and "Z" in f
            and "_valley_" not in f
        )
    ]
    if not csv_files:
        print(f"⚠️ No matching CSVs found in: {input_dir}")
        continue

    csv_data = {
        filename: pd.read_csv(os.path.join(input_dir, filename), header=None)
        for filename in csv_files
    }

    # Step 4: Save the dictionary to a pickle file named after the subdirectory
    pickle_filename = f"{subdir}_roi_signals.pkl"
    pickle_path = os.path.join(input_dir, pickle_filename)
    with open(pickle_path, "wb") as f:
        pickle.dump(csv_data, f)

    print(f"✅ Saved: {pickle_path}")

    # Step 4: Create metadata with file info and Z
    meta = []
    for fname, df in csv_data.items():
        # Remove extension first, then extract Z value
        fname_no_ext = os.path.splitext(fname)[0]  # removes '.csv'
        z_match = re.search(r'Z-?([\d.]+)', fname_no_ext)
        z_pos = float(z_match.group(1)) if z_match else None
    
        meta.append({
            "filename": fname,
            "rows": len(df),
            "min": df.min().iloc[0],
            "max": df.max().iloc[0],
            "std": df.std().iloc[0],
            "n_unique": df.nunique().iloc[0],
            "z_motor_microns": z_pos
        })

        # Convert metadata to DataFrame
        meta_df = pd.DataFrame(meta)

        # Step 5: Get all CSVs in the folder (just for logging purposes)
        all_csvs = [f for f in os.listdir(input_dir) if f.endswith(".csv")]
        
        # Determine skipped files
        skipped_files = sorted(set(all_csvs) - set(csv_files))
        processed_files = sorted(csv_files)

            # Save to TXT (formatted string)
    meta_txt_path = os.path.join(input_dir, f"csv_metadata_{subdir}.txt")
    with open(meta_txt_path, "w") as f:
        f.write(meta_df.to_string(index=False))
    
    #print(f"📝 Metadata TXT saved: {meta_txt_path}")
        
    # Write logs
    # ADD DIRECTORY HEADER
    with open(os.path.join(input_dir, f"processed_files_{subdir}.txt"), "w") as f:
        f.write("\n".join(processed_files))
    
    with open(os.path.join(input_dir, f"skipped_files_{subdir}.txt"), "w") as f:
        f.write("\n".join(skipped_files))
    
    #print(f"📄 Logged {len(processed_files)} processed and {len(skipped_files)} skipped files in {input_dir}")

outdir = os.path.split(input_dir)
metaoutdir = os.path.split(meta_txt_path)
print("   ***   ")
print(f"📝 Metadata TXT saved to respective subdirectories (eg: {metaoutdir[0]})")
print(f"📄 Logged processed and skipped files in subdirectories of {outdir[0]}")


✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht01_roiA/mht01_roiA_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht03_roiA/mht03_roiA_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht03_roiB/mht03_roiB_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht05_roiA/mht05_roiA_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht05_roiB/mht05_roiB_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_pointscan_csvFiles/mht07_roiA/mht07_roiA_roi_signals.pkl
✅ Saved: /Users/jcoleman/Documents/--LARGE DATA--/#Pizzi/TBI-sham rbc velocity/results/070825_

In [43]:
# LOAD PICKLE FILE

import os
import pandas as pd
import pickle
from tkinter import Tk, filedialog

# Open dialog to select a folder
root = Tk()
root.withdraw()  # Hide the root window
pickle_path = filedialog.askopenfilename(title="Select a PKL file") 

# # SAVE
# # Save pickle in the same directory
# # pickle_path = os.path.join(input_dir, "roi_signals.pkl")
# with open(pickle_path, "wb") as f:
#     pickle.dump(csv_data, f)
# print(f"Saved pickle to: {pickle_path}")

# LOAD Pickle file
# pickle_path = os.path.join(input_dir, "roi_signals.pkl")
with open(pickle_path, "rb") as f:
    csv_data_tmp = pickle.load(f)

In [45]:
csv_data.keys()
csv_data['output_roi4_frame3_Z85.4.csv']

Unnamed: 0,0
0,559790.0
1,553180.0
2,583450.0
3,572700.0
4,587820.0
...,...
3602,198110.0
3603,219010.0
3604,187640.0
3605,201780.0


In [46]:
# csv_data.items()



In [53]:
# headers for extracted data (valley_durations are the estimated velocity measures for each valley/RBC)
headers_main = ['filename', 'rows',  'min', 'max', 'std' , 'n_unique', 'z_motor_microns']

headers_data = ['filename', 'roiNum', 'frameNum', 'valley_counts',
           'flux', 'mean_velocity', 'std_velocity', 'valley_durations',
           'datapoint_rows', 'datapoint_seconds', 'threshold',
           'prominence', 'SNR', 'min', 'max', 'z_motor_microns',
           'peak_counting_algorithmID']
#                      filename  rows  min      max           std  n_unique  z_motor_microns
# output_roi2_frame1_Z-38.7.csv  3607  0.0  68115.0  12052.769498      3461             38.7

In [57]:
headers_data

['filename',
 'roiNum',
 'frameNum',
 'valley_counts',
 'flux',
 'mean_velocity',
 'std_velocity',
 'valley_durations',
 'datapoint_rows',
 'datapoint_seconds',
 'threshold',
 'prominence',
 'SNR',
 'min',
 'max',
 'z_motor_microns',
 'peak_counting_algorithmID']