In [34]:
# Import system modules
import sys
import os
sys.path.append('/Users/Volkan/Repos/somnotate-vlkuzun/src/somnotate_pipeline') # Adjust the path as necessary to your somnotate_pipeline directory

# Import required libraries for MAT to CSV conversion
import numpy as np
import pandas as pd
import h5py
import pyedflib

# Import functions from pipeline modules
from mat_to_csv import mat_to_csv
# Removed the import of generate_edf_and_visbrain_formats as we'll implement it directly in the notebook

In [35]:
# Implementation of the generate_edf_and_visbrain_formats function directly in the notebook
def generate_edf_and_visbrain_formats(mouse_ids, sessions, recordings, extra_info, test_train_or_to_score, base_directory, sample_frequency):
    '''
    Generate EDF and Visbrain stage duration format files from CSV files, respectively for EEG and EMG data and sleep stage annotations.
    
    Inputs:
    mouse_ids: list of str, mouse IDs
    sessions: list of str, session IDs
    recordings: list of str, recording IDs
    extra_info: str, additional information to include in the output filenames for differentiation (optional)
    test_train_or_to_score: str, 'test', 'train' or 'to_score' to specify which dataset to process
    base_directory: str, path to the base directory where the CSV files are stored and where the output EDF and annotations files should be saved
    sample_frequency: float, the sampling frequency in Hz for the data

    Outputs:
    EDF files and annotations in Visbrain stage duration format are saved in the 'edfs' and '{test_train_or_to_score}_manual_annotation' directories, respectively.
    '''

    # Define output directories
    csv_input_dir = os.path.join(base_directory, f"{test_train_or_to_score}_set/{test_train_or_to_score}_csv_files")
    edf_output_dir = os.path.join(base_directory, f"{test_train_or_to_score}_set", 'edfs')
    annotations_output_dir = os.path.join(base_directory, f"{test_train_or_to_score}_set", f"{test_train_or_to_score}_manual_annotation")
    
    if not os.path.exists(edf_output_dir):
        os.makedirs(edf_output_dir)
    if not os.path.exists(annotations_output_dir):
        os.makedirs(annotations_output_dir)

    # Process each CSV file and generate output
    for mouse_id in mouse_ids:
        for session in sessions:
            for recording in recordings:
                # Prepare the base filename for the CSV file
                base_filename = f"{mouse_id}_{session}_{recording}"
                if extra_info:
                    csv_file = os.path.join(csv_input_dir, f"{base_filename}_{extra_info}.csv")
                else:
                    csv_file = os.path.join(csv_input_dir, f"{base_filename}.csv")
                
                # Prepare the base filename for EDF and Visbrain files
                if extra_info:
                    edf_file = os.path.join(edf_output_dir, f"output_{base_filename}_{extra_info}.edf")
                    visbrain_file = os.path.join(annotations_output_dir, f"annotations_visbrain_{base_filename}_{extra_info}.txt")
                else:
                    edf_file = os.path.join(edf_output_dir, f"output_{base_filename}.edf")
                    visbrain_file = os.path.join(annotations_output_dir, f"annotations_visbrain_{base_filename}.txt")

                if not os.path.isfile(csv_file):
                    print(f"File not found: {csv_file}")
                    continue
                if os.path.exists(edf_file):
                    print(f"EDF file already exists: {edf_file}")
                    continue
            
                print(f"Processing file: {csv_file}")
                df = pd.read_csv(csv_file)

                # Extract EEG and EMG data
                eeg1_data = df["EEG1"].to_numpy()
                eeg2_data = df["EEG2"].to_numpy()
                emg_data = df["EMG"].to_numpy()

                # Combine all data
                all_data = np.array([eeg1_data, eeg2_data, emg_data])

                # Create an EDF file
                f = pyedflib.EdfWriter(edf_file, len(all_data), file_type=pyedflib.FILETYPE_EDFPLUS)

                # Define EDF header information
                labels = ["EEG1", "EEG2", "EMG"]
                for i, label in enumerate(labels):
                    signal_info = {
                        'label': label,
                        'dimension': 'uV',
                        'sample_frequency': sample_frequency,
                        'physical_min': np.min(all_data[i]),
                        'physical_max': np.max(all_data[i]),
                        'digital_min': -32768,
                        'digital_max': 32767,
                        'transducer': '',
                        'prefilter': ''
                    }
                    f.setSignalHeader(i, signal_info)

                # Write EEG and EMG data to the EDF file
                f.writeSamples(all_data)
                f.close()

                # Prepare annotations in Visbrain stage duration format
                annotations = [(0, 10, "Undefined")]
                current_stage = None
                start_time = 10 / sample_frequency

                for i, label in enumerate(df["sleepStage"]):
                    current_time = i / sample_frequency
                    if label != current_stage:
                        if current_stage is not None:
                            annotations.append((start_time, current_time, current_stage))
                        current_stage = label
                        start_time = current_time
                annotations.append((start_time, len(df) / sample_frequency, current_stage))

                # Write annotations to a text file
                last_time_value = annotations[-1][1]
                with open(visbrain_file, "w") as f:
                    f.write(f"*Duration_sec    {last_time_value}\n")
                    f.write("*Datafile\tUnspecified\n")
                    for start, end, stage in annotations:
                        stage_label = {1: "awake", 2: "non-REM", 3: "REM", 4: 'ambiguous', 5: 'doubt'}.get(stage, "Undefined")
                        f.write(f"{stage_label}    {end}\n")

                print(f"EDF file and annotations created successfully for {mouse_id}, {session}, {recording} with extra info '{extra_info}'.")

# Somnotate Pipeline Notebook

This notebook provides a streamlined interface for running the somnotate pipeline:
1. Convert MAT files to CSV format
2. Generate EDF and Visbrain format files for visualization and analysis

Run each section in sequence, providing the requested inputs when prompted.

In [20]:
# STEP 1: MAT to CSV Conversion
# ---------------------------
print("STEP 1: MAT to CSV Conversion")
print("-----------------------------")

# Request user inputs for MAT to CSV conversion
train_test_or_to_score = input("Enter dataset type ('train', 'test', or 'to_score'): ")
output_directory_path = input(f"Enter the output directory path for {train_test_or_to_score} CSV files, without quotes (e.g., Z:/somnotate/to_score_set/to_score_csv_files): ")
sampling_rate = int(input("Enter the sampling rate in Hz (e.g., 512): "))
sleep_stage_resolution = int(input("Enter the sleep stage resolution in seconds (e.g., 10): "))

# Allow user to enter file paths one by one
file_paths = []
print("\nEnter the full paths of .mat files to convert (press Enter on an empty line to finish):")
while True:
    file_path = input("Enter file path: ")
    if file_path == "":
        break
    if os.path.isfile(file_path) and file_path.endswith('.mat'):
        file_paths.append(file_path)
    else:
        print("Invalid file path. Please enter a valid .mat file path.")

# Print summary of inputs
print("\nInput Summary for MAT to CSV Conversion:")
print(f"Dataset type: {train_test_or_to_score}")
print(f"Output directory: {output_directory_path}")
print(f"Sampling rate: {sampling_rate} Hz")
print(f"Sleep stage resolution: {sleep_stage_resolution} seconds")
print(f"Files to process: {len(file_paths)}")
for i, path in enumerate(file_paths):
    print(f"  {i+1}. {path}")

STEP 1: MAT to CSV Conversion
-----------------------------

Enter the full paths of .mat files to convert (press Enter on an empty line to finish):

Enter the full paths of .mat files to convert (press Enter on an empty line to finish):

Input Summary for MAT to CSV Conversion:
Dataset type: to_score
Output directory: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_csv_files
Sampling rate: 512 Hz
Sleep stage resolution: 10 seconds
Files to process: 1
  1. /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_mat_files/sub-015_ses-01_recording-01_time-0-20h.mat

Input Summary for MAT to CSV Conversion:
Dataset type: to_score
Output directory: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_csv_files
Sampling rate: 512 Hz
Sleep stage resolution: 10 seconds
Files to process: 1
  1. /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_mat_files/sub-015_ses-01_recording-01_time-0-20h.mat


In [21]:
# Execute the MAT to CSV conversion if inputs are valid
if file_paths and output_directory_path:
    try:
        print("Starting MAT to CSV conversion process...")
        
        # Create the output directory if it doesn't exist
        if not os.path.exists(output_directory_path):
            os.makedirs(output_directory_path)
            print(f"Created output directory: {output_directory_path}")
        
        # Execute the conversion
        mat_to_csv(file_paths, output_directory_path, sampling_rate, sleep_stage_resolution)
        
        print("\nMAT to CSV conversion completed successfully!")
        print(f"CSV files are saved to: {output_directory_path}")
        
        # List the created output files
        output_files = [f for f in os.listdir(output_directory_path) if f.endswith('.csv')]
        print(f"\nCreated {len(output_files)} CSV files:")
        for i, file in enumerate(output_files):
            print(f"  {i+1}. {file}")
    
    except Exception as e:
        print(f"Error during conversion: {str(e)}")
else:
    print("Error: Missing required inputs. Please provide valid file paths and output directory.")

Starting MAT to CSV conversion process...
Processing file: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_mat_files/sub-015_ses-01_recording-01_time-0-20h.mat
EEG1 data extracted successfully.
EEG2 data extracted successfully.
EMG data extracted successfully.
Length of upsampled sleep stages (5120) does not match length of EEG data (37319680)
EEG1 data extracted successfully.
EEG2 data extracted successfully.
EMG data extracted successfully.
Length of upsampled sleep stages (5120) does not match length of EEG data (37319680)
Saved CSV to: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_csv_files/sub-015_ses-01_recording-01_time-0-20h.csv

MAT to CSV conversion completed successfully!
CSV files are saved to: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_csv_files

Created 1 CSV files:
  1. sub-015_ses-01_recording-01_time-0-20h.csv
Saved CSV to: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_

# EDF and Visbrain Format Generation

After converting MAT files to CSV, the next step is to generate EDF files for the EEG/EMG data and Visbrain format files for the sleep stage annotations. This allows for visualization and further analysis of the data.

In [36]:
# STEP 2: Generate EDF and Visbrain Format Files
# ----------------------------------------------
print("STEP 2: Generate EDF and Visbrain Format Files")
print("-----------------------------------------")

# Extract base directory from the output_directory_path if available
suggested_base_dir = os.path.dirname(os.path.dirname(output_directory_path)) if 'output_directory_path' in locals() else ""
suggested_dataset = train_test_or_to_score if 'train_test_or_to_score' in locals() else ""

# Request inputs for EDF and Visbrain format generation
print("\nEnter information for EDF and Visbrain format generation:")
print("(Leave input blank to use values from previous step where applicable)")

# Get dataset type (reuse from previous step if available)
dataset_input = input(f"Enter dataset type ('train', 'test', or 'to_score') [{suggested_dataset}]: ").strip()
test_train_or_to_score = dataset_input if dataset_input else suggested_dataset

# Get base directory (extract from previous output directory if possible)
base_dir_input = input(f"Enter the base somnotate directory path [{suggested_base_dir}]: ").strip()
base_directory = base_dir_input if base_dir_input else suggested_base_dir

# Use correct variable name (sample_frequency) for pyedflib compatibility
sample_frequency = float(input("Enter the sampling rate in Hz (e.g., 512.0): "))

# Get subject and recording information
print("\nEnter subject and recording information (format: comma-separated values without spaces)")
mouse_ids_input = input("Enter mouse IDs (e.g., sub-001,sub-002): ")
mouse_ids = mouse_ids_input.split(',')

sessions_input = input("Enter session IDs (e.g., ses-01,ses-02): ")
sessions = sessions_input.split(',')

recordings_input = input("Enter recording IDs (e.g., recording-01,recording-02): ")
recordings = recordings_input.split(',')

extra_info = input("Enter any extra details about the recording (leave blank if not applicable): ").strip()

# Print summary of inputs
print("\nInput Summary for EDF and Visbrain Format Generation:")
print(f"Dataset type: {test_train_or_to_score}")
print(f"Base directory: {base_directory}")
print(f"Sampling rate: {sample_frequency} Hz")
print(f"Mouse IDs: {', '.join(mouse_ids)}")
print(f"Session IDs: {', '.join(sessions)}")
print(f"Recording IDs: {', '.join(recordings)}")
print(f"Extra info: {extra_info if extra_info else 'None'}")

# Confirm before proceeding
proceed = input("\nProceed with EDF and Visbrain format generation? (y/n): ").strip().lower()
if proceed == 'y':
    try:
        print("Starting EDF and Visbrain format generation...")
        
        # Calculate expected output paths for verification
        edf_output_dir = os.path.join(base_directory, f"{test_train_or_to_score}_set", 'edfs')
        annotations_output_dir = os.path.join(base_directory, f"{test_train_or_to_score}_set", f"{test_train_or_to_score}_manual_annotation")
        
        # Create directories if they don't exist
        if not os.path.exists(edf_output_dir):
            os.makedirs(edf_output_dir)
        if not os.path.exists(annotations_output_dir):
            os.makedirs(annotations_output_dir)
            
        # Call the function defined directly in the notebook
        generate_edf_and_visbrain_formats(
            mouse_ids,
            sessions,
            recordings,
            extra_info,
            test_train_or_to_score,
            base_directory,
            sample_frequency
        )
        
        print("\nEDF and Visbrain format generation completed!")
        print(f"EDF files saved to: {edf_output_dir}")
        print(f"Visbrain annotations saved to: {annotations_output_dir}")
        
    except Exception as e:
        print(f"Error during EDF and Visbrain format generation: {str(e)}")
else:
    print("EDF and Visbrain format generation cancelled.")

STEP 2: Generate EDF and Visbrain Format Files
-----------------------------------------

Enter information for EDF and Visbrain format generation:
(Leave input blank to use values from previous step where applicable)

Enter subject and recording information (format: comma-separated values without spaces)

Enter subject and recording information (format: comma-separated values without spaces)

Input Summary for EDF and Visbrain Format Generation:
Dataset type: to_score
Base directory: /Volumes/harris/volkan/somnotate-vlkuzun-testing
Sampling rate: 512.0 Hz
Mouse IDs: sub-015
Session IDs: ses-01
Recording IDs: recording-01
Extra info: time-0-20h

Input Summary for EDF and Visbrain Format Generation:
Dataset type: to_score
Base directory: /Volumes/harris/volkan/somnotate-vlkuzun-testing
Sampling rate: 512.0 Hz
Mouse IDs: sub-015
Session IDs: ses-01
Recording IDs: recording-01
Extra info: time-0-20h
Starting EDF and Visbrain format generation...
Processing file: /Volumes/harris/volkan/som



EDF file and annotations created successfully for sub-015, ses-01, recording-01 with extra info 'time-0-20h'.

EDF and Visbrain format generation completed!
EDF files saved to: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/edfs
Visbrain annotations saved to: /Volumes/harris/volkan/somnotate-vlkuzun-testing/to_score_set/to_score_manual_annotation


# Pipeline Summary

If all steps completed successfully, you should now have:

1. CSV files containing the extracted data from your MAT files
2. EDF files containing the EEG and EMG signal data for visualization
3. Visbrain format annotation files for sleep stage analysis

These files can now be used for further analysis or visualization using tools like Visbrain.