In [1]:
import os
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt, resample 

# Set up directory and file paths
dir = '../Consolidated_Features'
Subject = os.listdir(dir)
NumSub = np.size(Subject)
subj_list = [
    "3128", "3129", "3130", "3131", "3132", "3133", "3136", "3137", "3138", "3139", 
    "3140", "3141", "3142", "3143", "3147", "3148", "3149", "3150", "3151", "3152", 
    "3153", "3154", "3155", "3156", "3158", "3159", "3160", "3162", "6037", "6038", 
    "6043", "6044", "6045", "6046", "6047", "6048", "6049"
]

NumStim = 5
Stimulus = ['Rest', 'Reading', 'SpeechPrep', 'Speech', 'Recovery']
NumFeat = 5
Feature = ['HR', 'PAT', 'PEP', 'PPGamp', 'PTTrecip']

# Sort subject IDs
Subj = np.zeros(len(Subject))
for i in range(len(Subject)):
    Subj[i] = int(Subject[i][3:])  
Subj.sort()

# Initialize dataframes for different groups
dataframes_MI = []
dataframes_Ht = []
dataframes_all = []

sub_MI = -1
sub_Ht = -1
sub_all = -1

# Load data for each subject into corresponding dataframes
for sub in range(NumSub):

    # Check if the subject is in the list of subjects to process
    if str(round(Subj[sub])) in subj_list:

        # Check if the subject ID starts with '3', indicating a patient
        if str(Subj[sub]).startswith('3'):
            sub_MI += 1
            dataframes_MI.append([])

            # Iterate over stimuli
            for stim in range(NumStim):
                dataframes_MI[sub_MI].append([])

                # Iterate over features
                for feat in range(NumFeat):
                    dataframes_MI[sub_MI][stim].append([]) 
                    # Load feature data from CSV file and store it in the corresponding dataframe
                    Feat_load = os.path.join(dir, 'sub' + str(int(Subj[sub])), 'stim' + str(stim) + '_' + Feature[feat] + '.csv')
                    data = pd.read_csv(Feat_load)
                    dataframes_MI[sub_MI][stim][feat] = data.values

        # Check if the subject ID starts with '6', indicating a healthy control
        if str(Subj[sub]).startswith('6'):
            sub_Ht += 1
            dataframes_Ht.append([])

            # Iterate over stimuli
            for stim in range(NumStim):
                dataframes_Ht[sub_Ht].append([])

                # Iterate over features
                for feat in range(NumFeat):
                    dataframes_Ht[sub_Ht][stim].append([])
                    # Load feature data from CSV file and store it in the corresponding dataframe
                    Feat_load = os.path.join(dir, 'sub' + str(int(Subj[sub])), 'stim' + str(stim) + '_' + Feature[feat] + '.csv')
                    data = pd.read_csv(Feat_load)
                    dataframes_Ht[sub_Ht][stim][feat] = data.values

# Load data for all subjects into a single dataframe
for sub in range(NumSub):

    # Check if the subject is in the list of subjects to process
    if str(round(Subj[sub])) in subj_list:
        sub_all += 1
        dataframes_all.append([]) 

        # Iterate over stimuli
        for stim in range(NumStim):
            dataframes_all[sub_all].append([]) 

            # Iterate over features
            for feat in range(NumFeat):
                dataframes_all[sub_all][stim].append([]) 
                # Load feature data from CSV file and store it in the corresponding dataframe
                Feat_load = os.path.join(dir, 'sub' + str(int(Subj[sub])), 'stim' + str(stim) + '_' + Feature[feat] + '.csv')
                data = pd.read_csv(Feat_load)
                dataframes_all[sub_all][stim][feat] = data.values

In [13]:
# Determine healthy vs. non-healthy status based on subject identifier
healthy_subjects = ["6037", "6038", "6043", "6044", "6045", "6046", "6047", "6048", "6049"]
# Assuming all subjects not in the above list are considered non-healthy
non_healthy_subjects = [subj for subj in subj_list if subj not in healthy_subjects]

# Initialization of lists to store labels
data_labels = []

# Modify the loop to assign labels based on health status

for subj_index, subj_id in enumerate(subj_list):

    if subj_id in healthy_subjects:
        health_status = 1  # Healthy

    else:
        health_status = 0  # Non-healthy

    # Create labels based on health status for all stimuli and features
    subj_labels = []

    for _ in range(NumStim):
        stim_labels = []

        for _ in range(NumFeat):
            stim_labels.append(health_status)
        subj_labels.append(stim_labels)
        
    data_labels.append(subj_labels)

# Now, data_labels contains labels for each subject, stimulus, and feature based on health status

In [2]:
def access_df(df, participant_index, stimulus_index, feature_index):
    data = df[participant_index][stimulus_index][feature_index]
    time = data[:, 0]
    feat_values = data[:, 1]
    return time, feat_values

def join_stimulus(df, participant_index, feature_index):
    time = []
    feat_values = []

    for stimulus_index in range(NumStim):
        data = df[participant_index][stimulus_index][feature_index]
        time_temp = data[:, 0]
        values = data[:, 1]
        time.append(time_temp)
        feat_values.append(values)
    
    time = np.concatenate(time)
    feat_values = np.concatenate(feat_values)

    return time, feat_values

In [3]:
# %% Baseline correction (normalization)
mean_all = []
data_all = []
data_y_all = []
num_data = 80
dp = round(num_data / 2)

for subj in range(len(subj_list)):
    data_all.append([]) 
    data_y_all.append([]) 

    for stim in range(2):
        data_all[subj].append([]) 
        data_y_all[subj].append([]) 

        for feat in range(NumFeat):
            data_all[subj][stim].append([]) 
            data_y_all[subj][stim].append([]) 
            baseline = dataframes_all[subj][0][feat][:, 1]
            baseline_mid = baseline[round(len(baseline) / 2) - dp:round(len(baseline) / 2) + dp] 
            feature = dataframes_all[subj][2][feat][:, 1]
            feature_mid = feature[round(len(feature) / 2) - dp:round(len(feature) / 2) + dp]
            mean_all.append((np.mean(feature_mid) - np.mean(baseline_mid)) / np.mean(baseline_mid))

            if stim == 0:
                data_all[subj][stim][feat] = (baseline_mid - np.mean(baseline_mid)) / np.mean(baseline_mid)
                data_y_all[subj][stim][feat] = np.zeros([len(data_all[subj][stim][feat]), 1])

            if stim == 1:
                data_all[subj][stim][feat] = (feature_mid - np.mean(baseline_mid)) / np.mean(baseline_mid)
                data_y_all[subj][stim][feat] = np.ones([len(data_all[subj][stim][feat]), 1])

data_all_1 = []
data_y_all_1 = []

for feat in range(NumFeat):
    temp_data_stim = []
    temp_labels_stim = []

    for stim in range(2):
        temp_data = []
        temp_labels = []

        for subj in range(len(subj_list)):
            temp_data.extend(data_all[subj][stim][feat])
            temp_labels.extend(data_y_all[subj][stim][feat])

        temp_data_stim.extend(temp_data)
        temp_labels_stim.extend(temp_labels)

    data_all_1.append(temp_data_stim)
    data_y_all_1.append(temp_labels_stim)

In [16]:
print(data_labels)
print(data_labels[0][0][0])

[[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 