In [56]:
import os
import pandas as pd

# Constants
DIR_PATH = '../Consolidated_Features'
SUBJECT_LIST = [
    "3128", "3129", "3130", "3131", "3132", "3133", "3136", "3137", "3138", "3139", 
    "3140", "3141", "3142", "3143", "3147", "3148", "3149", "3150", "3151", "3152", 
    "3153", "3154", "3155", "3156", "3158", "3159", "3160", "3162", "6037", "6038", 
    "6043", "6044", "6045", "6046", "6047", "6048", "6049"
]
NUM_STIMULI = 5
STIMULUS_TYPES = ['Rest', 'Reading', 'SpeechPrep', 'Speech', 'Recovery']
NUM_FEATURES = 5
FEATURE_TYPES = ['HR', 'PAT', 'PEP', 'PPGamp', 'PTTrecip']

# List to store dataframes for MI subjects
dataframes_mi = []

# List to store dataframes for healthy control subjects
dataframes_ht = []

# List to store dataframes for all subjects
dataframes_all = []

# Variables to track index of MI, healthy control, and all subjects
sub_mi = -1
sub_ht = -1
sub_all = -1

# Loop through all subjects
for sub in os.listdir(DIR_PATH):
    subject_id = int(sub[3:])  # Extract subject ID from directory name

    if str(subject_id) in SUBJECT_LIST:

        # Check if the subject ID belongs to MI subjects
        if str(subject_id).startswith('3'):
            sub_mi += 1
            dataframes_mi.append([])

            # Loop through stimuli
            for stim in range(NUM_STIMULI):
                dataframes_mi[sub_mi].append([])

                # Loop through features
                for feat in range(NUM_FEATURES):
                    dataframes_mi[sub_mi][stim].append([])
                    feat_load = os.path.join(DIR_PATH, 'sub' + str(subject_id), 'stim' + str(stim) + '_' + FEATURE_TYPES[feat] + '.csv')
                    data = pd.read_csv(feat_load)
                    dataframes_mi[sub_mi][stim][feat] = data.values
        
        # Check if the subject ID belongs to healthy control subjects
        elif str(subject_id).startswith('6'):
            sub_ht += 1
            dataframes_ht.append([])

            # Loop through stimuli
            for stim in range(NUM_STIMULI):
                dataframes_ht[sub_ht].append([])

                # Loop through features
                for feat in range(NUM_FEATURES):
                    dataframes_ht[sub_ht][stim].append([])
                    feat_load = os.path.join(DIR_PATH, 'sub' + str(subject_id), 'stim' + str(stim) + '_' + FEATURE_TYPES[feat] + '.csv')
                    data = pd.read_csv(feat_load)
                    dataframes_ht[sub_ht][stim][feat] = data.values
        
        # For all subjects
        sub_all += 1
        dataframes_all.append([])

        # Loop through stimuli
        for stim in range(NUM_STIMULI):
            dataframes_all[sub_all].append([])

            # Loop through features
            for feat in range(NUM_FEATURES):
                dataframes_all[sub_all][stim].append([])
                feat_load = os.path.join(DIR_PATH, 'sub' + str(subject_id), 'stim' + str(stim) + '_' + FEATURE_TYPES[feat] + '.csv')
                data = pd.read_csv(feat_load)
                dataframes_all[sub_all][stim][feat] = data.values

print("Data loading completed.")

Data loading completed.


In [57]:
# Determine MI (Myocardial Infarction) vs. HT (Healthy Control) status based on subject identifier
mi_subjects = ["3128", "3129", "3130", "3131", "3132", "3133", "3136", "3137", "3138", "3139", 
               "3140", "3141", "3142", "3143", "3147", "3148", "3149", "3150", "3151", "3152", 
               "3153", "3154", "3155", "3156", "3158", "3159", "3160", "3162"]

# Healthy Control subjects
ht_subjects = ["6037", "6038", "6043", "6044", "6045", "6046", "6047", "6048", "6049"]

# Initialization of lists to store health condition labels
health_condition_labels = []

# Modify the loop to assign labels based on health condition
for subj_index, subj_id in enumerate(SUBJECT_LIST):

    if subj_id in mi_subjects:
        health_condition = 0  # Myocardial Infarction

    elif subj_id in ht_subjects:
        health_condition = 1  # Healthy Control
        
    else:
        health_condition = "Unknown"
    
    # Create labels based on health condition for all stimuli and features
    subj_labels = []

    for _ in range(NUM_STIMULI):
        stim_labels = []

        for _ in range(NUM_FEATURES):
            stim_labels.append(health_condition)

        subj_labels.append(stim_labels)
        
    health_condition_labels.append(subj_labels)

# Now, health_condition_labels contains labels for each subject, stimulus, and feature based on health condition

In [58]:
# Initialize a list to store labels for the "SpeechPrep" stimulus for all subjects and all features
speech_prep_labels_all = []

# Index of the "SpeechPrep" stimulus
stimulus_index = STIMULUS_TYPES.index('SpeechPrep')

# Loop through each subject
for subject_labels in health_condition_labels:
    # Access labels for the "SpeechPrep" stimulus for all features for the current subject
    speech_prep_labels = subject_labels[stimulus_index]
    # Append to the list
    speech_prep_labels_all.append(speech_prep_labels)

# Now, speech_prep_labels_all_subjects contains the labels for the "SpeechPrep" stimulus for all subjects and all features

In [62]:
# %% Baseline correction (normalization)

import numpy as np

# Initialize lists to store mean difference, normalized data, and labels
mean_difference_all = []  # Stores the mean difference between feature and baseline for all subjects, stimuli, and features
normalized_data_all = []  # Stores the normalized data for all subjects, stimuli, and features
labels_all = []  # Stores the labels for all subjects, stimuli, and features

# Number of data points and half of it
num_data_points = 80  # Total number of data points
data_points_half = round(num_data_points / 2)  # Half of the total number of data points

# Loop through each subject
for subject_index, subject_id in enumerate(SUBJECT_LIST):
    normalized_data_subject = []  # Stores the normalized data for the current subject
    labels_subject = []  # Stores the labels for the current subject

    # Loop through each stimulus
    for stimulus_index in range(NUM_STIMULI):
        normalized_data_stimulus = []  # Stores the normalized data for the current stimulus
        labels_stimulus = []  # Stores the labels for the current stimulus

        # Loop through each feature
        for feature_index in range(NUM_FEATURES):
            baseline_data = dataframes_all[subject_index][0][feature_index][:, 1]  # Baseline data
            baseline_midpoint = baseline_data[len(baseline_data) // 2 - data_points_half:len(baseline_data) // 2 + data_points_half]  # Midpoint of baseline data
            feature_data = dataframes_all[subject_index][2][feature_index][:, 1]  # Feature data
            feature_midpoint = feature_data[len(feature_data) // 2 - data_points_half:len(feature_data) // 2 + data_points_half]  # Midpoint of feature data

            # Calculate mean difference between feature and baseline
            mean_difference = (np.mean(feature_midpoint) - np.mean(baseline_midpoint)) / np.mean(baseline_midpoint)
            mean_difference_all.append(mean_difference)

            # Normalize the data and assign labels
            if stimulus_index == 0:
                # Baseline data
                normalized_data = (baseline_midpoint - np.mean(baseline_midpoint)) / np.mean(baseline_midpoint)
                labels = np.zeros_like(normalized_data)  # Assign label 0

            elif stimulus_index == 1:
                # Feature data
                normalized_data = (feature_midpoint - np.mean(baseline_midpoint)) / np.mean(baseline_midpoint)
                labels = np.ones_like(normalized_data)  # Assign label 1

            normalized_data_stimulus.append(normalized_data)
            labels_stimulus.append(labels)

        normalized_data_subject.append(normalized_data_stimulus)
        labels_subject.append(labels_stimulus)

    normalized_data_all.append(normalized_data_subject)
    labels_all.append(labels_subject)

# Aggregate data and labels for all subjects and stimuli
aggregated_data_all = []  # Stores the aggregated normalized data for all features
aggregated_labels_all = []  # Stores the aggregated labels for all features

for feature_index in range(NUM_FEATURES):
    aggregated_data = []  # Temporary list to aggregate normalized data for the current feature
    aggregated_labels = []  # Temporary list to aggregate labels for the current feature

    # Loop through each stimulus
    for stimulus_index in range(2):
        
        # Loop through each subject
        for subject_index in range(len(SUBJECT_LIST)):
            # Extend the temporary lists with data and labels for the current feature, stimulus, and subject
            aggregated_data.extend(normalized_data_all[subject_index][stimulus_index][feature_index])
            aggregated_labels.extend(labels_all[subject_index][stimulus_index][feature_index])

    aggregated_data_all.append(aggregated_data)
    aggregated_labels_all.append(aggregated_labels)


In [66]:
# Initialize lists to store data for healthy and MI groups
healthy_data_normalized = []
mi_data_normalized = []

# Loop through each subject
for subject_index, subject_id in enumerate(SUBJECT_LIST):
    normalized_data_subject = []  # Stores the normalized data for the current subject

    # Check if the subject is healthy or MI
    if subject_id in ht_subjects:
        data_group = healthy_data_normalized
    else:
        data_group = mi_data_normalized

    # Loop through each stimulus
    for stimulus_index in range(2):
        normalized_data_stimulus = []  # Stores the normalized data for the current stimulus

        # Loop through each feature
        for feature_index in range(NUM_FEATURES):
            # Get normalized data for the current feature, stimulus, and subject
            normalized_data = normalized_data_all[subject_index][stimulus_index][feature_index]
            normalized_data_stimulus.append(normalized_data)

        normalized_data_subject.append(normalized_data_stimulus)

    data_group.append(normalized_data_subject)

# Now, healthy_data_all contains data for healthy subjects and mi_data_all contains data for MI subjects

In [64]:
print(healthy_data_normalized)

[('Healthy_Subj_6037', [[array([ 0.04520254, -0.05157547,  0.01616914, -0.02756472, -0.0150976 ,
        0.05092185, -0.02385785,  0.03673937,  0.00159566,  0.00684649,
       -0.06428274,  0.02841214,  0.06402198, -0.02012261,  0.11175669,
        0.01751506,  0.02841214, -0.05040313,  0.0002915 ,  0.04095375,
       -0.05970151,  0.05525257, -0.03246364,  0.00552863,  0.02703726,
        0.01616914,  0.12477873,  0.02293458,  0.02703726,  0.03673937,
        0.06108269,  0.03673937, -0.03610556,  0.11175669, -0.05854918,
        0.02429849,  0.02703726, -0.03368067, -0.01002079, -0.0710715 ,
        0.03673937,  0.0002915 , -0.03489464,  0.01886455, -0.01761653,
        0.09903272, -0.03731345, -0.01129489, -0.02261594, -0.05623603,
       -0.01383329, -0.05040313,  0.02021762, -0.05623603,  0.00816781,
        0.0094926 , -0.00489136,  0.01616914, -0.00360069,  0.02841214,
       -0.07997141,  0.04948616, -0.00874339, -0.01129489,  0.05380503,
       -0.04211487,  0.06549774, -0.086