In [1]:
#import libraries

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
plt.switch_backend('agg')
from mpl_toolkits.mplot3d import Axes3D
import neurokit2 as nk
import random
path = "/scratch/alim/overnight_validation/MUSE-PSG/"
import os
import random
import pickle

In [6]:


MUSE_SAMPLING_RATE = 64
apnea_labels = ["Mixed Apnea", "Central Apnea", "Obstructive Apnea"]

'''
THE FOLLOWING LENGTH OF CODE LINES 17-83 PROCESSES FILES INTO A USEABLE PATIENT_DATA_DIRECTORY
'''
def read_files_in_directory(directory_path):
    try:
        #print("Finding all MUSE-PSG files")
        # Get a list of all files in the directory
        file_list = sorted([entry.name for entry in os.scandir(directory_path) if
                    entry.is_file() and any(keyword in entry.name for keyword in ['ppg','events'])])[30:50]


                     
        # Read the contents of each file
        #print("Findind all file names completed")

        return file_list
    except Exception as e:
        print(f"Error reading files: {e}")

# Example usage:
directory_path = "/scratch/alim/overnight_validation/MUSE-PSG"
file_name_list = read_files_in_directory(directory_path)

patient_dictionary = {}

#print("Organizing into dictionary")

for filename in file_name_list:
    parts = filename.rsplit('_',1)
    patient_name = parts[0]
    
    if patient_name in patient_dictionary:
        patient_dictionary[patient_name].append(filename)

    else:
        patient_dictionary[patient_name] = [filename]

#print("Finished Organizing into dicitonary")

#print("Short Preview of the Dictionary:")

def read_csv_to_dataframe(file_path):
    try:
        # Read the CSV file into a Pandas DataFrame
        dataframe = pd.read_csv(file_path)

        # Return the DataFrame
        return dataframe

    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None
    
patient_data_dictionary = {}
#read the csv for all patient files
for patient in patient_dictionary:
    files = patient_dictionary[patient]
    if len(files) != 2:
        continue
    patient_data_dictionary[patient] = {'events': read_csv_to_dataframe(path + files[0]),'ppg':read_csv_to_dataframe(path + files[1])}



#Change the time from unix format to regular date

for patient in patient_data_dictionary:

    patient_data_dictionary[patient]['ppg']['ts-datetime'] = pd.to_datetime(patient_data_dictionary[patient]['ppg']['ts'], unit='s')
    patient_data_dictionary[patient]['events']['start-datetime'] = pd.to_datetime(patient_data_dictionary[patient]['events']['start'], unit='s')
    patient_data_dictionary[patient]['events']['end-datetime'] = patient_data_dictionary[patient]['events']['start-datetime'] + pd.to_timedelta(patient_data_dictionary[patient]['events']['duration'], unit='s')



In [7]:
def getIndices(df):
    return df.index.values

def flattenedList(list_of_lists):
    flattened_list = [item for sublist in list_of_lists for item in sublist]
    return flattened_list

def consecutive_lists(sorted_list):
    result = []
    current_list = []

    for num in sorted_list:
        if not current_list or num == current_list[-1] + 1:
            current_list.append(num)
        else:
            result.append(current_list)
            current_list = [num]

    if current_list:
        result.append(current_list)

    return result

def randomlySampleList(lst,k):
    try:
        return random.sample(lst,k=k)
    
    except:
        return lst
    
def randomlySamplePpg_Df(df:pd.DataFrame,time:int) -> pd.DataFrame:
    try:
        first = df['ts-datetime'].values[0]
        last = df['ts-datetime'].values[-1]

        random_start_time = pd.to_datetime(random.uniform(first, last-pd.to_timedelta(time,unit = 's')), unit='s')
        window = df[(df['ts-datetime']>= random_start_time) & (df['ts-datetime']<= random_start_time + pd.to_timedelta(10,unit = 's'))]
        return window
    
    except:
        return df
    


    

In [20]:
sample_time = 10
for patient in patient_data_dictionary:
    print(f"Processing Patient {patient}")
    allApneaPpg_dfs = []
    allEventPpg_dfs = []
    allNonEventPpg_dfs = []

    dictionary = patient_data_dictionary[patient]
    ppg = dictionary['ppg']
    #normalize teh data
    ppg['ch2'] = (ppg['ch2'] - ppg['ch2'].min(axis = 0))/(ppg['ch2'].max(axis=0)-ppg['ch2'].min(axis=0))
    events = dictionary['events']

    for index,event in events.iterrows():
        inEventCondition = (ppg['ts-datetime'] <= event['end-datetime']) & (ppg ['ts-datetime'] >= event['start-datetime'])
        eventPpg = ppg[inEventCondition]

        if event['name'] in apnea_labels:
            allApneaPpg_dfs.append(eventPpg)
        #eventPpg = randomlySamplePpg_Df(eventPpg,sample_time)
        allEventPpg_dfs.append(eventPpg)

    allEventPpg_Indices = sorted(list(set(flattenedList(list(map(getIndices,allEventPpg_dfs))))))
    
    nonEventPpg_Indices = ppg[~ppg.index.isin(allEventPpg_Indices)].index.values

    sample_size = len(allApneaPpg_dfs)
    list_Of_NonEventPpgIndices_Lists = consecutive_lists(nonEventPpg_Indices)

    randomlySampledNonEvents = randomlySampleList(list_Of_NonEventPpgIndices_Lists,sample_size)
    for indices in randomlySampledNonEvents:
        nonEventPpg = ppg.iloc[indices]
        nonEventPpg = randomlySamplePpg_Df(nonEventPpg,sample_time)
        allNonEventPpg_dfs.append(nonEventPpg)
    print(f"Processed {len(allApneaPpg_dfs)} Apnea Events")
    print(f"Processed {len(randomlySampledNonEvents)} non-Events")

    for ppg in allApneaPpg_dfs:
        # signals, info = nk.ppg_process(timestamps = ppg['ts'].values,ppg_signal = ppg['ch2'].values,sampling_rate = MUSE_SAMPLING_RATE)
        # analyze_df = nk.ppg_analyze(signals, sampling_rate = MUSE_SAMPLING_RATE)
        print(ppg.head())
        print(ppg['ch2'].values)
        print(len(ppg['ch2'].values))
        ppg_signal = ppg['ch2'].values
        ppg_signal = nk.ppg_clean(ppg_signal =ppg_signal,sampling_rate = 64,method = 'elgendi')
        data = nk.ppg_simulate(duration = 10,sampling_rate = 64)


        df,info = nk.ppg_process(data,sampling_rate =64)
        results = nk.ppg_analyze(data = df, sampling_rate = 64)
        '''
        df, info = nk.ppg_process(data,sampling_rate = 100)
        nk.ppg_analyze(data = df,sampling_rate = 100)
        '''
        break   
    break

    '''
    apnea_fileName = 'apnea_ppg.pkl'
    nonEvent_fileName = 'nonEvent_ppg.pkl'
    os.makedirs(f"./samples/{patient}/",exist_ok=True)
    with open(f"./samples/{patient}/{apnea_fileName}",'wb') as pickle_file:
        pickle.dump(allApneaPpg_dfs,pickle_file)

    with open(f"./samples/{patient}/{nonEvent_fileName}",'wb') as pickle_file:
        pickle.dump(allNonEventPpg_dfs,pickle_file)

    '''

    




    



    

    



    
        



Processing Patient 2020-09-09T204008-0400_5007-ELYP-1F41
Processed 6 Apnea Events
Processed 6 non-Events
                   ts        ch1       ch2  ch3                   ts-datetime
1064271  1.599715e+09  6340513.0  0.297998  0.0 2020-09-10 05:17:14.257947136
1064272  1.599715e+09  6340513.0  0.298182  0.0 2020-09-10 05:17:14.273570304
1064273  1.599715e+09  6340513.0  0.298407  0.0 2020-09-10 05:17:14.289193472
1064274  1.599715e+09  6340513.0  0.298553  0.0 2020-09-10 05:17:14.304816384
1064275  1.599715e+09  6340513.0  0.298735  0.0 2020-09-10 05:17:14.320439552
[0.29799772 0.29818196 0.29840656 0.29855292 0.29873463 0.29871444
 0.29880278 0.29889109 0.29895167 0.29921666 0.29933526 0.29942106
 0.29959015 0.29955229 0.2995296  0.29951193 0.2995296  0.29948164
 0.29959774 0.29969362 0.29990058 0.29991571 0.30004947 0.30011762
 0.30015294 0.30019333 0.30035735 0.3005441  0.30070309 0.30080405
 0.30091509 0.30104885 0.30117251 0.30135169 0.30145263 0.30148292
 0.30150561 0.30145263

ValueError: NeuroKit error: the window cannot contain more data points than the time series. Decrease 'scale'.