In [18]:
#import librararies
import pandas as pd
import numpy as np
from scipy.signal import welch
from scipy.stats import entropy
import nolds
import glob
import mne
import multiprocessing as mp 
import os
from datetime import datetime
start = datetime.now()
print(f"Started job at {start}")



Started job at 2024-02-14 11:27:09.867504


In [3]:
ANNE_PATH = '/scratch/alim/overnight_validation/ANNE-PSG231215'
'''
 Numeric coding of repiratory events
   > 1 - "RERA"; 2 - "Hypopnea"; 3 - "Obstructive Apnea"; 4 - "Central Apnea"; 5 - "Mixed Apnea"
'''
#features to extract: mean, std, var for all channels
SAMPLING_RATE = 256
DATA_COLUMNS = ['zeroCrossingRate', 'ecgRaw', 'ecgProcessed', 'ecgSQI',\
       'ppgRaw', 'ppgFiltered', 'ppgSQI', 'x', 'y', 'z', 'chestTemp',\
       'limbTemp', 'x_detrend', 'z_detrend', 'y_detrend', 'PAT', 'PATmedian',\
       'PATdetrend', 'footpeak', 'footpeakmedian', 'HRmedian', 'HR', 'SpO2',\
       'RR', 'RRsqi', 'PPGamp', 'PPGampmedian', 'sleepstage']

features_Df = pd.DataFrame()



In [4]:
#get metadata csv
metadata_file_name = ANNE_PATH + "/ANNE-PSG_metadata.csv"
metadata_df = pd.read_csv(metadata_file_name)
metadata_df.head()
print(metadata_df[metadata_df['file'] == '23-11-01-19_35_59.C3884.L3802.696'])

                                  file        age   sex  AHI       TST  \
283  23-11-01-19_35_59.C3884.L3802.696  65.111567  Male  0.0  3.508333   

     n.channels       TRT       ECG      PPG      Both  align.pass  
283          32  10.07296  0.852927  0.92212  0.809934        True  


In [13]:
def separate_Data_by_Respiratory_Event(df):
  returnDict = {
    0:[],1:[],2:[],3:[],4:[],5:[]
  }

  start = 0
  end = 0
  length = len(df)


  while(end  != length):

    current_Label = int(str(df.iloc[start]['resp_events'])[0])
    end_Label = int(str(df.iloc[end]['resp_events'])[0])
    if (current_Label != end_Label):
      event_Period = df.iloc[start:end]
      if not current_Label in returnDict:
        
        returnDict[current_Label] = [
        ]

      returnDict[current_Label].append(event_Period)

      start = end
    end += 1

  event_Period = df.iloc[start:end]
  if not current_Label in returnDict:
    
    returnDict[current_Label] = [
    ]

  returnDict[current_Label].append(event_Period)

  return returnDict
  
def extract_Mean_Std_Var(df, desired_columns = DATA_COLUMNS):
  #returns dictionary with feature
  returnDict = {}
  for column in desired_columns:
    values = df[column].values
    returnDict[f"{column}_mean"] = [np.mean(values)]
    returnDict[f"{column}_std"] = [np.std(values)]
    returnDict[f"{column}_var"] = [np.var(values)]


  return returnDict




In [14]:
from pathlib import Path
import mne
parent_dir = Path(ANNE_PATH)
features = pd.DataFrame()

ncpus= int(os.environ.get('SLURM_CPUS_PER_TASK',default = 1))
pool = np.Pool(processes = ncpus)

#iterate through every patient in ANNE-PSG231215
for subDir in parent_dir.iterdir():
    if subDir.is_dir():
        for entry in subDir.iterdir():
            absolute_entry_path = str(entry)

            raw = mne.io.read_raw_edf(absolute_entry_path)
            raw.crop(0,3000)
            df = raw.to_data_frame()

            separated = separate_Data_by_Respiratory_Event(df)
            for label in separated:
                for data in separated[label]:

                    feature_row = {}
                    mean_std_var = extract_Mean_Std_Var(df = data)
                    feature_row.update(mean_std_var)

                    feature_row['event'] = label

                    feature_row['name'] = subDir.name
                    feature_row = pd.DataFrame(feature_row)
                    features = pd.concat([features,feature_row],ignore_index = True)

end = datetime.now()

features.to_csv(f"ANNE_FEATURES_{end}_.csv")

                #print(separate_Data_by_Respiratory_Event(df))





Directory: 23-11-01-19_35_59.C3884.L3802.696
Directory: 21-12-21-22_46_35.C3882.L3562.344
Directory: 23-09-28-19_35_07.C4298.L4033.676
Directory: 22-06-08-21_22_12.C4179.L3806.409
Directory: 23-02-22-22_02_14.C4359.L3786.553
Directory: 21-09-07-21_39_42.C1390.L3562.312
Directory: 22-12-07-22_29_59.C4359.L3786.515
Directory: 22-12-21-21_08_38.C4181.L3786.523
Directory: 22-07-07-21_33_44.C4179.L3806.424
Directory: 23-08-01-20_54_29.C4408.L4087.643
Directory: 22-07-26-22_06_00.C4368.L3805.436
Directory: 20-08-20-21_33_30.C1442.L1215.185
Entry: /scratch/alim/overnight_validation/ANNE-PSG231215/20-08-20-21_33_30.C1442.L1215.185/20-08-20-21_33_30.C1442.L1215.185-features.edf
Extracting EDF parameters from /scratch/alim/overnight_validation/ANNE-PSG231215/20-08-20-21_33_30.C1442.L1215.185/20-08-20-21_33_30.C1442.L1215.185-features.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw = mne.io.read_raw_edf(absolute_entry_path)
PLM_events
  raw = mne.io.read_raw_edf(absolute_entry_path)


Directory: 21-05-19-21_46_54.C1425.L1205.279
Directory: 20-11-09-21_34_42.C1425.L1205.218
Directory: 22-06-01-21_58_28.C4179.L3806.404
Directory: 21-11-03-21_20_01.C3884.L3562.330
Directory: 22-07-19-21_27_55.C4179.L3806.431
Directory: 23-03-08-21_32_13.C4181.L3766.561
Directory: 21-01-11-21_39_04.C1425.L1205.237
Directory: 21-05-31-22_11_50.C1390.L1215.282
Directory: 21-08-18-21_37_37.C1425.L1205.303
Directory: 22-08-24-21_37_23.C4368.L3805.456
Directory: 22-11-08-21_23_46.C4337.L3766.497
Directory: 20-11-11-22_10_54.C1442.L1215.220
Directory: 21-05-17-21_15_06.C1442.L1215.277
Directory: 22-05-04-21_14_35.C4179.L3806.389
Directory: 23-09-25-19_34_45.C4408.L4087.673
Directory: 22-06-20-21_33_21.C4179.L3806.416
Directory: 20-12-15-22_02_08.C1425.L1205.231
Directory: 21-02-01-21_36_56.C1425.L1205.244
Directory: 23-03-09-21_34_36.C4181.L3766.562
Directory: 23-06-20-21_18_51.C4384.L4041.618
Directory: 20-09-04-22_30_34.C1459.L1205.192
Directory: 20-01-14-20_23_27.C823.L775.4
Directory: 22-