In [1]:
# To work with files
import os

# Used in funct def stats(match, state, df): 
import numpy as np
# To use DataFrames
import pandas as pd

# To parse time
from datetime import datetime, timedelta 

# Used in funct def comp_angle(row):
import math

# To plot graphs
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib nbagg

# To allow more outputs in Jupyter notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Path to left wrist, right wrist Actigraphy and Polysomnography data
path = "C:\\Users\\sigmu\\Desktop\\dataset\\"
path_ACG = path + "Aktigrafie\\"
path_PSG = path + "Polysomnografie\\"
files_ACG = os.listdir(path_ACG)
files_PSG = os.listdir(path_PSG)

# thresholds 10 min & 5 min respectively
thr_0 = (10 * 60) / 5
thr_1 = (5 * 60) / 5
state_5_10_5 = 'State 5s_10,5'
df_stats = pd.DataFrame(columns=['Participant', 
                                 'TP_cnt', 'TN_cnt', 'FP_cnt', 'FN_cnt',
                                 'TST', 'WASO', 'SWR', 
                                 'specificity', 'sensitivity', 'accuracy'],
                        index=[state_5_10_5])

df_all_stats = pd.DataFrame(columns=['Participant', 
                                 'TP_cnt', 'TN_cnt', 'FP_cnt', 'FN_cnt',
                                 'TST', 'WASO', 'SWR', 
                                 'specificity', 'sensitivity', 'accuracy'],
                        index=[state_5_10_5])

returns_path_csv = path + "\\Results\\" + "_Stats_" + '.csv'
# if file doesn't exist, create new
if ~os.path.exists(returns_path_csv):
    df_stats.to_csv(returns_path_csv)

In [2]:
# function to cut start of the DataFrame to match PSG time stamp

def cutStartToMatch(df, df_PSG):
    df_PSG_len = len(df_PSG.index)-1
    df_len = len(df.index)-1

    # Match start
    # If df and PSG day is the same    
    if(df['time stamp'].dt.day[0] == df_PSG['Time [hh:mm:ss]'].dt.day[0]):        
        # If df starts earlier than df_PSG -> cut start of df        
        if(df['time stamp'].dt.hour[0] < df_PSG['Time [hh:mm:ss]'].dt.hour[0]):            
            # Find df index where time matches            
            idx_start = df[(df['time stamp'].dt.hour == df_PSG['Time [hh:mm:ss]'].dt.hour[0]) &
                           (df['time stamp'].dt.minute == df_PSG['Time [hh:mm:ss]'].dt.minute[0]) &
                           (df['time stamp'].dt.second == df_PSG['Time [hh:mm:ss]'].dt.second[0])].index[0]
            # Drop df from 0 to idx_start            
            df.drop(df.index[0:(idx_start)], inplace=True)
        # Else cut start of df_PSG        
        else:
            # Find df_PSG index where time matches            
            idx_start = df_PSG[(df['time stamp'].dt.hour[0] == df_PSG['Time [hh:mm:ss]'].dt.hour) & 
                               (df['time stamp'].dt.minute[0] == df_PSG['Time [hh:mm:ss]'].dt.minute)].index[0]
            # Drop df_PSG from 0 to idx_start            
            df_PSG.drop(df_PSG.index[0:(idx_start)], inplace=True) 
    # Else if df starts day earlier (happens in #45) -> cut start of df    
    elif(df['time stamp'].dt.day[0] < df_PSG['Time [hh:mm:ss]'].dt.day[0]):
        # Find df index where time matches            
        idx_start = df[(df['time stamp'].dt.hour == df_PSG['Time [hh:mm:ss]'].dt.hour[0]) &
                       (df['time stamp'].dt.minute == df_PSG['Time [hh:mm:ss]'].dt.minute[0]) &
                       (df['time stamp'].dt.second == df_PSG['Time [hh:mm:ss]'].dt.second[0])].index[0]
        # Drop df from 0 to idx_start
        df.drop(df.index[0:(idx_start)], inplace=True)
    # Else - not expected (no ACG b4 PSG in current dataset)    
    else:
        print("Something's wrong in cutStartToMatch.")
 
    return

def cutEndToMatch(df, df_PSG): 
    df.reset_index(inplace=True, drop=True)
    df_PSG.reset_index(inplace=True, drop=True)
    df_PSG_len = len(df_PSG.index)-1
    df_len = len(df.index)-1
    
    # Match end
    # If df ends earlier than df_PSG -> cut end of df_PSG
    if(df['time stamp'].dt.hour[df_len] < df_PSG['Time [hh:mm:ss]'].dt.hour[df_PSG_len]):
        idx_end = df_PSG[(df['time stamp'].dt.hour[df_len] == df_PSG['Time [hh:mm:ss]'].dt.hour) &
                         (df['time stamp'].dt.minute[df_len]+1 == df_PSG['Time [hh:mm:ss]'].dt.minute)].index[0]  # + 1 min  
        # Drop df_PSG from df end to end
        df_PSG.drop(df_PSG.index[(idx_end):(len(df_PSG.index))], inplace=True)
    # Else cut end of df
    else:
        idx_end = df[(df['time stamp'].dt.hour == df_PSG['Time [hh:mm:ss]'].dt.hour[df_PSG_len]) &
                     (df['time stamp'].dt.minute == df_PSG['Time [hh:mm:ss]'].dt.minute[df_PSG_len]) &
                     (df['time stamp'].dt.second == df_PSG['Time [hh:mm:ss]'].dt.second[df_PSG_len])].index[0]
        # Drop df from df_PSG end to end
        df.drop(df.index[(idx_end):(len(df.index))], inplace=True)
        
    return
        
# ----------------------------------------------------------------------------------------------------------- 

def comp_angle(row):
    return math.degrees(math.atan(row['z axis [g]'] / (row['x axis [g]']**2 + row['y axis [g]']**2)))

# -----------------------------------------------------------------------------------------------------------   

def inactiv(thr_1st, thr, clmn_name, df):
    counter = 1
    first_S = False
    for index, value in df['abs angle change'].items():
        counter += 1

        # Angle change > 5
        if (value > 5):
            counter = 0
            df.loc[index, clmn_name] = "W"
        # After first sleep - shorter threshold    
        elif (counter > thr_1) & first_S:
            counter += 1
            df.loc[index, clmn_name] = "S"
            SE = index
        # First sleep - longer threshold 
        elif (counter > thr_0):
            counter += 1
            df.loc[index, clmn_name] = "S"
            first_S = True
            # Sleep Onset (start of sleep)
            SO = index
    return
 
# -----------------------------------------------------------------------------------------------------------
    
def stats(match, state, df):    
    # Count Total Sleep Time and Wake After Sleep Onset in minutes... so far all 'S' and 'W' states
    TST = (len(df[df[state]=='S'].index.value_counts()) * 30 ) / 60
    df_stats['TST'][state] = TST

    WASO = (len(df[df[state]=='W'].index.value_counts()) * 30 ) / 60
    df_stats['WASO'][state] = WASO

    # Sleep Wake Ratio ... the bigger the better sleeper 
    SWR = round(TST / WASO, 4)
    df_stats['SWR'][state] = SWR

    #sensitivity (actigraphy = sleep when PSG = sleep), 
    #specificity (actigraphy = wake when PSG = wake), 
    #and accuracy (total proportion correct)

    # Put all states where: into an array
    TP = np.where((df[state] == 'W') & (df['State PSG'] == 'W'))
    FP = np.where((df[state] == 'W') & (df['State PSG'] == 'S'))
    TN = np.where((df[state] == 'S') & (df['State PSG'] == 'S'))
    FN = np.where((df[state] == 'S') & (df['State PSG'] == 'W'))

    # Return array size
    TP_cnt = np.size(TP)
    FP_cnt = np.size(FP)
    TN_cnt = np.size(TN)
    FN_cnt = np.size(FN)

    # Add to df
    df_stats['TP_cnt'][state] = TP_cnt
    df_stats['FP_cnt'][state] = FP_cnt
    df_stats['TN_cnt'][state] = TN_cnt
    df_stats['FN_cnt'][state] = FN_cnt

    # Calculate sensitivity, specificity and accuracy
    sens = round((TP_cnt / (TP_cnt + FN_cnt) )*100, 4)
    spec = round((TN_cnt / (TN_cnt + FP_cnt) )*100, 4)
    acc = round(((TP_cnt + TN_cnt) / (TP_cnt + TN_cnt + FP_cnt + FN_cnt) )*100, 4)

    # Add to df
    df_stats['Participant'][state] = participant+'_'+arm
    df_stats['sensitivity'][state] = sens
    df_stats['specificity'][state] = spec    
    df_stats['accuracy'][state] = acc

    return

# -----------------------------------------------------------------------------------------------------------

def datasetStats():
    return
#    acc, sens, spec
#    bias (mean ± standard deviation between patients) in estimated sleep duration

In [3]:
%%time

for file_name_PSG in files_PSG:
    participant = file_name_PSG.split("_")[0].upper() #uppercase
    for file_name_ACG in files_ACG:     
        arm = file_name_ACG.split("_")[1]
        if(file_name_ACG.startswith(participant+"_"+arm)):
            print(file_name_ACG)
            df = pd.read_csv(path_ACG+file_name_ACG, names=[
                 'time stamp', 'x axis [g]', 'y axis [g]', 'z axis [g]', 'light level [lux]', 'button [1/0]','temperature [°C]'], 
                 skiprows=100)
            df['time stamp'] = pd.to_datetime(df['time stamp'], format='%Y-%m-%d %H:%M:%S:%f')

            # 1st PSG read -> get recording date
            df_PSG = pd.read_csv(path_PSG + file_name_PSG)
            PSGdate = df_PSG["RemLogic Event Export"][2].split("\t")[1]
            # 2nd PSG read -> parse to datetime
            df_PSG = pd.read_csv(path_PSG + file_name_PSG, sep='\t', skiprows=17)
            df_PSG['Time [hh:mm:ss]'] = PSGdate + " " + df_PSG['Time [hh:mm:ss]']
            df_PSG['Time [hh:mm:ss]'] = pd.to_datetime(df_PSG['Time [hh:mm:ss]'], format='%d/%m/%Y %H:%M:%S')

            df.drop(columns=['light level [lux]', 'button [1/0]', 'temperature [°C]'], inplace=True, axis=1)

            # Commented because 'Position' column missing in 34               
            #df_PSG.drop(columns=['Position', 'Event', 'Duration[s]'], inplace=True, axis=1) 

            #-----------------------------------------------------------------------------------------------

            # Drop values so that the ACG and PSG recording is starting and ending the same
            cutStartToMatch(df, df_PSG)
            cutEndToMatch(df, df_PSG)

            #-----------------------------------------------------------------------------------------------

            # Set origin from PSG to start resampling
            orig = (df_PSG['Time [hh:mm:ss]'].dt.hour[0]*3600 + 
                    (df_PSG['Time [hh:mm:ss]'].dt.minute[0]*60) +
                    (df_PSG['Time [hh:mm:ss]'].dt.second[0]))
            orig  = pd.Timestamp(orig, unit='s')

            # Resample by 5 second epoch and compute median of x,y,z
            df = df.resample('5S', on='time stamp', kind='timestamp', origin=orig).median().round(decimals=4)

            #-----------------------------------------------------------------------------------------------  

            # Apply func comp_angle
            df['angle'] = df.apply(comp_angle, axis=1).round(decimals=4)

            # Average the angle per 5 sec epoch
            #df['angle'] = df.resample('5S').mean().round(decimals=4)

            # Return absolute difference in angle per column
            df['abs angle change'] = df['angle'].diff().round(decimals=4).abs()

            # New column with all "W" values
            df[state_5_10_5] = "W"

            #-----------------------------------------------------------------------------------------------    

            # Decide inactivity based on the angle
            inactiv(thr_0, thr_1, state_5_10_5, df)

            #-----------------------------------------------------------------------------------------------

            # Resample ACG to have same number of columns as PSG (30s epochs)
            df = df.resample('30S', origin=orig).interpolate()

            # If PSG has extra value at the end
            if(len(df_PSG) > len(df)):
                df_PSG.drop(df_PSG.index[len(df_PSG)-1], inplace=True)

            # Overwrite State PSG to bi-state 
            df['State PSG'] = np.where((df_PSG['Sleep Stage'] == 'N1')|(df_PSG['Sleep Stage'] == 'N2')|
                                               (df_PSG['Sleep Stage'] == 'N3')|(df_PSG['Sleep Stage'] == 'R'), 'S', 'W')

            # Create boolean column and compare
            match = 'match_' + state_5_10_5
            df[match] = np.where(df[state_5_10_5] == df['State PSG'], True, False)

            #-----------------------------------------------------------------------------------------------

            # Participant according to file name
            df_stats['Participant'][0] = participant+'_'+arm

            # Compute statistics
            match = 'match_' + state_5_10_5
            stats(match, state_5_10_5, df)

            #-----------------------------------------------------------------------------------------------

            # Return to files
            df.to_csv(path + "\\Results\\" + "df__" + file_name_ACG)
            df_stats.to_csv(returns_path_csv, mode='a', header=False)

            #print("df", df_stats)
            #df_all_stats.append(df_stats)
            #print("df", df_all_stats)

            # Delete existing dataframes?
            del df
            del df_PSG
            
# name=['Participant', 
#         'TP_cnt', 'TN_cnt', 'FP_cnt', 'FN_cnt',
#         'TST', 'WASO', 'SWR', 
#         'specificity', 'sensitivity', 'accuracy']            
# df = pd.read_csv(returns_path_csv, names=[
#                  'time stamp', 'x axis [g]', 'y axis [g]', 'z axis [g]', 'light level [lux]', 'button [1/0]','temperature [°C]'], 
#                  skiprows=100)            

MECSLEEP01_left_wrist_012870_2013-06-12_11-40-37.csv
MECSLEEP01_right_wrist_012855_2013-06-11_12-08-25.csv
MECSLEEP02_left_wrist_012859_2013-06-12_12-05-48.csv
MECSLEEP02_right_wrist_012869_2013-06-12_12-02-20.csv
MECSLEEP10_left_wrist_012859_2013-11-01_14-03-32.csv
MECSLEEP14_left_wrist_012870_2013-12-09_10-53-34.csv
MECSLEEP14_right_wrist_012855_2013-12-09_11-07-22.csv
MECSLEEP17_left_wrist_012854_2013-12-09_11-37-24.csv
MECSLEEP17_right_wrist_012932_2013-12-09_11-30-44.csv
MECSLEEP21_left_wrist_012854_2014-01-23_14-13-59.csv
MECSLEEP21_right_wrist_012932_2014-01-23_14-07-25.csv
MECSLEEP23_left_wrist_016283_2014-02-03_13-24-04.csv
MECSLEEP23_right_wrist_014883_2014-02-03_13-19-40.csv
MECSLEEP27_left_wrist_012867_2014-02-06_12-49-07.csv
MECSLEEP27_right_wrist_012851_2014-02-06_12-42-54.csv
MECSLEEP28_left_wrist_012856_2014-02-13_11-13-26.csv
MECSLEEP28_right_wrist_012854_2014-02-13_11-10-56.csv
MECSLEEP29_left_wrist_014883_2014-02-13_11-21-35.csv
MECSLEEP29_right_wrist_012865_2014-02-

In [7]:
stats = pd.read_csv(returns_path_csv)
stats

Unnamed: 0.1,Unnamed: 0,Participant,TP_cnt,TN_cnt,FP_cnt,FN_cnt,TST,WASO,SWR,specificity,sensitivity,accuracy
0,"State 5s_10,5",,,,,,,,,,,
1,"State 5s_10,5",MECSLEEP01_left,190.0,687.0,227.0,110.0,398.5,208.5,1.9113,75.1641,63.3333,72.2405
2,"State 5s_10,5",MECSLEEP01_right,193.0,665.0,249.0,107.0,386.0,221.0,1.7466,72.7571,64.3333,70.6755
3,"State 5s_10,5",MECSLEEP02_left,197.0,516.0,151.0,71.0,293.5,174.0,1.6868,77.3613,73.5075,76.2567
4,"State 5s_10,5",MECSLEEP02_right,205.0,551.0,116.0,63.0,307.0,160.5,1.9128,82.6087,76.4925,80.8556
5,"State 5s_10,5",MECSLEEP10_left,125.0,627.0,237.0,65.0,346.0,181.0,1.9116,72.5694,65.7895,71.3472
6,"State 5s_10,5",MECSLEEP14_left,686.0,4.0,216.0,13.0,8.5,451.0,0.0188,1.8182,98.1402,75.0816
7,"State 5s_10,5",MECSLEEP14_right,699.0,0.0,220.0,0.0,0.0,459.5,0.0,0.0,100.0,76.0609
8,"State 5s_10,5",MECSLEEP17_left,193.0,474.0,231.0,77.0,275.5,212.0,1.2995,67.234,71.4815,68.4103
9,"State 5s_10,5",MECSLEEP17_right,192.0,453.0,252.0,78.0,265.5,222.0,1.1959,64.2553,71.1111,66.1538
