# Init

In [None]:
# To work with files
import os

# To use DataFrames
import pandas as pd

# To parse time
#from datetime import datetime, timedelta 

# Used in funct def comp_angle(row):
import math

# To plot graphs
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib nbagg

# To allow more outputs in Jupyter notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Path to Actigraphy data
path = "C:\\Users\\sigmu\\Desktop\\dataset\\"
path_ACG = path + "vlastni\\Aktigrafie po dnech\\"
files_ACG = os.listdir(path_ACG)

#good accuracy parameters: first_threshold = 10 minutes, time_window = 3 minutes, angle = 10
first_threshold = (10 * 60) / 5
time_window = (3 * 60) / 5
angle = 10

# dataframe to write the result to
df_stats = pd.DataFrame(columns=['Participant',
                                 'TIB', 'SOL', 'TST', 'WASO', 'SWR', 'SFI', 'SE%'])

# Functions

In [None]:
# Function to compute angle according to van Hees method

def comp_angle(row):
    return math.degrees(math.atan(row['z axis [g]'] / ((row['x axis [g]']**2 + row['y axis [g]']**2)**0.5)))

# -----------------------------------------------------------------------------------------------------------  

# Function to decide sleep & wake epochs, first sleep time and sleep end time, and if the accelerometer has been worn
# parameters: window of inactivity to decide SO, window to decide sleep after SO (both in seconds / 5 ... 5s epochs), 
# angle threshold and ACG dataframe

def inactiv(first_threshold, time_window, angle, df):
    SleepOnset = 0
    SleepEnd = 0
    SleepFragmentation = 0
    counter = 0
    non_wear_counter = 0
    first_sleep = False
    woke_up = False
    result = []
    for index, value in df['abs angle change'].items():
        counter += 1

        # Non-wear possible solution: if angle change is 0 for at least an hour
# TEST < 0.5
        if (value < 0.5):
            non_wear_counter += 1
            if(non_wear_counter == 720):
                df['ACG_State'] = "N"
                print("Accelerometr is not being used.")            
                break
        else:
            non_wear_counter = 0
                
        # Angle change > angle -> woke up
        if (value > angle):
            counter = 0
            # after SO: if woken up, add to Sleep Fragmentation count
            if(first_sleep):
                if(woke_up == False):
                    SleepFragmentation += 1
                    woke_up = True              
        # After first sleep  
        elif (counter > time_window) & first_sleep:
            # Write S to state
            df.loc[index, 'ACG_State'] = "S"
            # Sleep End (end of sleep period)
            SleepEnd = index
            woke_up = False
        # First sleep 
        elif (counter > first_threshold) & ~first_sleep:
            # Write S to state
            df.loc[index, 'ACG_State'] = "S"
            first_sleep = True
            # Sleep Onset (start of sleep period)
            SleepOnset = index   
            
    result.append(SleepOnset)
    result.append(SleepEnd)
    result.append(SleepFragmentation)
    return result
 
# -----------------------------------------------------------------------------------------------------------

# Function to compute sleep parameters
# parameters: ACG dataframe, result dataframe to write statistics in
def stats(df, result):    
    
    # If non-wear
    if(df['ACG_State'][0] == 'N'):
        print("Accelerometr is not being used.")
        return
    
    # Sleep Onset and last Sleep from inactiv func (as timestamp -> str)
    #SOnset = str(latency[0].time())    
    #SEnd = str(latency[1].time())
    #last = df['ACG_State'][len(df)-1]
    #SEnd_greater = df.index[len(df)-1] < latency[1]
    ## If last item in dataframe is sleep and it is less than SEnd (SEnd was created with 5s epochs - could be after last item)
    #if ((last == 'S') & SEnd_greater):
    #    SEnd = str(df.index[len(df)-1].time())
    
    # Sleep Onset Latency: SOnset - start of recording
    SOL = latency[0] - df.index[0]
    SOL = round(SOL.seconds / 60, 2)
    SOL
    
    # TIB is presumed according to sleep diaries - the recording start and end is cut accordingly
    diff = df.index[len(df)-1] - df.index[0]
    TIB = diff.seconds / 60
    
    # TST is the duration in minutes of all sleep epochs between SOL time and sleep end
    TST = (len(df[df['ACG_State']=='S'].index.value_counts()) * 30 ) / 60
    
    # WASO is the duration in minutes of all wake epochs between SOL time and sleep end
    WASO = (len(df[df['ACG_State']=='W'].index.value_counts()) * 30 ) / 60 - SOL
    
    # Sleep Wake Ratio
    if(WASO != 0):
        SWR = round(TST / WASO, 2)
    else:
        SWR = 0        
    
    # Sleep Eficiency
    if(TIB != 0):
        SE = round((TST / TIB)*100, 2)
    else:
        SE = 0
    
    # Sleep Fragmentation is number of intervals scored as "W" (after sleep onset) relative to the total sleep time in hours
    if(TST != 0):
        SFI = round(latency[2] / (TST/60), 2)
    else:
        SFI = 0    
    
    # Add to stats dataframe
    result = result.append({'Participant': participant,
                            'TIB': TIB,
                            'SOL': SOL,
                            'TST': TST,
                            'WASO': WASO,
                            'SWR': SWR,
                            'SE%': SE,
                            'SFI': SFI},
                           ignore_index=True)
    
    return result

# Iterate through files

In [None]:
%%time

# go through files
for file_name_ACG in files_ACG:
    # split on _
    participant = file_name_ACG.split("_")[0]
    print(participant)

    df = pd.read_csv(path_ACG+file_name_ACG, names=['time stamp', 'x axis [g]', 'y axis [g]', 'z axis [g]', 
                                                    'light level [lux]', 'button [1/0]','temperature [°C]'],
                     skiprows=100,
                     # might be slightly faster:
                    infer_datetime_format=True, memory_map=True)
    df['time stamp'] = pd.to_datetime(df['time stamp'], format='%Y-%m-%d %H:%M:%S:%f')

    # drop not used columns from ACG
    df.drop(columns=['light level [lux]', 'button [1/0]', 'temperature [°C]'], inplace=True, axis=1)
    
    #-----------------------------------------------------------------------------------------------  
    
    # Resample by 5 second epoch and compute median of x,y,z
    df = df.resample('5S', on='time stamp', kind='timestamp').median()#.round(decimals=4)

    #-----------------------------------------------------------------------------------------------  

    # Apply func comp_angle
    df['angle'] = df.apply(comp_angle, axis=1)#.round(decimals=4)

    # Return absolute difference in angle per column
    df['abs angle change'] = df['angle'].diff().abs()#.round(decimals=4)

    # New column with all "W" values
    df['ACG_State'] = "W"

    #-----------------------------------------------------------------------------------------------    

    # Decide inactivity based on the angle 
    # returns list of timestamps for SE, SO
    latency = inactiv(first_threshold, time_window, angle, df)

    if(df['ACG_State'][0] != 'N'):

        #-----------------------------------------------------------------------------------------------

        # Resample ACG to have same number of columns as PSG (30s epochs)
        df = df.resample('30S').interpolate()
        
        #-----------------------------------------------------------------------------------------------

        # Compute statistics
        df_stats = stats(df, df_stats)

    else:
        print("Accelerometr is not being used.")
        del df
        del df_stats

# Return to file             
df_stats.to_csv(returns_path_csv, mode='a', header=False)

# Write to file

In [None]:
returns_path_csv = path + "Results\\" + "STATS.csv"
df_stats.to_csv(returns_path_csv)

# Read from file

In [None]:
read_path_csv = path + "Results\\"+ "STATS.csv"
stats = pd.read_csv(read_path_csv)
stats

In [None]:
# Correlation Matrix
## Understand the dependence between the independent variables of the data set.
## Helps choose important and non-redundant variables of the data set.
## Applicable only to numeric/continuous variables.
import seaborn as sn
 
# Numeric columns of the dataset
numeric_col = ['TIB', 'SOL', 'TST', 'WASO', 'SWR', 'SE%', 'SFI']
 
# Correlation Matrix formation
corr_matrix = all_stats.loc[:,numeric_col].corr()
print(corr_matrix)
 
#Using heatmap to visualize the correlation matrix
corr = sn.heatmap(corr_matrix, annot=True)

In [None]:
# corr.get_figure().savefig(path + "\\Results\\" + "corr_matrix.pdf")