In [1]:
import pandas as pd
import numpy as np
import os
import re
import pickle
import locale
import glob
import time
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import scipy.signal as sg
import statsmodels.api as sm
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from scipy.signal import butter, lfilter, fftconvolve, get_window, firwin2, firwin, freqz
from scipy import stats
from matplotlib.mlab import find
from statsmodels.graphics.api import qqplot
from __future__ import division
from functools import reduce

pd.set_option('max_column', 100)
pd.set_option('max_row', 4000)

%matplotlib inline

# <font color='violet'> Functions </font>

###Load Data

- load csv file (filename format: nameYYYY_HHMM_NightNo)
- computed for timedelta and converted to datetime
- computed for magnitude
- labeled the dataframe with username (format: NameNightno)
- did bandpass filtering on z axis

In [2]:
def load_data(file, samplingRate=50.0, lowcut=1.0, highcut=18.0): #1.0, 18.0
    """
    Loads the csv, computes for the corresponding datetime of the collected data and performs bandpass filtering on
    the z axis.
    
    Parameters
    ----------
    file: string
        csv file name
    samplingRate: float
        (default = 50.0)
    lowcutOakley: float
        low cut value for bandpass filtering (default=1.0)
    highcutOakley: float
        high cut value for bandpass filtering (default=18.0)
        
    Return
    ----------
    df : DataFrame
    """
    os.chdir('/Users/User/Documents/Sleep/gravitys8/SleepExperiment/')
    sname, sdate, stime, snight = [value for value in re.split('(\d+)',file) if value not in ('_','.csv')]
    df = pd.read_csv(file, names=['x', 'y', 'z', 'timestamp'], low_memory=False).convert_objects(convert_numeric=True)
    df['username'] = sname+snight
    df['magnitude'] = df[['x', 'y', 'z']].apply(lambda r: np.sqrt((r**2).sum()), axis=1)
#     df['timedelta'] = pd.to_timedelta(df.timestamp, unit = 'ns')
#     df['timedelta'] -= df.timedelta[0]
#     datetime_ = datetime.strptime(sdate + '-' + stime, "%Y%m%d-%H%M")
#     df['dtime'] = datetime_ + df.timedelta
    df['timedelta'] = pd.to_timedelta(df.timestamp, unit='ns') \
                        - (pd.to_timedelta(pd.to_datetime(df['timestamp'][0]).hour, unit='h') \
                        + pd.to_timedelta(pd.to_datetime(df['timestamp'][0]).minute, unit='m') \
                        + pd.to_timedelta(pd.to_datetime(df['timestamp'][0]).second, unit='s')) \
                        + pd.to_timedelta(pd.to_datetime(df['timestamp'][0]).nanosecond, unit='ns') \
                        + datetime.strptime("%s-%s-%s" %(sdate[:4], sdate[4:6], sdate[6:]), "%Y-%m-%d")
    df['dtime']= df['timedelta'].apply(lambda x: x + pd.to_timedelta((int(stime[:2])*60)+int(stime[2:]), unit='m'))
    df['z_bp'] = bandpass_filter(df.z, lowcut, highcut, samplingRate, 1, False)
    #print ("Total number of rows (raw data): {:,d}".format(len(df)))
    return df

In [3]:
def bandpass_filter(x, lowcut=10.0, highcut=25.0, samplingRate=100.0, order = 5, Plot=True):
    """
    Performs bandpass filtering on the given data.
    
    Parameters
    ----------
    x: Series
        axis from the accelerometer data
    lowcut: float
    highcut: float
    samplingRate: float
    order: int
    Plot: boolean
    
    Return
    ----------
    y: Series
    """
    nyq = 0.5 * samplingRate
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    y = lfilter(b, a, x)
    #y = butter_bandpass_filter(x, lowcut, highcut, samplingRate, order=1)
    if Plot:
        plt.figure(1, figsize=(20, 2))
        plt.plot(x, 'c-', label='z_raw',)
        plt.grid(True)
        plt.axis('tight')
        plt.legend(loc=1)

        plt.figure(2, figsize=(20, 2))
        plt.plot(y, label='Filtered signal (%g Hz)' % samplingRate)
        plt.xlabel('time (seconds)')
        plt.grid(True)
        plt.axis('tight')
        plt.legend(loc=1)
        plt.show()
        
    return y

In [4]:
def consecutive(data, n, stepsize=1):
    """Returns a list of lists of consecutive integers."""
    lst_ = []
    lst = np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
    for i in lst:
        if len(i) <= n:
            lst_.append(i)
        else:
            pass
    return lst_

###Activity Counts

- maximum absolute value within 1second epoch
- accumulate over epochs of 30 seconds

In [5]:
def counter(df_, Plot = True):
    """ 
    Computes for the activity counts within 1minute epochs.
    
    Parameters
    ----------
    df_: DataFrame
        DataFrame from load_data()
    Plot: boolean
        display the graph of the result(default=True)
        
    Return
    ----------
    df_1min: DataFrame
        activity counts per minute
    """
    df = df_[['dtime', 'z_bp']].copy()
    df['dtime'] = pd.to_datetime(df['dtime'])
    df['z_bp'] = abs(df['z_bp'])
    df_zo = df.set_index(['dtime']).resample("1S", how="max").fillna(0).reset_index()
    df_30 = pd.DataFrame((pd.rolling_sum(df_zo.set_index(['dtime']).z_bp
                        , window = 30).fillna(0)), columns=['Counts']).reset_index()
    df_30a = df_zo.set_index(['dtime']).resample("30S", how="sum").reset_index()
    df_30a.columns = ['dtime', 'Counts']
    df_1min = df_30.set_index(['dtime']).resample("1Min").reset_index()
    
    if Plot:
        plt.figure(figsize=(20, 2))
        plt.plot(df_30.dtime, df_30.Counts)
        plt.grid(True)
        plt.axis('tight')
        plt.title('Oakley Activity Count')
        
        plt.show()
        
    return df_1min

###Sadeh's Algorithm

- average number of activity counts during the scored epoch and the window of 5 epochs preceding and following it
- standard deviation of the activity counts during the scored epoch and the five epochs preceding it
- number of epochs with activity level equal to or higher than a certain threshold
- natural logarithm of the number of activity counts during the scored epoch + 1

In [6]:
def sadeh_scoring(elems):
    """
    Sadeh's scoring algorithm coefficients for the four most predictive measures.
    
    Parameter
    ----------
    elems: array_like, length (4)
    
    Return
    ----------
    probability of sleep
    """
    coeffs = [-0.065, -1.08, -0.056, -0.703]
    return 7.601 + (np.dot(elems, coeffs)) 


def sadeh(df_, Plot = True):
    """scores 0 (sleep) if PS (probability of sleep) is greater than or equal to 0, 1 (awake) otherwise
    
    Parameter:
    ----------
    df_: DataFrame
        DataFrame from load_data()
    Plot
        display the graphs of the results(default=True)
        
    Return
    ----------
    df: DataFrame
        PS - probability of sleep
        Score - 0 as sleep, 1 as wake
    """
    df = counter(df_, False)
    lst = [np.nan, np.nan, np.nan, np.nan, np.nan]
    df['PS'] = np.nan
    for i in range(len(df)-10):
        mean_w5 = np.mean(df['Counts'][i:i+11])
        sd_w6 = np.std(df['Counts'][i:i+6])
        nat = len([1 for x in df['Counts'][i:i+11] if x >= 25 and x < 100])
        log_act = np.log(df['Counts'][i+5]) + 1
        z = sadeh_scoring([mean_w5, nat, sd_w6, log_act])
        lst.append(z)
    df['PS'].__setitem__(slice(0, len(lst)), lst)
    df['Score'] = df['PS'].apply(lambda x: 1 if ((x < 0) or (x is np.inf) or pd.isnull(x)) else 0)
    if Plot:
        plt.figure(1, figsize=(20, 3))
        plt.plot_date(df_.dtime, df_.x, 'r-', label='x')
        plt.plot_date(df_.dtime, df_.y, 'b-', label='y')
        plt.plot_date(df_.dtime, df_.z, 'y-', label='z')
        plt.legend(loc=1 , bbox_to_anchor=(1, 1.2), ncol=3)
        plt.grid(True)
        plt.axis('tight')
        plt.title('Raw Accelerometer Data')
        
        plt.figure(2, figsize=(20, 2))
        plt.plot(df.dtime, df.Counts)
        plt.grid(True)
        plt.axis('tight')
        plt.title('Activity Count')
        
        plt.figure(3, figsize=(20, 2))
        ax = plt.gca()
        plt.plot(range(len(df.dtime)), df.Score)
        plt.fill_between(range(len(df.dtime)), df.Score, 1, facecolor='yellow')
        plt.axis('tight')
        plt.title("Sadeh's Sleep Detection")
        ax.grid(True)
        ax.get_yaxis().set_ticks([])
        a = ax.get_xticks().tolist()
        l = [df['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
        ax.set_xticklabels(l)
        
        plt.show()
        
    return df

### Oakley's algorithm

In [7]:
def oakley_scoring(elems):
    coeffs = [0.04, 0.2, 2, 0.2, 0.04]
    return (np.dot(elems, coeffs))


def oakley(df_, threshold = 20, Plot=True):
    """Scores 0 (sleep) if OakleyCountsPerMin is greater than or equal to the threshold, 1 (awake) otherwise.
    
     Parameter:
     ----------
     df_: DataFrame
         DataFrame from load_data()
     threshold: int
         possible values: 20 (low sensitivity), 40 (medium sensitivity), 80 (high sensitivity)
         (default=20)
     Plot
         display the graphs of the results(default=True)
         
     Return
     ----------
     df: DataFrame
         OakleyCountsPerMin  - probability of sleep
         OakleyScore - 0 as sleep, 1 as wake
     """
    df = counter(df_, False)
    df_oakley = pd.DataFrame((pd.rolling_apply(df.set_index(['dtime']).Counts
                        , window = 5
                        , func = oakley_scoring)), columns=['OakleyCountsPerMin']).reset_index()
    df_oakley['Score'] = df_oakley['OakleyCountsPerMin'].apply(lambda x: 1 if ((x > threshold) | (pd.isnull(x)))
                                                                     else 0)
    if Plot:  
        plt.figure(1, figsize=(20, 3))
        plt.plot_date(df_.dtime, df_.x, 'r-', label='x')
        plt.plot_date(df_.dtime, df_.y, 'b-', label='y')
        plt.plot_date(df_.dtime, df_.z, 'y-', label='z')
        plt.legend(loc=1 , bbox_to_anchor=(1, 1.2), ncol=3)
        plt.grid(True)
        plt.axis('tight')
        plt.title('Raw Accelerometer Data')
        
        plt.figure(2, figsize=(20, 2))
        plt.plot(df.dtime, df.Counts)
        plt.grid(True)
        plt.axis('tight')
        plt.title('Oakley Activity Count')
        
        plt.figure(3, figsize=(20, 2))
        ax = plt.gca()
        plt.plot(range(len(df_oakley.dtime)), df_oakley.Score)
        plt.fill_between(range(len(df_oakley.dtime)), df_oakley.Score, 1, facecolor='yellow')
        plt.axis('tight')
        plt.title("Oakley's Sleep Detection")
        ax.grid(True)
        ax.get_yaxis().set_ticks([])
        a = ax.get_xticks().tolist()
        l = [df_oakley['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
        ax.set_xticklabels(l)
        
        plt.show()
        
    return df_oakley

### Computing for Sleep-Wake time and Sleep Efficiency

In [8]:
def sleep_time(data_):
    """
    Computes the number of minutes the subject is asleep, awake and sleep efficiency.
    
    Parameter
    ----------
    data_: array_like 
    
    Returns
    ----------
    asleep_mins: int
        number of minutes the subject is asleep
    awake_mins: int
        number of minutes the subject is awake
    rec_mins: int
        total number of recorded minutes
    bed_mins: int
        number of minutes in bed
    sleep_efficiency: float
        ratio of total sleep time and bed time ((asleep_mins/bed_mins)*100)
    """
    data = data_[5:-5]
    #awake_mins = np.count_nonzero(data)
    rec_mins = len(data_)
    first_ = data_.argmin()
    bed_mins = len(data_[first_:-5]) #len(data)
    awake_mins = np.count_nonzero(data_[first_:-5])
    asleep_mins = bed_mins - awake_mins
    sleephrs, sleepmins = divmod(asleep_mins, 60)
    awakehrs, awakemins = divmod(awake_mins, 60)
    bedhr, bedmin = divmod(bed_mins, 60)
    rechr, recmin = divmod(rec_mins, 60)
    sleep_efficiency = (asleep_mins/bed_mins)*100

    print ("Total Sleep Time: %s hours and %s minutes" %(sleephrs, sleepmins))
    print ("Awake Duration: %s hours and %s minutes" %(awakehrs, awakemins))
    print ("Total Bed Time: %s hours and %s minutes" %(bedhr, bedmin))
    print ("Total Recording Time: %s hours and %s minutes" %(rechr, recmin))
    print ("Sleep Efficiency: %s%%" %np.round(sleep_efficiency, 2))
    return asleep_mins, awake_mins, rec_mins, bed_mins, sleep_efficiency

###Rescoring Rules by Webster et al.

In [9]:
def rescore1(data_):
    """After at least 4 minutes scored as wake, the next 1 minute scored as sleep is rescored wake."""
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data)==0)
    if zero_lst[0] > 10:
        data.loc[zero_lst[0]] = 1
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 4:
            pd.Series(data).loc[zero_lst[i+1]] = 1
        else:
            pass
    return data

def rescore2(data_):
    """After at least 10 minutes scored as wake, the next 3 minutes scored as sleep are recorded wake."""
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data)==0)
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 10 and (zero_lst[i+3] - zero_lst[i+1]) == 2 :
            for j in range(1, 4):
                data.loc[zero_lst[i+j]] = 1
        else:
            pass
    return data

def rescore3(data_):
    """After at least 15 minutes scored as wake, the next 4 minutes scored as sleep are rescored wake."""
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data)==0)
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 15 and (zero_lst[i+4] - zero_lst[i+1]) == 3 :
            for j in range(1, 5):
                data.loc[zero_lst[i+j]] = 1
        else:
            pass
    return data

def rescore4(data_):
    """6 minutes or less scored as sleep surrounded by at least 10 minutes (before and after) 
        scored as wake are rescored wake"""
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data)==0)
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 10: 
            consec = consecutive(zero_lst[i+1:], 6)[0]
            conseclen = len(consec)
            lst_consec = consec[-1]
            if zero_lst[i+conseclen+1] - zero_lst[i+conseclen] >= 10:
                for j in range(1, conseclen+1):
                    data.loc[zero_lst[i+j]] = 1
            else:
                pass
        pass
    return data

def rescore5(data_):
    """10 minutes or less scored as sleep surrounded by at least 20 minutes (before and after) 
        scored as wake are rescored wake"""
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data)==0)
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 20:
            consec = consecutive(zero_lst[i+1:], 10)[0]
            conseclen = len(consec)
            lst_consec = consec[-1]
            if zero_lst[i+conseclen+1] - zero_lst[i+conseclen] >= 20:
                for j in range(1, conseclen+1):
                    data.loc[zero_lst[i+j]] = 1
            else:
                pass
        pass
    return data

def rescore(data_):
    """Rescoring using all 5 Webster et al.'s rules."""
    data = data_.copy()
    return rescore5(rescore4(rescore3(rescore2(rescore1(data)))))

In [10]:
def graph_rescored(df_orig, rescored):
    """
    Compares the original graph versus the rescored one.
    
    Parameters
    ----------
    df_orig: DataFrame
        DataFrame from load_data()
    rescored: Series
        Series or DataFrame column to be rescored
    """
    plt.figure(1, figsize=(20, 2))
    ax = plt.gca()
    plt.plot(range(len(df_orig.dtime)), df_orig.Score)
    plt.fill_between(range(len(df_orig.dtime)), df_orig.Score, 1, facecolor='yellow')
    plt.axis('tight')
    plt.title("Original Sleep-Wake Detection")
    ax.grid(True)
    ax.get_yaxis().set_ticks([])
    a = ax.get_xticks().tolist()
    l = [df_orig['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
    ax.set_xticklabels(l)
    
    plt.figure(2, figsize=(20, 2))
    ax = plt.gca()
    plt.plot(range(len(df_orig['dtime'])), rescored )
    plt.fill_between(range(len(df_orig['dtime'])), rescored, 1, facecolor='yellow')
    plt.axis('tight')
    plt.title("Rescored Sleep-Wake Detection")
    ax.grid(True)
    ax.get_yaxis().set_ticks([])
    a = ax.get_xticks().tolist()
    l = [df_orig['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
    ax.set_xticklabels(l)

    plt.show()

In [11]:
def oakley_vs_sadeh(df_, df_oakley, df_sadeh):
    """Compares Oakley and Sadeh's results.
    
    Parameters
    ----------
    df_: DataFrame
        DataFrame from load_data()
    df_oakley: DataFrame
        DataFrame from oakley()
    df_sadeh: DataFrame
        DataFrame from sadeh()
    """
    plt.figure(1, figsize=(20, 3))
    plt.plot_date(df_.dtime, df_.x, 'r-', label='x')
    plt.plot_date(df_.dtime, df_.y, 'b-', label='y')
    plt.plot_date(df_.dtime, df_.z, 'y-', label='z')
    plt.legend(loc=1 , bbox_to_anchor=(1, 1.2), ncol=3)
    plt.grid(True)
    plt.axis('tight')
    plt.title('Raw Accelerometer Data')
        
    plt.figure(2, figsize=(20, 2))
    ax = plt.gca()
    plt.plot(range(len(df_oakley.dtime)), df_oakley.Score)
    plt.fill_between(range(len(df_oakley.dtime)), df_oakley.Score, 1, facecolor='yellow')
    plt.axis('tight')
    plt.title("Oakley's Sleep-Wake Detection")
    ax.grid(True)
    ax.get_yaxis().set_ticks([])
    a = ax.get_xticks().tolist()
    l = [df_oakley['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
    ax.set_xticklabels(l)
    
    plt.figure(3, figsize=(20, 2))
    ax = plt.gca()
    plt.plot(range(len(df_sadeh['dtime'])), df_sadeh.Score )
    plt.fill_between(range(len(df_sadeh['dtime'])),  df_sadeh.Score, 1, facecolor='yellow')
    plt.axis('tight')
    plt.title("Sadeh' Sleep-Wake Detection")
    ax.grid(True)
    ax.get_yaxis().set_ticks([])
    a = ax.get_xticks().tolist()
    l = [df_sadeh['dtime'].loc[int(i)].strftime('%H:%M:%S') for i in a[:-1]]
    ax.set_xticklabels(l)

    plt.show()

In [12]:
def rescored_wake(data_):
    """Rescore wake values of the first hour.
    
    Parameter
    ----------
    data_ : array_like
    
    Return
    ----------
    data : array_like
        rescored sleep values to wake if there exist at least 15 minutes of wake values
    """
    data = data_.copy()
    dat_ = data_[:60].copy()
    #lst_ = []
    ones_lst = np.flatnonzero(np.array(dat_)==1)
    for i in range(len(ones_lst)-1):
        if (ones_lst[i+1] - ones_lst[i]) >= 15:
            #lst_.append((zero_lst[i], zero_lst[i+1]))
            for j in range(ones_lst[i]+1, ones_lst[i+1]):
                data.loc[j] = 1
    return data

def rescored_wake2(data_):
    """
    Rescore wake values of the last 30 minutes of the data.
    
    Parameter
    ----------
    data_ : array_like
    
    Return
    ----------
    data : array_like
        rescored sleep values to wake if there exist at least 15 minutes of wake values
    """
    data = data_.copy()
    dat1_ = data[-15:-4].copy()
    ones_lst = np.flatnonzero(np.array(dat1_)==1)
    for i in range(len(ones_lst)-1):
        if (ones_lst[i+1] - ones_lst[i]) >= 7:
            for j in range(ones_lst[i]+1, ones_lst[i+1]):
                data.loc[dat1_.index[j]] = 1
    return data

In [13]:
def waso(data_):
    """
    Count the number of wake(s) after sleep onset.
    
    Parameter
    ----------
    data_ : array_like
    
    Return
    ----------
    data : array_like
        number of wake(s) after sleep onset
    """
    data = data_.copy()
    zero_lst = np.flatnonzero(np.array(data_)==0)
    cnt = 0
    for i in range(len(zero_lst)-1):
        if (zero_lst[i+1] - zero_lst[i]) >= 2:
            cnt += 1
    return cnt

In [17]:
def sleep_wake(file):
    """
    Sleep-Wake identification based from Oakley and Sadeh's algorithms.
    
    Parameter
    ----------
    file: string
        csv file name
        
    Returns
    ----------
    df_: DataFrame
        DataFrame from load_data()
    df_oakley: DataFrame
        DataFrame from oakley()
    df_sadeh: DataFrame
        DataFrame from sadeh()
    rs: array_like
        rescored df_sadeh from rescored_wake()
    waso_cnt: int
        number of wake(s) after sleep onset from waso()
    asleep_mins: int
        number of minutes the subject is asleep
    awake_mins: int
        number of minutes the subject is awake
    rec_mins: int
        total number of recorded minutes
    bed_mins: int
        number of minutes in bed
    sleep_efficiency: float
        ratio of total sleep time and bed time ((asleep_mins/bed_mins)*100)
    """
    df = load_data(file)
    df_sadeh = sadeh(df, False)
    df_oakley = oakley(df, 40, False)
    oakley_vs_sadeh(df, df_oakley, df_sadeh)
    rs = rescore1(rescored_wake2(rescored_wake(df_sadeh.Score)))
    graph_rescored(df_sadeh, rs)
    waso_cnt = waso(rs)
    tst, ad, rm, tbt, se = sleep_time(rs)
    return df, df_oakley, df_sadeh, rs, waso_cnt, tst, ad, rm, tbt, se

####List of csv files

In [15]:
names = [(i, v) for i, v in enumerate(os.listdir('/Users/User/Documents/Sleep/gravitys8/SleepExperiment/')) 
         if v not in ('bmarquez20151018_0657_2.csv','jmondejar20151020_0159_21.csv', 'ntelan20151027_0227_3.csv',
                      '.DS_Store', '.Rapp.history')]
print ("Number of files: %s" %len(names))
names

Number of files: 57


[(2, 'aabellera20151003_0551_1.csv'),
 (3, 'aabellera20151004_1224_2.csv'),
 (4, 'aabellera20151005_0250_3.csv'),
 (5, 'aquerouz20151016_0035_1.csv'),
 (6, 'aremirata20151021_0216_1.csv'),
 (7, 'aremirata20151022_0336_2.csv'),
 (8, 'avicente20151014_0219_1.csv'),
 (9, 'bmarquez20151017_0508_1.csv'),
 (11, 'bmarquez20151019_0759_3.csv'),
 (12, 'ccabiling20151015_0543_1.csv'),
 (13, 'cdelacuadra20151002_0112_1.csv'),
 (14, 'cmetoda20151014_0103_1.csv'),
 (15, 'fgalupo20151016_0133_1.csv'),
 (16, 'fsurilla20151001_0130_1.csv'),
 (17, 'gabatol20151006_0330_1.csv'),
 (18, 'gibo20151008_0054_1.csv'),
 (19, 'gmurillo20151013_0438_1.csv'),
 (20, 'jjungao20151009_0247_1.csv'),
 (21, 'jlimotlimot20151002_0134_1.csv'),
 (22, 'jmondejar20151018_0515_1.csv'),
 (23, 'jmondejar20151019_0303_2.csv'),
 (25, 'jmondejar20151021_0342_3.csv'),
 (26, 'jroxas20151017_0127_1.csv'),
 (27, 'jroxas20151019_0102_2.csv'),
 (28, 'jroxas20151020_0211_3.csv'),
 (29, 'jyu20151015_0334_1.csv'),
 (30, 'kbulan20151009_01

# <font color='violet'> Sleep Survey </font>

In [16]:
os.chdir('/Users/User/Documents/Sleep/gravitys8/SleepSurveynDiary/')
ss_data = pd.read_csv("SleepSurvey_data.csv", encoding='latin-1')
ss_data

Unnamed: 0,ID,LastName,FirstName,SurveyDate,Age,Sex,Height(cm),Weight(lb),Medications,MedicalConditions,SleepDuration,SleepTime,WakeTime,SleepTrouble,SleepTroubleNo,NightWakeNo,ResleepDuration,Parasomnia,Snore,Familiarity,AlarmClock,DomArm,Exercise,ExerciseTime,ExerciseDuration,Nap,NapDuration
0,1,GOMEZ,VANILYN,9/18/2015,22,F,156.0,111.0,,,6.0,2:00,11:30,1,2,1,40,,0,1,1,1,1,E,45,1,60
1,2,LIM,MARY ANN,9/18/2015,22,F,152.4,108.0,,,7.0,3:00,11:00,1,2,0,0,,0,0,0,1,1,E,60,0,0
2,3,SURILLA,FLORABELLE,9/30/2015,26,F,150.0,92.5,,,6.0,3:00,9:00,0,0,0,0,SLEEPWALKING | SLEEPTALKING,1,1,0,1,0,,0,1,180
3,4,FELICILDA,LOVEME,9/30/2015,22,F,157.5,123.5,,,4.0,3:00,7:00,0,0,0,0,,0,1,1,1,0,,0,1,30
4,5,DELA CUADRA,CATHERINE,10/1/15,22,F,147.0,99.2,,,7.0,2:00,9:00,0,0,0,0,,0,0,0,0,1,A,60,0,0
5,6,LIMOTLIMOT,JAMES MICHAEL,10/1/15,24,M,180.0,176.0,,,7.0,1:00,9:00,0,0,2,2,,1,1,0,1,3,M,60,0,0
6,7,ABELLERA,AR-JAY,10/1/15,24,M,165.0,136.24,,,7.0,3:00,10:00,0,0,1,10,SLEEPTALKING | TEETH GRINDING,0,1,0,1,1,M,180,1,60
7,8,DOLOR,ROSALIE,10/1/15,23,F,152.4,94.8,,,8.0,4:00,12:00,1,2,3,5,,1,1,0,1,2,A,10,1,10
8,9,ABATOL,GERARDO RAY,10/5/15,22,M,157.5,110.0,,,6.0,2:00,8:00,1,3,1,20,,0,1,0,1,2,A,120,1,30
9,10,AGOR,MARVIN JOSEPH,10/5/15,22,M,165.0,158.0,LORATADINE,,7.0,1:30,8:30,0,0,0,0,SLEEPTALKING,0,1,1,1,6,M,30,1,30


# <font color='violet'> Sleep Diary </font>

In [98]:
os.chdir('/Users/User/Documents/Sleep/gravitys8/SleepSurveynDiary/')
sd_data = pd.read_csv("SleepDiary_data.csv", encoding='latin-1')
sd_data

Unnamed: 0,id,name,day,on_bed,off_bed,sleep_onset_cat,alarm_flag,WASO,sleep_period_time,total_bed_time,sleep_disrupt_factors,wake_state,device_loc,device_comf,notes,workshift,caffeine,exercise,medication,nap_flag,nap_duration,doze_off,mood,consumption_before_sleep,bedtime_routine,Researcher's Notes
0,1,vgomez,1,09/19/2015 11:00pm,09/20/2015 07:00am,A,Y,,3.0,8:00,discomfort,F,L,N,,,,,,N,0,M,P,,Using electronics,WASO data not available. Used old version of s...
1,1,vgomez,2,09/21/2015 01:00am,09/21/2015 11:30am,E,Y,,10.5,10:30,,R,L,Y,,,1M|1E,,,N,0,S,P,,Using electronics,
2,1,vgomez,3,09/24/2015 06:00am,09/24/2015 10:30am,E,Y,,4.5,4:30,,S,L,Y,,05:20pm-02:30am,,,Biogesic,N,0,S,P,,Using electronics,
3,2,mlim,1,09/26/2015 03:55am,09/26/2015 09:29am,E,Y,,5.0,5:34,noise,S,L,Y,After 5 minutes of filling up the form I fell...,03:48pm-12:50am,1M|1E,,,N,0,N,P,A heavy meal|Caffeine|Water,Reading|Using electronics,
4,2,mlim,2,09/27/2015 04:23am,09/27/2015 12:57pm,E,,,8.0,8:34,noise,R,L,Y,,,1A|1E,E,,N,0,N,P,Caffeine|Water,Reading|Using electronics|Taking a bath,
5,2,mlim,3,09/28/2015 04:05am,09/28/2015 09:32am,A,N,,5.0,5:27,,S,L,Y,,,1A|1E,E,,N,0,N,P,,Reading|Using electronics,
6,3,fsurilla,1,10/01/2015 01:30am,10/01/2015 08:00am,E,,,6.5,6:30,noise|pets,F,L,N,,02:25pm-11:35pm,,,,N,0,S,P,Water,Using electronics,
7,4,lfelicilda,1,10/01/2015 03:00am,10/01/2015 07:00am,E,Y,,4.0,4:00,,S,L,Y,,08:30am-02:30am,1A,,,Y,30,N,P,Water,Using electronics,
8,5,cdelacuadra,1,10/02/2015 01:15am,10/02/2015 07:50am,E,Y,,6.0,6:35,,S,R,Y,The device did not fit my wrist properly kind ...,01:30pm-12:00am,,,,N,0,N,P,Water,Reading|Using electronics,
9,6,jlimotlimot,1,10/02/2015 01:33am,10/02/2015 10:20am,E,Y,08:00am-5,8.0,8:47,discomfort,R,R,N,No alarm. I don't use one. Discomfort because ...,01:20pm-11:45pm,,,,N,0,S,P,Water,,


In [100]:
sd_data['uname'] = sd_data.name + (sd_data.day).astype(str)
sd_data = sd_data.drop(sd_data.index[[8, 43]])
sd_data = sd_data.sort('uname').reset_index(drop=True)
sd_data 

Unnamed: 0,id,name,day,on_bed,off_bed,sleep_onset_cat,alarm_flag,WASO,sleep_period_time,total_bed_time,sleep_disrupt_factors,wake_state,device_loc,device_comf,notes,workshift,caffeine,exercise,medication,nap_flag,nap_duration,doze_off,mood,consumption_before_sleep,bedtime_routine,Researcher's Notes,uname
0,7,aabellera,1,10/03/2015 05:51am,10/03/2015 12:57pm,E,,,7.0,7:06,,R,L,Y,,04:30pm-01:30am,,,,Y,15,S,P,Alcohol,Using electronics,,aabellera1
1,7,aabellera,2,10/04/2015 12:25am,10/04/2015 07:20am,A,Y,,7.0,6:55,,S,R,Y,Hotel,,1A,,,N,0,S,P,Water,Reading|Using electronics,,aabellera2
2,7,aabellera,3,10/05/2015 03:15am,10/05/2015 08:50am,E,,,6.0,5:35,,S,L,Y,,,1M,,,Y,45,S,VP,Water,Reading|Using electronics|Taking a bath,,aabellera3
3,23,aquerouz,1,10/16/2015 12:30am,10/16/2015 05:10am,E,Y,,5.0,4:40,noise,S,L,Y,,02:00pm-11:00pm,3E,M,,Y,20,S,U,,Using electronics,,aquerouz1
4,33,aremirata,1,10/21/2015 02:20am,10/21/2015 07:12am,A,N,,4.5,4:52,,S,R,Y,,10:00am-02:00am,,,,N,0,S,P,A heavy meal,Reading|Using electronics,,aremirata1
5,33,aremirata,2,10/22/2015 03:00am,10/22/2015 09:00am,E,N,,5.67,6:00,,S,R,Y,,11:00am-03:00am,,,,Y,60,S,P,A heavy meal,Reading|Using electronics,,aremirata2
6,19,avicente,1,10/14/2015 02:00am,10/14/2015 07:00am,A,N,,5.0,5:00,,R,L,Y,,03:04pm-01:05am,,,Betahistine,N,0,S,P,Water,Reading,,avicente1
7,27,bmarquez,1,10/17/2015 05:00am,10/17/2015 01:15pm,A,,07:00am-5|09:00am-5,8.0,8:15,,R,L,Y,,04:30pm-03:00am,,,,N,0,N,P,Water,,,bmarquez1
8,27,bmarquez,3,10/19/2015 08:00am,10/19/2015 01:00pm,E,,,5.0,5:00,,S,L,Y,,,,,,N,0,N,P,Water,Using electronics,,bmarquez3
9,20,ccabiling,1,10/15/2015 05:45am,10/15/2015 10:57am,E,,08:35am-5|10:30am-7,7.0,5:12,,S,L,Y,,02:00pm-11:00pm,,,,N,0,N,P,Alcohol|Water,Taking a bath,,ccabiling1


In [101]:
sd_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57 entries, 0 to 56
Data columns (total 27 columns):
id                          57 non-null int64
name                        57 non-null object
day                         57 non-null int64
on_bed                      57 non-null object
off_bed                     57 non-null object
sleep_onset_cat             57 non-null object
alarm_flag                  32 non-null object
WASO                        24 non-null object
sleep_period_time           57 non-null float64
total_bed_time              57 non-null object
sleep_disrupt_factors       22 non-null object
wake_state                  57 non-null object
device_loc                  57 non-null object
device_comf                 57 non-null object
notes                       18 non-null object
workshift                   38 non-null object
caffeine                    23 non-null object
exercise                    20 non-null object
medication                  4 non-null object
nap_fl

# <font color='violet'> Gravity App </font>

### <font color='orange'> run the models to all the data collected </font>

In [26]:
stimes = []
for i in range(len(names)):
    print ('\n%s' %names[i][1])
    sdf, sdf_oakley, sdf_sadeh, srs, swaso, ssmin, samin, srmin, sbmin, sse = sleep_wake(names[i][1])
    stimes.append((sdf['username'][0], ssmin, samin, swaso, srmin, sbmin, sse))
    stimedf = pd.DataFrame(stimes, columns=['username', 'sleep_duration', 'awake_duration', 'waso', 
                                            'recording_duration', 'bedtime_duration', 'sleep_efficiency'])

stimedf


aabellera20151003_0551_1.csv
Total Sleep Time: 6 hours and 38 minutes
Awake Duration: 0 hours and 11 minutes
Total Bed Time: 6 hours and 49 minutes
Total Recording Time: 6 hours and 59 minutes
Sleep Efficiency: 97.31%

aabellera20151004_1224_2.csv
Total Sleep Time: 6 hours and 39 minutes
Awake Duration: 0 hours and 9 minutes
Total Bed Time: 6 hours and 48 minutes
Total Recording Time: 6 hours and 58 minutes
Sleep Efficiency: 97.79%

aabellera20151005_0250_3.csv
Total Sleep Time: 5 hours and 27 minutes
Awake Duration: 0 hours and 8 minutes
Total Bed Time: 5 hours and 35 minutes
Total Recording Time: 5 hours and 47 minutes
Sleep Efficiency: 97.61%

aquerouz20151016_0035_1.csv
Total Sleep Time: 4 hours and 18 minutes
Awake Duration: 0 hours and 6 minutes
Total Bed Time: 4 hours and 24 minutes
Total Recording Time: 4 hours and 34 minutes
Sleep Efficiency: 97.73%

aremirata20151021_0216_1.csv
Total Sleep Time: 3 hours and 22 minutes
Awake Duration: 0 hours and 23 minutes
Total Bed Time: 3 

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,username,sleep_duration,awake_duration,waso,recording_duration,bedtime_duration,sleep_efficiency
0,aabellera1,398,11,4,419,409,97.310513
1,aabellera2,399,9,2,418,408,97.794118
2,aabellera3,327,8,3,347,335,97.61194
3,aquerouz1,258,6,2,274,264,97.727273
4,aremirata1,202,23,2,235,225,89.777778
5,aremirata2,314,2,2,326,316,99.367089
6,avicente1,276,0,0,286,276,100.0
7,bmarquez1,478,0,0,488,478,100.0
8,bmarquez3,279,12,4,301,291,95.876289
9,ccabiling1,286,18,6,315,304,94.078947


In [102]:
concat_df = pd.concat((stimedf, sd_data[['WASO', 'sleep_period_time', 'total_bed_time']]), axis=1)
concat_df

Unnamed: 0,username,sleep_duration,awake_duration,waso,recording_duration,bedtime_duration,sleep_efficiency,WASO,sleep_period_time,total_bed_time
0,aabellera1,398,11,4,419,409,97.310513,,7.0,7:06
1,aabellera2,399,9,2,418,408,97.794118,,7.0,6:55
2,aabellera3,327,8,3,347,335,97.61194,,6.0,5:35
3,aquerouz1,258,6,2,274,264,97.727273,,5.0,4:40
4,aremirata1,202,23,2,235,225,89.777778,,4.5,4:52
5,aremirata2,314,2,2,326,316,99.367089,,5.67,6:00
6,avicente1,276,0,0,286,276,100.0,,5.0,5:00
7,bmarquez1,478,0,0,488,478,100.0,07:00am-5|09:00am-5,8.0,8:15
8,bmarquez3,279,12,4,301,291,95.876289,,5.0,5:00
9,ccabiling1,286,18,6,315,304,94.078947,08:35am-5|10:30am-7,7.0,5:12


In [104]:
concat_df['WASO_cnt'] = concat_df['WASO'].apply(lambda x: len(re.split('-', str(x)))-1)
concat_df = concat_df.drop('WASO', axis=1)
concat_df

Unnamed: 0,username,sleep_duration,awake_duration,waso,recording_duration,bedtime_duration,sleep_efficiency,sleep_period_time,total_bed_time,WASO_cnt
0,aabellera1,398,11,4,419,409,97.310513,7.0,7:06,0
1,aabellera2,399,9,2,418,408,97.794118,7.0,6:55,0
2,aabellera3,327,8,3,347,335,97.61194,6.0,5:35,0
3,aquerouz1,258,6,2,274,264,97.727273,5.0,4:40,0
4,aremirata1,202,23,2,235,225,89.777778,4.5,4:52,0
5,aremirata2,314,2,2,326,316,99.367089,5.67,6:00,0
6,avicente1,276,0,0,286,276,100.0,5.0,5:00,0
7,bmarquez1,478,0,0,488,478,100.0,8.0,8:15,2
8,bmarquez3,279,12,4,301,291,95.876289,5.0,5:00,0
9,ccabiling1,286,18,6,315,304,94.078947,7.0,5:12,2
