## Import Library

In [122]:
import os, errno
import argparse
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from scipy.stats import kurtosis
from scipy.stats.mstats import moment
from scipy import stats
from scipy.stats import ttest_ind, f_oneway

sys.path.append(os.path.join(os.path.curdir, '../../', 'util'))
from load_data_basic import *

# date_time format
date_time_format = '%Y-%m-%dT%H:%M:%S.%f'
date_only_date_time_format = '%Y-%m-%d'

# sleep after work duration thereshold
sleep_after_work_duration_threshold = 12
sleep_after_sleep_duration_threshold = 2

# data folder
main_data_directory = '../../../data/'
recording_timeline_directory = '../../output/recording_timeline'
sleep_timeline_directory = '../../output/sleep_timeline'
individual_timeline_directory = '../../output/individual_timeline'




## Read Basic Information

In [123]:
# Read sleep data for all participant
sleep_data = pd.read_csv(os.path.join('../../output', 'sleep_survey_full.csv'))

# Read participant information
participant_info = getParticipantInfo(main_data_directory)
# participant_info = participant_info.set_index('MitreID')

# Read MGT
MGT = read_MGT(main_data_directory)

# Read Pre-Study info
PreStudyInfo = read_pre_study_info(main_data_directory)

# Read IGTB info
IGTB = read_IGTB(main_data_directory)

# Demographic
Demographic = read_Demographic(main_data_directory)

# Day shift data
day_data = sleep_data.loc[sleep_data['shift_type'] == 1]
day_workday_data = day_data.loc[(day_data['is_sleep_before_work'] == 1) & (day_data['is_sleep_after_work'] == 1)]
day_off_day_data = day_data.loc[(day_data['is_sleep_before_work'] != 1) & (day_data['is_sleep_after_work'] != 1)]
day_transition_day_data = day_data.loc[(day_data['is_sleep_transition_before_work'] == 1) | (day_data['is_sleep_transition_after_work'] == 1)]

# Night shift data
night_data = sleep_data.loc[sleep_data['shift_type'] == 2]
night_workday_data = night_data.loc[(night_data['is_sleep_before_work'] == 1) & (night_data['is_sleep_after_work'] == 1)]
night_off_day_data = night_data.loc[(night_data['is_sleep_before_work'] != 1) & (night_data['is_sleep_after_work'] != 1)]
night_transition_day_data = night_data.loc[(night_data['is_sleep_transition_before_work'] == 1) | (night_data['is_sleep_transition_after_work'] == 1)]

# Data array
data_array = [day_data, night_data, 
              day_workday_data, day_off_day_data, 
              night_workday_data, night_off_day_data]

data_type = ['day-shift all', 'night-shift all',
             'day-shift workday', 'day-shift off day', 
             'night-shift workday', 'night-shift off day']

colunm_type = ['duration_in_seconds', 'SleepEfficiency',
               'SleepMinutesStageDeep', 'SleepMinutesStageLight', 'SleepMinutesStageRem',
               'sleep_heart_rate_mean', 'sleep_heart_rate_std',
               'sleep_heart_rate_percentile_10', 'sleep_heart_rate_percentile_90']

UserInfo = pd.merge(IGTB, PreStudyInfo, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, participant_info, left_on='uid', right_on='MitreID', how='outer')
UserInfo = pd.merge(UserInfo, Demographic, left_on='uid', right_on='uid', how='outer')
UserInfo = UserInfo.set_index('uid')
UserInfo = UserInfo.loc[UserInfo['Wave'] != 3]



## Raw IGTB - PSQI and GATS

In [124]:
raw_IGTB_col = ['ipaq1', 'ipaq2', 'ipaq3', 'ipaq4', 'ipaq5', 'ipaq6',
                'ipaq7', 'ipaq8', 'ipaq9', 'ipaq10', 'ipaq11', 'ipaq12', 
                'ipaq13', 'ipaq14', 'ipaq15', 'ipaq16', 'ipaq17', 'ipaq18', 
                'ipaq19', 'ipaq20', 'ipaq21', 'ipaq22', 'ipaq23', 'ipaq24', 
                'ipaq25', 'ipaq26', 'ipaq27', 'ipaq_break', 'Name']

ipaq_col1 = ['work_vigorous_activity', 'work_moderate_activity',
             'walk_time_on_work', 'sitting_weekday', 'sitting_weekend']

ipaq_col2 = ['leisure_vigorous_activity_day', 'leisure_moderate_activity_day',
             'housework_vigorous_activity_day', 'housework_moderate_activity_day',
             'days_in_trans', 'days_in_bike']

ipaq_col3 = ['housework_vigorous_activity', # 'leisure_vigorous_activity_day', 
             'housework_moderate_activity', # 'leisure_moderate_activity_day',
             'leisure_vigorous_activity', 'leisure_moderate_activity',
             'walk_time_on_work', 'walk_time_leisure', 
             'time_in_trans', 'time_in_bike']
             # 'days_in_trans', # 'days_in_bike'

IGTB_RAW = read_IGTB_Raw(main_data_directory)[raw_IGTB_col]
IGTB_RAW = IGTB_RAW.loc[IGTB_RAW['Name'].isin(UserInfo.index.values)]
IGTB_RAW_Score = pd.DataFrame()

for user_id in UserInfo.index.values:
    IGTB_RAW_participant = IGTB_RAW.loc[IGTB_RAW['Name'] == user_id]
    IGTB_participant = UserInfo.loc[user_id]
    
    frame = pd.DataFrame(index=[user_id])
    frame['Shift'] = 1 if IGTB_participant['Shift'] == 'Day shift' else 2
    frame['sleep_score'] = IGTB_participant['psqi_igtb']
    
    -    
    IGTB_RAW_Score = IGTB_RAW_Score.append(frame)
    
# print(IGTB_RAW_Score)


        Shift  sleep_score  work_vigorous_activity  work_moderate_activity  \
SD1001      1          6.0                   180.0                    60.0   
SD1002      1          6.0                     NaN                     NaN   
SD1003      1          7.0                    45.0                    60.0   
SD1004      1          8.0                    30.0                    15.0   
SD1005      1          8.0                   420.0                   180.0   
SD1006      1          5.0                    60.0                   120.0   
SD1008      1          7.0                     NaN                   180.0   
SD1009      1         10.0                     NaN                     NaN   
SD1010      1          8.0                   400.0                   300.0   
SD1011      1          6.0                    20.0                   600.0   
SD1012      1          8.0                     NaN                     NaN   
SD1013      1          8.0                   600.0              

## IGTB (Physical activity) - Day, Night shift

In [125]:
# ipaq_col = ['work_vigorous_activity', 'work_moderate_activity', 
#             'housework_vigorous_activity', 'housework_moderate_activity',
#             'leisure_vigorous_activity_day', 'leisure_moderate_activity_day',
#             'leisure_vigorous_activity', 'leisure_moderate_activity',
#             'walk_time_on_work', 'walk_time_leisure', 
#             'days_in_trans', 'time_in_trans', 'days_in_bike', 'time_in_bike',
#             'sitting_weekday', 'sitting_weekend']

ipaq_col = ['work_vigorous_activity', 'work_moderate_activity', 
            'housework_vigorous_activity', 'housework_moderate_activity',
            'leisure_vigorous_activity', 'leisure_moderate_activity',
            'walk_time_on_work', 'walk_time_leisure', 'time_in_trans',
            'sitting_weekday', 'sitting_weekend']

# shift_pre-study
day_sleep_stats   = IGTB_RAW_Score.loc[(IGTB_RAW_Score['Shift'] == 1)]
night_sleep_stats = IGTB_RAW_Score.loc[(IGTB_RAW_Score['Shift'] == 2)]

data_type = ['day-shift', 'night-shift']

for col in ipaq_col:
    
    overall    = IGTB_RAW_Score.dropna(subset=[col])
    day_data   = day_sleep_stats.dropna(subset=[col])
    night_data = night_sleep_stats.dropna(subset=[col])
    
    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_data), len(night_data)))

    # Print
    print('Total: mean = %.1f, std = %.1f, range is %.3f - %.3f' % (np.mean(overall[col]), np.std(overall[col]), np.min(overall[col]), np.max(overall[col])))
    print('Day shift: mean = %.1f, std = %.1f, range is %.3f - %.3f' % (np.mean(day_data[col]), np.std(day_data[col]), np.min(day_data[col]), np.max(day_data[col])))
    print('Night shift: mean = %.1f, std = %.1f, range is %.3f - %.3f \n' % (np.mean(night_data[col]), np.std(night_data[col]), np.min(night_data[col]), np.max(night_data[col])))
    

    stat, p = ttest_ind(day_data[col].dropna(), night_data[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

work_vigorous_activity

Number of valid participant: day: 58; night: 36

Total: mean = 170.7, std = 171.1, range is 15.000 - 600.000
Day shift: mean = 154.2, std = 163.0, range is 15.000 - 600.000
Night shift: mean = 197.2, std = 180.2, range is 15.000 - 600.000 

Statistics = -1.181, p = 0.241

work_moderate_activity

Number of valid participant: day: 73; night: 40

Total: mean = 174.8, std = 155.3, range is 15.000 - 600.000
Day shift: mean = 163.9, std = 154.1, range is 15.000 - 600.000
Night shift: mean = 194.6, std = 155.7, range is 15.000 - 600.000 

Statistics = -1.000, p = 0.319

housework_vigorous_activity

Number of valid participant: day: 49; night: 25

Total: mean = 276.3, std = 421.9, range is 15.000 - 2520.000
Day shift: mean = 214.6, std = 253.7, range is 20.000 - 1080.000
Night shift: mean = 397.2, std = 615.4, range is 15.000 - 2520.000 

Statistics = -1.775, p = 0.080

housework_moderate_activity

Number of valid participant: day: 94; night: 45

Total: mean = 318.1, st

## Output latex

In [128]:
latex_col = ['Vigorous activity per work', 'Moderate activity per work',
             'Overall vigorous housework', 'Overall moderate housework', 
             'Overall vigorous leisure', 'Overall moderate leisure',
             'Walk time per work', 'Walk time per leisure', 'Overall transportation',
             'Sitting on weekday', 'Sitting on weekend']

start = '\\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}' 

latex_output = start + '\n\t\multicolumn{1}{p{3cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{}\\\\'

# Header
latex_output = latex_output + '\n\n\t\hline'
latex_output = latex_output + '\n\t\\rule{0pt}{2ex}'
latex_output = latex_output + '\n\t\multirow{2}{*}{{Item (minutes)}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Total (n = $%d$)}} & ' % (len(day_sleep_stats) + len(night_sleep_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Day shift (n = $%d$)}} & ' % (len(day_sleep_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Night shift (n = $%d$)}} & ' % (len(night_sleep_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{\multirow{2}{*}{{\centering P-value}}} \\\\ & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} &'
latex_output = latex_output + '\n\t\\rule{0pt}{2ex}\\\\'
latex_output = latex_output + '\n\t\hline'

# Score
for idx, col in enumerate(ipaq_col):
    
    overall    = IGTB_RAW_Score.dropna(subset=[col])
    day_data   = day_sleep_stats.dropna(subset=[col])
    night_data = night_sleep_stats.dropna(subset=[col])
    
    stat, p = ttest_ind(day_data[col], night_data[col])
    
    latex_output = latex_output + '\n'
    
    latex_output = latex_output + '\n\t\multicolumn{1}{l}{%s} &' % (latex_col[idx])    
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(overall[col]), np.std(overall[col]))
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(day_data[col]), np.std(day_data[col]))
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(night_data[col]), np.std(night_data[col]))
    
    if p < 0.05:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$\mathbf{%.3f}$} \\rule{0pt}{3ex} \\\\' % (p)
    else:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.3f$} \\rule{0pt}{3ex} \\\\' % (p)
    
latex_output = latex_output + '[1mm]\n'
latex_output = latex_output + '\n\t\hline'
latex_output = latex_output + '\n\n'
latex_output = latex_output + '\end{tabular}'

print(latex_output)




\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}
	\multicolumn{1}{p{3cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{}\\

	\hline
	\rule{0pt}{2ex}
	\multirow{2}{*}{{Item (minutes)}} & 
	\multicolumn{1}{c}{{Total (n = $168$)}} & 
	\multicolumn{1}{c}{{Day shift (n = $110$)}} & 
	\multicolumn{1}{c}{{Night shift (n = $58$)}} & 
	\multicolumn{1}{c}{\multirow{2}{*}{{\centering P-value}}} \\ & 
	\multicolumn{1}{c}{{Mean $\pm$ SD}} & 
	\multicolumn{1}{c}{{Mean $\pm$ SD}} & 
	\multicolumn{1}{c}{{Mean $\pm$ SD}} &
	\rule{0pt}{2ex}\\
	\hline

	\multicolumn{1}{l}{Vigorous activity per work} &
	\multicolumn{1}{c}{$170.7$ $\pm$ $171.1$} & 
	\multicolumn{1}{c}{$154.2$ $\pm$ $163.0$} & 
	\multicolumn{1}{c}{$197.2$ $\pm$ $180.2$} & 
	\multicolumn{1}{c}{$0.241$} \rule{0pt}{3ex} \\

	\multicolumn{1}{l}{Moderate activity per work} &
	\multicolumn{1}{c}{$174.8$ $\pm$ $155.3$} & 
	\multicolumn{1}{c}{$163.9$ $\pm$ $154.1$} & 