## Import Library

In [39]:
import os, errno
import argparse
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from scipy.stats import kurtosis
from scipy.stats.mstats import moment
from scipy import stats
from scipy.stats import ttest_ind, f_oneway

sys.path.append(os.path.join(os.path.curdir, '../../', 'util'))
from load_data_basic import *

# date_time format
date_time_format = '%Y-%m-%dT%H:%M:%S.%f'
date_only_date_time_format = '%Y-%m-%d'

# sleep after work duration thereshold
sleep_after_work_duration_threshold = 12
sleep_after_sleep_duration_threshold = 2

# data folder
main_data_directory = '../../../data/'
recording_timeline_directory = '../../output/recording_timeline'
sleep_timeline_directory = '../../output/sleep_timeline'
individual_timeline_directory = '../../output/individual_timeline'



## Read Basic Information

In [40]:
# Read sleep data for all participant
sleep_data = pd.read_csv(os.path.join('../../output', 'sleep_survey_full.csv'))

# Read participant information
participant_info = getParticipantInfo(main_data_directory)
# participant_info = participant_info.set_index('MitreID')

# Read MGT
MGT = read_MGT(main_data_directory)

# Read Pre-Study info
PreStudyInfo = read_pre_study_info(main_data_directory)

# Read IGTB info
IGTB = read_IGTB(main_data_directory)

# Demographic
Demographic = read_Demographic(main_data_directory)

# Day shift data
day_data = sleep_data.loc[sleep_data['shift_type'] == 1]
day_workday_data = day_data.loc[(day_data['is_sleep_before_work'] == 1) & (day_data['is_sleep_after_work'] == 1)]
day_off_day_data = day_data.loc[(day_data['is_sleep_before_work'] != 1) & (day_data['is_sleep_after_work'] != 1)]
day_transition_day_data = day_data.loc[(day_data['is_sleep_transition_before_work'] == 1) | (day_data['is_sleep_transition_after_work'] == 1)]

# Night shift data
night_data = sleep_data.loc[sleep_data['shift_type'] == 2]
night_workday_data = night_data.loc[(night_data['is_sleep_before_work'] == 1) & (night_data['is_sleep_after_work'] == 1)]
night_off_day_data = night_data.loc[(night_data['is_sleep_before_work'] != 1) & (night_data['is_sleep_after_work'] != 1)]
night_transition_day_data = night_data.loc[(night_data['is_sleep_transition_before_work'] == 1) | (night_data['is_sleep_transition_after_work'] == 1)]

# Data array
data_array = [day_data, night_data, 
              day_workday_data, day_off_day_data, 
              night_workday_data, night_off_day_data]

data_type = ['day-shift all', 'night-shift all',
             'day-shift workday', 'day-shift off day', 
             'night-shift workday', 'night-shift off day']

colunm_type = ['duration_in_seconds', 'SleepEfficiency',
               'SleepMinutesStageDeep', 'SleepMinutesStageLight', 'SleepMinutesStageRem',
               'sleep_heart_rate_mean', 'sleep_heart_rate_std',
               'sleep_heart_rate_percentile_10', 'sleep_heart_rate_percentile_90']

UserInfo = pd.merge(IGTB, PreStudyInfo, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, participant_info, left_on='uid', right_on='MitreID', how='outer')
UserInfo = pd.merge(UserInfo, Demographic, left_on='uid', right_on='uid', how='outer')
UserInfo = UserInfo.set_index('uid')
UserInfo = UserInfo.loc[UserInfo['Wave'] != 3]


## Raw IGTB - PSQI and GATS

In [41]:
raw_IGTB_col = ['ipaq1', 'ipaq2', 'ipaq3', 'ipaq4', 'ipaq5', 'ipaq6',
                'ipaq7', 'ipaq8', 'ipaq9', 'ipaq10', 'ipaq11', 'ipaq12', 
                'ipaq13', 'ipaq14', 'ipaq15', 'ipaq16', 'ipaq17', 'ipaq18', 
                'ipaq19', 'ipaq20', 'ipaq21', 'ipaq22', 'ipaq23', 'ipaq24', 
                'ipaq25', 'ipaq26', 'ipaq27', 'ipaq_break', 'Name']

IGTB_RAW = read_IGTB_Raw(main_data_directory)[raw_IGTB_col]
IGTB_RAW = IGTB_RAW.loc[IGTB_RAW['Name'].isin(UserInfo.index.values)]
IGTB_RAW_Score = pd.DataFrame()

for user_id in UserInfo.index.values:
    IGTB_RAW_participant = IGTB_RAW.loc[IGTB_RAW['Name'] == user_id]
    IGTB_participant = UserInfo.loc[user_id]
    
    frame = pd.DataFrame(index=[user_id])
    frame['Shift'] = 1 if IGTB_participant['Shift'] == 'Day shift' else 2
    
    frame['work_vigorous_activity'] = IGTB_RAW_participant['ipaq3'].values[0]
    frame['work_moderate_activity'] = IGTB_RAW_participant['ipaq5'].values[0]
    
    IGTB_RAW_Score = IGTB_RAW_Score.append(frame)
    
# valid_sleep_stats = pd.concat([valid_sleep_stats, IGTB_RAW_Score], axis=1)
    

## IGTB (BFI, personality) - Day, Night shift

In [42]:
BFI_col = ['neu_igtb', 'con_igtb', 'ext_igtb', 
           'agr_igtb', 'ope_igtb']

# shift_pre-study
day_stats   = UserInfo.loc[(UserInfo['Shift'] == 'Day shift')]
night_stats = UserInfo.loc[(UserInfo['Shift'] == 'Night shift')]

data_type = ['day-shift', 'night-shift']

for col in BFI_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_stats), len(night_stats)))

    # Print
    print('Total: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(UserInfo[col]), np.std(UserInfo[col]), np.min(UserInfo[col]), np.max(UserInfo[col])))
    print('Day shift: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(day_stats[col]), np.std(day_stats[col]), np.min(day_stats[col]), np.max(day_stats[col])))
    print('Night shift: mean = %.2f, std = %.2f, range is %.3f - %.3f \n' % (np.mean(night_stats[col]), np.std(night_stats[col]), np.min(night_stats[col]), np.max(night_stats[col])))
    

    stat, p = ttest_ind(day_stats[col].dropna(), night_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

neu_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 2.28, std = 0.72, range is 1.000 - 4.750
Day shift: mean = 2.24, std = 0.71, range is 1.000 - 4.083
Night shift: mean = 2.36, std = 0.72, range is 1.000 - 4.750 

Statistics = -1.049, p = 0.296

con_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 4.13, std = 0.62, range is 2.333 - 5.000
Day shift: mean = 4.14, std = 0.63, range is 2.333 - 5.000
Night shift: mean = 4.11, std = 0.61, range is 2.833 - 5.000 

Statistics = 0.254, p = 0.799

ext_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 3.58, std = 0.65, range is 1.833 - 5.000
Day shift: mean = 3.62, std = 0.61, range is 1.833 - 4.750
Night shift: mean = 3.50, std = 0.72, range is 1.833 - 5.000 

Statistics = 1.086, p = 0.279

agr_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 4.15, std = 0.47, range is 2.583 - 5.000
Day shift: mean = 4.16, std = 0.48, range is 2.583 - 5.000
Night shift: mean = 4

## IGTB (BFI, anxiety) - Day, Night shift

In [43]:
BFI_col = ['stai_igtb', 'pos_af_igtb', 'neg_af_igtb',
           'shipley_abs_igtb', 'shipley_voc_igtb']

# shift_pre-study
day_stats   = UserInfo.loc[(UserInfo['Shift'] == 'Day shift')]
night_stats = UserInfo.loc[(UserInfo['Shift'] == 'Night shift')]

data_type = ['day-shift', 'night-shift']

for col in BFI_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_stats), len(night_stats)))

    # Print
    print('Total: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(UserInfo[col]), np.std(UserInfo[col]), np.min(UserInfo[col]), np.max(UserInfo[col])))
    print('Day shift: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(day_stats[col]), np.std(day_stats[col]), np.min(day_stats[col]), np.max(day_stats[col])))
    print('Night shift: mean = %.2f, std = %.2f, range is %.3f - %.3f \n' % (np.mean(night_stats[col]), np.std(night_stats[col]), np.min(night_stats[col]), np.max(night_stats[col])))
    

    stat, p = ttest_ind(day_stats[col].dropna(), night_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

stai_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 34.86, std = 8.86, range is 20.000 - 71.000
Day shift: mean = 34.23, std = 7.89, range is 20.000 - 54.000
Night shift: mean = 36.07, std = 10.35, range is 20.000 - 71.000 

Statistics = -1.279, p = 0.203

pos_af_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 36.13, std = 6.62, range is 16.000 - 50.000
Day shift: mean = 35.83, std = 6.50, range is 16.000 - 49.000
Night shift: mean = 36.71, std = 6.80, range is 18.000 - 50.000 

Statistics = -0.816, p = 0.416

neg_af_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 16.11, std = 5.24, range is 10.000 - 38.000
Day shift: mean = 15.25, std = 4.14, range is 10.000 - 32.000
Night shift: mean = 17.74, std = 6.57, range is 10.000 - 38.000 

Statistics = -2.982, p = 0.003

shipley_abs_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 13.35, std = 3.75, range is 2.000 - 21.000
Day shift: mean = 13.25, std = 3.

## IGTB (Work) - Day, Night shift

In [45]:
BFI_col = ['itp_igtb', 'irb_igtb', 
           'iod_id_igtb', 'iod_od_igtb', 'ocb_igtb']

# shift_pre-study
day_stats   = UserInfo.loc[(UserInfo['Shift'] == 'Day shift')]
night_stats = UserInfo.loc[(UserInfo['Shift'] == 'Night shift')]

data_type = ['day-shift', 'night-shift']

for col in BFI_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_stats), len(night_stats)))

    # Print
    print('Total: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(UserInfo[col]), np.std(UserInfo[col]), np.min(UserInfo[col]), np.max(UserInfo[col])))
    print('Day shift: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(day_stats[col]), np.std(day_stats[col]), np.min(day_stats[col]), np.max(day_stats[col])))
    print('Night shift: mean = %.2f, std = %.2f, range is %.3f - %.3f \n' % (np.mean(night_stats[col]), np.std(night_stats[col]), np.min(night_stats[col]), np.max(night_stats[col])))
    

    stat, p = ttest_ind(day_stats[col].dropna(), night_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))

itp_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 4.69, std = 0.52, range is 1.000 - 5.000
Day shift: mean = 4.66, std = 0.57, range is 1.000 - 5.000
Night shift: mean = 4.74, std = 0.42, range is 3.333 - 5.000 

Statistics = -0.911, p = 0.364

irb_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 45.03, std = 4.33, range is 31.000 - 49.000
Day shift: mean = 44.85, std = 4.35, range is 32.000 - 49.000
Night shift: mean = 45.38, std = 4.27, range is 31.000 - 49.000 

Statistics = -0.757, p = 0.450

iod_id_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 11.05, std = 5.36, range is 7.000 - 32.000
Day shift: mean = 11.35, std = 5.43, range is 7.000 - 32.000
Night shift: mean = 10.50, std = 5.16, range is 7.000 - 29.000 

Statistics = 0.970, p = 0.334

iod_od_igtb

Number of valid participant: day: 110; night: 58

Total: mean = 17.15, std = 6.58, range is 12.000 - 54.000
Day shift: mean = 17.89, std = 7.49, range is 12.000 - 

## Output latex

In [None]:
anx_col = ['stai_igtb', 'pos_af_igtb', 'neg_af_igtb']

latex_col = ['Anxiety', 'Affect', 'Affect']
affect_col = ['Positive Affect', 'Negative Affect']

start = '\\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}' 

latex_output = start + '\n\t\multicolumn{1}{p{3cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{}\\\\'

# Header
latex_output = latex_output + '\n\n\t\hline'
latex_output = latex_output + '\n\t\\rule{0pt}{2ex}'
latex_output = latex_output + '\n\t\multirow{2}{*}{{Item}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Total (n = $%d$)}} & ' % (len(day_stats) + len(night_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Day shift (n = $%d$)}} & ' % (len(day_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Night shift (n = $%d$)}} & ' % (len(night_stats))
latex_output = latex_output + '\n\t\multicolumn{1}{c}{\multirow{2}{*}{{\centering P-value}}} \\\\ & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{{Mean $\pm$ SD}} &'
latex_output = latex_output + '\n\t\\rule{0pt}{2ex}\\\\'
latex_output = latex_output + '\n\t\hline'

# Score
for idx, col in enumerate(anx_col):
    
    overall    = UserInfo.dropna(subset=[col])
    day_data   = day_stats.dropna(subset=[col])
    night_data = night_stats.dropna(subset=[col])
    
    stat, p = ttest_ind(day_data[col], night_data[col])
    
    latex_output = latex_output + '\n'
    
    if col == 'stai_igtb':
        latex_output = latex_output + '\n\t\multicolumn{1}{l}{%s} &' % (latex_col[idx])
    elif col == 'pos_af_igtb':
        latex_output = latex_output + '\n\t\multicolumn{1}{l}{%s} &' % (latex_col[idx])
        latex_output = latex_output + ' & & & \\rule{0pt}{3ex} \\\\'
        latex_output = latex_output + '\n\t\multicolumn{1}{l}{\hspace{0.5cm}%s} &' % (affect_col[0])
    elif col == 'neg_af_igtb':
        latex_output = latex_output + '\n\t\multicolumn{1}{l}{\hspace{0.5cm}%s} &' % (affect_col[1])
    
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(overall[col]), np.std(overall[col]))
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(day_data[col]), np.std(day_data[col]))
    latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.1f$ $\pm$ $%.1f$} & ' % (np.mean(night_data[col]), np.std(night_data[col]))
    
    if p < 0.05:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$\mathbf{%.3f}$} \\rule{0pt}{3ex} \\\\' % (p)
    else:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.3f$} \\rule{0pt}{3ex} \\\\' % (p)
    
latex_output = latex_output + '[1mm]\n'
latex_output = latex_output + '\n\t\hline'
latex_output = latex_output + '\n\n'
latex_output = latex_output + '\end{tabular}'

print(latex_output)



