## Import Library

In [88]:
import os, errno
import argparse
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from scipy.stats import kurtosis
from scipy.stats.mstats import moment
from scipy import stats
from scipy.stats import ttest_ind, f_oneway

sys.path.append(os.path.join(os.path.curdir, '../../', 'util'))
from load_data_basic import *

# date_time format
date_time_format = '%Y-%m-%dT%H:%M:%S.%f'
date_only_date_time_format = '%Y-%m-%d'

# sleep after work duration thereshold
sleep_after_work_duration_threshold = 12
sleep_after_sleep_duration_threshold = 2

# data folder
main_data_directory = '../../../data/'
recording_timeline_directory = '../../output/recording_timeline'
sleep_timeline_directory = '../../output/sleep_timeline'
individual_timeline_directory = '../../output/individual_timeline'



## Read Basic Information

In [89]:
# Read sleep data for all participant
sleep_data = pd.read_csv(os.path.join('../../output', 'sleep_survey_full.csv'))

# Read participant information
participant_info = getParticipantInfo(main_data_directory)
# participant_info = participant_info.set_index('MitreID')

# Read MGT
MGT = read_MGT(main_data_directory)

# Read Pre-Study info
PreStudyInfo = read_pre_study_info(main_data_directory)

# Read IGTB info
IGTB = read_IGTB(main_data_directory)

# Demographic
Demographic = read_Demographic(main_data_directory)

# Day shift data
day_data = sleep_data.loc[sleep_data['shift_type'] == 1]
day_workday_data = day_data.loc[(day_data['is_sleep_before_work'] == 1) & (day_data['is_sleep_after_work'] == 1)]
day_off_day_data = day_data.loc[(day_data['is_sleep_before_work'] != 1) & (day_data['is_sleep_after_work'] != 1)]
day_transition_day_data = day_data.loc[(day_data['is_sleep_transition_before_work'] == 1) | (day_data['is_sleep_transition_after_work'] == 1)]

# Night shift data
night_data = sleep_data.loc[sleep_data['shift_type'] == 2]
night_workday_data = night_data.loc[(night_data['is_sleep_before_work'] == 1) & (night_data['is_sleep_after_work'] == 1)]
night_off_day_data = night_data.loc[(night_data['is_sleep_before_work'] != 1) & (night_data['is_sleep_after_work'] != 1)]
night_transition_day_data = night_data.loc[(night_data['is_sleep_transition_before_work'] == 1) | (night_data['is_sleep_transition_after_work'] == 1)]

# Data array
data_array = [day_data, night_data, 
              day_workday_data, day_off_day_data, 
              night_workday_data, night_off_day_data]

data_type = ['day-shift all', 'night-shift all',
             'day-shift workday', 'day-shift off day', 
             'night-shift workday', 'night-shift off day']

colunm_type = ['duration_in_seconds', 'SleepEfficiency',
               'SleepMinutesStageDeep', 'SleepMinutesStageLight', 'SleepMinutesStageRem',
               'sleep_heart_rate_mean', 'sleep_heart_rate_std',
               'sleep_heart_rate_percentile_10', 'sleep_heart_rate_percentile_90']

UserInfo = pd.merge(IGTB, PreStudyInfo, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, participant_info, left_on='uid', right_on='MitreID', how='outer')
UserInfo = pd.merge(UserInfo, Demographic, left_on='uid', right_on='uid', how='outer')
UserInfo = UserInfo.set_index('uid')
UserInfo = UserInfo.loc[UserInfo['Wave'] != 3]


## Raw IGTB - Alchohol

In [90]:
raw_IGTB_col = ['psqi_inst', 'psqi1', 'psqi1ampm', 'psqi2', 'psqi3', 'psqi3ampm', 'psqi4',
                'psqi5a', 'psqi5b', 'psqi5c', 'psqi5d', 'psqi5e', 'psqi5f', 'psqi5g', 'psqi5h',
                'psqi5i', 'psqi5ja', 'psqi5jb', 'psqi6', 'psqi7', 'psqi8', 'psqi9', 'psqi_complete', 
                'gats1', 'gats2', 'gats3Week_1', 'gats3Week_2', 'gats3Week_3', 
                'gats3Week_4', 'gats3Week_5', 'gats3Week_6', 'gats3Week_7', 
                'audit1', 'audit2', 'audit3', 'audit4', 'audit5', 
                'audit6', 'audit7', 'audit8', 'audit9', 'audit10', 'Name']

IGTB_RAW = read_IGTB_Raw(main_data_directory)[raw_IGTB_col]
IGTB_RAW = IGTB_RAW.loc[IGTB_RAW['Name'].isin(UserInfo.index.values)]
IGTB_RAW_Score = pd.DataFrame()

for user_id in UserInfo.index.values:
    IGTB_RAW_participant = IGTB_RAW.loc[IGTB_RAW['Name'] == user_id]
    IGTB_participant = UserInfo.loc[user_id]
    
    # PSQI
    # Contains 7 scores, the lower the score, the better the performance
    frame = pd.DataFrame(index=[user_id])
    frame['Shift'] = 1 if IGTB_participant['Shift'] == 'Day shift' else 2
    
    # AUDIT
    frame['driking_frequency'] = IGTB_RAW_participant['audit1'].values[0]
    frame['number_of_drink_per_day'] = IGTB_RAW_participant['audit2'].values[0]
    frame['more_than_six_drink_frequency'] = IGTB_RAW_participant['audit3'].values[0]
    
    IGTB_RAW_Score = IGTB_RAW_Score.append(frame)


## IGTB (Alcohol) - Day, Night shift

In [91]:
audit_col = ['driking_frequency', 'number_of_drink_per_day', 'more_than_six_drink_frequency']

driking_frequency = ['Never', 'Monthly', '2-4 times per month', '2-3 times per week', '4 or more times a week']
number_of_drink_per_day = ['1-2', '3-4', '5-6', '7-8', '9-10']
more_than_six_drink_frequency = ['Never', 'Less than Monthly', 'Monthly', 'Weekly', 'Daily']

# shift_pre-study
day_sleep_stats   = IGTB_RAW_Score.loc[(IGTB_RAW_Score['Shift'] == 1)].dropna(subset=['driking_frequency'])
night_sleep_stats = IGTB_RAW_Score.loc[(IGTB_RAW_Score['Shift'] == 2)].dropna(subset=['driking_frequency'])
data_type = ['day-shift', 'night-shift']

for col in audit_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_sleep_stats), len(night_sleep_stats)))
    
    # Status
    if col == 'driking_frequency':
        answer_type = driking_frequency
    elif col == 'number_of_drink_per_day':
        answer_type = number_of_drink_per_day
    elif col == 'more_than_six_drink_frequency':
        answer_type = more_than_six_drink_frequency
        
    for i in range(1, 6, 1):
        overall = IGTB_RAW_Score.loc[IGTB_RAW_Score[col] == i]
        day_data = day_sleep_stats.loc[day_sleep_stats[col] == i]
        night_data = night_sleep_stats.loc[night_sleep_stats[col] == i]

        # Print
        print(answer_type[i-1] + '\n')
        print('Total: n = %i, %.3f ' % (len(overall), len(overall) / (len(day_sleep_stats) + len(night_sleep_stats))))
        print('Day shift: n = %i, %.3f' % (len(day_data), len(day_data) / len(day_sleep_stats)))
        print('Night shift: n = %i, %.3f\n' % (len(night_data), len(night_data) / len(night_sleep_stats)))

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

driking_frequency

Number of valid participant: day: 110; night: 58

Never

Total: n = 33, 0.196 
Day shift: n = 22, 0.200
Night shift: n = 11, 0.190

Monthly

Total: n = 69, 0.411 
Day shift: n = 43, 0.391
Night shift: n = 26, 0.448

2-4 times per month

Total: n = 45, 0.268 
Day shift: n = 31, 0.282
Night shift: n = 14, 0.241

2-3 times per week

Total: n = 20, 0.119 
Day shift: n = 14, 0.127
Night shift: n = 6, 0.103

4 or more times a week

Total: n = 1, 0.006 
Day shift: n = 0, 0.000
Night shift: n = 1, 0.017

Statistics = 0.169, p = 0.866

number_of_drink_per_day

Number of valid participant: day: 110; night: 58

1-2

Total: n = 117, 0.696 
Day shift: n = 86, 0.782
Night shift: n = 31, 0.534

3-4

Total: n = 25, 0.149 
Day shift: n = 12, 0.109
Night shift: n = 13, 0.224

5-6

Total: n = 10, 0.060 
Day shift: n = 3, 0.027
Night shift: n = 7, 0.121

7-8

Total: n = 1, 0.006 
Day shift: n = 1, 0.009
Night shift: n = 0, 0.000

9-10

Total: n = 1, 0.006 
Day shift: n = 0, 0.000
Night 

## Convert to Latex

In [93]:
audit_col = ['driking_frequency', 'number_of_drink_per_day']
latex_col = ['Alcohol drinking frequency', 'Number of alcohol drinks per time']

driking_frequency = ['Never', 'Monthly', '2-4 times per month', '2-3 times per week', '4 or more times a week']
number_of_drink_per_day = ['1-2', '3-4', '5-6', '7-8', '9-10']
more_than_six_drink_frequency = ['Never', 'Less than Monthly', 'Monthly', 'Weekly', 'Daily']


start = '\\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}' 

latex_output = start + '\n\t\multicolumn{1}{p{3cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{} & '
latex_output = latex_output + '\n\t\multicolumn{1}{p{2.5cm}}{}\\\\'

# Header
latex_output = latex_output + '\n\n\t\hline'
latex_output = latex_output + '\n\t\multicolumn{1}{c}{Items} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{Total n (\%)} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{Day shift n (\%)} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{Night shift n (\%)} & '
latex_output = latex_output + '\n\t\multicolumn{1}{c}{P-value} \\rule{0pt}{2ex} \\\\'

latex_output = latex_output + '\n\t\hline'

# Score
for idx, col in enumerate(audit_col):
    
    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    
    latex_output = latex_output + '\n'
    
    if col is 'driking_frequency':
        latex_option = driking_frequency
    elif col is 'number_of_drink_per_day':
        latex_option = number_of_drink_per_day
    elif col is 'more_than_six_drink_frequency':
        latex_option = more_than_six_drink_frequency
        
    latex_output = latex_output + '\n\t\multicolumn{1}{l}{%s} & & & & ' % (latex_col[idx])
    
    if p > 0.05:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%.3f$} \\rule{0pt}{2ex} \\\\' % (p)
    else:
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$\mathbf{%.3f}$} \\rule{0pt}{2ex} \\\\' % (p)
    
    for option_idx, option in enumerate(latex_option):
        overall = IGTB_RAW_Score.loc[IGTB_RAW_Score[col] == (option_idx + 1)]
        day_data = day_sleep_stats.loc[day_sleep_stats[col] == (option_idx + 1)]
        night_data = night_sleep_stats.loc[night_sleep_stats[col] == (option_idx + 1)]
        
        latex_output = latex_output + '\n\t\multicolumn{1}{l}{\hspace{0.5cm}%s} & '  % (option)
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%d$ ($%.2f$)} & ' % (len(overall), len(overall) / len(IGTB_RAW_Score))
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%d$ ($%.2f$)} & ' % (len(day_data), len(day_data) / len(day_sleep_stats))
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{$%d$ ($%.2f$)} & ' % (len(night_data), len(night_data) / len(night_sleep_stats))
        latex_output = latex_output + '\n\t\multicolumn{1}{c}{} \\rule{0pt}{2ex} \\\\'

latex_output = latex_output + '\n'
latex_output = latex_output + '\n\t\hline'
latex_output = latex_output + '\n\n'
latex_output = latex_output + '\end{tabular}'

print(latex_output)



\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}
	\multicolumn{1}{p{3cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{} & 
	\multicolumn{1}{p{2.5cm}}{}\\

	\hline
	\multicolumn{1}{c}{Items} & 
	\multicolumn{1}{c}{Total n (\%)} & 
	\multicolumn{1}{c}{Day shift n (\%)} & 
	\multicolumn{1}{c}{Night shift n (\%)} & 
	\multicolumn{1}{c}{P-value} \rule{0pt}{2ex} \\
	\hline

	\multicolumn{1}{l}{Alcohol drinking frequency} & & & & 
	\multicolumn{1}{c}{$0.866$} \rule{0pt}{2ex} \\
	\multicolumn{1}{l}{\hspace{0.5cm}Never} & 
	\multicolumn{1}{c}{$33$ ($0.20$)} & 
	\multicolumn{1}{c}{$22$ ($0.20$)} & 
	\multicolumn{1}{c}{$11$ ($0.19$)} & 
	\multicolumn{1}{c}{} \rule{0pt}{2ex} \\
	\multicolumn{1}{l}{\hspace{0.5cm}Monthly} & 
	\multicolumn{1}{c}{$69$ ($0.41$)} & 
	\multicolumn{1}{c}{$43$ ($0.39$)} & 
	\multicolumn{1}{c}{$26$ ($0.45$)} & 
	\multicolumn{1}{c}{} \rule{0pt}{2ex} \\
	\multicolumn{1}{l}{\hspace{0.5cm}2-4 times per month} & 
	\multicolumn{1