## Import Library

In [1]:
import os, errno
import argparse
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from scipy.stats import kurtosis
from scipy.stats.mstats import moment
from scipy import stats
from scipy.stats import ttest_ind, f_oneway

from util.load_data_basic import *

# date_time format
date_time_format = '%Y-%m-%dT%H:%M:%S.%f'
date_only_date_time_format = '%Y-%m-%d'

# sleep after work duration thereshold
sleep_after_work_duration_threshold = 12
sleep_after_sleep_duration_threshold = 2

# data folder
main_data_directory = '../data/keck_wave1/2_preprocessed_data'
recording_timeline_directory = '../output/recording_timeline'
sleep_timeline_directory = '../output/sleep_timeline'
individual_timeline_directory = 'output/individual_timeline'


## Read Basic Information

In [2]:
# Read sleep data for all participant
sleep_data = pd.read_csv(os.path.join('output', 'sleep_survey_full.csv'))

# Read ID
IDs = getParticipantID(main_data_directory, index=False)

# Read MGT
MGT = read_MGT(main_data_directory)

# Read Pre-Study info
PreStudyInfo = read_pre_study_info(main_data_directory)

# Read IGTB info
IGTB = read_IGTB(main_data_directory)

# Demographic
Demographic = read_Demographic(main_data_directory)

# Day shift data
day_data = sleep_data.loc[sleep_data['shift_type'] == 1]
day_workday_data = day_data.loc[(day_data['is_sleep_before_work'] == 1) & (day_data['is_sleep_after_work'] == 1)]
day_off_day_data = day_data.loc[(day_data['is_sleep_before_work'] != 1) & (day_data['is_sleep_after_work'] != 1)]
day_transition_day_data = day_data.loc[(day_data['is_sleep_transition_before_work'] == 1) | (day_data['is_sleep_transition_after_work'] == 1)]

# Night shift data
night_data = sleep_data.loc[sleep_data['shift_type'] == 2]
night_workday_data = night_data.loc[(night_data['is_sleep_before_work'] == 1) & (night_data['is_sleep_after_work'] == 1)]
night_off_day_data = night_data.loc[(night_data['is_sleep_before_work'] != 1) & (night_data['is_sleep_after_work'] != 1)]
night_transition_day_data = night_data.loc[(night_data['is_sleep_transition_before_work'] == 1) | (night_data['is_sleep_transition_after_work'] == 1)]

# Data array
data_array = [day_data, night_data, 
              day_workday_data, day_off_day_data, 
              night_workday_data, night_off_day_data]

data_type = ['day-shift all', 'night-shift all',
             'day-shift workday', 'day-shift off day', 
             'night-shift workday', 'night-shift off day']

colunm_type = ['duration_in_seconds', 'SleepEfficiency',
               'SleepMinutesStageDeep', 'SleepMinutesStageLight', 'SleepMinutesStageRem',
               'sleep_heart_rate_mean', 'sleep_heart_rate_std',
               'sleep_heart_rate_percentile_10', 'sleep_heart_rate_percentile_90']

UserInfo = pd.merge(IGTB, PreStudyInfo, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, IDs, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, Demographic, left_on='uid', right_on='uid', how='outer')
UserInfo = UserInfo.set_index('uid')


## Get participant with valid sleep data

In [3]:
valid_sleep_stats = pd.DataFrame()
frame_col = ['number_of_sleep', 'number_of_long_sleep', 'number_of_short_sleep', 
             'sleep_rate', 'short_sleep_rate', 'long_sleep_rate']

select_column = ['participant_id', 'shift_pre-study', 
                 'life_satisfaction_pre-study', 'wellbeing_pre-study',
                 'social_functioning_pre-study', 'pain_pre-study', 'general_health_pre-study',
                 'neu_igtb', 'con_igtb', 'ext_igtb', 'agr_igtb', 'ope_igtb']

for participant_id in UserInfo['participant_id']:
    
    user_id = UserInfo.loc[UserInfo['participant_id'] == participant_id].index.values[0]
    participant_MGT = MGT.loc[MGT['uid'] == user_id]
    
    if len(participant_MGT) > 1:
        start_date = np.datetime64(participant_MGT.index.values[0], 'D')
        end_date = np.datetime64(participant_MGT.index.values[-1], 'D')
        days_of_survey = (end_date - start_date) / np.timedelta64(1, 'D') + 1
        
        
        participant_sleep_data = sleep_data.loc[sleep_data['participant_id'] == participant_id]
        participant_sleep_data = participant_sleep_data.set_index('start_recording_time')
        
        start_date = np.datetime64(start_date).astype(datetime).strftime(date_time_format)
        end_date = np.datetime64(end_date).astype(datetime).strftime(date_time_format)
        
        participant_sleep_data = participant_sleep_data[start_date:end_date]
        
    
    if len(participant_sleep_data) > 20:
        frame = pd.DataFrame(columns=frame_col, index=[user_id])
        frame['number_of_sleep'] = len(participant_sleep_data)
        frame['number_of_short_sleep'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] <= 3600 * 4])
        frame['number_of_long_sleep'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] > 3600 * 4])
        frame['sleep_rate'] = len(participant_sleep_data) / days_of_survey
        frame['short_sleep_rate'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] <= 3600 * 4]) / days_of_survey
        frame['long_sleep_rate'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] > 3600 * 4]) / days_of_survey
        
        valid_sleep_stats = valid_sleep_stats.append(frame)
        
# sleep rate and IGTB
temp = UserInfo.loc[:,:].copy()
valid_sleep_stats = pd.concat([valid_sleep_stats, temp], axis=1)
valid_sleep_stats = valid_sleep_stats.dropna(subset=['number_of_sleep'])

## Raw IGTB - PSQI and GATS

In [4]:
raw_IGTB_col = ['ipaq1', 'ipaq2', 'ipaq3', 'ipaq4', 'ipaq5', 'ipaq6',
                'ipaq7', 'ipaq8', 'ipaq9', 'ipaq10', 'ipaq11', 'ipaq12', 
                'ipaq13', 'ipaq14', 'ipaq15', 'ipaq16', 'ipaq17', 'ipaq18', 
                'ipaq19', 'ipaq20', 'ipaq21', 'ipaq22', 'ipaq23', 'ipaq24', 
                'ipaq25', 'ipaq26', 'ipaq27', 'ipaq_break', 'Name']

IGTB_RAW = read_IGTB_Raw(main_data_directory)[raw_IGTB_col]

IGTB_RAW_Score = pd.DataFrame()

for user_id in valid_sleep_stats.index.values:
    IGTB_RAW_participant = IGTB_RAW.loc[IGTB_RAW['Name'] == user_id]
    
    frame = pd.DataFrame(index=[user_id])
    
    frame['work_vigorous_activity'] = IGTB_RAW_participant['ipaq3'].values[0]
    frame['work_moderate_activity'] = IGTB_RAW_participant['ipaq5'].values[0]
    
    IGTB_RAW_Score = IGTB_RAW_Score.append(frame)
    
valid_sleep_stats = pd.concat([valid_sleep_stats, IGTB_RAW_Score], axis=1)
    

## IGTB (Job performance) - Day, Night shift

In [6]:
BFI_col = ['itp_igtb', 'irb_igtb', 
           'iod_id_igtb', 'iod_od_igtb', 'ocb_igtb']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]
data_type = ['day-shift', 'night-shift']

for col in BFI_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(day_sleep_stats), len(night_sleep_stats)))

    # Print
    print('Total: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(valid_sleep_stats[col]), np.std(valid_sleep_stats[col]), np.min(valid_sleep_stats[col]), np.max(valid_sleep_stats[col])))
    print('Day shift: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(day_sleep_stats[col]), np.std(day_sleep_stats[col]), np.min(day_sleep_stats[col]), np.max(day_sleep_stats[col])))
    print('Night shift: mean = %.2f, std = %.2f, range is %.3f - %.3f \n' % (np.mean(night_sleep_stats[col]), np.std(night_sleep_stats[col]), np.min(night_sleep_stats[col]), np.max(night_sleep_stats[col])))
    

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

itp_igtb

Number of valid participant: day: 93; night: 58

Total: mean = 4.71, std = 0.49, range is 2.000 - 5.000
Day shift: mean = 4.66, std = 0.54, range is 2.000 - 5.000
Night shift: mean = 4.80, std = 0.39, range is 3.333 - 5.000 

Statistics = -1.664, p = 0.098

irb_igtb

Number of valid participant: day: 93; night: 58

Total: mean = 45.09, std = 4.35, range is 26.000 - 49.000
Day shift: mean = 44.88, std = 4.44, range is 26.000 - 49.000
Night shift: mean = 45.36, std = 4.20, range is 31.000 - 49.000 

Statistics = -0.656, p = 0.513

iod_id_igtb

Number of valid participant: day: 93; night: 58

Total: mean = 11.22, std = 5.24, range is 7.000 - 32.000
Day shift: mean = 11.47, std = 5.09, range is 7.000 - 32.000
Night shift: mean = 10.86, std = 5.47, range is 7.000 - 30.000 

Statistics = 0.693, p = 0.490

iod_od_igtb

Number of valid participant: day: 93; night: 58

Total: mean = 16.99, std = 6.01, range is 12.000 - 42.000
Day shift: mean = 17.87, std = 6.57, range is 12.000 - 42.0