## Import Library

In [277]:
import os, errno
import argparse
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from scipy.stats import kurtosis
from scipy.stats.mstats import moment
from scipy import stats
from scipy.stats import ttest_ind, f_oneway

from util.load_data_basic import *

# date_time format
date_time_format = '%Y-%m-%dT%H:%M:%S.%f'
date_only_date_time_format = '%Y-%m-%d'

# sleep after work duration thereshold
sleep_after_work_duration_threshold = 12
sleep_after_sleep_duration_threshold = 2

# data folder
main_data_directory = '../data/keck_wave1/2_preprocessed_data'
recording_timeline_directory = '../output/recording_timeline'
sleep_timeline_directory = '../output/sleep_timeline'
individual_timeline_directory = 'output/individual_timeline'


## Read Basic Information

In [278]:
# Read sleep data for all participant
sleep_data = pd.read_csv(os.path.join('output', 'sleep_survey_full.csv'))

# Read ID
IDs = getParticipantID(main_data_directory, index=False)

# Read MGT
MGT = read_MGT(main_data_directory)

# Read Pre-Study info
PreStudyInfo = read_pre_study_info(main_data_directory)

# Read IGTB info
IGTB = read_IGTB(main_data_directory)

# Demographic
Demographic = read_Demographic(main_data_directory)

# Day shift data
day_data = sleep_data.loc[sleep_data['shift_type'] == 1]
day_workday_data = day_data.loc[(day_data['is_sleep_before_work'] == 1) & (day_data['is_sleep_after_work'] == 1)]
day_off_day_data = day_data.loc[(day_data['is_sleep_before_work'] != 1) & (day_data['is_sleep_after_work'] != 1)]
day_transition_day_data = day_data.loc[(day_data['is_sleep_transition_before_work'] == 1) | (day_data['is_sleep_transition_after_work'] == 1)]

# Night shift data
night_data = sleep_data.loc[sleep_data['shift_type'] == 2]
night_workday_data = night_data.loc[(night_data['is_sleep_before_work'] == 1) & (night_data['is_sleep_after_work'] == 1)]
night_off_day_data = night_data.loc[(night_data['is_sleep_before_work'] != 1) & (night_data['is_sleep_after_work'] != 1)]
night_transition_day_data = night_data.loc[(night_data['is_sleep_transition_before_work'] == 1) | (night_data['is_sleep_transition_after_work'] == 1)]

# Data array
data_array = [day_data, night_data, 
              day_workday_data, day_off_day_data, 
              night_workday_data, night_off_day_data]

data_type = ['day-shift all', 'night-shift all',
             'day-shift workday', 'day-shift off day', 
             'night-shift workday', 'night-shift off day']

colunm_type = ['duration_in_seconds', 'SleepEfficiency',
               'SleepMinutesStageDeep', 'SleepMinutesStageLight', 'SleepMinutesStageRem',
               'sleep_heart_rate_mean', 'sleep_heart_rate_std',
               'sleep_heart_rate_percentile_10', 'sleep_heart_rate_percentile_90']

UserInfo = pd.merge(IGTB, PreStudyInfo, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, IDs, left_on='uid', right_on='uid', how='outer')
UserInfo = pd.merge(UserInfo, Demographic, left_on='uid', right_on='uid', how='outer')
UserInfo = UserInfo.set_index('uid')


## Get participant with valid sleep data

In [279]:
from sklearn.mixture import GaussianMixture

valid_sleep_stats = pd.DataFrame()

valid_long_sleep_stats = pd.DataFrame()
valid_short_sleep_stats = pd.DataFrame()

frame_col = ['number_of_sleep', 'number_of_long_sleep', 'number_of_short_sleep', 
             'sleep_rate', 'short_sleep_rate', 'long_sleep_rate']

select_column = ['participant_id', 'shift_pre-study', 
                 'life_satisfaction_pre-study', 'wellbeing_pre-study',
                 'social_functioning_pre-study', 'pain_pre-study', 'general_health_pre-study',
                 'neu_igtb', 'con_igtb', 'ext_igtb', 'agr_igtb', 'ope_igtb']

for participant_id in UserInfo['participant_id']:
    
    user_id = UserInfo.loc[UserInfo['participant_id'] == participant_id].index.values[0]
    participant_MGT = MGT.loc[MGT['uid'] == user_id]
    
    if len(participant_MGT) > 1:
        start_date = np.datetime64(participant_MGT.index.values[0], 'D')
        end_date = np.datetime64(participant_MGT.index.values[-1], 'D')
        days_of_survey = (end_date - start_date) / np.timedelta64(1, 'D') + 1
        
        participant_sleep_data = sleep_data.loc[sleep_data['participant_id'] == participant_id]
        participant_sleep_data = participant_sleep_data.set_index('start_recording_time')
        
        start_date = np.datetime64(start_date).astype(datetime).strftime(date_time_format)
        end_date = np.datetime64(end_date).astype(datetime).strftime(date_time_format)
        
        participant_sleep_data = participant_sleep_data[start_date:end_date]
        
    if len(participant_sleep_data) > 20:
        frame = pd.DataFrame(index=[user_id])
        frame['number_of_sleep'] = len(participant_sleep_data)
        frame['number_of_short_sleep'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] <= 3600 * 4])
        frame['number_of_long_sleep'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] > 3600 * 4])
        frame['sleep_rate'] = len(participant_sleep_data) / days_of_survey
        frame['short_sleep_rate'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] <= 3600 * 4]) / days_of_survey
        frame['long_sleep_rate'] = len(participant_sleep_data.loc[participant_sleep_data['duration_in_seconds'] > 3600 * 4]) / days_of_survey
        
        # GMM
        gmm = GaussianMixture(n_components=2, max_iter=300).fit(participant_sleep_data['duration_in_seconds'].values.reshape(-1, 1))
        labels = gmm.predict(participant_sleep_data['duration_in_seconds'].values.reshape(-1, 1))
        
        participant_sleep_data_with_label = pd.DataFrame()
        data_index = 0
        
        # iterate data
        for index, data in participant_sleep_data.iterrows():
            data = data.to_frame().transpose()
            data['sleep_type'] = labels[data_index]
            participant_sleep_data_with_label = participant_sleep_data_with_label.append(data)
            data_index += 1
        
        # Group comparison
        if (np.max(gmm.means_) - np.min(gmm.means_)) > 15000:
            long_sleep_df = pd.DataFrame()
            long_sleep_df  = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmax(gmm.means_)]

            valid_long_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmax(gmm.means_)]
            valid_long_sleep_df['shift_pre-study'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift_pre-study'].values[0]
            valid_long_sleep_df['shift'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift'].values[0]
            valid_long_sleep_stats = valid_long_sleep_stats.append(valid_long_sleep_df)

            short_sleep_df = pd.DataFrame()
            short_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmin(gmm.means_)]

            valid_short_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmin(gmm.means_)]
            valid_short_sleep_df['shift_pre-study'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift_pre-study'].values[0]
            valid_short_sleep_df['shift'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift'].values[0]
            valid_short_sleep_stats = valid_short_sleep_stats.append(valid_short_sleep_df)
        else:
            long_sleep_df = pd.DataFrame()
            long_sleep_df  = participant_sleep_data_with_label.copy()

            valid_long_sleep_df = participant_sleep_data_with_label.copy()
            valid_long_sleep_df['shift_pre-study'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift_pre-study'].values[0]
            valid_long_sleep_df['shift'] = UserInfo.loc[UserInfo['participant_id'] == participant_id]['shift'].values[0]
            valid_long_sleep_stats = valid_long_sleep_stats.append(valid_long_sleep_df)
        
        
        if (np.max(gmm.means_) - np.min(gmm.means_)) > 15000:
            
            long_sleep_workday_df = long_sleep_df.loc[(long_sleep_df['is_sleep_before_work'] == 1) & (long_sleep_df['is_sleep_after_work'] == 1)]
            short_sleep_workday_df = short_sleep_df.loc[(short_sleep_df['is_sleep_before_work'] == 1) & (short_sleep_df['is_sleep_after_work'] == 1)]

            long_sleep_offday_df = long_sleep_df.loc[(long_sleep_df['is_sleep_before_work'] != 1) & (long_sleep_df['is_sleep_after_work'] != 1)]
            short_sleep_offday_df = short_sleep_df.loc[(short_sleep_df['is_sleep_before_work'] != 1) & (short_sleep_df['is_sleep_after_work'] != 1)]

            # Long sleep
            frame['long_sleep_duration_mean'] = np.max(gmm.means_) / 60
            frame['long_sleep_duration_std'] = np.sqrt(gmm.covariances_[np.argmax(gmm.means_)]) / 60
            
            frame['long_sleep_duration_deep_mean'] = np.mean(long_sleep_df['SleepMinutesStageDeep'].dropna().values)
            frame['long_sleep_duration_deep_std'] = np.std(long_sleep_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['long_sleep_duration_light_mean'] = np.mean(long_sleep_df['SleepMinutesStageLight'].dropna().values)
            frame['long_sleep_duration_light_std'] = np.std(long_sleep_df['SleepMinutesStageLight'].dropna().values)
            
            frame['long_sleep_duration_rem_mean'] = np.mean(long_sleep_df['SleepMinutesStageRem'].dropna().values)
            frame['long_sleep_duration_rem_std'] = np.std(long_sleep_df['SleepMinutesStageRem'].dropna().values)
            
            frame['long_sleep_duration_wake_mean'] = np.mean(long_sleep_df['SleepMinutesStageWake'].dropna().values)
            frame['long_sleep_duration_wake_std'] = np.std(long_sleep_df['SleepMinutesStageWake'].dropna().values)
            
            frame['long_sleep_efficiency_mean'] = np.mean(long_sleep_df['SleepEfficiency'].dropna().values)
            frame['long_sleep_efficiency_std'] = np.std(long_sleep_df['SleepEfficiency'].dropna().values)
            
            # Long sleep workday
            frame['workday_long_sleep_heart_rate_mean'] = np.mean(long_sleep_workday_df['sleep_heart_rate_mean'].dropna().values)
            
            frame['workday_long_sleep_duration_mean'] = np.mean(long_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            frame['workday_long_sleep_duration_std'] = np.std(long_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['workday_long_sleep_duration_deep_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            frame['workday_long_sleep_duration_deep_std'] = np.std(long_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['workday_long_sleep_duration_light_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            frame['workday_long_sleep_duration_light_std'] = np.std(long_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['workday_long_sleep_duration_rem_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            frame['workday_long_sleep_duration_rem_std'] = np.std(long_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['workday_long_sleep_duration_wake_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            frame['workday_long_sleep_duration_wake_std'] = np.std(long_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['workday_long_sleep_efficiency_mean'] = np.mean(long_sleep_workday_df['SleepEfficiency'].dropna().values)
            frame['workday_long_sleep_efficiency_std'] = np.std(long_sleep_workday_df['SleepEfficiency'].dropna().values)
                     
            # Long sleep offday
            frame['offday_long_sleep_heart_rate_mean'] = np.mean(long_sleep_offday_df['sleep_heart_rate_mean'].dropna().values)
            
            frame['offday_long_sleep_duration_mean'] = np.mean(long_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            frame['offday_long_sleep_duration_std'] = np.std(long_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['offday_long_sleep_duration_deep_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            frame['offday_long_sleep_duration_deep_std'] = np.std(long_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['offday_long_sleep_duration_light_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            frame['offday_long_sleep_duration_light_std'] = np.std(long_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['offday_long_sleep_duration_rem_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            frame['offday_long_sleep_duration_rem_std'] = np.std(long_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['offday_long_sleep_duration_wake_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            frame['offday_long_sleep_duration_wake_std'] = np.std(long_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['offday_long_sleep_efficiency_mean'] = np.mean(long_sleep_offday_df['SleepEfficiency'].dropna().values)
            frame['offday_long_sleep_efficiency_std'] = np.std(long_sleep_offday_df['SleepEfficiency'].dropna().values)
            
            
            # Short sleep
            frame['short_sleep_duration_mean'] = np.min(gmm.means_) / 60
            frame['short_sleep_duration_std'] = np.sqrt(gmm.covariances_[np.argmin(gmm.means_)]) / 60
            
            frame['short_sleep_duration_deep_mean'] = np.mean(short_sleep_df['SleepMinutesStageDeep'].dropna().values)
            frame['short_sleep_duration_deep_std'] = np.std(short_sleep_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['short_sleep_duration_light_mean'] = np.mean(short_sleep_df['SleepMinutesStageLight'].dropna().values)
            frame['short_sleep_duration_light_std'] = np.std(short_sleep_df['SleepMinutesStageLight'].dropna().values)
            
            frame['short_sleep_duration_rem_mean'] = np.mean(short_sleep_df['SleepMinutesStageRem'].dropna().values)
            frame['short_sleep_duration_rem_std'] = np.std(short_sleep_df['SleepMinutesStageRem'].dropna().values)
            
            frame['short_sleep_duration_wake_mean'] = np.mean(short_sleep_df['SleepMinutesStageWake'].dropna().values)
            frame['short_sleep_duration_wake_std'] = np.std(short_sleep_df['SleepMinutesStageWake'].dropna().values)
            
            frame['short_sleep_efficiency_mean'] = np.mean(short_sleep_df['SleepEfficiency'].dropna().values)
            frame['short_sleep_efficiency_std'] = np.std(short_sleep_df['SleepEfficiency'].dropna().values)
            
            frame['short_sleep_heart_rate_mean'] = np.mean(short_sleep_df['sleep_heart_rate_mean'].dropna().values)
            
            
            # workday Short sleep
            frame['workday_short_sleep_duration_mean'] = np.mean(short_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            frame['workday_short_sleep_duration_std'] = np.std(short_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['workday_short_sleep_duration_deep_mean'] = np.mean(short_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            frame['workday_short_sleep_duration_deep_std'] = np.std(short_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['workday_short_sleep_duration_light_mean'] = np.mean(short_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            frame['workday_short_sleep_duration_light_std'] = np.std(short_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['workday_short_sleep_duration_rem_mean'] = np.mean(short_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            frame['workday_short_sleep_duration_rem_std'] = np.std(short_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['workday_short_sleep_duration_wake_mean'] = np.mean(short_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            frame['workday_short_sleep_duration_wake_std'] = np.std(short_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['workday_short_sleep_efficiency_mean'] = np.mean(short_sleep_workday_df['SleepEfficiency'].dropna().values)
            frame['workday_short_sleep_efficiency_std'] = np.std(short_sleep_workday_df['SleepEfficiency'].dropna().values)
            
            frame['workday_short_sleep_heart_rate_mean'] = np.mean(short_sleep_workday_df['sleep_heart_rate_mean'].dropna().values)
            
            # offday Short sleep
            frame['offday_short_sleep_duration_mean'] = np.mean(short_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            frame['offday_short_sleep_duration_std'] = np.std(short_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['offday_short_sleep_duration_deep_mean'] = np.mean(short_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            frame['offday_short_sleep_duration_deep_std'] = np.std(short_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['offday_short_sleep_duration_light_mean'] = np.mean(short_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            frame['offday_short_sleep_duration_light_std'] = np.std(short_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['offday_short_sleep_duration_rem_mean'] = np.mean(short_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            frame['offday_short_sleep_duration_rem_std'] = np.std(short_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['offday_short_sleep_duration_wake_mean'] = np.mean(short_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            frame['offday_short_sleep_duration_wake_std'] = np.std(short_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['offday_short_sleep_efficiency_mean'] = np.mean(short_sleep_offday_df['SleepEfficiency'].dropna().values)
            frame['offday_short_sleep_efficiency_std'] = np.std(short_sleep_offday_df['SleepEfficiency'].dropna().values)
            
            frame['offday_short_sleep_heart_rate_mean'] = np.mean(short_sleep_offday_df['sleep_heart_rate_mean'].dropna().values)

        else:
            
            # Long sleep
            frame['long_sleep_duration_mean'] = np.mean(participant_sleep_data['duration_in_seconds'].dropna().values) / 60
            frame['long_sleep_duration_std'] = np.std(participant_sleep_data['duration_in_seconds'].dropna().values) / 60
            
            frame['long_sleep_duration_deep_mean'] = np.mean(participant_sleep_data['SleepMinutesStageDeep'].dropna().values)
            frame['long_sleep_duration_deep_std'] = np.std(participant_sleep_data['SleepMinutesStageDeep'].dropna().values)
            
            frame['long_sleep_duration_light_mean'] = np.mean(participant_sleep_data['SleepMinutesStageLight'].dropna().values)
            frame['long_sleep_duration_light_std'] = np.mean(participant_sleep_data['SleepMinutesStageLight'].dropna().values)
            
            frame['long_sleep_duration_rem_mean'] = np.mean(participant_sleep_data['SleepMinutesStageRem'].dropna().values)
            frame['long_sleep_duration_rem_std'] = np.mean(participant_sleep_data['SleepMinutesStageRem'].dropna().values)
            
            frame['long_sleep_duration_wake_mean'] = np.mean(participant_sleep_data['SleepMinutesStageWake'].dropna().values)
            frame['long_sleep_duration_wake_std'] = np.mean(participant_sleep_data['SleepMinutesStageWake'].dropna().values)
            
            frame['long_sleep_efficiency_mean'] = np.mean(participant_sleep_data['SleepEfficiency'].dropna().values)
            frame['long_sleep_efficiency_std'] = np.mean(participant_sleep_data['SleepEfficiency'].dropna().values)
            
            frame['long_sleep_heart_rate_mean'] = np.mean(participant_sleep_data['sleep_heart_rate_mean'].dropna().values)
            
            
            long_sleep_workday_df = participant_sleep_data.loc[(participant_sleep_data['is_sleep_before_work'] == 1) & (participant_sleep_data['is_sleep_after_work'] == 1)]
            long_sleep_offday_df = participant_sleep_data.loc[(participant_sleep_data['is_sleep_before_work'] != 1) & (participant_sleep_data['is_sleep_after_work'] != 1)]
            
            
            # Long sleep workday
            frame['workday_long_sleep_heart_rate_mean'] = np.mean(long_sleep_workday_df['sleep_heart_rate_mean'].dropna().values)
            
            frame['workday_long_sleep_duration_mean'] = np.mean(long_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            frame['workday_long_sleep_duration_std'] = np.std(long_sleep_workday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['workday_long_sleep_duration_deep_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            frame['workday_long_sleep_duration_deep_std'] = np.std(long_sleep_workday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['workday_long_sleep_duration_light_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            frame['workday_long_sleep_duration_light_std'] = np.std(long_sleep_workday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['workday_long_sleep_duration_rem_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            frame['workday_long_sleep_duration_rem_std'] = np.std(long_sleep_workday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['workday_long_sleep_duration_wake_mean'] = np.mean(long_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            frame['workday_long_sleep_duration_wake_std'] = np.std(long_sleep_workday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['workday_long_sleep_efficiency_mean'] = np.mean(long_sleep_workday_df['SleepEfficiency'].dropna().values)
            frame['workday_long_sleep_efficiency_std'] = np.std(long_sleep_workday_df['SleepEfficiency'].dropna().values)
                     
            # Long sleep offday
            frame['offday_long_sleep_heart_rate_mean'] = np.mean(long_sleep_offday_df['sleep_heart_rate_mean'].dropna().values)
            
            frame['offday_long_sleep_duration_mean'] = np.mean(long_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            frame['offday_long_sleep_duration_std'] = np.std(long_sleep_offday_df['duration_in_seconds'].dropna().values) / 60
            
            frame['offday_long_sleep_duration_deep_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            frame['offday_long_sleep_duration_deep_std'] = np.std(long_sleep_offday_df['SleepMinutesStageDeep'].dropna().values)
            
            frame['offday_long_sleep_duration_light_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            frame['offday_long_sleep_duration_light_std'] = np.std(long_sleep_offday_df['SleepMinutesStageLight'].dropna().values)
            
            frame['offday_long_sleep_duration_rem_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            frame['offday_long_sleep_duration_rem_std'] = np.std(long_sleep_offday_df['SleepMinutesStageRem'].dropna().values)
            
            frame['offday_long_sleep_duration_wake_mean'] = np.mean(long_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            frame['offday_long_sleep_duration_wake_std'] = np.std(long_sleep_offday_df['SleepMinutesStageWake'].dropna().values)
            
            frame['offday_long_sleep_efficiency_mean'] = np.mean(long_sleep_offday_df['SleepEfficiency'].dropna().values)
            frame['offday_long_sleep_efficiency_std'] = np.std(long_sleep_offday_df['SleepEfficiency'].dropna().values)


            # Short sleep
            # Just set is as np.nan
            frame['short_sleep_duration_mean'] = np.nan
            frame['short_sleep_duration_std'] = np.nan
            
            frame['short_sleep_duration_deep_mean'] = np.nan
            frame['short_sleep_duration_deep_std'] = np.nan
            
            frame['short_sleep_duration_light_mean'] = np.nan
            frame['short_sleep_duration_light_std'] = np.nan
            
            frame['short_sleep_duration_rem_mean'] = np.nan
            frame['short_sleep_duration_rem_std'] = np.nan
            
            frame['short_sleep_efficiency_mean'] = np.nan
            frame['short_sleep_efficiency_std'] = np.nan
            
            frame['short_sleep_heart_rate_mean'] = np.nan
            
        
        frame['sleep_duration_mean'] = np.mean(participant_sleep_data['duration_in_seconds'].dropna().values) / 60
        frame['sleep_duration_std'] = np.std(participant_sleep_data['duration_in_seconds'].dropna().values) / 60
        
        workday_df = long_sleep_workday_df.append(short_sleep_workday_df)
        offday_df = long_sleep_offday_df.append(short_sleep_offday_df)
        
        frame['workday_sleep_duration_mean'] = np.mean(workday_df['duration_in_seconds'].dropna().values) / 60
        frame['workday_sleep_duration_std'] = np.std(workday_df['duration_in_seconds'].dropna().values) / 60
        
        frame['off_sleep_duration_mean'] = np.mean(offday_df['duration_in_seconds'].dropna().values) / 60
        frame['off_sleep_duration_std'] = np.std(offday_df['duration_in_seconds'].dropna().values) / 60


        # print(participant_sleep_data_with_label['sleep_type'])
        
        # print('Mean: %i, std: %i' % (np.max(gmm.means_), np.sqrt(gmm.covariances_[np.argmax(gmm.means_)])))
        # print('Mean: %i, std: %i \n' % (np.min(gmm.means_), np.sqrt(gmm.covariances_[np.argmin(gmm.means_)])))
        
        valid_sleep_stats = valid_sleep_stats.append(frame)

# sleep rate and IGTB
temp = UserInfo.loc[:,:].copy()
valid_sleep_stats = pd.concat([valid_sleep_stats, temp], axis=1)
valid_sleep_stats = valid_sleep_stats.dropna(subset=['number_of_sleep'])
        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out=out, **kwa

## Raw IGTB - PSQI and GATS

In [280]:
raw_IGTB_col = ['psqi_inst', 'psqi1', 'psqi1ampm', 'psqi2', 'psqi3', 'psqi3ampm', 'psqi4',
                'psqi5a', 'psqi5b', 'psqi5c', 'psqi5d', 'psqi5e', 'psqi5f', 'psqi5g', 'psqi5h',
                'psqi5i', 'psqi5ja', 'psqi5jb', 'psqi6', 'psqi7', 'psqi8', 'psqi9', 'psqi_complete', 
                'gats1', 'gats2', 'gats3Week_1', 'gats3Week_2', 'gats3Week_3', 
                'gats3Week_4', 'gats3Week_5', 'gats3Week_6', 'gats3Week_7', 
                'audit1', 'audit2', 'audit3', 'audit4', 'audit5', 
                'audit6', 'audit7', 'audit8', 'audit9', 'audit10', 'Name']

IGTB_RAW = read_IGTB_Raw(main_data_directory)[raw_IGTB_col]

IGTB_RAW_Score = pd.DataFrame()

for user_id in valid_sleep_stats.index.values:
    IGTB_RAW_participant = IGTB_RAW.loc[IGTB_RAW['Name'] == user_id]
    
    # PSQI
    # Contains 7 scores, the lower the score, the better the performance
    frame = pd.DataFrame(index=[user_id])
    # 1st score
    frame['subjective_sleep_quality_psqi'] = IGTB_RAW_participant['psqi6'].values[0]
    
    # 2nd score
    if IGTB_RAW_participant['psqi2'].values[0] <= 15:
        response = 0
    elif 15 < IGTB_RAW_participant['psqi2'].values[0] <= 30:
        response = 1
    elif 30 < IGTB_RAW_participant['psqi2'].values[0] <= 60:
        response = 2
    elif IGTB_RAW_participant['psqi2'].values[0] > 60:
        response = 3
     
    frame['sleep_latency_psqi'] = int((response + IGTB_RAW_participant['psqi5a'].values[0] + 1) / 2)
    
    # 3rd score
    if IGTB_RAW_participant['psqi4'].values[0] >= 7:
        response = 0
    elif 6 <= IGTB_RAW_participant['psqi4'].values[0] < 7:
        response = 1
    elif 5 <= IGTB_RAW_participant['psqi4'].values[0] < 6:
        response = 2
    elif IGTB_RAW_participant['psqi4'].values[0] < 5:
        response = 3
    frame['sleep_duration_psqi'] = response
    
    # 4th score
    in_bed_time = int(IGTB_RAW_participant['psqi1'].values[0] / 100)
    in_bed_time = in_bed_time + int(IGTB_RAW_participant['psqi1'].values[0] % 100) / 60
    
    get_up_time = int(IGTB_RAW_participant['psqi3'].values[0] / 100)
    get_up_time = get_up_time + int(IGTB_RAW_participant['psqi3'].values[0] % 100) / 60
    
    if in_bed_time >= 12:
        in_bed_time = in_bed_time - 12
    
    if get_up_time >= 12:
        get_up_time = get_up_time - 12
    
    if IGTB_RAW_participant['psqi1ampm'].values[0] == 1200 and IGTB_RAW_participant['psqi3ampm'].values[0] == 0:
        time_in_bed = 12 - in_bed_time + get_up_time
    elif IGTB_RAW_participant['psqi1ampm'].values[0] == 0 and IGTB_RAW_participant['psqi3ampm'].values[0] == 1200:
        time_in_bed = 12 - in_bed_time + get_up_time
    else:
        time_in_bed = get_up_time - in_bed_time
    
    efficiency = 100 * IGTB_RAW_participant['psqi4'].values[0] / time_in_bed
    
    if efficiency < 0:
        efficiency = 100
    
    if efficiency >= 85:
        response = 0
    elif 75 <= efficiency < 85:
        response = 1
    elif 65 <= efficiency < 75:
        response = 2
    elif efficiency < 65:
        response = 3
    frame['sleep_efficiency_psqi'] = response
    
    # 5th score
    response = IGTB_RAW_participant['psqi5b'].values[0] + IGTB_RAW_participant['psqi5c'].values[0] + IGTB_RAW_participant['psqi5d'].values[0] + IGTB_RAW_participant['psqi5e'].values[0] + IGTB_RAW_participant['psqi5f'].fillna(0).values[0] + IGTB_RAW_participant['psqi5g'].values[0] + IGTB_RAW_participant['psqi5h'].values[0] + IGTB_RAW_participant['psqi5i'].values[0] 
    response = response + IGTB_RAW_participant['psqi5jb'].fillna(0)
    frame['sleep_distrubance_psqi'] = int((response + 8) / 9)
    
    # 6th score
    frame['sleep_medication_psqi'] = IGTB_RAW_participant['psqi7'].values[0]
    
    # 7th score
    response = IGTB_RAW_participant['psqi8'].values[0] + IGTB_RAW_participant['psqi9'].values[0]
    frame['daytime_dysfunction_psqi'] = int((response + 1) / 2)
    
    # GATS
    frame['current_tobacco_gats'] = IGTB_RAW_participant['gats1'].values[0]
    frame['past_tobacco_gats'] = IGTB_RAW_participant['gats2'].values[0]
    frame['individual_cigarettes'] = IGTB_RAW_participant['gats3Week_1'].fillna(0).values[0]
    frame['individual_clove_cigarettes'] = IGTB_RAW_participant['gats3Week_2'].fillna(0).values[0]
    frame['individual_cigars'] = IGTB_RAW_participant['gats3Week_3'].fillna(0).values[0]
    frame['e_cigarette'] = IGTB_RAW_participant['gats3Week_4'].fillna(0).values[0]
    frame['pipe_session'] = IGTB_RAW_participant['gats3Week_5'].fillna(0).values[0]
    frame['smokeless_session'] = IGTB_RAW_participant['gats3Week_6'].fillna(0).values[0]
    frame['other_cigarettes'] = IGTB_RAW_participant['gats3Week_7'].fillna(0).values[0]
    
    # AUDIT
    frame['driking_frequency'] = IGTB_RAW_participant['audit1'].values[0]
    frame['number_of_drink_per_day'] = IGTB_RAW_participant['audit2'].values[0]
    frame['more_than_six_drink_frequency'] = IGTB_RAW_participant['audit3'].values[0]
    
    IGTB_RAW_Score = IGTB_RAW_Score.append(frame)
    
valid_sleep_stats = pd.concat([valid_sleep_stats, IGTB_RAW_Score], axis=1)
    

## IGTB (Sleep) - Day, Night shift

In [281]:
sleep_col = ['subjective_sleep_quality_psqi', 'sleep_latency_psqi', 'sleep_duration_psqi',
             'sleep_efficiency_psqi', 'sleep_distrubance_psqi', 'sleep_medication_psqi', 'daytime_dysfunction_psqi']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]

overall_data = [day_sleep_stats, night_sleep_stats]
data_type = ['day-shift', 'night-shift']

for col in sleep_col:

    response0 = overall_data[0][col]
    response1 = overall_data[1][col]
    
    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(response0), len(response1)))
    
    print('Total: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(valid_sleep_stats[col]), np.std(valid_sleep_stats[col]), np.min(valid_sleep_stats[col]), np.max(valid_sleep_stats[col])))
    print('Day shift: mean = %.2f, std = %.2f, range is %.3f - %.3f' % (np.mean(response0), np.std(response0), np.min(response0), np.max(response0)))
    print('Night shift: mean = %.2f, std = %.2f, range is %.3f - %.3f \n' % (np.mean(response1), np.std(response1), np.min(response1), np.max(response1)))
    
    stat, p = ttest_ind(overall_data[0][col].dropna(), overall_data[1][col].dropna())
    # print('Type: ' + col)
    print('Statistics = %.3f, p = %.3f' % (stat, p))
    print('\n')

subjective_sleep_quality_psqi

Number of valid participant: day: 93; night: 58

Total: mean = 0.54, std = 1.01, range is 0.000 - 3.000
Day shift: mean = 0.48, std = 0.98, range is 0.000 - 3.000
Night shift: mean = 0.59, std = 1.02, range is 0.000 - 3.000 

Statistics = -0.611, p = 0.542


sleep_latency_psqi

Number of valid participant: day: 93; night: 58

Total: mean = 1.34, std = 0.93, range is 0.000 - 3.000
Day shift: mean = 1.26, std = 0.88, range is 0.000 - 3.000
Night shift: mean = 1.45, std = 0.99, range is 0.000 - 3.000 

Statistics = -1.226, p = 0.222


sleep_duration_psqi

Number of valid participant: day: 93; night: 58

Total: mean = 0.97, std = 0.94, range is 0.000 - 3.000
Day shift: mean = 0.84, std = 0.88, range is 0.000 - 3.000
Night shift: mean = 1.14, std = 0.97, range is 0.000 - 3.000 

Statistics = -1.933, p = 0.055


sleep_efficiency_psqi

Number of valid participant: day: 93; night: 58

Total: mean = 0.62, std = 0.93, range is 0.000 - 3.000
Day shift: mean = 0.35, 

## IGTB (Tobacco) - Day, Night shift

In [282]:
gats_col = ['current_tobacco_gats', 'past_tobacco_gats',]
            #'individual_cigarettes',
            #'individual_clove_cigarettes', 'individual_cigars', 'e_cigarette', 'pipe_session',
            #'smokeless_session', 'other_cigarettes']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]

overall_data = [day_sleep_stats, night_sleep_stats]
data_type = ['day-shift', 'night-shift']

for col in gats_col:

    response0 = overall_data[0][col]
    response1 = overall_data[1][col]
    
    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(response0), len(response1)))
    
    # Status
    if col == 'current_tobacco_gats' or col == 'past_tobacco_gats':
        
        daily = valid_sleep_stats.loc[valid_sleep_stats[col] == 1]
        day_daily = day_sleep_stats.loc[day_sleep_stats[col] == 1]
        night_daily = night_sleep_stats.loc[night_sleep_stats[col] == 1]
        
        less_than_daily = valid_sleep_stats.loc[valid_sleep_stats[col] == 2]
        day_less_than_daily = day_sleep_stats.loc[day_sleep_stats[col] == 2]
        night_less_than_daily = night_sleep_stats.loc[night_sleep_stats[col] == 2]
        
        not_at_all = valid_sleep_stats.loc[valid_sleep_stats[col] == 3]
        day_not_at_all = day_sleep_stats.loc[day_sleep_stats[col] == 3]
        night_not_at_all = night_sleep_stats.loc[night_sleep_stats[col] == 3]
        
        # Daily
        print('Daily \n')
        print('Total: n = %i, %.3f ' % (len(daily), len(daily) / len(valid_sleep_stats)))
        print('Day shift: n = %i, %.3f' % (len(day_daily), len(day_daily) / len(day_sleep_stats)))
        print('Night shift: n = %i, %.3f\n' % (len(night_daily), len(night_daily) / len(night_sleep_stats)))
        
        # Less than daily
        print('Less than daily \n')
        print('Total: n = %i, %.3f ' % (len(less_than_daily), len(less_than_daily) / len(valid_sleep_stats)))
        print('Day shift: n = %i, %.3f' % (len(day_less_than_daily), len(day_less_than_daily) / len(day_sleep_stats)))
        print('Night shift: n = %i, %.3f\n' % (len(night_less_than_daily), len(night_less_than_daily) / len(night_sleep_stats)))
        
        # Not at all
        print('Not at all \n')
        print('Total: n = %i, %.3f ' % (len(not_at_all), len(not_at_all) / len(valid_sleep_stats)))
        print('Day shift: n = %i, %.3f' % (len(day_not_at_all), len(day_not_at_all) / len(day_sleep_stats)))
        print('Night shift: n = %i, %.3f\n' % (len(night_not_at_all), len(night_not_at_all) / len(night_sleep_stats)))
        
        stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
        print('Statistics = %.3f, p = %.3f\n' % (stat, p))
        
    else:
        
        print()
    

current_tobacco_gats

Number of valid participant: day: 93; night: 58

Daily 

Total: n = 5, 0.033 
Day shift: n = 2, 0.022
Night shift: n = 2, 0.034

Less than daily 

Total: n = 11, 0.072 
Day shift: n = 6, 0.065
Night shift: n = 5, 0.086

Not at all 

Total: n = 135, 0.888 
Day shift: n = 84, 0.903
Night shift: n = 51, 0.879

Statistics = 0.681, p = 0.497

past_tobacco_gats

Number of valid participant: day: 93; night: 58

Daily 

Total: n = 13, 0.086 
Day shift: n = 9, 0.097
Night shift: n = 3, 0.052

Less than daily 

Total: n = 24, 0.158 
Day shift: n = 15, 0.161
Night shift: n = 9, 0.155

Not at all 

Total: n = 114, 0.750 
Day shift: n = 68, 0.731
Night shift: n = 46, 0.793

Statistics = -0.968, p = 0.335



## IGTB - Alcohol usage, Day, Night shift

In [283]:
audit_col = ['driking_frequency', 'number_of_drink_per_day', 'more_than_six_drink_frequency']

driking_frequency = ['Never', 'Monthly', '2-4 times per month', '2-3 times per week', '4 or more times a week']
number_of_drink_per_day = ['1-2', '3-4', '5-6', '7-8', '9-10']
more_than_six_drink_frequency = ['Never', 'Less than Monthly', 'Monthly', 'Weekly', 'Daily']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]
data_type = ['day-shift', 'night-shift']

for col in audit_col:

    print(col + '\n')
    print('Number of valid participant: day: %i; night: %i\n' % (len(response0), len(response1)))
    
    # Status
    if col == 'driking_frequency':
        answer_type = driking_frequency
    elif col == 'number_of_drink_per_day':
        answer_type = number_of_drink_per_day
    elif col == 'more_than_six_drink_frequency':
        answer_type = more_than_six_drink_frequency
        
    for i in range(1, 6, 1):
        overall = valid_sleep_stats.loc[valid_sleep_stats[col] == i]
        day_data = day_sleep_stats.loc[day_sleep_stats[col] == i]
        night_data = night_sleep_stats.loc[night_sleep_stats[col] == i]

        # Print
        print(answer_type[i-1] + '\n')
        print('Total: n = %i, %.3f ' % (len(overall), len(overall) / len(valid_sleep_stats)))
        print('Day shift: n = %i, %.3f' % (len(day_data), len(day_data) / len(day_sleep_stats)))
        print('Night shift: n = %i, %.3f\n' % (len(night_data), len(night_data) / len(night_sleep_stats)))

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    

driking_frequency

Number of valid participant: day: 93; night: 58

Never

Total: n = 32, 0.211 
Day shift: n = 20, 0.215
Night shift: n = 11, 0.190

Monthly

Total: n = 66, 0.434 
Day shift: n = 38, 0.409
Night shift: n = 28, 0.483

2-4 times per month

Total: n = 38, 0.250 
Day shift: n = 23, 0.247
Night shift: n = 15, 0.259

2-3 times per week

Total: n = 14, 0.092 
Day shift: n = 11, 0.118
Night shift: n = 3, 0.052

4 or more times a week

Total: n = 2, 0.013 
Day shift: n = 1, 0.011
Night shift: n = 1, 0.017

Statistics = 0.489, p = 0.625

number_of_drink_per_day

Number of valid participant: day: 93; night: 58

1-2

Total: n = 107, 0.704 
Day shift: n = 73, 0.785
Night shift: n = 33, 0.569

3-4

Total: n = 20, 0.132 
Day shift: n = 10, 0.108
Night shift: n = 10, 0.172

5-6

Total: n = 8, 0.053 
Day shift: n = 2, 0.022
Night shift: n = 6, 0.103

7-8

Total: n = 1, 0.007 
Day shift: n = 1, 0.011
Night shift: n = 0, 0.000

9-10

Total: n = 1, 0.007 
Day shift: n = 0, 0.000
Night shi

## Sleep stats

In [284]:
sleep_col = ['long_sleep_duration_mean', # 'long_sleep_duration_std', 
             'long_sleep_duration_deep_mean', # 'long_sleep_duration_deep_std',
             'long_sleep_duration_light_mean', # 'long_sleep_duration_light_std',
             'long_sleep_duration_rem_mean', # 'long_sleep_duration_rem_std',
             'long_sleep_duration_wake_mean', # 'long_sleep_duration_wake_std',
             'long_sleep_efficiency_mean', # 'long_sleep_efficiency_std',
             'long_sleep_heart_rate_mean',
             'short_sleep_duration_mean', # 'short_sleep_duration_std',
             'short_sleep_duration_deep_mean', # 'short_sleep_duration_deep_std',
             'short_sleep_duration_light_mean', # 'short_sleep_duration_light_std',
             'short_sleep_duration_rem_mean', # 'short_sleep_duration_rem_std',
             'short_sleep_duration_wake_mean', # 'short_sleep_duration_wake_std',
             'short_sleep_efficiency_mean', # 'short_sleep_efficiency_std',
             'short_sleep_heart_rate_mean',
             'sleep_duration_mean', 'sleep_duration_std']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]
data_type = ['day-shift', 'night-shift']

for col in sleep_col:
    
    print(col + '\n')
    
    print('Number of valid participant: %i' % (len(valid_sleep_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(valid_sleep_stats[col]), np.std(valid_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(day_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_sleep_stats[col]), np.std(day_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(night_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_sleep_stats[col]), np.std(night_sleep_stats[col])))

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))


long_sleep_duration_mean

Number of valid participant: 152
Overall, mean: 435.09, std: 104.44 

Number of valid participant: 93
Day shift, mean: 430.92, std: 70.09 

Number of valid participant: 58
Night shift, mean: 445.23, std: 141.01 

Statistics = -0.823, p = 0.412

long_sleep_duration_deep_mean

Number of valid participant: 152
Overall, mean: 65.37, std: 16.29 

Number of valid participant: 93
Day shift, mean: 65.12, std: 13.61 

Number of valid participant: 58
Night shift, mean: 65.85, std: 19.95 

Statistics = -0.264, p = 0.792

long_sleep_duration_light_mean

Number of valid participant: 152
Overall, mean: 242.98, std: 58.68 

Number of valid participant: 93
Day shift, mean: 232.96, std: 39.10 

Number of valid participant: 58
Night shift, mean: 260.42, std: 77.44 

Statistics = -2.862, p = 0.005

long_sleep_duration_rem_mean

Number of valid participant: 152
Overall, mean: 83.74, std: 22.85 

Number of valid participant: 93
Day shift, mean: 84.40, std: 19.84 

Number of valid 

## Sleep stats, workday

In [285]:
# Workday
sleep_col = ['workday_long_sleep_duration_mean', # 'long_sleep_duration_std', 
             'workday_long_sleep_duration_deep_mean', # 'long_sleep_duration_deep_std',
             'workday_long_sleep_duration_light_mean', # 'long_sleep_duration_light_std',
             'workday_long_sleep_duration_rem_mean', # 'long_sleep_duration_rem_std',
             'workday_long_sleep_duration_wake_mean', # 'long_sleep_duration_wake_std',
             'workday_long_sleep_efficiency_mean', # 'long_sleep_efficiency_std',
             'workday_long_sleep_heart_rate_mean',
             'workday_short_sleep_duration_mean', # 'short_sleep_duration_std',
             'workday_short_sleep_duration_deep_mean', # 'short_sleep_duration_deep_std',
             'workday_short_sleep_duration_light_mean', # 'short_sleep_duration_light_std',
             'workday_short_sleep_duration_rem_mean', # 'short_sleep_duration_rem_std',
             'workday_short_sleep_duration_wake_mean', # 'short_sleep_duration_wake_std',
             'workday_short_sleep_efficiency_mean', # 'short_sleep_efficiency_std',
             'workday_short_sleep_heart_rate_mean',
             'workday_sleep_duration_mean', 'workday_sleep_duration_std']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]
data_type = ['day-shift', 'night-shift']

for col in sleep_col:
    
    print(col + '\n')
    
    print('Number of valid participant: %i' % (len(valid_sleep_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(valid_sleep_stats[col]), np.std(valid_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(day_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_sleep_stats[col]), np.std(day_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(night_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_sleep_stats[col]), np.std(night_sleep_stats[col])))

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))


workday_long_sleep_duration_mean

Number of valid participant: 152
Overall, mean: 382.23, std: 124.81 

Number of valid participant: 91
Day shift, mean: 391.30, std: 87.45 

Number of valid participant: 58
Night shift, mean: 368.00, std: 166.40 

Statistics = 1.108, p = 0.270

workday_long_sleep_duration_deep_mean

Number of valid participant: 152
Overall, mean: 58.22, std: 22.20 

Number of valid participant: 91
Day shift, mean: 61.01, std: 18.08 

Number of valid participant: 57
Night shift, mean: 53.77, std: 26.94 

Statistics = 1.944, p = 0.054

workday_long_sleep_duration_light_mean

Number of valid participant: 152
Overall, mean: 203.02, std: 71.05 

Number of valid participant: 91
Day shift, mean: 203.37, std: 49.06 

Number of valid participant: 57
Night shift, mean: 202.46, std: 96.26 

Statistics = 0.075, p = 0.940

workday_long_sleep_duration_rem_mean

Number of valid participant: 152
Overall, mean: 71.05, std: 28.64 

Number of valid participant: 91
Day shift, mean: 76.04, 

## Sleep stats, offday

In [286]:
# Workday
sleep_col = ['offday_long_sleep_duration_mean', # 'long_sleep_duration_std', 
             'offday_long_sleep_duration_deep_mean', # 'long_sleep_duration_deep_std',
             'offday_long_sleep_duration_light_mean', # 'long_sleep_duration_light_std',
             'offday_long_sleep_duration_rem_mean', # 'long_sleep_duration_rem_std',
             'offday_long_sleep_duration_wake_mean', # 'long_sleep_duration_wake_std',
             'offday_long_sleep_efficiency_mean', # 'long_sleep_efficiency_std',
             'offday_long_sleep_heart_rate_mean',
             'offday_short_sleep_duration_mean', # 'short_sleep_duration_std',
             'offday_short_sleep_duration_deep_mean', # 'short_sleep_duration_deep_std',
             'offday_short_sleep_duration_light_mean', # 'short_sleep_duration_light_std',
             'offday_short_sleep_duration_rem_mean', # 'short_sleep_duration_rem_std',
             'offday_short_sleep_duration_wake_mean', # 'short_sleep_duration_wake_std',
             'offday_short_sleep_efficiency_mean', # 'short_sleep_efficiency_std',
             'offday_short_sleep_heart_rate_mean',
             'offday_sleep_duration_mean', 'offday_sleep_duration_std']

# shift_pre-study
day_sleep_stats = valid_sleep_stats.loc[(valid_sleep_stats['shift_pre-study'] == 1) | (valid_sleep_stats['shift'] == 1) & (valid_sleep_stats['shift_pre-study'] != 2)]
night_sleep_stats = valid_sleep_stats.loc[((valid_sleep_stats['shift_pre-study'] == 2) | (valid_sleep_stats['shift'] == 2)) & (valid_sleep_stats['shift_pre-study'] != 1)]
data_type = ['day-shift', 'night-shift']

for col in sleep_col:
    
    print(col + '\n')
    
    print('Number of valid participant: %i' % (len(valid_sleep_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(valid_sleep_stats[col]), np.std(valid_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(day_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_sleep_stats[col]), np.std(day_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(night_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_sleep_stats[col]), np.std(night_sleep_stats[col])))

    stat, p = ttest_ind(day_sleep_stats[col].dropna(), night_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))



offday_long_sleep_duration_mean

Number of valid participant: 152
Overall, mean: 436.06, std: 117.09 

Number of valid participant: 93
Day shift, mean: 433.40, std: 95.10 

Number of valid participant: 58
Night shift, mean: 443.80, std: 143.73 

Statistics = -0.531, p = 0.596

offday_long_sleep_duration_deep_mean

Number of valid participant: 152
Overall, mean: 65.38, std: 18.07 

Number of valid participant: 93
Day shift, mean: 65.87, std: 16.11 

Number of valid participant: 58
Night shift, mean: 64.69, std: 20.93 

Statistics = 0.387, p = 0.700

offday_long_sleep_duration_light_mean

Number of valid participant: 152
Overall, mean: 246.39, std: 59.24 

Number of valid participant: 93
Day shift, mean: 240.33, std: 45.94 

Number of valid participant: 58
Night shift, mean: 257.54, std: 74.23 

Statistics = -1.748, p = 0.082

offday_long_sleep_duration_rem_mean

Number of valid participant: 152
Overall, mean: 84.97, std: 25.13 

Number of valid participant: 93
Day shift, mean: 86.70, st

KeyError: 'offday_sleep_duration_mean'

## Group comparison

In [None]:
sleep_col = ['duration_in_seconds', 'SleepEfficiency',
             'SleepMinutesStageDeep', 'SleepMinutesStageLight',
             'SleepMinutesStageRem', 'SleepMinutesStageWake']    

day_long_sleep_stats = valid_long_sleep_stats.loc[(valid_long_sleep_stats['shift_pre-study'] == 1) | (valid_long_sleep_stats['shift'] == 1) & (valid_long_sleep_stats['shift_pre-study'] != 2)]
day_long_sleep_stats = day_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_long_sleep_stats = valid_long_sleep_stats.loc[((valid_long_sleep_stats['shift_pre-study'] == 2) | (valid_long_sleep_stats['shift'] == 2)) & (valid_long_sleep_stats['shift_pre-study'] != 1)]
night_long_sleep_stats = night_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

day_short_sleep_stats = valid_short_sleep_stats.loc[(valid_short_sleep_stats['shift_pre-study'] == 1) | (valid_short_sleep_stats['shift'] == 1) & (valid_short_sleep_stats['shift_pre-study'] != 2)]
day_short_sleep_stats = day_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_short_sleep_stats = valid_short_sleep_stats.loc[((valid_short_sleep_stats['shift_pre-study'] == 2) | (valid_short_sleep_stats['shift'] == 2)) & (valid_short_sleep_stats['shift_pre-study'] != 1)]
night_short_sleep_stats = night_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

overall_long_stats = day_long_sleep_stats.append(night_long_sleep_stats)
overall_short_stats = day_short_sleep_stats.append(night_short_sleep_stats)
    

print('Long sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    print('Number of valid participant: %i' % (len(overall_long_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_long_stats[col]), np.std(overall_long_stats[col])))
    
    print('Number of valid participant: %i' % (len(day_long_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_long_sleep_stats[col]), np.std(day_long_sleep_stats[col])))
    
    print('Number of valid participant: %i' % (len(night_long_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_long_sleep_stats[col]), np.std(night_long_sleep_stats[col])))

    stat, p = ttest_ind(day_long_sleep_stats[col].dropna(), night_long_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    
print('Short sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    print('Number of valid participant: %i' % (len(overall_short_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_short_stats[col].dropna()), np.std(overall_short_stats[col].dropna())))
    
    print('Number of valid participant: %i' % (len(day_short_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_short_sleep_stats[col].dropna()), np.std(day_short_sleep_stats[col].dropna())))
    
    print('Number of valid participant: %i' % (len(night_short_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_short_sleep_stats[col].dropna()), np.std(night_short_sleep_stats[col].dropna())))

    stat, p = ttest_ind(day_short_sleep_stats[col].dropna(), night_short_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))


# valid_long_sleep_stats = valid_long_sleep_stats.append(long_sleep_df)
        
# short_sleep_df = pd.DataFrame()
# short_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmin(gmm.means_)]
# valid_short_sleep_stats = valid_short_sleep_stats.append(short_sleep_df)

## Sleep on workday - All

In [None]:
sleep_col = ['duration_in_seconds', 'SleepEfficiency',
             'SleepMinutesStageDeep', 'SleepMinutesStageLight',
             'SleepMinutesStageRem', 'SleepMinutesStageWake']    

day_long_sleep_stats = valid_long_sleep_stats.loc[(valid_long_sleep_stats['shift_pre-study'] == 1) | (valid_long_sleep_stats['shift'] == 1) & (valid_long_sleep_stats['shift_pre-study'] != 2)]
day_long_sleep_stats = day_long_sleep_stats.loc[(day_long_sleep_stats['is_sleep_before_work'] == 1) & (day_long_sleep_stats['is_sleep_after_work'] == 1)]
day_long_sleep_stats = day_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_long_sleep_stats = valid_long_sleep_stats.loc[((valid_long_sleep_stats['shift_pre-study'] == 2) | (valid_long_sleep_stats['shift'] == 2)) & (valid_long_sleep_stats['shift_pre-study'] != 1)]
night_long_sleep_stats = night_long_sleep_stats.loc[(night_long_sleep_stats['is_sleep_before_work'] == 1) & (night_long_sleep_stats['is_sleep_after_work'] == 1)]
night_long_sleep_stats = night_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

day_short_sleep_stats = valid_short_sleep_stats.loc[(valid_short_sleep_stats['shift_pre-study'] == 1) | (valid_short_sleep_stats['shift'] == 1) & (valid_short_sleep_stats['shift_pre-study'] != 2)]
day_short_sleep_stats = day_short_sleep_stats.loc[(day_short_sleep_stats['is_sleep_before_work'] == 1) & (day_short_sleep_stats['is_sleep_after_work'] == 1)]
day_short_sleep_stats = day_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_short_sleep_stats = valid_short_sleep_stats.loc[((valid_short_sleep_stats['shift_pre-study'] == 2) | (valid_short_sleep_stats['shift'] == 2)) & (valid_short_sleep_stats['shift_pre-study'] != 1)]
night_short_sleep_stats = night_short_sleep_stats.loc[(night_short_sleep_stats['is_sleep_before_work'] == 1) & (night_short_sleep_stats['is_sleep_after_work'] == 1)]
night_short_sleep_stats = night_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

overall_long_stats = day_long_sleep_stats.append(night_long_sleep_stats)
overall_short_stats = day_short_sleep_stats.append(night_short_sleep_stats)
    

print('Long sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    if 'duration_in_seconds' in col:
        divide = 60
    else:
        divide = 1
    
    print('Number of valid participant: %i' % (len(overall_long_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_long_stats[col]) / divide, np.std(overall_long_stats[col]) / divide))
    
    print('Number of valid participant: %i' % (len(day_long_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_long_sleep_stats[col]) / divide, np.std(day_long_sleep_stats[col]) / divide))
    
    print('Number of valid participant: %i' % (len(night_long_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_long_sleep_stats[col]) / divide, np.std(night_long_sleep_stats[col]) / divide))

    stat, p = ttest_ind(day_long_sleep_stats[col].dropna(), night_long_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    
print('Short sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    if 'duration_in_seconds' in col:
        divide = 60
    else:
        divide = 1
    
    print('Number of valid participant: %i' % (len(overall_short_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_short_stats[col].dropna()), np.std(overall_short_stats[col].dropna())))
    
    print('Number of valid participant: %i' % (len(day_short_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_short_sleep_stats[col].dropna()) / divide, np.std(day_short_sleep_stats[col].dropna()) / divide))
    
    print('Number of valid participant: %i' % (len(night_short_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_short_sleep_stats[col].dropna()) / divide, np.std(night_short_sleep_stats[col].dropna()) / divide))

    stat, p = ttest_ind(day_short_sleep_stats[col].dropna(), night_short_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))


# valid_long_sleep_stats = valid_long_sleep_stats.append(long_sleep_df)
        
# short_sleep_df = pd.DataFrame()
# short_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmin(gmm.means_)]
# valid_short_sleep_stats = valid_short_sleep_stats.append(short_sleep_df)

## Sleep on offday - All

In [None]:
sleep_col = ['duration_in_seconds', 'SleepEfficiency',
             'SleepMinutesStageDeep', 'SleepMinutesStageLight',
             'SleepMinutesStageRem', 'SleepMinutesStageWake']    

day_long_sleep_stats = valid_long_sleep_stats.loc[(valid_long_sleep_stats['shift_pre-study'] == 1) | (valid_long_sleep_stats['shift'] == 1) & (valid_long_sleep_stats['shift_pre-study'] != 2)]
day_long_sleep_stats = day_long_sleep_stats.loc[(day_long_sleep_stats['is_sleep_before_work'] != 1) & (day_long_sleep_stats['is_sleep_after_work'] != 1)]
day_long_sleep_stats = day_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_long_sleep_stats = valid_long_sleep_stats.loc[((valid_long_sleep_stats['shift_pre-study'] == 2) | (valid_long_sleep_stats['shift'] == 2)) & (valid_long_sleep_stats['shift_pre-study'] != 1)]
night_long_sleep_stats = night_long_sleep_stats.loc[(night_long_sleep_stats['is_sleep_before_work'] != 1) & (night_long_sleep_stats['is_sleep_after_work'] != 1)]
night_long_sleep_stats = night_long_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

day_short_sleep_stats = valid_short_sleep_stats.loc[(valid_short_sleep_stats['shift_pre-study'] == 1) | (valid_short_sleep_stats['shift'] == 1) & (valid_short_sleep_stats['shift_pre-study'] != 2)]
day_short_sleep_stats = day_short_sleep_stats.loc[(day_short_sleep_stats['is_sleep_before_work'] != 1) & (day_short_sleep_stats['is_sleep_after_work'] != 1)]
day_short_sleep_stats = day_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

night_short_sleep_stats = valid_short_sleep_stats.loc[((valid_short_sleep_stats['shift_pre-study'] == 2) | (valid_short_sleep_stats['shift'] == 2)) & (valid_short_sleep_stats['shift_pre-study'] != 1)]
night_short_sleep_stats = night_short_sleep_stats.loc[(night_short_sleep_stats['is_sleep_before_work'] != 1) & (night_short_sleep_stats['is_sleep_after_work'] != 1)]
night_short_sleep_stats = night_short_sleep_stats.dropna(subset=['SleepMinutesStageDeep'])

overall_long_stats = day_long_sleep_stats.append(night_long_sleep_stats)
overall_short_stats = day_short_sleep_stats.append(night_short_sleep_stats)
    

print('Long sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    if 'duration_in_seconds' in col:
        divide = 60
    else:
        divide = 1
    
    print('Number of valid participant: %i' % (len(overall_long_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_long_stats[col]) / divide, np.std(overall_long_stats[col]) / divide))
    
    print('Number of valid participant: %i' % (len(day_long_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_long_sleep_stats[col]) / divide, np.std(day_long_sleep_stats[col]) / divide))
    
    print('Number of valid participant: %i' % (len(night_long_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_long_sleep_stats[col]) / divide, np.std(night_long_sleep_stats[col]) / divide))

    stat, p = ttest_ind(day_long_sleep_stats[col].dropna(), night_long_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))
    
print('Short sleep' + '\n\n')
    
for col in sleep_col:
    
    print(col + '\n')
    
    if 'duration_in_seconds' in col:
        divide = 60
    else:
        divide = 1
    
    print('Number of valid participant: %i' % (len(overall_short_stats[col])))
    print('Overall, mean: %.2f, std: %.2f \n' % (np.mean(overall_short_stats[col].dropna()) / divide, np.std(overall_short_stats[col].dropna()) / divide))
    
    print('Number of valid participant: %i' % (len(day_short_sleep_stats[col].dropna())))
    print('Day shift, mean: %.2f, std: %.2f \n' % (np.mean(day_short_sleep_stats[col].dropna()) / divide, np.std(day_short_sleep_stats[col].dropna()) / divide))
    
    print('Number of valid participant: %i' % (len(night_short_sleep_stats[col].dropna())))
    print('Night shift, mean: %.2f, std: %.2f \n' % (np.mean(night_short_sleep_stats[col].dropna()) / divide, np.std(night_short_sleep_stats[col].dropna()) / divide))

    stat, p = ttest_ind(day_short_sleep_stats[col].dropna(), night_short_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))


# valid_long_sleep_stats = valid_long_sleep_stats.append(long_sleep_df)
        
# short_sleep_df = pd.DataFrame()
# short_sleep_df = participant_sleep_data_with_label.loc[participant_sleep_data_with_label['sleep_type'] == np.argmin(gmm.means_)]
# valid_short_sleep_stats = valid_short_sleep_stats.append(short_sleep_df)

## Sleep Strategy

In [None]:
# frame_col = ['nurse_year', 'well_being', 'general_health', 'life_satisfaction', 'perceived_stress', 'overtime']
less_adaptive_data = sleep_data.loc[(sleep_data['night_shift_type'] == 2) | (sleep_data['night_shift_type'] == 4)]
more_adaptive_data = sleep_data.loc[(sleep_data['night_shift_type'] == 3) | (sleep_data['night_shift_type'] == 5)]

less_adaptive_sleep_stats = pd.DataFrame()
more_adaptive_sleep_stats = pd.DataFrame()

for participant_id in less_adaptive_data['participant_id'].unique():
    frame = valid_sleep_stats.loc[valid_sleep_stats['participant_id'] == participant_id]
    if len(frame) > 0:
        less_adaptive_sleep_stats = less_adaptive_sleep_stats.append(frame)

for participant_id in more_adaptive_data['participant_id'].unique():
    frame = valid_sleep_stats.loc[valid_sleep_stats['participant_id'] == participant_id]
    if len(frame) > 0:
        more_adaptive_sleep_stats = more_adaptive_sleep_stats.append(frame)
    
print('Number of valid less_adaptive participant: %i' % (len(less_adaptive_sleep_stats)))
print('Number of valid more_adaptive participant: %i' % (len(more_adaptive_sleep_stats)))

for col in ana_col:
    print(col + '\n')
    # print('less_adaptive')
    # print('Number of valid participant: %i' % (len(less_adaptive_sleep_stats)))
    print('Less_adaptive, Average: %.3f, Std: %.3f' % (np.mean(less_adaptive_sleep_stats[col]), np.std(less_adaptive_sleep_stats[col])))
    
    # print('more_adaptive')
    # print('Number of valid participant: %i' % (len(more_adaptive_sleep_stats)))
    print('More_adaptive, Average: %.3f, Std: %.3f \n' % (np.mean(more_adaptive_sleep_stats[col]), np.std(more_adaptive_sleep_stats[col])))
    
    stat, p = ttest_ind(less_adaptive_sleep_stats[col].dropna(), more_adaptive_sleep_stats[col].dropna())
    print('Statistics = %.3f, p = %.3f\n' % (stat, p))