# Figure 2
- Py Kernel with rpy2 integration for R and Python

In [1]:
import pandas as pd
import sys
sys.path.insert(1, '/Users/brinkley97/Documents/development/')
import utils

# Load Data

In [2]:
path_to_data =  "lab-kcad/datasets/tiles_dataset/" 
work_data = "figure_2/physical/diurnal_work_lm_6.csv.gz"
off_data = "figure_2/physical/diurnal_off_lm_6.csv.gz"

In [3]:
work_df = utils.load_gzip_csv_data(path_to_data, work_data)
off_df = utils.load_gzip_csv_data(path_to_data, off_data)

In [30]:
work_df.keys(), work_df

(Index(['Unnamed: 0', 'work', 'time', 'rest', 'step_ratio', 'run_ratio', 'id',
        'age', 'gender', 'shift'],
       dtype='object'),
     Unnamed: 0  work time      rest  ...  \
 0         work  work  3rd  0.870900  ...   
 1         work  work  5th  0.628179  ...   
 2         work  work  6th  0.901501  ...   
 3         work  work  1st  0.948894  ...   
 4         work  work  4th  0.764789  ...   
 ..         ...   ...  ...       ...  ...   
 607       work  work  5th  0.572392  ...   
 608       work  work  6th  0.788603  ...   
 609       work  work  1st  0.986683  ...   
 610       work  work  4th  0.610316  ...   
 611       work  work  2nd  0.932886  ...   
 
                                        id          age  gender  shift  
 0    02581754-36cd-4b23-85ea-bf995c6dec83   < 40 Years    Male  night  
 1    02581754-36cd-4b23-85ea-bf995c6dec83   < 40 Years    Male  night  
 2    02581754-36cd-4b23-85ea-bf995c6dec83   < 40 Years    Male  night  
 3    02581754-36cd-4b23-85e

In [5]:
# off_df

# Modify Dataframes

In [6]:
def modify_df(work_df, off_df):
    '''Modify Dataframes columns and values to match/flow with specific questions
    
    Arguments:
    work_df -- Dataframe
    off_df -- Dataframe
    
    Return: tuple/list 
    work_df -- Dataframe
    off_df -- Dataframe
    '''
    # 0 is actually 11pm - 3am, ..., 5 is 7pm - 11pm
    # day starts with 2, which is 7am -11am, night starts with 5, which is 7pm - 11pm
    day_map = {'3rd': "11:00–15:00 h", '4th': "15:00–19:00 h", '5th': "19:00–23:00 h", '6th': "23:00–3:00 h", '1st': "3:00–7:00 h", '2nd': "7:00–11:00 h"}
    night_map = {'6th': '23:00–3:00 h', '1st': '3:00–7:00 h', '2nd': '7:00–11:00 h', '3rd': '11:00–15:00 h', '4th': '15:00–19:00 h', '5th': '19:00–23:00 h'}
    shift_map = {'day': 'day-shift', 'night': 'night-shift'}
    
    work_df_copy = work_df.copy()
    # work_df_copy = work_df_copy.drop(columns=["age",  "gender"])
    update_work_df = work_df_copy.replace({"time": day_map})
    update_work_df = update_work_df.replace({"shift": shift_map})
    
    off_df_copy = off_df.copy()
    update_off_df = off_df_copy.replace({"time": night_map})
    update_off_df = update_off_df.replace({"shift": shift_map})
                            
    return update_work_df, update_off_df

In [7]:
modified_work_df, modified_off_df = modify_df(work_df, off_df)

In [31]:
# modified_work_df

Unnamed: 0.1,Unnamed: 0,work,time,rest,...,id,age,gender,shift
0,work,work,11:00–15:00 h,0.870900,...,02581754-36cd-4b23-85ea-bf995c6dec83,< 40 Years,Male,night-shift
1,work,work,19:00–23:00 h,0.628179,...,02581754-36cd-4b23-85ea-bf995c6dec83,< 40 Years,Male,night-shift
2,work,work,23:00–3:00 h,0.901501,...,02581754-36cd-4b23-85ea-bf995c6dec83,< 40 Years,Male,night-shift
3,work,work,3:00–7:00 h,0.948894,...,02581754-36cd-4b23-85ea-bf995c6dec83,< 40 Years,Male,night-shift
4,work,work,15:00–19:00 h,0.764789,...,02581754-36cd-4b23-85ea-bf995c6dec83,< 40 Years,Male,night-shift
...,...,...,...,...,...,...,...,...,...
607,work,work,19:00–23:00 h,0.572392,...,fcb14a4c-1ffa-4315-872a-a38833459885,>= 40 Years,Female,day-shift
608,work,work,23:00–3:00 h,0.788603,...,fcb14a4c-1ffa-4315-872a-a38833459885,>= 40 Years,Female,day-shift
609,work,work,3:00–7:00 h,0.986683,...,fcb14a4c-1ffa-4315-872a-a38833459885,>= 40 Years,Female,day-shift
610,work,work,15:00–19:00 h,0.610316,...,fcb14a4c-1ffa-4315-872a-a38833459885,>= 40 Years,Female,day-shift


# Load Generated Specific Questions

In [9]:
base = "/Users/brinkley97/Documents/development/lab-kcad/"
path_to_questions = "tiles-day-night/my_code"
name_of_questions = "generateSpecificQuestions.ipynb"
questions = base + path_to_questions + name_of_questions

In [10]:
# %load questions
/Users/brinkley97/Documents/development/lab-kcad/tiles-day-night/my_codegenerateSpecificQuestions.ipynb

In [11]:
%run "generateSpecificQuestions.ipynb"

In [12]:
# fig_2

# Py + R Code Integration

In [13]:
%load_ext rpy2.ipython

In [14]:
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages

# load and activate() bc if NOT error (Conversion 'py2rpy' not defined for objects of type '<class 'pandas.core.frame.DataFrame'>') will appear
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [15]:
report = rpackages.importr('report')

In [16]:
rpackages.importr('devtools')
rpackages.importr('ez')

rpy2.robjects.packages.Package as a <module 'ez'>

In [17]:
# see all cols; run here, then go back to cell w/ data to run
pd.set_option('display.max_columns', 9)

In [18]:
# def separate_data(df, specific_question):
    

In [81]:
def figure_2(generated_specific_questions, original_work_df, original_off_df, work_df, off_df):
    '''Reproduce Table 2
    
    Arguments:
    generated_specific_questions -- list
    work_df -- Dataframe
    off_df -- Dataframe
    
    Return: specific_data -- Dataframe in list
    
    '''
    # print(type(generated_specific_questions), generated_specific_questions)
    # print(type(work_df), work_df)
    # print(type(off_df), off_df)
    
    work_data_columns = list(work_df.keys())
#     print(work_data_columns)
   
    times = ['23:00–3:00 h', '3:00–7:00 h', '7:00–11:00 h', '11:00–15:00 h', '15:00–19:00 h', '19:00–23:00 h']
   
    for time in times:
        work_data_columns.append(time)
    
    for generated_specific_question in range(len(generated_specific_questions)):
        specific_question = generated_specific_questions[generated_specific_question]
#         print("\nspecific_question : ", specific_question, "*****")
        
        data_cols = ["time", "id", "shift"]
        time = []
        
        for work_data_column in work_data_columns: 
            
            if work_data_column in specific_question.split("*"):
                # print(work_data_column, True)
                data_cols.append(work_data_column)
                time.append(work_data_column)
                
                
            else:
                # print(work_data_column, False)
                continue
                
            
        # print(generated_specific_question, data_cols[0:-1], time[-1])
        
        data_cols = data_cols[0:-1]
        set_time = time[-1]
        form_df = work_df.loc[0:, data_cols]
        form_df = work_df.loc[0:, data_cols]
        
        
        specific_work_df = form_df.set_index("time")
        # print(specific_work_df, "\n-----")
        
#         specific_work_df = specific_work_df.loc[set_time]
        # print(specific_work_df, "\n-----")
        
        if len(specific_work_df.keys()) == 4:
#             print(specific_work_df)
            
            update_work_df = specific_work_df.reset_index()
#             print(update_work_df)
#             if list(update_work_df.keys())[-1] == "rest" && list(update_work_df.keys())[-2] == "work":
#                 print(True)
#             else:
#                 print(False)
            
            '''
            Start of integration
            '''
            
            r_objects = robjects.r
            r_objects.source("figure_2.R")
#             print("\n=========================\n")
#             work_time_shift_id = r_objects.work_model(update_work_df)
            print("\nspecific_question : ", specific_question, "*****")
            if list(update_work_df.keys())[-1] == "rest" and list(update_work_df.keys())[-2] == "work":
                rest_work = r_objects.diurnal_rest_work(original_work_df)
                print(rest_work)
                print("\n=========================\n")
            
#             print("\nspecific_question : ", specific_question, "*****")
            
            if list(update_work_df.keys())[-1] == "rest" and list(update_work_df.keys())[-2] != "work":
                rest_off = r_objects.diurnal_rest_off(original_off_df)
                print(rest_off)
                print("\n=========================\n")
            
#             print("\nspecific_question : ", specific_question, "*****")
            
            if list(update_work_df.keys())[-1] == "step_ratio" and list(update_work_df.keys())[-2] == "work":
                step_ratio_work = r_objects.diurnal_step_ratio_work(original_work_df)
                print(step_ratio_work)
                print("\n=========================\n")
            
#             print("\nspecific_question : ", specific_question, "*****")
            
            if list(update_work_df.keys())[-1] == "step_ratio" and list(update_work_df.keys())[-2] != "work":
                step_ratio_off = r_objects.diurnal_step_ratio_off(original_off_df)
                print(step_ratio_off)
                print("\n=========================\n")
                    

In [82]:
figure_2(fig_2, work_df, off_df, modified_work_df, modified_off_df)

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().




specific_question :  on both *work* day and *off* day, what is the effect of primarily *day-shift* nurses and primarily *night-shift* nurses on *rest* between the hours of *23:00–3:00 h* ? *****
$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr.ci    upr.ci    pval    
night-day 0.07810084 0.05280674 0.1033949 2.3e-09 ***

$time
                diff       lwr.ci       upr.ci    pval    
2nd-1st -0.014693458 -0.057472943  0.028086028  0.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr.ci    upr.ci    pval    
night-day 0.07810084 0.05280674 0.1033949 2.3e-09 ***

$time
                diff       lwr.ci       upr.ci    pval    
2nd-1st -0.014693458 -0.057472943  0.028086028  0.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr.ci    upr.ci    pval    
night-day 0.07810084 0.05280674 0.1033949 2.3e-09 ***

$time
                diff       lwr.ci       upr.ci    pval    
2nd-1st -0.014693458 -0.057472943  0.028086028  0.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr.ci    upr.ci    pval    
night-day 0.07810084 0.05280674 0.1033949 2.3e-09 ***

$time
                diff       lwr.ci       upr.ci    pval    
2nd-1st -0.014693458 -0.057472943  0.028086028  0.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd         F            p p<.05        ges
2      shift   1 100  8.685575 3.991586e-03     * 0.05774823
3       time   5 500 54.478574 3.848140e-45     * 0.13820742
4 shift:time   5 500 52.571610 8.297243e-44     * 0.13401797

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.1283365 3.096158e-35     *
4 shift:time 0.1283365 3.096158e-35     *

$`Sphericity Corrections`
      Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
3       time 0.5250303 6.034008e-25         * 0.5404687 1.326925e-25         *
4 shift:time 0.5250303 3.059101e-24         * 0.5404687 7.051349e-25         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                diff     lwr.ci    upr.ci    pval    
night-day 0.07810084 0.05280674 0.1033949 2.3e-09 ***

$time
                diff       lwr.ci       upr.ci    pval    
2nd-1st -0.014693458 -0.057472943  0.028086028  0.

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0

R[write to console]:  Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().



$ANOVA
      Effect DFn DFd          F             p p<.05        ges
2      shift   1 100   2.059939  1.543367e-01       0.01035423
3       time   5 500 132.657969  2.710301e-89     * 0.39496662
4 shift:time   5 500 368.173691 5.235337e-165     * 0.64435076

$`Mauchly's Test for Sphericity`
      Effect         W            p p<.05
3       time 0.2531097 8.023141e-22     *
4 shift:time 0.2531097 8.023141e-22     *

$`Sphericity Corrections`
      Effect       GGe         p[GG] p[GG]<.05       HFe         p[HF]
3       time 0.6506126  4.311754e-59         * 0.6750795  3.299798e-61
4 shift:time 0.6506126 2.115201e-108         * 0.6750795 2.291382e-112
  p[HF]<.05
3         *
4         *


  Posthoc multiple comparisons of means : Fisher LSD 
    95% family-wise confidence level

$shift
                 diff      lwr.ci       upr.ci   pval    
night-day -0.01570992 -0.02802406 -0.003395771 0.0125 *  

$time
                diff      lwr.ci       upr.ci    pval    
2nd-1st  0.053885795  0