In [1]:
%reload_ext autoreload
%autoreload 2

import json
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy import stats
import statsmodels.api as sm
from lib import utils


mpl.rcParams['font.family'] = 'times new roman'
mpl.rcParams['font.size'] = 16

In [2]:
df = pd.read_csv('valid_sequence_data.csv',index_col=0)
df['total_money_heuristic'] = df['value_surplus'] == 60
(df.groupby('worker_id')['total_money_heuristic'].sum() >= 12).sum()

62

In [80]:
with open('bootstrap_rlm_fe_label_k.pkl', 'rb') as file:
    bootstrap_rlm_fe_label_k = pickle.load(file)

print(bootstrap_rlm_fe_label_k.conf_int())
print('M-W Criterion:',bootstrap_rlm_fe_label_k.muller_welsh_criterion())

                    median_coef        se       mad   ci_lower   ci_upper
const                 52.030609  0.619995  0.381180  50.921611  53.337420
front_amount_6m_0     -0.016674  0.002263  0.001520  -0.021366  -0.012207
front_amount_12m_0    -0.021383  0.002305  0.001625  -0.025773  -0.017041
front_amount_6m_1     -0.000709  0.000584  0.000390  -0.001880   0.000391
front_amount_12m_1    -0.000920  0.000589  0.000416  -0.001986   0.000200
choice_peli            2.452066  0.658568  0.406990   1.091811   3.717946
M-W Criterion: 119.0131305639573


In [81]:
with open('bootstrap_rlm_fe_baseline_k.pkl', 'rb') as file:
    bootstrap_rlm_fe_baseline_k = pickle.load(file)

print(bootstrap_rlm_fe_baseline_k.conf_int())
print('M-W Criterion:',bootstrap_rlm_fe_baseline_k.muller_welsh_criterion())

                  median_coef        se       mad   ci_lower   ci_upper
const               51.639343  0.610820  0.362385  50.581226  52.996600
front_amount_6m     -0.004421  0.000764  0.000521  -0.005860  -0.002932
front_amount_12m    -0.005719  0.000790  0.000519  -0.007393  -0.004259
choice_peli          2.378662  0.649683  0.404103   0.944536   3.554102
M-W Criterion: 125.09652410843766


In [82]:
with open('bootstrap_rlm_pool_label_k.pkl', 'rb') as file:
    bootstrap_rlm_pool_label_k = pickle.load(file)

print(bootstrap_rlm_pool_label_k.conf_int())
print('M-W Criterion:',bootstrap_rlm_pool_label_k.muller_welsh_criterion())

                    median_coef        se       mad   ci_lower   ci_upper
const                 57.477850  0.614153  0.415818  56.234533  58.644906
front_amount_6m_0     -0.065290  0.003210  0.002201  -0.071599  -0.059017
front_amount_12m_0    -0.068168  0.003257  0.002077  -0.075164  -0.062138
front_amount_6m_1      0.009703  0.001449  0.000959   0.006622   0.012413
front_amount_12m_1     0.008944  0.001473  0.000946   0.006106   0.011916
choice_peli           -1.341249  0.524019  0.371619  -2.298808  -0.311319
M-W Criterion: 214.3676680443063


In [83]:
with open('bootstrap_rlm_pool_baseline_k.pkl', 'rb') as file:
    bootstrap_rlm_pool_baseline_k = pickle.load(file)

print(bootstrap_rlm_pool_baseline_k.conf_int())
print('M-W Criterion:',bootstrap_rlm_pool_baseline_k.muller_welsh_criterion())

                  median_coef        se       mad   ci_lower   ci_upper
const               57.125512  0.861522  0.554130  55.320198  58.804736
front_amount_6m     -0.002642  0.002016  0.001390  -0.006710   0.001183
front_amount_12m    -0.003970  0.002159  0.001426  -0.008334   0.000134
choice_peli         -2.261167  0.835444  0.551689  -3.815411  -0.476396
M-W Criterion: 412.64036514663803


In [115]:
with open('model_reg_result_k.json', 'r') as f:
    reg_result = json.load(f)

for m in list(reg_result.keys()):
    if 'Intercept' in reg_result[m]['coef_name']:
        index_to_replace = reg_result[m]['coef_name'].index('Intercept')
        reg_result[m]['coef_name'][index_to_replace] = 'const'

indicators = ['nobs','rsquared_adj']
row_names = ['b_front_amount_6m','se_front_amount_6m',
             'b_front_amount_12m','se_front_amount_12m',
             'b_front_amount_6m_0','se_front_amount_6m_0',
             'b_front_amount_12m_0','se_front_amount_12m_0',
             'b_front_amount_6m_1','se_front_amount_6m_1',
             'b_front_amount_12m_1','se_front_amount_12m_1',
             'b_choice_peli','se_choice_peli',
             'b_const','se_const'] + indicators

result_table = {k:[] for k in row_names}
result_table['model'] = list(reg_result.keys())

digit = 3
def get_star(p):
    if p > 0.05:
        return ''
    elif p > 0.01:
        return '*'
    elif p > 0.005:
        return '**'
    else:
        return '***'


for r in row_names:
    for m in list(reg_result.keys()):
        b_coef = r.split('b_',1)
        se_coef = r.split('se_',1)
        coef_name = reg_result[m]['coef_name']

        if len(b_coef) > 1 and b_coef[1] in coef_name:
            _b = reg_result[m]['params'][coef_name.index(b_coef[1])]
            _p_value = reg_result[m]['pvalues'][coef_name.index(b_coef[1])]

            if b_coef[1] == 'const':
                _b = _b + reg_result[m]['contrast_mean']

            result_table[r] += [ str(round(_b,digit)) + get_star(_p_value)]

        elif len(se_coef) > 1 and se_coef[1] in coef_name:
            _se = reg_result[m]['bse'][coef_name.index(se_coef[1])]
            result_table[r] += [ '(' + str(round(_se,digit)) + ')' ]

        elif r in indicators:
            result_table[r] += [ reg_result[m][r] ]
            
        else:
            result_table[r] += ['']

result_table = pd.DataFrame(result_table)

In [120]:
reg_table = pd.DataFrame(result_table.iloc[:,:-1].values.T,columns=result_table['model'])
reg_table['row_name'] = row_names
reg_table = reg_table.set_index('row_name')
reg_table.iloc[-1,4:] = np.repeat('',4)

reg_table.index.name = None
reg_table.columns.name = None

reg_table_1 = reg_table.iloc[:,:4]
reg_table_1

Unnamed: 0,ols_pool_baseline,ols_pool_label,ols_fe_baseline,ols_fe_label
b_front_amount_6m,-0.005**,,-0.005*,
se_front_amount_6m,(0.002),,(0.002),
b_front_amount_12m,-0.006***,,-0.005**,
se_front_amount_12m,(0.002),,(0.002),
b_front_amount_6m_0,,-0.059***,,-0.019***
se_front_amount_6m_0,,(0.005),,(0.004)
b_front_amount_12m_0,,-0.06***,,-0.02***
se_front_amount_12m_0,,(0.005),,(0.004)
b_front_amount_6m_1,,0.023***,,0.002
se_front_amount_6m_1,,(0.004),,(0.002)


In [119]:
reg_table_2 = reg_table.iloc[:-1,4:]
reg_table_2.loc['muller_welsh'] = [
                    bootstrap_rlm_pool_baseline_k.muller_welsh_criterion(),
                    bootstrap_rlm_pool_label_k.muller_welsh_criterion(),
                    bootstrap_rlm_fe_baseline_k.muller_welsh_criterion(),
                    bootstrap_rlm_fe_label_k.muller_welsh_criterion()]
reg_table_2

Unnamed: 0,rlm_pool_baseline,rlm_pool_label,rlm_fe_baseline,rlm_fe_label
b_front_amount_6m,-0.005,,-0.005***,
se_front_amount_6m,(0.003),,(0.001),
b_front_amount_12m,-0.006*,,-0.006***,
se_front_amount_12m,(0.003),,(0.001),
b_front_amount_6m_0,,-0.059***,,-0.017***
se_front_amount_6m_0,,(0.003),,(0.002)
b_front_amount_12m_0,,-0.064***,,-0.021***
se_front_amount_12m_0,,(0.003),,(0.002)
b_front_amount_6m_1,,0.015***,,0.0
se_front_amount_6m_1,,(0.002),,(0.001)


In [144]:
with open('tables/tab_rlm.tex','w') as f:
    f.write(reg_table_2.to_latex())

In [140]:
row_names_ci = ['front_amount_6m',
             'front_amount_12m',
             'front_amount_6m_0',
             'front_amount_12m_0',
             'front_amount_6m_1',
             'front_amount_12m_1',
             'choice_peli',
             'const']

def get_ci(model):
    ci_table = model.conf_int()
    ci_list = []
    for r in row_names_ci:
        if r in ci_table.index:
            _lower = np.round(ci_table.loc[r]['ci_lower'],digit)
            _upper = np.round(ci_table.loc[r]['ci_upper'],digit)
            ci_list += ['[' + str(_lower) + ', ' + str(_upper) + ']']
        else:
            ci_list += ['']
    return ci_list


model_list = [bootstrap_rlm_pool_baseline_k,bootstrap_rlm_pool_label_k,bootstrap_rlm_fe_baseline_k,bootstrap_rlm_fe_label_k]
model_names = ['rlm_pool_baseline','rlm_pool_label','rlm_fe_baseline','rlm_fe_label']

ci_table = pd.DataFrame({model_names[i]:get_ci(model_list[i]) for i in range(len(model_names))},index=row_names_ci)
ci_table

Unnamed: 0,rlm_pool_baseline,rlm_pool_label,rlm_fe_baseline,rlm_fe_label
front_amount_6m,"[-0.007, 0.001]",,"[-0.006, -0.003]",
front_amount_12m,"[-0.008, 0.0]",,"[-0.007, -0.004]",
front_amount_6m_0,,"[-0.072, -0.059]",,"[-0.021, -0.012]"
front_amount_12m_0,,"[-0.075, -0.062]",,"[-0.026, -0.017]"
front_amount_6m_1,,"[0.007, 0.012]",,"[-0.002, 0.0]"
front_amount_12m_1,,"[0.006, 0.012]",,"[-0.002, 0.0]"
choice_peli,"[-3.815, -0.476]","[-2.299, -0.311]","[0.945, 3.554]","[1.092, 3.718]"
const,"[55.32, 58.805]","[56.235, 58.645]","[50.581, 52.997]","[50.922, 53.337]"
