In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns 

df = pd.read_csv('./EDA/data/pretrial_data.csv')
df = df.sample(frac=.5)

print(df.columns)

df.head()

Index(['Unnamed: 0', 'case_type', 'age', 'sex', 'race', 'is_poor', 'bond',
       'bond_type', 'prior_F', 'prior_M', 'gini', 'released', 'sentence_type',
       'sentence', 'bond_NA', 'held_wo_bail', 'sentence_NA'],
      dtype='object')


Unnamed: 0.1,Unnamed: 0,case_type,age,sex,race,is_poor,bond,bond_type,prior_F,prior_M,gini,released,sentence_type,sentence,bond_NA,held_wo_bail,sentence_NA
2816,2816,M,34.0,F,B,0.0,1500.0,3,0.0,0.0,0.44,1.0,4.0,0.0,False,False,True
9917,9917,F,32.0,F,B,1.0,1500.0,2,0.0,2.0,0.46,1.0,4.0,0.0,False,False,True
5495,5495,M,24.0,M,B,0.0,0.0,4,0.0,0.0,0.42,1.0,0.0,0.0,False,False,False
21284,21284,M,34.0,M,B,0.0,0.0,6,,,0.45,1.0,0.0,0.0,False,False,False
7439,7439,S,20.0,M,W,1.0,1500.0,3,0.0,1.0,0.47,1.0,0.0,0.328542,False,False,False


In [2]:
y = df['sentence']
contvars = ['age','prior_F', 'prior_M', 'gini']
x = df.loc[:,contvars]
cluster_var = df['race']

In [3]:
from mast.lm import LM

reg1 = LM(x,y)
reg1.run()
reg2 = LM(x,y)
reg2.run(se_type='robust')
reg3 = LM(x,y)
reg3.run(se_type='cluster-robust',cluster_var=cluster_var)

Due to missing values, 319 observations out of 11493 were dropped. 


     Variable  Coefficient    Std.Err  t-Statistic       p-Value Significance
0        age    -0.084394   0.038976    -2.165269  3.038818e-02           **
1    prior_F     1.901870   0.159010    11.960668  9.050207e-33          ***
2    prior_M     0.164327   0.116622     1.409054  1.588470e-01             
3       gini     3.834871  11.266541     0.340377  7.335791e-01             
4  Intercept    10.308603   5.124274     2.011720  4.427347e-02           ** 

Due to missing values, 319 observations out of 11493 were dropped. 

Robust standard errors

     Variable  Coefficient    Std.Err  t-Statistic       p-Value Significance
0        age    -0.084394   0.047558    -1.774560  7.599786e-02            *
1    prior_F     1.901870   0.243255     7.818433  5.829098e-15          ***
2    prior_M     0.164327   0.141231     1.163531  2.446390e-01             
3       gini     3.834871  14.036982     0.273198  7.847063e-01

In [4]:
reg_list = [reg1, reg2, reg3]

['age' 'prior_F' 'prior_M' 'gini' 'Intercept']


In [12]:
# Get the intersection of variables 
J = len(reg_list)
keep = 'intersection'
vars_list = reg_list[0].vars
#for j in range(J):
 #   vars_list = reg_list[j].vars
K = len(vars_list)

# Create formatters
real_formatter = "{:,.3f}".format
int_formatter = "{:,.0f}".format

## Make the latex table
file = open("regression.tex", "w")

# Begin tabular
file.write('\\begin{threeparttable}[h] \n')
file.write('\\centering \n')
file.write('\\caption{} \n')
file.write('\\label{} \n \n')

# Begin table
file.write('\\begin{tabular}{'+K*'c'+'} \\\\ \n')
file.write('\\toprule \n')
# Title row

# Point estimates and standard errors
for k in range(K):
    est = []
    est.append(vars_list[k])
    se = []
    for j in range(J):
        est.append(' & ')
        est.append(str(real_formatter(reg_list[j].beta[k])))
        ##
        se.append(' & ')
        se.append('(')
        se.append(str(real_formatter(reg_list[j].se[k])))
        se.append(')')
    est.append('\\\\ \n')
    se.append('\\\\ \n')
    # Join lists
    est_row = ''.join(est)
    se_row = ''.join(se)
    file.write(est_row)
    file.write(se_row)
file.write('\\hline \n')

## Additional table rows
# Mean depvar
est = []
est.append('Mean, Dep.Var.')
for j in range(J):
    est.append(' & ')
    est.append(str(real_formatter(reg_list[j].ybar)))
est.append('\\\\ \n')
est_row = ''.join(est)
file.write(est_row)
# Sample size
est = []
est.append('N Obs.')
for j in range(J):
    est.append(' & ')
    est.append(str(int_formatter(reg_list[j].n)))
est.append('\\\\ \n')
est_row = ''.join(est)
file.write(est_row)
# rsquared
est = []
est.append('R$^2$')
for j in range(J):
    est.append(' & ')
    est.append(str(real_formatter(reg_list[j].rsq)))
est.append('\\\\ \n')
est_row = ''.join(est)
file.write(est_row)

file.write('\\hline \n')
# End tabular section:
file.write('\\end{tabular} \n \n')
# End notes:
file.write('\\begin{tablenotes} \n')
file.write('\\end{tablenotes} \n \n')
file.write('\\end{threeparttable} \n')


file.write('\\end{tabular}')

# Close file
file.close()

In [62]:
def threeparttable(reg_list, 
    filename = 'reg.tex', 
    path = './',
    caption = 'Insights',
    label = 'tab:',
    stretch = 1,
    notes = 'Standard Errors are great.'):
    # Get the intersection of variables 
    J = len(reg_list)
    keep = 'intersection'
    vars_list = reg_list[0].vars
    #for j in range(J):
    #   vars_list = reg_list[j].vars
    K = len(vars_list)

    # Create formatters
    real_formatter = "{:,.3f}".format
    int_formatter = "{:,.0f}".format

    ## Make the latex table
    file = open(path+filename, "w")

    # Begin tabular
    file.write('\\begin{table} \n')
    file.write('\\centering \n')
    file.write('\\begin{threeparttable}[t] \n')
    file.write('\\caption{'+caption+'} \n')
    file.write('\\label{'+label+'} \n')

    # Begin table
    file.write('\\renewcommand{\\arraystretch}{'+str(stretch)+'}')
    file.write('\\begin{tabular}{'+K*'c'+'} ')
    file.write('\\hline \\hline \n')
    # Title row

    # Point estimates and standard errors
    for k in range(K):
        est = []
        est.append('$'+vars_list[k]+'$')
        se = []
        for j in range(J):
            est.append(' & ')
            est.append(str(real_formatter(reg_list[j].beta[k])))
            ##
            se.append(' & ')
            se.append('(')
            se.append(str(real_formatter(reg_list[j].se[k])))
            se.append(')')
        est.append('\\\\ \n')
        se.append('\\\\ \n')
        # Join lists
        est_row = ''.join(est)
        se_row = ''.join(se)
        file.write(est_row)
        file.write(se_row)
    file.write('\\hline \n')

    ## Additional table rows
    # Mean depvar
    est = []
    est.append('Mean, Dep.Var.')
    for j in range(J):
        est.append(' & ')
        est.append(str(real_formatter(reg_list[j].ybar)))
    est.append('\\\\ \n')
    est_row = ''.join(est)
    file.write(est_row)
    # Sample size
    est = []
    est.append('N Obs.')
    for j in range(J):
        est.append(' & ')
        est.append(str(int_formatter(reg_list[j].n)))
    est.append('\\\\ \n')
    est_row = ''.join(est)
    file.write(est_row)
    # rsquared
    est = []
    est.append('R$^2$')
    for j in range(J):
        est.append(' & ')
        est.append(str(real_formatter(reg_list[j].rsq)))
    est.append('\\\\ \n')
    est_row = ''.join(est)
    file.write(est_row)

    file.write('\\hline \n')
    # End tabular section:
    file.write('\\end{tabular} \n \n')
    # End notes:
    file.write('\\begin{tablenotes} \n')
    file.write('\small \n')
    file.write('\\item')
    file.write(' ' + notes+'\n')
    file.write('\\end{tablenotes} \n \n')
    file.write('\\hline \hline \n')
    file.write('\\end{threeparttable} \n')
    file.write('\\end{table} \n')

    # Close file
    file.close()

In [63]:
threeparttable(reg_list)