In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('law_data.csv')
df = df.drop(columns = ['Unnamed: 0'])
df = df[df['region_first'] != 'PO']

In [2]:
race_coded = pd.get_dummies(df['race'])
df = pd.concat([df,race_coded],axis=1)

In [3]:
gender_coded = pd.get_dummies(df['sex'])
gender_coded.columns = ['female', 'male']
df = pd.concat([df, gender_coded],axis=1)

In [4]:
df = df.drop(columns = ['race', 'sex'])

In [5]:
sense_cols = ['Amerindian', 'Asian', 'Black', 'Hispanic', 'Mexican', 'Other',
       'Puertorican', 'White', 'female', 'male']

In [6]:
from sklearn.model_selection import train_test_split

X = df.loc[:, df.columns !='ZFYA']
y = df['ZFYA']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [7]:
n = len(X_train)
ne = len(X_test)
K = len(sense_cols)

In [8]:
X_train['LSAT'] = np.round(X_train['LSAT'], 0)
X_test['LSAT'] = np.round(X_test['LSAT'], 0)

In [9]:
np.array(X_train['UGPA'])

array([3.4, 3.1, 3.5, ..., 3.8, 2.6, 3.4])

In [11]:
law_stan_data = {'N' : n, 
        'K' : K, 
        'a' : np.array(X_train[sense_cols]), 
        'ugpa' : np.array(X_train['UGPA']), 
       'lsat' : np.array(X_train['LSAT']),
       'zfya' : np.array(y_train)
       }

In [12]:
model= """
data {
  int<lower = 0> N; // number of observations
  int<lower = 0> K; // number of covariates
  matrix[N, K]   a; // sensitive variables
  real           ugpa[N]; // UGPA
  int            lsat[N]; // LSAT
  real           zfya[N]; // ZFYA
  
}

transformed data {
  
 vector[K] zero_K;
 vector[K] one_K;
 
 zero_K = rep_vector(0,K);
 one_K = rep_vector(1,K);

}

parameters {

  vector[N] u;

  real ugpa0;
  real eta_u_ugpa;
  real lsat0;
  real eta_u_lsat;
  real eta_u_zfya;
  
  vector[K] eta_a_ugpa;
  vector[K] eta_a_lsat;
  vector[K] eta_a_zfya;
  
  
  real<lower=0> sigma_g_Sq;
}

transformed parameters  {
 // Population standard deviation (a positive real number)
 real<lower=0> sigma_g;
 // Standard deviation (derived from variance)
 sigma_g = sqrt(sigma_g_Sq);
}

model {
  
  // don't have data about this
  u ~ normal(0, 1);
  
  ugpa0      ~ normal(0, 1);
  eta_u_ugpa ~ normal(0, 1);
  lsat0     ~ normal(0, 1);
  eta_u_lsat ~ normal(0, 1);
  eta_u_zfya ~ normal(0, 1);

  eta_a_ugpa ~ normal(zero_K, one_K);
  eta_a_lsat ~ normal(zero_K, one_K);
  eta_a_zfya ~ normal(zero_K, one_K);

  sigma_g_Sq ~ inv_gamma(1, 1);

  // have data about these
  ugpa ~ normal(ugpa0 + eta_u_ugpa * u + a * eta_a_ugpa, sigma_g);
  lsat ~ poisson(exp(lsat0 + eta_u_lsat * u + a * eta_a_lsat));
  zfya ~ normal(eta_u_zfya * u + a * eta_a_zfya, 1);

}
"""

In [15]:
import pystan

ModuleNotFoundError: No module named 'pystan'

In [14]:
fit = stan.build(model, data = law_stan_data)

RuntimeError: asyncio.run() cannot be called from a running event loop