In [1]:
import pandas as pd
import numpy as np
import random

import tensorflow as tf
import keras

from keras.layers import Dense, Input
from tensorflow.keras import Model

from sklearn.metrics import confusion_matrix


In [2]:
raw = pd.read_csv("./compas-scores-two-years.csv")

# biased feature
name_list = [ 'sex','age', 'race', 'juv_fel_count', 'decile_score','juv_misd_count', 'juv_other_count', 'priors_count',
             'c_days_from_compas','c_charge_degree', 'c_charge_desc', 'v_decile_score',
             'start', 'end', 'event', 'two_year_recid'] # 16

# remove decile_score, v_decile_score... those features already contain prejudice
name_list1 = [ 'sex','age', 'race', 'priors_count',
             'c_charge_degree', 'c_charge_desc',
             'start', 'end', 'event', 'two_year_recid'] # 11


raw_data_for_train = raw.loc[:,name_list1]

print(raw_data_for_train.shape) 

# dropna 7214 -> 7185
raw_data_for_train = raw_data_for_train.dropna()
print(raw_data_for_train.shape)

(7214, 10)
(7185, 10)


In [3]:
## Reorder training data
African_American_index = raw_data_for_train.race == 'African-American'
Caucasian_index = raw_data_for_train.race == 'Caucasian'
print(raw_data_for_train[African_American_index].shape,raw_data_for_train[Caucasian_index].shape)

AA_train = raw_data_for_train[African_American_index]
CA_train = raw_data_for_train[Caucasian_index]
train_data = pd.concat( (AA_train, CA_train) ) 

'''
note
train_data is a df with dimension (6129 rows × 10 columns) 
first (3684,10) is z/s = 0
last (2445,10) is z/s = 1
now race is still in categorical form
'''

(3684, 10) (2445, 10)


'\nnote\ntrain_data is a df with dimension (6129 rows × 10 columns) \nfirst (3684,10) is z/s = 0\nlast (2445,10) is z/s = 1\nnow race is still in categorical form\n'

## label and feature

In [4]:
## label
train_label = train_data.two_year_recid
del train_data["two_year_recid"]  
print(train_label.shape)

## feature
# 1. normalizing numerical features
numeric_features = train_data.dtypes[train_data.dtypes != 'object'].index
train_data[numeric_features] = train_data[numeric_features].apply(lambda x: (x - x.mean()) / (x.std()))

# 2. convert sensitive feature to numerical feature 
# race = "African-American"/"Caucasian"  -->  race_num = 0/1
train_data = train_data.assign(race_num = (train_data['race']!= 'African-American') | (train_data['race']== 'Caucasian') ) # warning! + -> |
train_data.race_num = train_data.race_num.astype('int64')
del train_data["race"]
print(train_data.race_num[:10],"\n",train_data.race_num[-10:], train_data.race_num[:3684].mean(),train_data.race_num[3684:].mean())

# 3. convert other categorical features to dummy variable (one_hot)
all_features = pd.get_dummies(train_data, dummy_na=True) 

'''
note
print(all_features.shape) will return (6129, 423)
'''

(6129,)
1     0
2     0
3     0
11    0
13    0
15    0
17    0
20    0
21    0
27    0
Name: race_num, dtype: int64 
 7184    1
7185    1
7187    1
7188    1
7191    1
7192    1
7194    1
7199    1
7205    1
7206    1
Name: race_num, dtype: int64 0.0 1.0


'\nnote\nprint(all_features.shape) will return (6129, 423)\n'

In [5]:
## Put sensitive variable i.e. race_num to last column
reorder_column_name = list(all_features.keys())
# print(len(reorder_column_name))
race_num_name = reorder_column_name.pop(5)
# print(race_num_name,len(reorder_column_name))
reorder_column_name.append(race_num_name)
# print(reorder_column_name[-3:],len(reorder_column_name))
all_features = all_features[reorder_column_name]
print("test")
print(all_features.iloc[:,-2:])

'''
note

3 print will return
423
race_num 422
['c_charge_desc_arrest case no charge', 'c_charge_desc_nan', 'race_num'] 423

'''

test
      c_charge_desc_nan  race_num
1                     0         0
2                     0         0
3                     0         0
11                    0         0
13                    0         0
...                 ...       ...
7192                  0         1
7194                  0         1
7199                  0         1
7205                  0         1
7206                  0         1

[6129 rows x 2 columns]


"\nnote\n\n3 print will return\n423\nrace_num 422\n['c_charge_desc_arrest case no charge', 'c_charge_desc_nan', 'race_num'] 423\n\n"

In [6]:
## create and shuffle train test validation (5:1:1)
num_examples = all_features.shape[0]
# print(num_examples)
indices = list(range(num_examples))
random.shuffle(indices)
# print(np.max(indices))

all_features_shuffled = all_features.values[indices]
train_label_shuffled = train_label.values[indices]

x_train = all_features_shuffled[:4378]
y_train = train_label_shuffled[:4378]

x_val = all_features_shuffled[4378:5254]
y_val = train_label_shuffled[4378:5254]

x_test = all_features_shuffled[5254:]
y_test = train_label_shuffled[5254:]

#split out white testors
x_test_white_ind = x_test[:,-1] == 1
x_test_white = x_test[x_test_white_ind]
y_test_white = y_test[x_test_white_ind]

#black testors
x_test_black_ind = x_test[:,-1] == 0
x_test_black = x_test[x_test_black_ind]
y_test_black = y_test[x_test_black_ind]


print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape,y_test.shape)

print("#"*100)
print("#"*100)
########################################
########################################

## base model 
def model_1():
    feature_input = Input(all_features_shuffled.shape[1],)
    y = Dense(2,"softmax")(feature_input)
    
    model = Model(feature_input,y )
    
    return model


adam = tf.keras.optimizers.Adam(0.001)
loss = keras.losses.BinaryCrossentropy(from_logits=False)
metric = tf.keras.metrics.BinaryAccuracy()

model_1_1 = model_1()

model_1_1.compile(optimizer=adam,loss=loss,metrics=metric,run_eagerly=True)

model_1_1.fit(x_train,tf.one_hot(y_train,2),epochs=5,batch_size = 128,validation_data=(x_val,tf.one_hot(y_val,2)))

model_1_1.evaluate(x_test,tf.one_hot(y_test,2))

(4378, 423) (4378,) (876, 423) (876,) (875, 423) (875,)
####################################################################################################
####################################################################################################
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.5541599988937378, 0.8617143034934998]

In [7]:
# In test set:

def evaluation(model, test_labels, x_test):
    y_true = test_labels
    y_pred = np.argmax(model.predict(x_test), axis=1)
    CM = confusion_matrix(y_true, y_pred)/len(test_labels)
    # print('False positive rate is: ', CM[0][1], 'False negative rate is: ', CM[1][0])
    FPR =  CM[0][1]
    FNR =  CM[1][0]
    return FPR, FNR

#split out white testors
x_test_white_ind = x_test[:,-1] == 1
x_test_white = x_test[x_test_white_ind]
y_test_white = y_test[x_test_white_ind]

#black testors
x_test_black_ind = x_test[:,-1] == 0
x_test_black = x_test[x_test_black_ind]
y_test_black = y_test[x_test_black_ind]

#black accuracy
print("black")
print("False positive rate/","False negative rate ",evaluation(model_1_1, y_test_black, x_test_black))

#white accuracy
print("white")
print("False positive rate/","False negative rate ",evaluation(model_1_1, y_test_white, x_test_white))

black
False positive rate/ False negative rate  (0.059813084112149535, 0.04672897196261682)
white
False positive rate/ False negative rate  (0.0058823529411764705, 0.18235294117647058)


False positive means the model falsley make a positive prediction. Criminal will not recidivate, but model think he/she will.

False negative means the model falsley make a negative prediction. Criminal will recidivate, but model think he/she will not.

FPr of African American criminals is 6.0%, comparing to Caucasian's 0.58%

Criminals who are African American, are more likely to be predicted as recidivating in two years.

FNr of African American criminals is 4.7%, comparing to Caucasian's 18.23%

Criminals who are Caucasian, are much more likely to be predicted as not recidivating in two years.

In [8]:
print("Difference of FPR between black and white" ,
      evaluation(model_1_1, y_test_black, x_test_black)[0] - evaluation(model_1_1, y_test_white, x_test_white)[0])
print("Difference of FNR between black and white" ,
      evaluation(model_1_1, y_test_black, x_test_black)[1] - evaluation(model_1_1, y_test_white, x_test_white)[1])

Difference of FPR between black and white 0.053930731170973065
Difference of FNR between black and white -0.13562396921385375


In [9]:
# # In Train set
# #white 
# x_train_white_ind = x_train[:,-1] == 1
# x_train_white = x_train[x_train_white_ind]
# y_train_white = y_train[x_train_white_ind]

# #black 
# x_train_black_ind = x_train[:,-1] == 0
# x_train_black = x_train[x_train_black_ind]
# y_train_black = y_train[x_train_black_ind]

# #black accuracy
# print("black")
# print("False positive rate/","False negative rate ",evaluation(model_1_1, y_train_black, x_train_black))

# #white accuracy
# print("white")
# print("False positive rate/","False negative rate ",evaluation(model_1_1, y_train_white, x_train_white))

## Prejudice Remover

Add a prejudice remover to previous binary cross entropy loss (and also a L2 regularizer)

$$ -\mathcal{L}(D;\theta) + \eta \mathcal{R}( \mathcal{D}, \theta) + \frac{\lambda}{2} \| \theta\|_{2}^{2} $$

$ \mathcal{M}(Y|X,;\theta) = h_{\theta}(x^{T}\theta)^{y} * (1-h_{\theta}(x^{T}\theta))^{1-y} $
```
model_1_1()
```
log likelihood:
$ L(D;\theta) = \sum_{(y_i,x_i,s_i) \in \mathcal{D}} \ln \mathcal{M}[y_i|x_i,s_i ; \theta ] $ 

```
BinaryCrossentropy()

```

$\mathcal{R}( \mathcal{D}, \theta)$

$$ PI = \sum_{X,S} \tilde{Pr}[X,S] \sum_{Y} \mathcal{M}[Y|X,S;\theta] ln \frac{\hat{Pr}[Y,S]} {\hat{Pr}[S] \hat{Pr}[Y] } $$

which can be rewritten as:


$$ PI = \sum_{(x_i,s_i) \in \mathcal{D}} \sum_{y \in {\{0,1\}}} \mathcal{M}[Y|x_i,s_i;\theta] ln \frac{\hat{Pr}[y|s_i]} {\hat{Pr}[y] } $$


We approximate $\hat{Pr}[y|s_i]$ and $\hat{Pr}[y] $ by sample and model:

 $$  \hat{Pr}[y|s = k] = \frac{ \sum_{(x_i,s_i = k)} \mathcal{M}[y|x_i,s_i=k; \theta]}{| {(x_i,s_i=k) \in \mathcal{D}}|} $$
 
 $$\hat{Pr}[y]  = \frac{ \sum_{(x_i,s_i)} \mathcal{M}[y|x_i,s_i; \theta] }{|\mathcal{D}|} $$
 
 where $ |\mathcal{D}| $ is size of training set. Here $ s_i \in {\{0,1\}}$ representing African American and Caucasion, i.e. sensitive feature.



In [10]:
# subset for sensitive features for calculate approximate probabilities

X_s0 = all_features.values[:3684]
X_s1 = all_features.values[3684:]

X_full = all_features.values

X_s0[:,-1],X_s1[:,-1], X_s0.shape, X_s1.shape,X_full.shape

(array([0., 0., 0., ..., 0., 0., 0.]),
 array([1., 1., 1., ..., 1., 1., 1.]),
 (3684, 423),
 (2445, 423),
 (6129, 423))

In [11]:
def custom_loss_1(y_true, y_pred):
    
    '''
    X       n*d
    M(X)    n*2
    y       n,
    
    X_s0    n0*d
    
    X_s1    n1*d
    
    X_full  (n0+n1)*d
    
    '''
    eta =1
    
    M = model_1_1

    loss_vector = tf.keras.losses.binary_crossentropy( tf.one_hot(y_train,2), M(x_train))

    L = tf.reduce_sum(loss_vector)
    
    Pys0 = tf.reduce_sum( M(X_s0),axis=0 )/X_s0.shape[0]
    
    Pys1 = tf.reduce_sum( M(X_s1),axis=0 )/X_s1.shape[0]
    
    Py = tf.reduce_sum( M(X_full),axis=0 )/X_full.shape[0] 
    
    R = tf.reduce_sum(M(X_s0)* tf.math.log(Pys0 /Py))  + tf.reduce_sum(M(X_s1)*tf.math.log(Pys1/Py)) 
    

    loss =  L + eta* R
    
    return loss

In [12]:
from keras import regularizers

def model_1():
    feature_input = Input(all_features_shuffled.shape[1],)
    y = Dense(2,"softmax",kernel_regularizer=regularizers.l2(0.01))(feature_input)
    
    model = Model(feature_input,y )
    
    return model


adam = tf.keras.optimizers.Adam(0.001)
# loss = keras.losses.BinaryCrossentropy(from_logits=False) 
loss = custom_loss_1
metric = tf.keras.metrics.BinaryAccuracy()

model_1_1 = model_1()

model_1_1.compile(optimizer=adam,
               loss=loss,
               metrics=metric 
               )

model_1_1.fit(x_train,tf.one_hot(y_train,2),epochs=5,validation_data=(x_val,tf.one_hot(y_val,2)))

model_1_1.evaluate(x_test,tf.one_hot(y_test,2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[1439.778564453125, 0.9062857031822205]

Model accuracy on test set improves from 86% to 90% by adding prejudice remover and regularizers

In [13]:
# In test set:

def evaluation(model, test_labels, x_test):
    y_true = test_labels
    y_pred = np.argmax(model.predict(x_test), axis=1)
    CM = confusion_matrix(y_true, y_pred)/len(test_labels)
    # print('False positive rate is: ', CM[0][1], 'False negative rate is: ', CM[1][0])
    FPR =  CM[0][1]
    FNR =  CM[1][0]
    return FPR, FNR

#white testors
x_test_white_ind = x_test[:,-1] == 1
x_test_white = x_test[x_test_white_ind]
y_test_white = y_test[x_test_white_ind]

#black testors
x_test_black_ind = x_test[:,-1] == 0
x_test_black = x_test[x_test_black_ind]
y_test_black = y_test[x_test_black_ind]

#black accuracy
print("black")
print("False positive rate/","False negative rate ",evaluation(model_1_1, y_test_black, x_test_black))

#white accuracy
print("white")
print("False positive rate/","False negative rate ",evaluation(model_1_1, y_test_white, x_test_white))

black
False positive rate/ False negative rate  (0.04299065420560748, 0.05794392523364486)
white
False positive rate/ False negative rate  (0.03823529411764706, 0.04411764705882353)


In test set:

FPr for African American decreases 6.0% -> 4.2%. FNr increases 4.7% -> 5.8%. 

FPr for Caucasion increases 0.58% -> 3.8%. FNr decreases 18.2% -> 4.4%

Conclusion: prejudice remover does reduce bias.


In [14]:
print("Difference of FPR between black and white" ,
      evaluation(model_1_1, y_test_black, x_test_black)[0] - evaluation(model_1_1, y_test_white, x_test_white)[0])
print("Difference of FNR between black and white" ,
      evaluation(model_1_1, y_test_black, x_test_black)[1] - evaluation(model_1_1, y_test_white, x_test_white)[1])

Difference of FPR between black and white 0.004755360087960417
Difference of FNR between black and white 0.013826278174821328


Comparing to base model without bias remover

Difference of FPR between black and white 0.053930731170973065 \
Difference of FNR between black and white -0.13562396921385375

We reduce the absolute difference of FPR and FNR between races. Prejudice remover works.