## Implementation of $DM_{sen}$ Algorithm

Only focusing on reducing difference of false positive rate or false negative rates beween sensitive categories.

Try implementing A4 12/4

In [None]:
import pandas as pd
import numpy as np
import random

import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split

from keras.layers import Dense, Input
from tensorflow.keras import Model

from sklearn.metrics import classification_report, confusion_matrix

raw = pd.read_csv("./compas-scores-two-years.csv")

## 0. 选取 feature
name_list1 = [ 'sex','age', 'race', 'priors_count',
             'c_charge_degree', 'c_charge_desc',
             'start', 'end', 'event', 'two_year_recid'] # 11


raw_data = raw.loc[:,name_list1]

#print(raw_data.shape) 

# dropna 7214 -> 7185
raw_data = raw_data[raw_data[[ 'sex','age', 'race', 'priors_count','c_charge_degree', 'c_charge_desc',
             'start', 'end', 'event', 'two_year_recid']].notna()]
print("dataset size after dropping NAs: ", raw_data.shape)

########################################
########################################
print('\n',"#"*100, '\n', ' '*30, " Data Preprocessing ",'\n',"#"*100, '\n')


## only include African-American and Caucasians data
African_American_index = raw_data.race == 'African-American'
Caucasian_index = raw_data.race == 'Caucasian'

AA = raw_data[African_American_index]
CA = raw_data[Caucasian_index]

raw_data = pd.concat( (AA, CA) )

print("number of white ppl: ",raw_data[Caucasian_index].shape[0], ", number of black ppl: ", raw_data[African_American_index].shape[0])

# extracting labels as train_labels
label = raw_data.two_year_recid
del raw_data["two_year_recid"]  

# standardize numerical values 
numeric_features = raw_data.dtypes[raw_data.dtypes != 'object'].index
raw_data[numeric_features] = raw_data[numeric_features].apply(lambda x: (x - x.mean()) / (x.std()))

#turn binary categorical features into 0,1 
cleanups = {"race": {"African-American": 0, "Caucasian": 1},
            "sex": {"Male": 0, "Female": 1 },
            "c_charge_degree": {"F": 0, "M": 1}}

raw_data = raw_data.replace(cleanups)

#multivariate categorical values into dummies
raw_data = pd.get_dummies(raw_data, dummy_na=True) 
cleaned_data = raw_data.astype('float32')

## place sensitive features at last
reorder_column_name = list(cleaned_data.keys())
race_colname = reorder_column_name.pop(2)
reorder_column_name.append(race_colname)
print("new colnames: ",reorder_column_name)
cleaned_data = cleaned_data[reorder_column_name]

## look at the last two columns
print("Cleaned Dataset")
print(cleaned_data.iloc[:,:])


dataset size after dropping NAs:  (7214, 10)

 #################################################################################################### 
                                 Data Preprocessing  
 #################################################################################################### 

number of white ppl:  2454 , number of black ppl:  3696
new colnames:  ['sex', 'age', 'priors_count', 'c_charge_degree', 'start', 'end', 'event', 'c_charge_desc_Abuse Without Great Harm', 'c_charge_desc_Accessory After the Fact', 'c_charge_desc_Agg Assault W/int Com Fel Dome', 'c_charge_desc_Agg Battery Grt/Bod/Harm', 'c_charge_desc_Agg Fleeing and Eluding', 'c_charge_desc_Agg Fleeing/Eluding High Speed', 'c_charge_desc_Aggrav Battery w/Deadly Weapon', 'c_charge_desc_Aggrav Child Abuse-Agg Battery', 'c_charge_desc_Aggrav Child Abuse-Causes Harm', 'c_charge_desc_Aggrav Stalking After Injunctn', 'c_charge_desc_Aggravated Assault', 'c_charge_desc_Aggravated Assault W/Dead Weap', 'c_charg



In [None]:
########################################
########################################
print('\n',"#"*100, '\n', ' '*20, " Split up Train Validation Test sets ",'\n',"#"*100, '\n')


## 3. shuffle 创建 train test validation
x_train, x_val, y_train, y_val = train_test_split(cleaned_data, label, test_size=0.20, random_state=42)
x_val,x_test,y_val,y_test = train_test_split(x_val, y_val, test_size=0.50, random_state=42)

if type(x_train).__module__ != "numpy":
  x_train, x_val, y_train, y_val = x_train.to_numpy(), x_val.to_numpy(), y_train.to_numpy(), y_val.to_numpy()
  
#split out white testors
x_test_white_ind = np.array(x_test['race'] == 1.0)
x_test_white = x_test[x_test_white_ind]
y_test_white = y_test[x_test_white_ind]

#black testors
x_test_black_ind = np.array(x_test['race'] == 0.0)
x_test_black = x_test[x_test_black_ind]
y_test_black = y_test[x_test_black_ind]

if type(x_test).__module__ != "numpy":
  x_test, y_test, x_test_white, y_test_white, x_test_black, y_test_black = x_test.to_numpy(), y_test.to_numpy(), x_test_white.to_numpy(), y_test_white.to_numpy(), x_test_black.to_numpy(), y_test_black.to_numpy()

print("X_train size: ", x_train.shape, ", y_train size: ", y_train.shape, '\n',
      "X_validation size: ", x_val.shape, ", y_validation size: ", y_val.shape, '\n',
      "X_test sizes for African Americans and Caucasians respectively: ", x_test_white.shape,x_test_black.shape)


########################################
########################################
print('\n',"#"*100, '\n', ' '*20, " Training the Original Model without Prejudice Control ",'\n',"#"*100, '\n')

## 4. 创建 base 模型
def model_1():
    feature_input = Input(raw_data.shape[1],)
    y = Dense(2,"softmax")(feature_input)
    
    model = Model(feature_input,y )
    
    return model


adam = tf.keras.optimizers.Adam(0.001)
loss = keras.losses.BinaryCrossentropy(from_logits=False)
metric = tf.keras.metrics.BinaryAccuracy()

model_1_1 = model_1()

model_1_1.compile(optimizer=adam,
               loss=loss,
               metrics=metric 
               )

model_1_1.fit(x_train,tf.one_hot(y_train,2),epochs=7,batch_size = 128,validation_data=(x_val,tf.one_hot(y_val,2)))

model_1_1.evaluate(x_test,tf.one_hot(y_test,2))


 #################################################################################################### 
                       Split up Train Validation Test sets  
 #################################################################################################### 

X_train size:  (4920, 419) , y_train size:  (4920,) 
 X_validation size:  (615, 419) , y_validation size:  (615,) 
 X_test sizes for African Americans and Caucasians respectively:  (241, 419) (374, 419)

 #################################################################################################### 
                       Training the Original Model without Prejudice Control  
 #################################################################################################### 

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


[0.4981095492839813, 0.8796747922897339]

## Verification for Prejudice: FPR & FNR - Original Model

In [None]:
def evaluation(model, test_labels, x_test):
  exist = int(np.unique(x_test[:,-1]))== 0
  if exist:
    race = "African-Americans"
  else:
    race = "Caucasians"

  y_true = test_labels
  y_pred = model.predict(x_test)
  y_pred = np.argmax(y_pred,axis = 1)
  CM = confusion_matrix(y_true, y_pred)/len(test_labels)

  print('For', race, ', False positive rate is: ', CM[0][1], ',False negative rate is: ', CM[1][0])

#black accuracy
evaluation(model_1_1,y_test_black,x_test_black)

#white accuracy
evaluation(model_1_1,y_test_white,x_test_white)

def dfr(model, test_labels_0, x_test_0, test_labels_1, x_test_1, df):

  y_pred_0 = model.predict(x_test_0)
  y_pred_0 = np.argmax(y_pred_0,axis = 1)
  y_pred_1 = model.predict(x_test_1)
  y_pred_1 = np.argmax(y_pred_1,axis = 1)
  CM_0 = confusion_matrix(test_labels_0, y_pred_0)/len(test_labels_0)
  CM_1 = confusion_matrix(test_labels_1, y_pred_1)/len(test_labels_1)

  #print('Disparate Mistreatment of False positive rate is: ', CM_0[0][1]-CM_1[0][1], '\nDisparate Mistreatment of False negative rate is: ', CM_0[1][0]-CM_1[1][0])
  if df == "dfpr":
    return CM_0[0][1]-CM_1[0][1]
  elif df == "dfnr":
    return CM_0[1][0]-CM_1[1][0]
  else:
    return None

dfpr = dfr(model_1_1,y_test_black,x_test_black,y_test_white,x_test_white, df=  "dfpr")
dfnr = dfr(model_1_1,y_test_black,x_test_black,y_test_white,x_test_white, df = "dfnr")
print(dfpr, dfnr)

For African-Americans , False positive rate is:  0.053475935828877004 ,False negative rate is:  0.058823529411764705
For Caucasians , False positive rate is:  0.029045643153526972 ,False negative rate is:  0.1037344398340249
0.024430292675350032 -0.0449109104222602


#Baseline model

In [None]:
def model_1():
    feature_input = Input(cleaned_data.shape[1],)
    y = Dense(2,"softmax")(feature_input)
    
    model = Model(feature_input,y )
    
    return model

#Custom Loss for Baseline model

In [None]:
#define universal penalty factor c

def new_training_groups(M, x_train, y_train):
    '''
    X     n*d
    M(X)    n*2
    y       n,
    delta   c,
    dfr   "dfpr","dfnr"
    
    '''
    #split training sets according to sensitive variable
    x_train_0 = x_train[np.array(x_train[:,-1] == 0.0)]
    y_train_0 = (y_train[np.array(x_train[:,-1] == 0.0)])
    x_train_1 = x_train[np.array(x_train[:,-1] == 1.0)]
    y_train_1 = (y_train[np.array(x_train[:,-1] == 1.0)])
    
    #get the ones with wrong prediction in discriminated group
    dp = dfr(M,y_train_0,x_train_0,y_train_1,x_train_1, df = "dfpr")
    dn = dfr(M,y_train_0,x_train_0,y_train_1,x_train_1, df = "dfnr")
 
    if dp > 0 or dn< 0: d = 0
    else: d = 1

    if d == 0:
      #take penalized trainers
      y0_pred = tf.argmax(M.predict(x_train_0),axis = 1)
      y0_train = tf.constant(y_train_0)
      y0_diff = (y0_train-y0_pred).numpy()
      x_train_penalized = x_train_0[np.array(y0_diff != 0.0)]
      y_train_penalized = y_train_0[np.array(y0_diff != 0.0)]
      # safe trainers
      x_train_safe = x_train_0[np.array(y0_diff == 0.0)]
      y_train_safe = y_train_0[np.array(y0_diff == 0.0)]
      #make new
      x_train_safe = tf.concat((x_train_safe,x_train_1),0)
      y_train_safe = tf.concat((y_train_safe,y_train_1),0)
    
    else:
      #reverse the steps above for train set 1
      y1_pred = tf.argmax(M.predict(x_train_1),axis = 1)
      y1_train = tf.constant(y_train_1)
      y1_diff = (y1_train-y1_pred).numpy()
      x_train_penalized = x_train_1[np.array(y1_diff != 0.0)]
      y_train_penalized = y_train_1[np.array(y1_diff != 0.0)]
      x_train_penalized = tf.convert_to_tensor(x_train_penalized, dtype=tf.float32)
      y_train_penalized = tf.convert_to_tensor(y_train_penalized, dtype=tf.int64)
      # safe trainers
      x_train_safe = x_train_1[np.array(y1_diff == 0.0)]
      y_train_safe = y_train_1[np.array(y1_diff == 0.0)]
      #make new
      x_train_safe = tf.concat((x_train_safe,x_train_0),0)
      y_train_safe = tf.concat((y_train_safe,y_train_0),0)

    return x_train_safe , y_train_safe , x_train_penalized , y_train_penalized, dp, dn

      # #calculate two losses
      # 

      # L_p = np.sum(penalized_loss)
      # L_s = np.sum(safe_loss)
      
      # loss =  L_p*c + L_s

      # return loss

##Constraining on only Dfpr or Dfnr

In [None]:
## initialization 
model =  model_1()
adam = tf.keras.optimizers.Adam(0.001)
loss = keras.losses.BinaryCrossentropy(from_logits=False)
metric = tf.keras.metrics.BinaryAccuracy()

model.compile(optimizer=adam,loss=loss,metrics=metric)
model.fit(x_train,tf.one_hot(y_train,2),epochs=5,batch_size = 128,validation_data=(x_val,tf.one_hot(y_val,2)))

#initialized C and delta
C = 1
delta = 0.2

# new training groups
x_ts , y_ts, x_tp , y_tp , dp, dn= new_training_groups(model, x_train, y_train)
model_in_loop = model_1()

def penalized_loss(y_true,y_pred):
  return tf.keras.losses.binary_crossentropy( tf.one_hot(y_tp,2), model_in_loop(x_tp))

def safe_loss(y_true,y_pred):  
  return tf.keras.losses.binary_crossentropy( tf.one_hot(y_ts,2), model_in_loop(x_ts))

#start while loop
while tf.abs(dp) > 0.01: #或者 dn（控制Dfnr）
  #model_in_loop = model_1()

  C = C + delta # 增大 unfair那波人的权重 C

  model_in_loop.compile(optimizer=adam,loss=[penalized_loss, safe_loss],loss_weights=[C,1],metrics=metric )

  model_in_loop.fit(x_train, tf.one_hot(y_train,2), epochs=10, validation_data=(x_val,tf.one_hot(y_val,2)))

  x_ts , y_ts, x_tp , y_tp , dp, dn= new_training_groups(model_in_loop, x_train, y_train)      
      
        

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
dp, dn

(0.00023296297092261116, 0.017866798478433726)

In [None]:
print(dfr(model_in_loop,y_test_black,x_test_black,y_test_white,x_test_white, df = "dfpr"))

0.009219606363858254


In [None]:
evaluation(model_in_loop,y_test_black,x_test_black)
evaluation(model_in_loop,y_test_white,x_test_white)

For African-Americans , False positive rate is:  0.013368983957219251 ,False negative rate is:  0.14171122994652408
For Caucasians , False positive rate is:  0.004149377593360996 ,False negative rate is:  0.11203319502074689
