In [486]:
from dataset_readers import *
from sklearn.preprocessing import StandardScaler
from aif360.metrics import ClassificationMetric
from util import mathew_correlation_coefficient, f1_score
from models import FairTransitionLossMLP

import numpy as np
from scipy.stats import spearmanr, pearsonr
from tensorflow import broadcast_to, expand_dims
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import Regularizer
from tensorflow.keras.layers import InputLayer, Dropout, Dense
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
from aif360.algorithms import Transformer
from tensorflow.keras import backend as K
from xicorrelation import xicorr


In [487]:
class FeaturewiseRegularizer(Regularizer):
    def __init__(self, lambdas, l2):
        # lambdas should be an array of regularization coefficients, one per feature
        self.lambdas = K.constant(lambdas)
        self.lambdas = expand_dims(self.lambdas, axis=1)
        self.l2 = l2

    def __call__(self, x):
        # Custom regularization: l2 * sum of lambda[i] * sqrt(x[i]) across all features
        return self.l2 * K.sum(broadcast_to(self.lambdas, x.shape) * K.square(x))

    def get_config(self):
        # This method enables the regularizer to be serialized
        return {'lambdas': self.lambdas.numpy().tolist(),
                'l2': self.l2}

In [488]:
class SimpleMLP(Transformer):

    def __init__(self, sensitive_attr='',
                 hidden_sizes=[32, 64, 32], dropout=0.1,
                 num_epochs=20, batch_size=64, patience=5,
                 corr_type=None, l2=0.0):

        self.model = None
        self.hidden_sizes = hidden_sizes
        self.input_shape = None
        self.num_classes = 2
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.dropout = dropout
        self.corr_type = corr_type

        if corr_type == 'spearman':
            self.corr_fn = spearmanr
        elif corr_type == 'pearson':
            self.corr_fn = pearsonr
        elif corr_type == 'xi':
            self.corr_fn = xicorr
        else:
            self.corr_fn = None
            self.corr_type = None
        self.corr = None
        self.l2 = l2
        self.patience = patience
        self.sensitive_attr = sensitive_attr
        self.classes_ = None
        self.history = None

    def _calculate_corr(self, features, sensitive_feature):
        self.corr = np.array([abs(self.corr_fn(features[:, i], sensitive_feature)[0])
                     for i in range(features.shape[1])])
        self.corr[np.isnan(self.corr)] = 1.0
        self.corr[self.corr == 1.0] = 0.0
        self.corr[self.corr > 1.0] = 0.0
        self.corr = self.corr.tolist()

    def _compile_model(self):
        self.model = Sequential()
        self.model.add(InputLayer(input_shape=self.input_shape))
        if self.corr is not None:
            regularizer = FeaturewiseRegularizer(l2=self.l2, lambdas=self.corr)
        else:
            regularizer = None
        for i, hidden_size in enumerate(self.hidden_sizes):
            if i == 0:
                self.model.add(Dense(units=hidden_size, activation='relu',
                                     kernel_regularizer=regularizer))
            else:
                self.model.add(Dense(hidden_size, activation='relu'))
                self.model.add(Dropout(self.dropout))

        self.model.add(Dense(self.num_classes, activation="softmax"))
        self.model.compile(optimizer=Adam(learning_rate=3e-4),
                           loss='categorical_crossentropy')
    def fit(self, dataset, verbose=False):

        callback = EarlyStopping(monitor='val_loss', patience=self.patience, restore_best_weights=True)
        dataset_cp = dataset.copy()
        X = dataset_cp.features
        y = np.zeros(shape=(X.shape[0], 2))

        y[:, 0] = (dataset_cp.labels == dataset_cp.unfavorable_label).reshape(X.shape[0]).astype(int)
        y[:, 1] = (dataset_cp.labels == dataset_cp.favorable_label).reshape(X.shape[0]).astype(int)

        sensitive_index = dataset.protected_attribute_names.index(self.sensitive_attr)
        X_protected = (dataset.protected_attributes[:, sensitive_index] == dataset.unprivileged_protected_attributes[
            sensitive_index]).astype(int)

        if self.model is None:
            self.input_shape = dataset.features.shape[1]
            if self.corr_type is not None:
                self._calculate_corr(dataset.features, X_protected)
            self._compile_model()
            self.classes_ = np.array([dataset.unfavorable_label, dataset.favorable_label])

        self.history = self.model.fit(X, y, epochs=self.num_epochs,
                                      batch_size=self.batch_size, callbacks=[callback],
                                      verbose=verbose, validation_split=0.1)

        return self

    def predict_proba(self, X):
        return self.model.predict(X, verbose=False)

    def predict(self, X):
        logits = self.predict_proba(X)
        return np.argmax(logits, axis=1)

In [489]:
def get_counterfactual_dataset(dataset, sens_attr):
    # Create a counterfactual dataset, flipping the sensitive feature value
    counterfactual_dataset = dataset.copy(deepcopy=True)

    sens_index = dataset.feature_names.index(sens_attr)

    unprivileged_rows = dataset.features[:,sens_index] == float(dataset.unprivileged_protected_attributes[0])
    privileged_rows = dataset.features[:,sens_index] == float(dataset.privileged_protected_attributes[0])

    counterfactual_dataset.features[unprivileged_rows,sens_index] = dataset.privileged_protected_attributes[0]
    counterfactual_dataset.features[privileged_rows,sens_index] = dataset.unprivileged_protected_attributes[0]

    counterfactual_dataset.protected_attributes[unprivileged_rows, 0] = float(dataset.privileged_protected_attributes[0])
    counterfactual_dataset.protected_attributes[privileged_rows, 0] = float(dataset.unprivileged_protected_attributes[0])

    return counterfactual_dataset

In [490]:
def simple_mlp_initializer(sens_attr, unprivileged_groups, privileged_groups):
    hidden_sizes = [100, 100]
    corr_type = None
    l2 = 0.0
    dropout = 0.2

    model = SimpleMLP(sensitive_attr=sens_attr,
                    hidden_sizes=hidden_sizes,
                    dropout=dropout,
                    batch_size=64,
                    corr_type=corr_type,
                    l2=l2)
    return model

In [491]:
def ftl_mlp_initializer(sens_attr, unprivileged_groups, privileged_groups,
                        privileged_demotion=0.0, privileged_promotion=0.0, protected_demotion=0.0, protected_promotion=0.0):
    hidden_sizes = [100,100]
    corr_type = 'xi'
    l2 = 0.1
    dropout = 0.2

    model = FairTransitionLossMLP(sensitive_attr=sens_attr,
                                  hidden_sizes=hidden_sizes,
                                  dropout=dropout,
                                  batch_size=64,
                                  privileged_demotion=privileged_demotion,
                                  privileged_promotion=privileged_promotion,
                                  protected_demotion=protected_demotion,
                                  protected_promotion=protected_promotion,
                                  corr_type=corr_type, l2=l2)

    return model

In [492]:
def eval(model, dataset, unprivileged_groups, privileged_groups):
    
    try:
        # sklearn classifier
        y_pred_prob = model.predict_proba(dataset.features)
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
        y_pred = (y_pred_prob[:, 1] > 0.5).astype(np.float64)
        


        y_pred_mapped = y_pred.copy()
        # Map the dataset labels to back to their original values.
        y_pred_mapped[y_pred == 0] = dataset.unfavorable_label
        y_pred_mapped[y_pred == 1] = dataset.favorable_label

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_pred_mapped

    except AttributeError:
        # aif360 inprocessing algorithm
        y_pred = model.predict(dataset).labels
        y_pred_prob = model.predict_proba(dataset)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_pred

        # Map the dataset labels to back to their original values.
        temp_labels = dataset_pred.labels.copy()

        temp_labels[(dataset_pred.labels == 1.0).ravel(), 0] = dataset.favorable_label
        temp_labels[(dataset_pred.labels == 0.0).ravel(), 0] = dataset.unfavorable_label

        dataset_pred.labels = temp_labels.copy()
    metric = ClassificationMetric(
            dataset, dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

    metrics = dict()
    metrics['overall_acc'] = abs(metric.accuracy())
    metrics['bal_acc'] = abs((metric.true_positive_rate()
                                 + metric.true_negative_rate()) / 2)
    metrics['avg_odds_diff'] = metric.average_abs_odds_difference()
    metrics['disp_imp'] = abs(metric.disparate_impact())
    metrics['stat_par_diff'] = abs(metric.statistical_parity_difference())
    metrics['eq_opp_diff'] = abs(metric.equal_opportunity_difference())
    metrics['theil_ind'] = abs(metric.theil_index())
    metrics.update(metric.performance_measures())
    metrics['MCC'] = mathew_correlation_coefficient(metrics)
    metrics['f1_score'] = f1_score(metrics)

    return metrics, dataset_pred.labels, y_pred_prob

In [493]:
def get_T(model, X):
    x_prob = model.predict_proba(X)
    x_hat_0 = np.argmax(x_prob[:, 0])
    x_hat_1 = np.argmax(x_prob[:, 1])
    T_11 = x_prob[x_hat_1, 1]
    T_10 = x_prob[x_hat_1, 0]
    T_00 = x_prob[x_hat_0, 0]
    T_01 = x_prob[x_hat_0, 1]
    T = np.array([[T_00, T_01], [T_10, T_11]])
    return T

In [494]:
def train_model(dataset_reader, model_initializer, **kwargs):
    (dataset_expanded_train, dataset_train, dataset_val, dataset_test,
     unprivileged_groups, privileged_groups, sens_attr) = dataset_reader(shuffle=True)

    counterfactual_dataset_test = get_counterfactual_dataset(dataset_test, sens_attr)

    scaler = StandardScaler()
    dataset_expanded_train.features = scaler.fit_transform(dataset_expanded_train.features)
    dataset_test.features = scaler.transform(dataset_test.features)
    counterfactual_dataset_test.features = scaler.transform(counterfactual_dataset_test.features)

    model = model_initializer(sens_attr, unprivileged_groups, privileged_groups, **kwargs)

    model = model.fit(dataset_expanded_train)
    result, prediction, logits = eval(model, dataset_test, unprivileged_groups, privileged_groups)
    counterfactual_result, counterfactual_prediction, counterfactual_logits = eval(model, counterfactual_dataset_test, unprivileged_groups, privileged_groups)

    return result, prediction, logits, counterfactual_result, counterfactual_prediction, counterfactual_logits

In [495]:
result, prediction, logits, counterfactual_result, counterfactual_prediction, counterfactual_logits  = train_model(adult_dataset_reader, simple_mlp_initializer)



#### Training Dataset shape
(28941, 102)
#### Validation Dataset shape
(7236, 102)
#### Test Dataset shape
(9045, 102)
#### Favorable and unfavorable labels
1.0 0.0
#### Protected attribute names
['sex']
#### Privileged and unprivileged protected attribute values
[array([1.])] [array([0.])]
#### Dataset feature names
['age', 'education-num', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'workclass=Federal-gov', 'workclass=Local-gov', 'workclass=Private', 'workclass=Self-emp-inc', 'workclass=Self-emp-not-inc', 'workclass=State-gov', 'workclass=Without-pay', 'education=10th', 'education=11th', 'education=12th', 'education=1st-4th', 'education=5th-6th', 'education=7th-8th', 'education=9th', 'education=Assoc-acdm', 'education=Assoc-voc', 'education=Bachelors', 'education=Doctorate', 'education=HS-grad', 'education=Masters', 'education=Preschool', 'education=Prof-school', 'education=Some-college', 'marital-status=Divorced', 'marital-status=Married-AF-spouse', 'marital-status=Marr

  unprivileged_rows = dataset.features[:,sens_index] == float(dataset.unprivileged_protected_attributes[0])
  privileged_rows = dataset.features[:,sens_index] == float(dataset.privileged_protected_attributes[0])
  counterfactual_dataset.protected_attributes[unprivileged_rows, 0] = float(dataset.privileged_protected_attributes[0])
  counterfactual_dataset.protected_attributes[privileged_rows, 0] = float(dataset.unprivileged_protected_attributes[0])


In [496]:
result

{'overall_acc': 0.8528468767274737,
 'bal_acc': 0.7817830134594259,
 'avg_odds_diff': 0.07965192876393784,
 'disp_imp': 0.3086443656727532,
 'stat_par_diff': 0.19362494959747503,
 'eq_opp_diff': 0.07853293288571983,
 'theil_ind': 0.11273346983432551,
 'TPR': 0.6433439345751931,
 'TNR': 0.9202220923436587,
 'FPR': 0.07977790765634132,
 'FNR': 0.3566560654248069,
 'GTPR': 1.0,
 'GTNR': 1.0,
 'GFPR': 0.0,
 'GFNR': 0.0,
 'PPV': 0.7217125382262997,
 'NPV': 0.8891712551178879,
 'FDR': 0.2782874617737003,
 'FOR': 0.1108287448821121,
 'ACC': 0.8528468767274737,
 'MCC': 0.586748116591865,
 'f1_score': 0.6802786452077829}

In [497]:
logits

array([[0.9932836 , 0.00671642],
       [0.5716578 , 0.4283422 ],
       [0.5642539 , 0.4357461 ],
       ...,
       [0.999     , 0.001     ],
       [0.9237688 , 0.07623114],
       [0.9024113 , 0.09758865]], dtype=float32)

In [498]:
prediction

array([0., 0., 0., ..., 0., 0., 0.])

In [499]:
counterfactual_result

{'overall_acc': 0.8470978441127694,
 'bal_acc': 0.7597987578569967,
 'avg_odds_diff': 0.06724973854162426,
 'disp_imp': 2.0031580894033296,
 'stat_par_diff': 0.11765854201137359,
 'eq_opp_diff': 0.10580244430263852,
 'theil_ind': 0.12507218162432762,
 'TPR': 0.5897319400272604,
 'TNR': 0.9298655756867329,
 'FPR': 0.0701344243132671,
 'FNR': 0.4102680599727397,
 'GTPR': 1.0,
 'GTNR': 1.0,
 'GFPR': 0.0,
 'GFNR': 0.0,
 'PPV': 0.7300337457817773,
 'NPV': 0.8757396449704142,
 'FDR': 0.26996625421822273,
 'FOR': 0.1242603550295858,
 'ACC': 0.8470978441127694,
 'MCC': 0.5610332868203817,
 'f1_score': 0.6524252324704699}

In [500]:
counterfactual_logits

array([[0.9917326 , 0.00826739],
       [0.6880872 , 0.31191283],
       [0.61839855, 0.38160142],
       ...,
       [0.9973992 , 0.0026008 ],
       [0.9568373 , 0.0431627 ],
       [0.9485195 , 0.05148051]], dtype=float32)

In [501]:
counterfactual_prediction

array([0., 0., 0., ..., 0., 0., 0.])

In [502]:
counterfactually_consistent = prediction == counterfactual_prediction
counterfactually_consistent

array([ True,  True,  True, ...,  True,  True,  True])

In [503]:
np.sum(counterfactually_consistent)/prediction.shape[0]

0.9579878385848535

In [504]:
sum(logits[:, 1] > 0.7)/logits.shape[0]

0.11531232725262576

In [507]:
np.sort(logits[:,1] - counterfactual_logits[:,1])

array([-0.310433  , -0.29484716, -0.29242566, ...,  0.25849572,
        0.26054797,  0.31403267], dtype=float32)