source: https://archive.ics.uci.edu/dataset/891/cdc+diabetes+health+indicators

In [1]:
# Please uncomment the bellow command if you did not install ucimlrepo before
#pip install ucimlrepo

In [2]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict, RepeatedStratifiedKFold
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, f1_score
from sklearn.model_selection import train_test_split

## Load Dataset

In [3]:
def load_dataset():
    # Load the dataset
    cdc_diabetes_health_indicators = fetch_ucirepo(id=891) 

    # Define categorical and numerical features
    categorical_features = ['HighBP', 'HighChol','CholCheck','Smoker','Stroke','HeartDiseaseorAttack',
                            'PhysActivity','Fruits','Veggies','HvyAlcoholConsump','AnyHealthcare',
                            'NoDocbcCost','GenHlth','DiffWalk','Sex','Age','Education','Income']
    numerical_features = ['BMI','MentHlth','PhysHlth',]

    # Get features and target variable
    X = cdc_diabetes_health_indicators.data.features
    y = cdc_diabetes_health_indicators.data.targets['Diabetes_binary'] 

    # Preprocessing: One-hot encoding for categorical variables and scaling for numerical variables
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    # Split the dataset into training and testing sets with a fixed random state for reproducibility
    X_train_full, X_test_full, y_train, y_test, gender_train, gender_test = train_test_split(
        X, y, cdc_diabetes_health_indicators.data.features['Sex'], test_size=0.2, random_state=42
    )
    # Apply preprocessing to training and testing set separately
    X_train_processed = preprocessor.fit_transform(X_train_full)
    X_test_processed = preprocessor.transform(X_test_full)

    # Return processed training and testing sets along with gender attributes
    return X_train_processed, X_test_processed, y_train, y_test, gender_train.values, gender_test.values

In [4]:
def determine_confusion_matrix(df):
    if df['y_true'] == df['y_pred'] == 1:
        return 'TP'
    elif df['y_pred'] == 1 and df['y_true'] != df['y_pred']:
        return 'FP'
    elif df['y_true'] == df['y_pred'] == 0:
        return 'TN'
    else:
        return 'FN'

In [5]:
# preprocess and load the data
X_train, X_test, y_train, y_test, gender_train, gender_test = load_dataset()

## Train Model and Generate Predictions

### Select Model Based on Performance

In [6]:
# try logistic regression
model1 = LogisticRegression(max_iter=10000, random_state=0)

# Train the Logistic Regression model
model1.fit(X_train, y_train)

# Predict on the testing set
y_pred1 = model1.predict(X_test)
y_pred_proba1 = model1.predict_proba(X_test)

# Evaluate the model
accuracy1 = accuracy_score(y_test, y_pred1)
auc1 = roc_auc_score(y_test, y_pred_proba1[:, 1])
f1_1 = f1_score(y_test, y_pred1)

print('Accuracy:', accuracy1, '\nAUC:', auc1, '\nf1:', f1_1)

Accuracy: 0.8676087984862819 
AUC: 0.8288682758066404 
f1: 0.2510870777121195


In [7]:
# try nn
model2 = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(21,), random_state=1, max_iter=100)

# Train the Logistic Regression model
model2.fit(X_train, y_train)

# Predict on the testing set
y_pred2 = model2.predict(X_test)
y_pred_proba2 = model2.predict_proba(X_test)

# Evaluate the model
accuracy2 = accuracy_score(y_test, y_pred2)
auc2 = roc_auc_score(y_test, y_pred_proba2[:, 1])
f1_2 = f1_score(y_test, y_pred2)

print('Accuracy:', accuracy2, '\nAUC:', auc2, '\nf1:', f1_2)

Accuracy: 0.8676876379690949 
AUC: 0.8329001174326579 
f1: 0.23323814962878356


In [8]:
# try svm, running unexpected long, give up with this model
# from sklearn import svm

# clf2 = svm.SVC()
# clf2.fit(X_train, y_train)

# y_pred2 = clf2.predict(X_test)
# y_pred_proba2 = clf2.predict_proba(X_test)[:, 1]
# accuracy = accuracy_score(y_test, y_pred)
# auc = roc_auc_score(y_test, y_pred_proba)

# print('Accuracy:', accuracy, '\nAUC:', auc)

### Train Logistic Regression Model

In [9]:
y_pred = y_pred1
y_pred_proba = y_pred_proba1

In [10]:
# Female = 0 and Male = 1
fair_df = pd.DataFrame({'sex': gender_test, 'y_true': y_test, 'y_pred': y_pred})
fair_df['confusion_matrix'] = fair_df[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
fair_df.head()

Unnamed: 0,sex,y_true,y_pred,confusion_matrix
219620,0,0,0,TN
132821,0,0,0,TN
151862,1,0,0,TN
139717,1,0,0,TN
239235,0,0,0,TN


In [11]:
y_pred_proba

array([[0.94368063, 0.05631937],
       [0.77816319, 0.22183681],
       [0.99647526, 0.00352474],
       ...,
       [0.91102181, 0.08897819],
       [0.8648969 , 0.1351031 ],
       [0.9863914 , 0.0136086 ]])

In [12]:
# 0 stand for female and 1 is male
fair_df['sex'].value_counts()

sex
0    28412
1    22324
Name: count, dtype: int64

## Fairness Evaluation

In [13]:
threshold = 0.001

In [14]:
def statistical_parity(df):
    female_positive = df[(df['sex'] == 0) & (df['y_pred'] == 1)].shape[0]
    female_positive_prob = female_positive / (df[(df['sex'] == 0)].shape[0])
    
    male_positive = df[(df['sex'] == 1) & (df['y_pred'] == 1)].shape[0]
    male_positive_prob = male_positive / (df[(df['sex'] == 1)].shape[0])

    print('Female Probability of Positive Predictions: %.3f' % female_positive_prob)
    print('Male Probability of Positive Predictions: %.3f' % male_positive_prob)
    
    abs_difference = abs(female_positive_prob - male_positive_prob)
    print('Achieves Statistical Parity: %r' % (abs_difference < threshold))

In [15]:
def predictive_parity(df):
    female_TP = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'TP')].shape[0]
    PPV_female = female_TP / (df[(df['sex'] == 0) & (df['y_pred'] == 1)].shape[0])
    
    male_TP = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'TP')].shape[0]
    PPV_male = male_TP / (df[(df['sex'] == 1) & (df['y_pred'] == 1)].shape[0])

    print('Female Probability of True Positive Predictions: %.3f' % PPV_female)
    print('Male Probability of True Positive Predictions: %.3f' % PPV_male)
    
    abs_difference = abs(PPV_female - PPV_male)
    print('Achieves Predictive Parity: %r' % (abs_difference < threshold))

In [16]:
def equalized_odds(df):
    # FNR = FN/(FN+TP) = FN/(all-positive-true-label)
    female_fn = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'FN')].shape[0]
    fnr_female = female_fn / (df[(df['sex'] == 0) & (df['y_true'] == 1)].shape[0])
    male_fn = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'FN')].shape[0]
    fnr_male = male_fn / (df[(df['sex'] == 1) & (df['y_true'] == 1)].shape[0])
    
    # FPR = FP/(FP+TN) = FN/(all-negative-true-label)
    female_fp = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'FP')].shape[0]
    fpr_female = female_fp / (df[(df['sex'] == 0) & (df['y_true'] == 0)].shape[0])
    male_fp = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'FP')].shape[0]
    fpr_male = male_fp / (df[(df['sex'] == 1) & (df['y_true'] == 0)].shape[0])

    print('Probability of Female with Diabetes Predicted No Diabetes: %.3f' % fnr_female)
    print('Probability of Male with Diabetes Predicted No Diabetes: %.3f' % fnr_male)
    
    abs_difference_fnr = abs(fnr_female - fnr_male)
    print('Achieves Equality of No Diabetes Prediction: %r' % (abs_difference_fnr < threshold))
    
    print('Probability of Female with No Diabetes Predicted Having Diabetes: %.3f' % fpr_female)
    print('Probability of Male with No Diabetes Predicted Having Diabetes: %.3f' % fpr_male)
    
    abs_difference_fpr = abs(fnr_female - fnr_male)
    print('Achieves Equality of Having Diabetes Prediction: %r' % (abs_difference_fpr < threshold))

In [17]:
def accuracy_equality(df):
    # Accuracy = (TP+TN)/all-samples
    female_t = df[(df['sex'] == 0) & (df['confusion_matrix'].isin(['TP', 'TN']))].shape[0]
    accuracy_female = female_t / (df[(df['sex'] == 0)].shape[0])
    male_t = df[(df['sex'] == 1) & (df['confusion_matrix'].isin(['TP', 'TN']))].shape[0]
    accuracy_male = male_t / (df[(df['sex'] == 1)].shape[0])
    
    print('Female Accuracy: %.3f' % accuracy_female)
    print('Male Accuracy: %.3f' % accuracy_male)
    
    abs_difference = abs(accuracy_female - accuracy_male)
    print('Equality of Accuracy: %r' % (abs_difference < threshold))

In [18]:
def treatment_equality(df):    
    female = df[(df['sex'] == 0)]
    ratio_female = (female[female['confusion_matrix'] == 'FN'].shape[0] / 
        female[female['confusion_matrix'] == 'FP'].shape[0])

    male = df[(df['sex'] == 1)]
    ratio_male = (male[male['confusion_matrix'] == 'FN'].shape[0] / 
        male[male['confusion_matrix'] == 'FP'].shape[0])

    print('Female Ratio of Errors: %.3f' % ratio_female)
    print('Male Ratio of Errors: %.3f' % ratio_male)
    
    abs_difference = abs(ratio_female - ratio_male)
    print('Achieves Treatment Equality: %r' % (abs_difference < threshold))

In [19]:
statistical_parity(fair_df)
predictive_parity(fair_df)
equalized_odds(fair_df)
accuracy_equality(fair_df)
treatment_equality(fair_df)

Female Probability of Positive Predictions: 0.036
Male Probability of Positive Predictions: 0.043
Achieves Statistical Parity: False
Female Probability of True Positive Predictions: 0.570
Male Probability of True Positive Predictions: 0.572
Achieves Predictive Parity: False
Probability of Female with Diabetes Predicted No Diabetes: 0.842
Probability of Male with Diabetes Predicted No Diabetes: 0.835
Achieves Equality of No Diabetes Prediction: False
Probability of Female with No Diabetes Predicted Having Diabetes: 0.018
Probability of Male with No Diabetes Predicted Having Diabetes: 0.022
Achieves Equality of Having Diabetes Prediction: False
Female Accuracy: 0.876
Male Accuracy: 0.857
Equality of Accuracy: False
Female Ratio of Errors: 7.092
Male Ratio of Errors: 6.779
Achieves Treatment Equality: False


## Mitigation through Post-Processiong

On Fairness and Calibration: https://arxiv.org/pdf/1709.02012.pdf

In [20]:
def trivial_pred(df):
    trivial_df = df.copy()
    trivial_base_rate = df['y_true'].mean()
    trivial_df['y_prob_1'] = trivial_base_rate
    trivial_df['y_prob_0'] = 1.0 - trivial_df['y_prob_1']
    trivial_df = reclassify(trivial_df)
    
    return trivial_df, trivial_base_rate

In [21]:
# reassign prediction based on the adjusted y_prob(args_max)
def reclassify(df): 
    new_y_pred = []
    for _, row in df.iterrows():
        if row['y_prob_0'] >=  row['y_prob_1']:
            new_y_pred.append(0)
        else:
            new_y_pred.append(1)
    df['y_pred'] = new_y_pred 
    
    return df

In [22]:
# compute fpr and fnr given y_probs and true labels
def compute_errors(df):
    df_copy = df.copy()
    df_copy['confusion_matrix'] = df_copy[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
    fpr = df_copy[df_copy['confusion_matrix'] == 'FP'].shape[0]/df_copy[df_copy['y_true'] == 0].shape[0]
    fnr = df_copy[df_copy['confusion_matrix'] == 'FN'].shape[0]/df_copy[df_copy['y_true'] == 1].shape[0]
    return fpr, fnr

In [31]:
# calculate mix rate and calibrated
def calibrate_eq_odds(g1, g2, fnr=1, fpr=1):
    # create trivial classifier
    g1_trivial, g1_trivial_base_rate = trivial_pred(g1)
    g2_trivial, g2_trivial_base_rate = trivial_pred(g2)
    
    # compute generalized fpr and generalized fnr for all 4 sets of output
    # g1_fp_cost = mean y_pred=1 prob for rows where y_true=0
    g1_g_fp = g1.loc[g1['y_true'] == 0, 'y_prob_1'].mean() 
    g2_g_fp = g2.loc[g2['y_true'] == 0, 'y_prob_1'].mean()
    g1_trivial_g_fp = g1_trivial.loc[g1_trivial['y_true'] == 0, 'y_prob_1'].mean()
    g2_trivial_g_fp = g2_trivial.loc[g2_trivial['y_true'] == 0, 'y_prob_1'].mean()
    
    # g1_fn_cost = 1- mean y_pred=1 prob for rows where y_true=1
    g1_g_fn = 1 - g1.loc[g1['y_true'] == 1, 'y_prob_1'].mean()
    g2_g_fn = 1 - g2.loc[g2['y_true'] == 1, 'y_prob_1'].mean()
    g1_trivial_g_fn = 1 - g1_trivial.loc[g1_trivial['y_true'] == 1, 'y_prob_1'].mean()
    g2_trivial_g_fn = 1 - g2_trivial.loc[g2_trivial['y_true'] == 1, 'y_prob_1'].mean()

    g1_base_rate = g1['y_true'].mean()
    g2_base_rate = g2['y_true'].mean()

    # calibrate by consider both rate
    if fpr and fnr:
        g1_cost = 1 / 2.0 * g1_g_fp * (1 - g1_base_rate) + 1 / 2.0 * g1_g_fn * g1_base_rate
        g2_cost = 1 / 2.0 * g2_g_fp * (1 - g2_base_rate) + 1 / 2.0 * g2_g_fn * g2_base_rate
        print(g1_cost, g2_cost)
        g1_trivial_cost = (1 / 2.0 * g1_trivial_g_fp * (1 - g1_trivial_base_rate) 
                           + 1 / 2.0 * g1_trivial_g_fn * g1_trivial_base_rate)
        g2_trivial_cost = (1 / 2.0 * g2_trivial_g_fp * (1 - g2_trivial_base_rate) 
                           + 1 / 2.0 * g2_trivial_g_fn * g2_trivial_base_rate)
    # calibrate FP rate
    elif fpr:
        g1_cost = g1_g_fp
        g2_cost = g2_g_fp
        g1_trivial_cost = g1_trivial_g_fp
        g2_trivial_cost = g2_trivial_g_fp
    # calibrate FN rate
    else:
        g1_cost = g1_g_fn
        g2_cost = g2_g_fn
        g1_trivial_cost = g1_trivial_g_fn
        g2_trivial_cost = g2_trivial_g_fn

    g2_cost_more = g2_cost > g1_cost
    g1_mix_rate = (g2_cost - g1_cost) / (g1_trivial_cost - g1_cost) if g2_cost_more else 0
    g2_mix_rate = 0 if g2_cost_more else (g1_cost - g2_cost) / (g2_trivial_cost - g2_cost)
    
    # Randomly select mix_rate% of elements from the prediction
    g1_copy = g1.copy(deep=True)
    g1_random_indices = np.random.choice(g1_copy['y_prob_1'].index, 
                                         size=int(g1_mix_rate*g1_copy.shape[0]), replace=False)
    g2_copy = g2.copy(deep=True)
    g2_random_indices = np.random.choice(g2_copy['y_prob_1'].index, 
                                         size=int(g2_mix_rate*g2_copy.shape[0]), replace=False)
    
    # Set those to base_rate
    g1_copy.loc[g1_random_indices, 'y_prob_1'] = g1_base_rate
    g2_copy.loc[g2_random_indices, 'y_prob_1'] = g2_base_rate
    g1_copy.loc[g1_random_indices, 'y_prob_0'] = 1 - g1_base_rate
    g2_copy.loc[g2_random_indices, 'y_prob_0'] = 1 - g2_base_rate
    
    # reclassify
    caibrated_g1 = reclassify(g1_copy)
    caibrated_g2 = reclassify(g2_copy)
    
    # Update confusion matrix
    caibrated_g1['confusion_matrix'] = caibrated_g1[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
    caibrated_g2['confusion_matrix'] = caibrated_g2[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)

    
    return caibrated_g1, caibrated_g2

In [32]:
post_processiong_df = pd.DataFrame({'sex': gender_test, 'y_true': y_test, 'y_pred': y_pred, 
                                    'y_prob_1': y_pred_proba[:, 1], 'y_prob_0': y_pred_proba[:, 0]})

In [33]:
male_data = post_processiong_df[post_processiong_df['sex'] == 1]
female_data = post_processiong_df[post_processiong_df['sex'] == 0]

In [34]:
calibrated_g1, calibrated_g2 = calibrate_eq_odds(female_data, male_data, 1, 1)

0.09112231284032365 0.10493468441308659


In [35]:
print(calibrated_g1['y_pred'].value_counts(), calibrated_g1['y_prob_1'].mean())
print(calibrated_g2['y_pred'].value_counts(), calibrated_g2['y_prob_1'].mean())

y_pred
0    28055
1      357
Name: count, dtype: int64 0.12959424212166823
y_pred
0    21364
1      960
Name: count, dtype: int64 0.1521268316742733


In [36]:
calibrated = pd.concat([calibrated_g1, calibrated_g2], ignore_index=True)

In [37]:
statistical_parity(calibrated)
predictive_parity(calibrated)
equalized_odds(calibrated)
accuracy_equality(calibrated)
treatment_equality(calibrated)

Female Probability of Positive Predictions: 0.013
Male Probability of Positive Predictions: 0.043
Achieves Statistical Parity: False
Female Probability of True Positive Predictions: 0.594
Male Probability of True Positive Predictions: 0.572
Achieves Predictive Parity: False
Probability of Female with Diabetes Predicted No Diabetes: 0.942
Probability of Male with Diabetes Predicted No Diabetes: 0.835
Achieves Equality of No Diabetes Prediction: False
Probability of Female with No Diabetes Predicted Having Diabetes: 0.006
Probability of Male with No Diabetes Predicted Having Diabetes: 0.022
Achieves Equality of Having Diabetes Prediction: False
Female Accuracy: 0.873
Male Accuracy: 0.857
Equality of Accuracy: False
Female Ratio of Errors: 23.793
Male Ratio of Errors: 6.779
Achieves Treatment Equality: False
