source: https://archive.ics.uci.edu/dataset/891/cdc+diabetes+health+indicators

In [1]:
#pip install ucimlrepo

In [2]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict, RepeatedStratifiedKFold
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, f1_score
from sklearn.model_selection import train_test_split

## Load Dataset

In [3]:
def load_dataset():
    # Load the dataset
    cdc_diabetes_health_indicators = fetch_ucirepo(id=891) 

    # Define categorical and numerical features
    categorical_features = ['HighBP', 'HighChol','CholCheck','Smoker','Stroke','HeartDiseaseorAttack',
                            'PhysActivity','Fruits','Veggies','HvyAlcoholConsump','AnyHealthcare','NoDocbcCost','GenHlth',
                            'DiffWalk','Sex','Age','Education','Income']
    numerical_features = ['BMI','MentHlth','PhysHlth',]

    # Get features and target variable
    X = cdc_diabetes_health_indicators.data.features
    y = cdc_diabetes_health_indicators.data.targets['Diabetes_binary'] 

    # Preprocessing: One-hot encoding for categorical variables and scaling for numerical variables
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    # Split the dataset into training and testing sets with a fixed random state for reproducibility
    X_train_full, X_test_full, y_train, y_test, gender_train, gender_test = train_test_split(X, y, 
                                                                                             cdc_diabetes_health_indicators.data.features['Sex'],
                                                                                             test_size=0.2, random_state=42)
    # Apply preprocessing to training and testing set separately
    X_train_processed = preprocessor.fit_transform(X_train_full)
    X_test_processed = preprocessor.transform(X_test_full)

    # Return processed training and testing sets along with gender attributes
    return X_train_processed, X_test_processed, y_train, y_test, gender_train.values, gender_test.values

In [4]:
# cdc_diabetes_health_indicators.metadata
# cdc_diabetes_health_indicators.variables

## Train Model and Generate Predictions

In [5]:
def train_and_predict_model(X_train, X_test, y_train, weights=None):

    # Initialize the Logistic Regression model
    model = LogisticRegression(max_iter=10000, random_state=0)

    # Train the Logistic Regression model
    model.fit(X_train, y_train, sample_weight=weights)

    # Predict on the testing set
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    return y_pred, y_pred_proba

In [6]:
def determine_confusion_matrix(df):
    if df['y_true'] == df['y_pred'] == 1:
        return 'TP'
    elif df['y_pred'] == 1 and df['y_true'] != df['y_pred']:
        return 'FP'
    elif df['y_true'] == df['y_pred'] == 0:
        return 'TN'
    else:
        return 'FN'

In [8]:
# preprocess and load the data
X_train, X_test, y_train, y_test, gender_train, gender_test = load_dataset()

### Select Model Based on Performance

In [9]:
# try logistic regression
model1 = LogisticRegression(max_iter=10000, random_state=0)

# Train the Logistic Regression model
model1.fit(X_train, y_train)

# Predict on the testing set
y_pred1 = model1.predict(X_test)
y_pred_proba1 = model1.predict_proba(X_test)

# Evaluate the model
accuracy1 = accuracy_score(y_test, y_pred1)
auc1 = roc_auc_score(y_test, y_pred_proba1[:, 1])
f1_1 = f1_score(y_test, y_pred1)

print('Accuracy:', accuracy1, '\nAUC:', auc1, '\nf1:', f1_1)

Accuracy: 0.8676285083569851 
AUC: 0.8288682104560867 
f1: 0.25111507582515613


In [10]:
# try nn
model2 = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(21,), random_state=1, max_iter=100)

# Train the Logistic Regression model
model2.fit(X_train, y_train)

# Predict on the testing set
y_pred2 = model2.predict(X_test)
y_pred_proba2 = model2.predict_proba(X_test)

# Evaluate the model
accuracy2 = accuracy_score(y_test, y_pred2)
auc2 = roc_auc_score(y_test, y_pred_proba2[:, 1])
f1_2 = f1_score(y_test, y_pred2)

print('Accuracy:', accuracy2, '\nAUC:', auc2, '\nf1:', f1_2)

Accuracy: 0.8676876379690949 
AUC: 0.8329001174326579 
f1: 0.23323814962878356


In [11]:
y_pred = y_pred1
y_pred_proba = y_pred_proba1

In [12]:
# Female = 1 and Male = 0
fair_df = pd.DataFrame({'sex': gender_test, 'y_true': y_test, 'y_pred': y_pred})
fair_df['confusion_matrix'] = fair_df[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
fair_df.head()

Unnamed: 0,sex,y_true,y_pred,confusion_matrix
219620,0,0,0,TN
132821,0,0,0,TN
151862,1,0,0,TN
139717,1,0,0,TN
239235,0,0,0,TN


In [13]:
y_pred_proba

array([[0.94368563, 0.05631437],
       [0.77818335, 0.22181665],
       [0.99647578, 0.00352422],
       ...,
       [0.91103087, 0.08896913],
       [0.86490401, 0.13509599],
       [0.98639281, 0.01360719]])

In [14]:
# 1 stand for female and 0 is male
fair_df['sex'].value_counts()

sex
0    28412
1    22324
Name: count, dtype: int64

## Fairness Evaluation

In [15]:
threshold = 0.001

In [16]:
def statistical_parity(df):
    """
    TODO: Add your code here
    """
    female_positive = df[(df['sex'] == 1) & (df['y_pred'] == 1)].shape[0]
    female_positive_prob = female_positive / (df[(df['sex'] == 1)].shape[0])
    
    male_positive = df[(df['sex'] == 0) & (df['y_pred'] == 1)].shape[0]
    male_positive_prob = male_positive / (df[(df['sex'] == 0)].shape[0])

    print('Female Probability of Positive Predictions: %.3f' % female_positive_prob)
    print('Male Probability of Positive Predictions: %.3f' % male_positive_prob)
    
    abs_difference = abs(female_positive_prob - male_positive_prob)
    print('Achieves Statistical Parity: %r' % (abs_difference < threshold))

In [17]:
def predictive_parity(df):
    """
    TODO: Add your code here
    """
    female_TP = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'TP')].shape[0]
    PPV_female = female_TP / (df[(df['sex'] == 1) & (df['y_pred'] == 1)].shape[0])
    
    male_TP = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'TP')].shape[0]
    PPV_male = male_TP / (df[(df['sex'] == 0) & (df['y_pred'] == 1)].shape[0])

    print('Female Probability of True Positive Predictions: %.3f' % PPV_female)
    print('Male Probability of True Positive Predictions: %.3f' % PPV_male)
    
    abs_difference = abs(PPV_female - PPV_male)
    print('Achieves Statistical Parity: %r' % (abs_difference < threshold))

In [18]:
def equalized_odds(df):
    """
    TODO: Add your code here
    """
    # FNR = FN/(FN+TP) = FN/(all-positive-true-label)
    female_fn = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'FN')].shape[0]
    fnr_female = female_fn / (df[(df['sex'] == 1) & (df['y_true'] == 1)].shape[0])
    male_fn = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'FN')].shape[0]
    fnr_male = male_fn / (df[(df['sex'] == 0) & (df['y_true'] == 1)].shape[0])
    
    # FPR = FP/(FP+TN) = FN/(all-negative-true-label)
    female_fp = df[(df['sex'] == 1) & (df['confusion_matrix'] == 'FP')].shape[0]
    fpr_female = female_fp / (df[(df['sex'] == 1) & (df['y_true'] == 0)].shape[0])
    male_fp = df[(df['sex'] == 0) & (df['confusion_matrix'] == 'FP')].shape[0]
    fpr_male = male_fp / (df[(df['sex'] == 0) & (df['y_true'] == 0)].shape[0])

    print('Probability of Credit-Worthy Female Predicted Not Credit-Worthy: %.3f' % fnr_female)
    print('Probability of Credit-Worthy Male Predicted Not Credit-Worthy: %.3f' % fnr_male)
    
    abs_difference_fnr = abs(fnr_female - fnr_male)
    print('Achieves Equality of Non Credit Worthy Prediction: %r' % (abs_difference_fnr < threshold))
    
    print('Probability of Non Credit-Worthy Female Predicted Credit-Worthy: %.3f' % fpr_female)
    print('Probability of Non Credit-Worthy Male Predicted Credit-Worthy: %.3f' % fpr_male)
    
    abs_difference_fpr = abs(fnr_female - fnr_male)
    print('Achieves Equality of Credit Worthy Prediction: %r' % (abs_difference_fpr < threshold))

In [19]:
def accuracy_equality(df):
    """
    TODO: Add your code here
    """

    # Accuracy = (TP+TN)/all-samples
    female_t = df[(df['sex'] == 1) & (df['confusion_matrix'].isin(['TP', 'TN']))].shape[0]
    accuracy_female = female_t / (df[(df['sex'] == 1)].shape[0])
    male_t = df[(df['sex'] == 0) & (df['confusion_matrix'].isin(['TP', 'TN']))].shape[0]
    accuracy_male = male_t / (df[(df['sex'] == 0)].shape[0])
    
    print('Female Accuracy: %.3f' % accuracy_female)
    print('Male Accuracy: %.3f' % accuracy_male)
    
    abs_difference = abs(accuracy_female - accuracy_male)
    print('Equality of Accuracy: %r' % (abs_difference < threshold))

In [20]:
def treatment_equality(df):
    """
    TODO: Add your code here
    """
    
    female = df[(df['sex'] == 1)]
    ratio_female = (female[female['confusion_matrix'] == 'FN'].shape[0] / 
        female[female['confusion_matrix'] == 'FP'].shape[0])

    male = df[(df['sex'] == 0)]
    ratio_male = (male[male['confusion_matrix'] == 'FN'].shape[0] / 
        male[male['confusion_matrix'] == 'FP'].shape[0])

    print('Female Ratio of Errors: %.3f' % ratio_female)
    print('Male Ratio of Errors: %.3f' % ratio_male)
    
    abs_difference = abs(ratio_female - ratio_male)
    print('Achieves Treatment Equality: %r' % (abs_difference < threshold))

In [21]:
statistical_parity(fair_df)
predictive_parity(fair_df)
equalized_odds(fair_df)
accuracy_equality(fair_df)
treatment_equality(fair_df)

Female Probability of Positive Predictions: 0.043
Male Probability of Positive Predictions: 0.036
Achieves Statistical Parity: False
Female Probability of True Positive Predictions: 0.572
Male Probability of True Positive Predictions: 0.570
Achieves Statistical Parity: False
Probability of Credit-Worthy Female Predicted Not Credit-Worthy: 0.835
Probability of Credit-Worthy Male Predicted Not Credit-Worthy: 0.842
Achieves Equality of Non Credit Worthy Prediction: False
Probability of Non Credit-Worthy Female Predicted Credit-Worthy: 0.022
Probability of Non Credit-Worthy Male Predicted Credit-Worthy: 0.018
Achieves Equality of Credit Worthy Prediction: False
Female Accuracy: 0.857
Male Accuracy: 0.876
Equality of Accuracy: False
Female Ratio of Errors: 6.795
Male Ratio of Errors: 7.092
Achieves Treatment Equality: False


## Mitigation through Post-Processiong

On Fairness and Calibration: https://arxiv.org/pdf/1709.02012.pdf

In [22]:
# simulate trivial classifier output
def get_trivial_pred(df):
    trivial_pred = df['y_true'].mean()
    trivial_df = df.assign(y_prob_1=[trivial_pred]*df.shape[0])
    trivial_df = reclassify(trivial_df)
    
    return trivial_df, trivial_pred

In [23]:
# reassign prediction based on the adjusted y_prob(args_max)
def reclassify(df): 
    new_y_pred = []
    for _, row in df.iterrows():
        if row['y_prob_0'] >=  row['y_prob_1']:
            new_y_pred .append(0)
        else:
            new_y_pred .append(1)
    df['y_pred'] = new_y_pred 
    
    return df

In [24]:
# compute fpr and fnr given y_probs and true labels
def compute_errors(df):
    df['confusion_matrix'] = df[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
    fpr = df[df['confusion_matrix'] == 'FP'].shape[0]/df[df['y_true'] == 0].shape[0]
    fnr = df[df['confusion_matrix'] == 'FN'].shape[0]/df[df['y_true'] == 1].shape[0]
    return fpr, fnr
    

In [36]:
# calibrate 
def calib_eq_odds(g1_val, g2_val, g1, g2, fnr=1, fpr=1):
    g1_val_prob = g1_val.drop(columns=['y_pred'])
    g2_val_prob = g2_val.drop(columns=['y_pred'])

    # simluate trivial classifier with holdout data
    g1_trivial, g1_trivial_pred = get_trivial_pred(g1_val_prob)
    g2_trivial, g2_trivial_pred  = get_trivial_pred(g2_val_prob)

    # compute fpr and fnr for all 4 sets of output
    g1_fpr, g1_fnr = compute_errors(g1)
    g1_trivial_fpr, g1_trivial_fnr  = compute_errors(g1_trivial)
    g2_fpr, g2_fnr = compute_errors(g2)
    g2_trivial_fpr, g2_trivial_fnr  = compute_errors(g2_trivial)

    # compute generalized fpr and generalized fnr for all 4 sets of output
    g1_g_fp = g1.loc[g1['y_true'] == 0, 'y_prob_1'].mean() # g1_fp_cost = mean y_pred=1 prob for rows where y_true=0
    g2_g_fp = g2.loc[g2['y_true'] == 0, 'y_prob_1'].mean()
    g1_trivial_g_fp = g1_trivial.loc[g1_trivial['y_true'] == 0, 'y_prob_1'].mean()
    g2_trivial_g_fp = g2_trivial.loc[g2_trivial['y_true'] == 0, 'y_prob_1'].mean()
    g1_g_fn = g1.loc[g1['y_true'] == 0, 'y_prob_1'].mean() # g1_fp_cost = mean y_pred=1 prob for rows where y_true=0
    g2_g_fn = g2.loc[g2['y_true'] == 0, 'y_prob_1'].mean()
    g1_trivial_g_fn = g1_trivial.loc[g1_trivial['y_true'] == 0, 'y_prob_1'].mean()
    g2_trivial_g_fn = g2_trivial.loc[g2_trivial['y_true'] == 0, 'y_prob_1'].mean()

    # calibrate FP rate
    if fpr:
        g1_cost = g1_g_fp
        g2_cost = g2_g_fp
        g1_trivial_cost = g1_trivial_g_fp
        g2_trivial_cost = g2_trivial_g_fp
    # calibrate FN rate
    elif fnr:
        g1_cost = g1_g_fn
        g2_cost = g2_g_fn
        g1_trivial_cost = g1_trivial_g_fn
        g2_trivial_cost = g2_trivial_g_fn
    # consider both
    else:
        g1_cost = g1_fpr / 2.0 * g1_g_fp * (1 - g1_trivial_pred) + g1_fnr / 2.0 * g1_g_fn * g1_trivial_pred
        g2_cost = g2_fpr / 2.0 * g2_g_fp * (1 - g2_trivial_pred) + g2_fnr / 2.0 * g2_g_fn * g1_trivial_pred
        g1_trivial_cost = g1_trivial_fpr / 2.0 * g1_trivial_g_fp * (1 - g2_trivial_pred) + g2_fnr / 2.0 * g1_trivial_g_fn * g1_trivial_pred
        g2_trivial_cost = g2_trivial_fpr / 2.0 * g2_trivial_g_fp * (1 - g2_trivial_pred) + g2_fnr / 2.0 * g2_trivial_g_fn * g1_trivial_pred

    # determine what % of preditions need to be calibrated
    g1_mix_rate = (g2_cost - g1_cost) / (g1_trivial_cost - g1_cost) if g2_cost > g1_cost else 0
    g2_mix_rate = (g1_cost - g2_cost) / (g2_trivial_cost - g2_cost) if g1_cost > g2_cost else 0
    
    # Randomly select mix_rate% of elements from the prediction
    g1_copy = g1.copy(deep=True)
    g1_random_indices = np.random.choice(g1_copy['y_prob_1'].index, size=int(g1_mix_rate*g1_copy.shape[0]), replace=False)
    g2_copy = g2.copy(deep=True)
    g2_random_indices = np.random.choice(g2_copy['y_prob_1'].index, size=int(g2_mix_rate*g2_copy.shape[0]), replace=False)
    # Set those to base_rate
    g1_copy.loc[g1_random_indices, 'y_prob_1'] = g1_trivial_pred
    g2_copy.loc[g2_random_indices, 'y_prob_1'] = g2_trivial_pred
    # reclassify
    caibrated_g1 = reclassify(g1_copy)
    caibrated_g2 = reclassify(g2_copy)
    # Update confusion matrix
    caibrated_g1['confusion_matrix'] = caibrated_g1[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
    caibrated_g2['confusion_matrix'] = caibrated_g2[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)

    return caibrated_g1, caibrated_g2

In [37]:
post_processiong_df = pd.DataFrame({'sex': gender_test, 'y_true': y_test, 'y_pred': y_pred, 'y_prob_1': y_pred_proba[:, 1], 'y_prob_0': y_pred_proba[:, 0]})

In [38]:
val_data, test_data = train_test_split(post_processiong_df, test_size=0.5, random_state=42)

# Create model objects - one for each group, validation and test
male_val_data = val_data[val_data['sex'] == 0]
female_val_data = val_data[val_data['sex'] == 1]
male_test_data = test_data[test_data['sex'] == 0]
print(male_test_data['y_pred'].value_counts(), male_test_data['y_prob_1'].mean())
female_test_data = test_data[test_data['sex'] == 1]
print(female_test_data['y_pred'].value_counts(), female_test_data['y_prob_1'].mean())

y_pred
0    13727
1      471
Name: count, dtype: int64 0.13092837124167578
y_pred
0    10688
1      482
Name: count, dtype: int64 0.15296408167591213


In [47]:
calibrated_g1, calibrated_g2 = calib_eq_odds(male_val_data, female_val_data, male_test_data, female_test_data, 1, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['confusion_matrix'] = df[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['confusion_matrix'] = df[['y_true','y_pred']].apply(determine_confusion_matrix, axis=1)


In [48]:
print(calibrated_g1['y_pred'].value_counts(), calibrated_g1['y_prob_1'].mean())
print(calibrated_g2['y_pred'].value_counts(), calibrated_g2['y_prob_1'].mean())

y_pred
0    14071
1      127
Name: count, dtype: int64 0.13161768014699238
y_pred
0    10688
1      482
Name: count, dtype: int64 0.15296408167591213


In [49]:
calibrated = pd.concat([calibrated_g1, calibrated_g2], ignore_index=True)

In [50]:
statistical_parity(calibrated)
predictive_parity(calibrated)
equalized_odds(calibrated)
accuracy_equality(calibrated)
treatment_equality(calibrated)

Female Probability of Positive Predictions: 0.043
Male Probability of Positive Predictions: 0.009
Achieves Statistical Parity: False
Female Probability of True Positive Predictions: 0.591
Male Probability of True Positive Predictions: 0.528
Achieves Statistical Parity: False
Probability of Credit-Worthy Female Predicted Not Credit-Worthy: 0.831
Probability of Credit-Worthy Male Predicted Not Credit-Worthy: 0.963
Achieves Equality of Non Credit Worthy Prediction: False
Probability of Non Credit-Worthy Female Predicted Credit-Worthy: 0.021
Probability of Non Credit-Worthy Male Predicted Credit-Worthy: 0.005
Achieves Equality of Credit Worthy Prediction: False
Female Accuracy: 0.857
Male Accuracy: 0.874
Equality of Accuracy: False
Female Ratio of Errors: 7.091
Male Ratio of Errors: 28.917
Achieves Treatment Equality: False
