In [1]:
import numpy as np
import pandas as pd
from aif360.datasets import CompasDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.metrics import BinaryLabelDatasetMetric
import random

2024-01-04 12:14:00.792581: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-04 12:14:00.792612: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-04 12:14:00.793257: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-04 12:14:00.797104: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
%matplotlib inline
# Load all necessary packages
# import sys
# sys.path.append("../")
import numpy as np
from tqdm import tqdm

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt

from common_utils import compute_metrics

In [3]:
import itertools

import numpy as np
import pandas as pd

from aif360.metrics import BinaryLabelDatasetMetric, MDSSClassificationMetric
from aif360.detectors import bias_scan

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas

In [4]:
# # Load the dataset with 'sex' and 'race' as protected attributes
# compas_data = CompasDataset(
#     protected_attribute_names=['sex', 'race'],
#     privileged_classes=[['Male'], ['Caucasian']],  # 'Male' for 'sex' and 'Caucasian' for 'race'
#     features_to_drop=[]  # No features to drop initially
# )

# # Filter the dataset for individuals aged less than 25
# compas_data = compas_data.subset(compas_data.features[:, compas_data.feature_names.index('age')] < 25)

# # Define privileged and unprivileged groups for both 'sex' and 'race'
# privileged_groups = [{'sex': 1, 'race': 1}]
# unprivileged_groups = [{'sex': 0, 'race': 0}]

In [5]:
def custom_preprocessing(df):
    sex_mapping = {'Male': 1, 'Female': 0, 'Other': 2}
    df['sex'] = df['sex'].map(sex_mapping)

    if 'race' in df.columns:
        race_mapping = {'African-American': 0, 'Caucasian': 1, 'Hispanic': 2, 'Other': 3, 'Asian': 4, 'Native American': 5}
        df['race'] = df['race'].map(race_mapping)

    return df

compas_data = CompasDataset(
    protected_attribute_names=[],
    privileged_classes=[],  
    custom_preprocessing=custom_preprocessing
)

privileged_groups = []
unprivileged_groups = []



In [6]:
# Get the dataset and split into train and test
dataset_orig_train, dataset_orig_vt = compas_data.split([0.7], shuffle=True)
dataset_orig_valid, dataset_orig_test = compas_data.split([0.3], shuffle=True)

In [7]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
w_train = dataset_orig_train.instance_weights.ravel()

lmod = LogisticRegression()
lmod.fit(X_train, y_train, 
         sample_weight=dataset_orig_train.instance_weights)
y_train_pred = lmod.predict(X_train)

# positive class index
pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

dataset_orig_train_pred = dataset_orig_train.copy()
dataset_orig_train_pred.labels = y_train_pred

In [8]:
dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
X_valid = scale_orig.transform(dataset_orig_valid_pred.features)
y_valid = dataset_orig_valid_pred.labels
dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)
print(dataset_orig_valid_pred.scores[:10])

dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels
dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

[[0.64901428]
 [0.3686909 ]
 [0.43872729]
 [0.38599773]
 [0.16714846]
 [0.46130743]
 [0.77796401]
 [0.99837841]
 [0.34652783]
 [0.01865967]]


In [9]:
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    
    fav_inds = dataset_orig_valid_pred.scores > class_thresh
    dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
    dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label
    
    classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,
                                             dataset_orig_valid_pred)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

print("Best balanced accuracy (no reweighing) = %.4f" % np.max(ba_arr))
print("Optimal classification threshold (no reweighing) = %.4f" % best_class_thresh)

Best balanced accuracy (no reweighing) = 0.6765
Optimal classification threshold (no reweighing) = 0.5544


In [10]:
display(Markdown("#### Predictions from original testing data"))
bal_acc_arr_orig = []
disp_imp_arr_orig = []
avg_odds_diff_arr_orig = []

print("Classification threshold used = %.4f" % best_class_thresh)
for thresh in tqdm(class_thresh_arr):
    
    if thresh == best_class_thresh:
        disp = True
    else:
        disp = False
    
    fav_inds = dataset_orig_test_pred.scores > thresh
    dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label
    
    metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = disp)

    bal_acc_arr_orig.append(metric_test_bef["Balanced accuracy"])
    avg_odds_diff_arr_orig.append(metric_test_bef["Average odds difference"])
    disp_imp_arr_orig.append(metric_test_bef["Disparate impact"])

#### Predictions from original testing data

Classification threshold used = 0.5544


  0%|          | 0/100 [00:00<?, ?it/s]invalid value encountered in double_scalars
invalid value encountered in double_scalars
invalid value encountered in double_scalars
 33%|███▎      | 33/100 [00:00<00:00, 329.83it/s]

Balanced accuracy = 0.6947
Statistical parity difference = nan
Disparate impact = nan
Average odds difference = nan
Equal opportunity difference = nan
Theil index = 0.2224


100%|██████████| 100/100 [00:00<00:00, 336.57it/s]


## Bias Scan

In [11]:
# Convert the test dataset to DataFrame and add actual observed labels
df_test, _ = dataset_orig_test.convert_to_dataframe()
df_test['observed'] = df_test['two_year_recid']

# Use the scores from dataset_orig_test_pred as probabilities for the test dataset
df_test['probabilities'] = dataset_orig_test_pred.scores

# Perform bias scan on the test dataset
privileged_subset = bias_scan(df_test[['sex', 'age', 'race']], df_test['observed'], df_test['probabilities'],
                              favorable_value=dataset_orig_test_pred.favorable_label,
                              penalty=0.5, overpredicted=True)
unprivileged_subset = bias_scan(df_test[['sex', 'age', 'race']], df_test['observed'], df_test['probabilities'],
                                favorable_value=dataset_orig_test_pred.favorable_label,
                                penalty=0.5, overpredicted=False)

# Display results
print("Privileged Subset:", privileged_subset)
print("Unprivileged Subset:", unprivileged_subset)

Privileged Subset: ({'age': [19.0, 20.0, 33.0, 43.0, 48.0, 52.0, 55.0, 65.0, 96.0], 'sex': [1.0], 'race': [0.0, 2.0, 5.0]}, 16.798)
Unprivileged Subset: ({'age': [22.0, 23.0, 24.0, 25.0, 28.0, 30.0, 32.0, 35.0, 37.0, 40.0, 49.0, 54.0], 'sex': [0.0]}, 10.6331)


## Measuring and Comparing Bias

Compare subset sex : 1 and sex : 0, based on Privileged Subset: ({'age': [19.0, 20.0, 33.0, 43.0, 48.0, 52.0, 55.0, 65.0, 96.0], 'sex': [1.0], 'race': [0.0, 2.0, 5.0]}, 16.798)

In [18]:
from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
import numpy as np

# Load the COMPAS dataset
compas_data = CompasDataset()

# Get indices for age, sex, and race features
age_index = compas_data.feature_names.index('age')
sex_index = compas_data.feature_names.index('sex')
race_index = compas_data.feature_names.index('race')

# Define interested age and race values
ages_of_interest = [19, 20, 33, 43, 48, 52, 55, 65, 96]
races_of_interest = [0, 2, 5]

# Create subgroups based on sex, age, and race
male_subgroup = compas_data.subset(np.logical_and(
    np.isin(compas_data.features[:, age_index], ages_of_interest),
    np.logical_and(compas_data.features[:, sex_index] == 1,
                   np.isin(compas_data.features[:, race_index], races_of_interest))))

female_subgroup = compas_data.subset(np.logical_and(
    np.isin(compas_data.features[:, age_index], ages_of_interest),
    np.logical_and(compas_data.features[:, sex_index] == 0,
                   np.isin(compas_data.features[:, race_index], races_of_interest))))

# Print number of instances and label distribution in each subgroup
print("Number of instances in male subgroup:", male_subgroup.features.shape[0])
print("Number of instances in female subgroup:", female_subgroup.features.shape[0])
print("Label distribution in male subgroup:", np.unique(male_subgroup.labels, return_counts=True))
print("Label distribution in female subgroup:", np.unique(female_subgroup.labels, return_counts=True))

# Calculate and display simple mean difference
def calculate_simple_mean_difference(subgroup):
    labels = subgroup.labels.ravel()  # Flatten the label array
    mean_label = np.mean(labels)     # Calculate the mean of labels
    return mean_label

mean_diff_male = calculate_simple_mean_difference(male_subgroup)
mean_diff_female = calculate_simple_mean_difference(female_subgroup)

print("Simple Mean Difference (Male Subgroup): ", mean_diff_male)
print("Simple Mean Difference (Female Subgroup): ", mean_diff_female)



Number of instances in male subgroup: 82
Number of instances in female subgroup: 366
Label distribution in male subgroup: (array([0., 1.]), array([56, 26]))
Label distribution in female subgroup: (array([0., 1.]), array([139, 227]))
Simple Mean Difference (Male Subgroup):  0.3170731707317073
Simple Mean Difference (Female Subgroup):  0.6202185792349727


Compare subset age, Unprivileged Subset: ({'age': [22.0, 23.0, 24.0, 25.0, 28.0, 30.0, 32.0, 35.0, 37.0, 40.0, 49.0, 54.0], 'sex': [0.0]}, 10.6331)

In [19]:
from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
import numpy as np

# Load the COMPAS dataset
compas_data = CompasDataset()

# Get indices for age, sex, and race features
age_index = compas_data.feature_names.index('age')
sex_index = compas_data.feature_names.index('sex')

# Define interested age values
ages_of_interest = [19, 20, 33, 43, 48, 52, 55, 65, 96]

# Create subgroups for female individuals based on age
female_in_age_subgroup = compas_data.subset(np.logical_and(
    np.isin(compas_data.features[:, age_index], ages_of_interest),
    compas_data.features[:, sex_index] == 0))

female_outside_age_subgroup = compas_data.subset(np.logical_and(
    ~np.isin(compas_data.features[:, age_index], ages_of_interest),
    compas_data.features[:, sex_index] == 0))

# Print number of instances and label distribution in each female subgroup
print("Number of instances in female in age subgroup:", female_in_age_subgroup.features.shape[0])
print("Number of instances in female outside age subgroup:", female_outside_age_subgroup.features.shape[0])
print("Label distribution in female in age subgroup:", np.unique(female_in_age_subgroup.labels, return_counts=True))
print("Label distribution in female outside age subgroup:", np.unique(female_outside_age_subgroup.labels, return_counts=True))

# Calculate and display simple mean difference for female subgroups
mean_diff_female_in_age = calculate_simple_mean_difference(female_in_age_subgroup)
mean_diff_female_outside_age = calculate_simple_mean_difference(female_outside_age_subgroup)

print("Simple Mean Difference (Female In Age Subgroup): ", mean_diff_female_in_age)
print("Simple Mean Difference (Female Outside Age Subgroup): ", mean_diff_female_outside_age)



Number of instances in female in age subgroup: 551
Number of instances in female outside age subgroup: 4443
Label distribution in female in age subgroup: (array([0., 1.]), array([251, 300]))
Label distribution in female outside age subgroup: (array([0., 1.]), array([2347, 2096]))
Simple Mean Difference (Female In Age Subgroup):  0.5444646098003629
Simple Mean Difference (Female Outside Age Subgroup):  0.4717533198289444
