In [None]:
import pandas as pd
import models
import metrics
import numpy as np
import matplotlib.pyplot as plt
import torch
import scipy
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [None]:
# Training the models on the 3 datasets

datasets = ['law_school_edited','crime_edited','insurance_edited']
results_data = []
for d in datasets:
    results_data.append(models.train_models(d))


In [None]:
# Measuring fairness using a set of fairness metrics.

fairness_results = []
for result_data in results_data:
    parity_list = []
    indep_list = []
    sep_list = []
    dpd_list = []
    hgr_ind_list = []
    hgr_sep_list = []

    protected_list = result_data[result_data.columns[-2]].values


    for col in result_data.columns[:-2]:
        parity = metrics.calc_demographic_parity_disparity(result_data[col].values, protected_list)
        parity_list.append(round(parity,2))

        metric = metrics.calculate_regression_measures(result_data['y_test'].values, result_data[col].values,protected_list,1)
        indep_list.append(round(metric['independence'].values[0],2))
        sep_list.append(round(metric['separation'].values[0],2))



        f = metrics.optimized_f_fai(result_data[col].values, protected_list)
        dpd_list.append(round(f,2))

        hgr_ind_list.append(float(metrics.hgr(torch.Tensor(result_data[col].values), torch.Tensor(protected_list))))

        metric = metrics.hgr_cond(torch.Tensor(result_data[col].values),torch.Tensor(protected_list),torch.Tensor(result_data['y_test'].values))
        hgr_sep_list.append(np.max(metric))

    fairness_results.append(pd.DataFrame({'Model': result_data.columns[:-2], 'DP1': parity_list, 'DP2': dpd_list, 'Independence': indep_list, 'DP3': hgr_ind_list, 'Separation': sep_list, 'Equalized Odds': hgr_sep_list}))


In [None]:
# Calculating Spearman's rank correlation and Pearson's correlation coefficient 

col1 = 'DP1'
col2 = 'DP2'

dataset = fairness_results[0]
spearman_corr, spearman_p_value = scipy.stats.spearmanr(dataset[col1],dataset[col2])

pearson_corr, pearson_p_value = scipy.stats.pearsonr(dataset[col1],dataset[col2])

alpha = 0.05

spearman_significant = spearman_p_value < alpha
pearson_significant = pearson_p_value < alpha


result = f"""Pearson's Correlation Coefficient: {pearson_corr:.5f}
- Statistically Significant: {"True " if pearson_significant else " False"}

Spearman's Correlation Coefficient: {spearman_corr:.5f} 
- Statistically Significant: {"True " if  spearman_significant else " False"}"""

print(result)

In [None]:
# Plotting fairness values for a pair of metrics

col1 = 'Separation'
col2 = 'Equalized Odds'

col1_val = fairness_results[0][col1] 
col2_val = fairness_results[0][col2]
plt.scatter( col2_val,col1_val, color='blue', alpha=0.8,label='Law Dataset', marker='o')

col1_val = fairness_results[1][col1] 
col2_val = fairness_results[1][col2]
plt.scatter( col2_val,col1_val, color='red', alpha=0.8, label='Crime Dataset',marker='^')

col1_val = fairness_results[2][col1] 
col2_val = fairness_results[2][col2]
plt.scatter( col2_val,col1_val, color='green', alpha=0.8, label='Insurance Dataset', marker='d')


plt.xlabel(col2)
plt.ylabel(col1)
plt.legend()
plt.grid(True)
plt.show()