In [53]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [54]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [55]:
mlp_baseline_results = pd.read_csv('simple_mlp_results.csv')
mlp_baseline_results.replace({'simple_mlp_initializer': r'MLP'}, inplace=True)

mlp_standard_l2_results = pd.read_csv('mlp_standard_l2_results.csv')
mlp_standard_l2_results.replace({'mlp_standard_l2_initializer': r'MLP+L2'}, inplace=True)

mlp_preg_results = pd.read_csv('mlp_preg_results.csv')
mlp_preg_results.replace({'mlp_preg_initializer': r'MLP+RPR$_{\rho}$'}, inplace=True)

mlp_sreg_results = pd.read_csv('mlp_sreg_results.csv')
mlp_sreg_results.replace({'mlp_sreg_initializer': r'MLP+RPR$_{\rho_s}$'}, inplace=True)

mlp_kreg_results = pd.read_csv('mlp_kreg_results.csv')
mlp_kreg_results.replace({'mlp_kreg_initializer': r'MLP+RPR$_{\tau}$'}, inplace=True)

mlp_xi_reg_results = pd.read_csv('mlp_xi_reg_results.csv')
mlp_xi_reg_results.replace({'mlp_xi_reg_initializer': r'MLP+RPR$_{\xi}$'}, inplace=True)


results = pd.concat([mlp_baseline_results, mlp_standard_l2_results, mlp_preg_results, mlp_sreg_results, mlp_kreg_results, mlp_xi_reg_results], ignore_index=True)


In [56]:
results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [57]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

results['Performance'] = 0
results['Fairness'] = 0
results['Fitness Rule'] = ''
for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
    results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

 0.58075151 0.57811729 0.57147862 0.55847176 0.56402881 0.58191431
 0.58343253 0.57740222 0.51579307 0.30958237 0.37962947 0.55899189
 0.53558872 0.2598879  0.57717035 0.50086739 0.33275221 0.37929293
 0.47729109 0.29196869 0.29037704 0.52303609 0.27480633 0.38668781
 0.50626951 0.29960979 0.4042848  0.52119218 0.2936422  0.33522388
 0.49730721 0.25112662 0.35119067 0.510052   0.26843855 0.36081008
 0.52157495 0.30114722 0.42995147 0.48267704 0.29056691 0.23139881
 0.54223416 0.23653437 0.27041017 0.52121263 0.25759348 0.23912165
 0.5223892  0.27406652 0.22303564 0.54094974 0.27699558 0.30939251
 0.51731345 0.30339828 0.37825089 0.52113734 0.26323857 0.33963196
 0.5538225  0.27850228 0.30234984 0.51877126 0.29494101 0.2648939
 0.52742877 0.24492927 0.13407928 0.53530394 0.29087766 0.26820576
 0.55204731 0.29318377 0.14015216 0.51983069 0.3145689  0.47077449
 0.50770986 0.28880123 0.29866465 0.5180664  0.29093567 0.3636368
 0.53289704 0.29737768 0.38084556 0.54328387 0.28002914 0.282325

In [58]:
datasets = ['Adult Income', 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income', 'Bank Marketing', 'Compas Recidivism', 'German Credit']

In [59]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [60]:
methods = [r'MLP', r'MLP+L2', r'MLP+RPR$_{\rho}$', r'MLP+RPR$_{\rho_s}$', r'MLP+RPR$_{\tau}$', r'MLP+RPR$_{\xi}$']
significances = []
grouped_results_list = []

In [61]:
path = 'mlp_multi_aso_data_list.json'

if os.path.exists(path):
    with open(path) as file:
        multi_aso_data_list = json.load(file)
else:    
    multi_aso_data_list = []
    for d in datasets:
        multi_aso_data = []
        for f in fitness_rules:
            methods_results = []
            for m in methods:
                r = results.loc[ (results['dataset'] == d) &
                                     (results['fitness_rule'] == f) &
                                     (results['method'] == m) ]\
                            .fitness.tolist()
                if len(r) == 0:
                    r = [-1]
                methods_results.append(r)
            min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
            multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
    with open(path, 'w') as file:
        json.dump(multi_aso_data_list, file)

aso_df_resume = []
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['method'] = methods
    aso_df['dataset'] = dataset
    aso_df['fitness_rule'] = fitness_rule
    aso_df_resume.append(aso_df)

print('Significance Testing')
significance = pd.concat(aso_df_resume)
significance.replace(fitness_rules_abvr, inplace=True)
significance = significance.set_index(['fitness_rule', 'dataset'])
significance = significance.sort_values(by=['fitness_rule', 'dataset'], ascending=[False, True])
significances.append(significance)

grouped_results = results\
    .groupby(['Fitness Rule Abvr', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std', 'count'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['Fitness Rule Abvr', 'dataset', ('fitness','mean')], ascending=False)
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results = grouped_results.sort_values(by=['Fitness Rule Abvr', 'dataset'])
grouped_results_list.append(grouped_results)

Model comparisons: 100%|█████████▉| 14985/15000 [00:36<00:00, 415.76it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:41<00:00, 361.07it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:36<00:00, 411.79it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:34<00:00, 432.06it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:35<00:00, 424.86it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:39<00:00, 380.58it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:41<00:00, 361.05it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:40<00:00, 368.91it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:41<00:00, 363.62it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:42<00:00, 353.09it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:40<00:00, 373.47it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:40<00:00, 370.29it/s]
Model comparisons: 100%|█████████▉| 14985/15000 [00:39<00:00, 376.84it/s]
Model comparisons: 100%|█████████▉| 14

Significance Testing


In [62]:
grouped_results = results\
    .groupby(['fitness_rule', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std', 'count'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['fitness_rule', 'dataset', ('fitness','mean')], ascending=[False, True, False])
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,mean,std,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
fitness_rule,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
mcc_parity,Adult Income,MLP+RPR$_{\rho}$,0.393134,0.012324,18,0.580085,0.010404,0.186951,0.013465,$0.393 (\pm0.01)$,$0.580 (\pm0.01)$,$0.187 (\pm0.01)$
mcc_parity,Adult Income,MLP+RPR$_{\xi}$,0.387579,0.010264,15,0.578472,0.010924,0.190892,0.012732,$0.388 (\pm0.01)$,$0.578 (\pm0.01)$,$0.191 (\pm0.01)$
mcc_parity,Adult Income,MLP,0.385655,0.010666,15,0.576385,0.009136,0.190730,0.008002,$0.386 (\pm0.01)$,$0.576 (\pm0.01)$,$0.191 (\pm0.01)$
mcc_parity,Adult Income,MLP+RPR$_{\tau}$,0.385619,0.012184,15,0.576804,0.008372,0.191185,0.010923,$0.386 (\pm0.01)$,$0.577 (\pm0.01)$,$0.191 (\pm0.01)$
mcc_parity,Adult Income,MLP+L2,0.385462,0.014493,15,0.575642,0.010278,0.190180,0.011652,$0.385 (\pm0.01)$,$0.576 (\pm0.01)$,$0.190 (\pm0.01)$
...,...,...,...,...,...,...,...,...,...,...,...,...
acc_odds,German Credit,MLP+RPR$_{\tau}$,0.646668,0.074699,14,0.747857,0.033668,0.101189,0.067014,$0.647 (\pm0.07)$,$0.748 (\pm0.03)$,$0.101 (\pm0.07)$
acc_odds,German Credit,MLP+RPR$_{\rho_s}$,0.646608,0.063586,16,0.742500,0.028107,0.095892,0.049303,$0.647 (\pm0.06)$,$0.742 (\pm0.03)$,$0.096 (\pm0.05)$
acc_odds,German Credit,MLP+RPR$_{\rho}$,0.641057,0.038716,16,0.734375,0.028976,0.093318,0.044720,$0.641 (\pm0.04)$,$0.734 (\pm0.03)$,$0.093 (\pm0.04)$
acc_odds,German Credit,MLP+RPR$_{\xi}$,0.640351,0.062612,15,0.747667,0.020430,0.107316,0.058156,$0.640 (\pm0.06)$,$0.748 (\pm0.02)$,$0.107 (\pm0.06)$


In [70]:
selected_columns = ['formatted_fitness', 'formatted_performance', 'formatted_fairness']
for fitness_rule in fitness_rules:
    grouped_results.loc[fitness_rule][selected_columns].to_latex(f'tables/grouped_results_{fitness_rule}_crp.tex')
     #.to_latex(f'tables/grouped_results_{fitness_rule}_crp.tex', columns=selected_columns))

In [72]:
significances = significance[significance.method == r'MLP+RPR$_{\xi}$'][[r'MLP', r'MLP+L2', r'MLP+RPR$_{\rho}$', r'MLP+RPR$_{\rho_s}$', r'MLP+RPR$_{\tau}$']]
significances.to_latex(f'tables/aso_rpr_variations.tex')
significances

Unnamed: 0_level_0,Unnamed: 1_level_0,MLP,MLP+L2,MLP+RPR$_{\rho}$,MLP+RPR$_{\rho_s}$,MLP+RPR$_{\tau}$
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Max(MCC - Stat. Parity),Adult Income,0.801043,0.678098,1.0,0.6147,0.652057
Max(MCC - Stat. Parity),Bank Marketing,1.0,0.46102,0.799061,0.735043,0.589013
Max(MCC - Stat. Parity),Compas Recidivism,0.015765,0.172591,0.184003,0.1469,0.087308
Max(MCC - Stat. Parity),German Credit,1.0,1.0,1.0,0.244125,1.0
Max(MCC - Eq. Opp.),Adult Income,0.085117,0.030187,0.256104,0.458334,0.281296
Max(MCC - Eq. Opp.),Bank Marketing,1.0,1.0,0.791482,1.0,0.551217
Max(MCC - Eq. Opp.),Compas Recidivism,0.190255,0.23099,0.600127,0.334401,0.887471
Max(MCC - Eq. Opp.),German Credit,1.0,0.707371,1.0,0.889051,1.0
Max(MCC - Eq. Odds),Adult Income,0.289463,0.238521,0.24674,0.627617,0.458722
Max(MCC - Eq. Odds),Bank Marketing,1.0,1.0,1.0,1.0,1.0
