In [66]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [67]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [68]:
baseline_results = pd.read_csv('baseline_results.csv')
baseline_results.replace({'ftl_mlp_initializer': 'Standard FTL (baseline)'}, inplace=True)

pearson_results = pd.read_csv('pearson_results.csv')
pearson_results.replace({'ftl_mlp_initializer': 'Pearson regularized FTL'}, inplace=True)

spearman_results = pd.read_csv('spearman_results.csv')
spearman_results.replace({'ftl_mlp_initializer': 'Spearman regularized FTL'}, inplace=True)

results = pd.concat([baseline_results, pearson_results, spearman_results])

In [69]:
results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [70]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

results['Performance'] = 0
results['Fairness'] = 0
results['Fitness Rule'] = ''
for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
    results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

 0.50936495 0.52610501 0.50501965 0.49517103 0.52255177 0.48277552
 0.49360563 0.49928347 0.49428572 0.50830079 0.5315851  0.49084885
 0.49727459 0.52602458 0.51498102 0.49446574 0.52981801 0.51333191
 0.51460142 0.49477946 0.4559726  0.49333343 0.53090834 0.49284321
 0.48727491 0.46760391 0.48532912 0.48816006 0.49060769 0.50033309
 0.54306751 0.52179051 0.4829124  0.51199556 0.52334945 0.50003926]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
 0.00711975 0.00623328 0.00497483 0.00291418 0.02981982 0.0109755
 0.02228438 0.00327132 0.02537383 0.00143259 0.01617422 0.01169424
 0.01656405 0.02579937 0.00583962 0.01503756 0.00376432 0.00064319
 0.00965313 0.02342952 0.01174198 0.00202581 0.01859382 0.00603458
 0.01774073 0.0244613  0.05969289 0.00814399 0.01136305 0.00834383
 0.0523313  0.02800188 0.01513682 0.01

In [71]:
display(results)

Unnamed: 0,dataset,method,fitness_rule,fitness,Accuracy,Mathew Correlation,Equalized Odds,Statistical Parity,Equal Opportunity,solution,Performance,Fairness,Fitness Rule,Fitness Rule Abvr
0,Adult Income,Standard FTL (baseline),mcc_parity,0.494246,0.834936,0.559881,0.105715,0.065635,0.192739,"{'corr_type': 'pearson', 'l2': 0.0, 'dropout':...",0.559881,0.065635,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
1,Adult Income,Standard FTL (baseline),mcc_odds,0.526651,0.844002,0.575709,0.049059,0.170094,0.018271,"{'corr_type': 'pearson', 'l2': 0.0, 'dropout':...",0.575709,0.049059,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
2,Adult Income,Standard FTL (baseline),mcc_opportunity,0.528761,0.823328,0.590304,0.099124,0.259137,0.061543,"{'corr_type': 'pearson', 'l2': 0.0, 'dropout':...",0.590304,0.061543,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
3,Adult Income,Standard FTL (baseline),acc_parity,0.728663,0.836263,0.530541,0.024246,0.107601,0.027139,"{'corr_type': 'pearson', 'l2': 0.0, 'dropout':...",0.836263,0.107601,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
4,Adult Income,Standard FTL (baseline),acc_odds,0.824289,0.839469,0.526192,0.015181,0.104461,0.002116,"{'corr_type': 'pearson', 'l2': 0.0, 'dropout':...",0.839469,0.015181,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,Adult Income,Spearman regularized FTL,mcc_odds,0.539093,0.843449,0.580012,0.040919,0.166083,0.012468,"{'corr_type': 'spearman', 'l2': 0.001, 'dropou...",0.580012,0.040919,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
68,Adult Income,Spearman regularized FTL,mcc_opportunity,0.585072,0.842233,0.589531,0.046258,0.188986,0.004459,"{'corr_type': 'spearman', 'l2': 0.0001, 'dropo...",0.589531,0.004459,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
69,Adult Income,Spearman regularized FTL,acc_parity,0.791413,0.815810,0.436570,0.218489,0.024396,0.389870,"{'corr_type': 'spearman', 'l2': 0.0001, 'dropo...",0.815810,0.024396,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
70,Adult Income,Spearman regularized FTL,acc_odds,0.792800,0.844223,0.557665,0.051423,0.122700,0.063949,"{'corr_type': 'spearman', 'l2': 0.0001, 'dropo...",0.844223,0.051423,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)


In [72]:
datasets = ['Adult Income']#, 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income']

In [73]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [74]:
methods = ['Standard FTL (baseline)', 'Pearson regularized FTL', 'Spearman regularized FTL']
methods

['Standard FTL (baseline)',
 'Pearson regularized FTL',
 'Spearman regularized FTL']

In [75]:
if os.path.exists('multi_aso_data_list.json'):
    with open('multi_aso_data_list.json') as file:
        multi_aso_data_list = json.load(file)
else:    
    multi_aso_data_list = []
    for d in datasets:
        multi_aso_data = []
        for f in fitness_rules:
            methods_results = []
            for m in methods:
                r = results.loc[ (results['dataset'] == d) &
                                     (results['fitness_rule'] == f) &
                                     (results['method'] == m) ]\
                            .fitness.tolist()
                if len(r) == 0:
                    r = [-1]
                methods_results.append(r)
            min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
            multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
    with open('multi_aso_data_list.json', 'w') as file:
        json.dump(multi_aso_data_list, file)

Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 418.70it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:06<00:00, 428.32it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 423.09it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 425.93it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 424.78it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 423.56it/s]


In [76]:
aso_df_resume = []
reverse_aso_df_resume = []
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['method'] = methods
    aso_df['dataset'] = dataset
    aso_df['fitness_rule'] = fitness_rule
    aso_df_resume.append(aso_df)


print('Significance Testing')
significance = pd.concat(aso_df_resume)
significance.replace(fitness_rules_abvr, inplace=True)
significance = significance.set_index(['fitness_rule', 'dataset'])
significance = significance.sort_values(by=['fitness_rule', 'dataset'], ascending=[False, True])
display(significance)


Significance Testing


Unnamed: 0_level_0,Unnamed: 1_level_0,Standard FTL (baseline),Pearson regularized FTL,Spearman regularized FTL,method
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Max(MCC - Stat. Parity),Adult Income,1.0,1.0,0.376663,Standard FTL (baseline)
Max(MCC - Stat. Parity),Adult Income,0.590427,1.0,0.233961,Pearson regularized FTL
Max(MCC - Stat. Parity),Adult Income,1.0,1.0,1.0,Spearman regularized FTL
Max(MCC - Eq. Opp.),Adult Income,1.0,1.0,1.0,Standard FTL (baseline)
Max(MCC - Eq. Opp.),Adult Income,0.329814,1.0,0.692762,Pearson regularized FTL
Max(MCC - Eq. Opp.),Adult Income,0.43016,1.0,1.0,Spearman regularized FTL
Max(MCC - Eq. Odds),Adult Income,1.0,0.17221,0.607859,Standard FTL (baseline)
Max(MCC - Eq. Odds),Adult Income,1.0,1.0,1.0,Pearson regularized FTL
Max(MCC - Eq. Odds),Adult Income,1.0,0.478243,1.0,Spearman regularized FTL
Max(Acc - Stat. Parity),Adult Income,1.0,1.0,1.0,Standard FTL (baseline)


In [77]:
grouped_results = results\
    .groupby(['Fitness Rule Abvr', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std', 'count'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['Fitness Rule Abvr', 'dataset', ('fitness','mean')], ascending=False)
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results = grouped_results.sort_values(by=['Fitness Rule Abvr', 'dataset'])
grouped_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,mean,std,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Fitness Rule Abvr,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Max(Acc - Eq. Odds),Adult Income,Pearson regularized FTL,0.81644,0.024623,16,0.844002,0.00914,0.027562,0.022816,$0.816 (\pm0.02)$,$0.844 (\pm0.01)$,$0.028 (\pm0.02)$
Max(Acc - Eq. Odds),Adult Income,Standard FTL (baseline),0.8134,0.021986,13,0.847098,0.002867,0.033698,0.02106,$0.813 (\pm0.02)$,$0.847 (\pm0.00)$,$0.034 (\pm0.02)$
Max(Acc - Eq. Odds),Adult Income,Spearman regularized FTL,0.793284,0.036451,12,0.843265,0.003187,0.049981,0.037638,$0.793 (\pm0.04)$,$0.843 (\pm0.00)$,$0.050 (\pm0.04)$
Max(Acc - Eq. Opp.),Adult Income,Pearson regularized FTL,0.806716,0.015056,16,0.843954,0.007224,0.037238,0.01454,$0.807 (\pm0.02)$,$0.844 (\pm0.01)$,$0.037 (\pm0.01)$
Max(Acc - Eq. Opp.),Adult Income,Spearman regularized FTL,0.805827,0.011884,12,0.8425,0.003842,0.036674,0.011862,$0.806 (\pm0.01)$,$0.843 (\pm0.00)$,$0.037 (\pm0.01)$
Max(Acc - Eq. Opp.),Adult Income,Standard FTL (baseline),0.801817,0.015775,14,0.839035,0.008325,0.037218,0.017543,$0.802 (\pm0.02)$,$0.839 (\pm0.01)$,$0.037 (\pm0.02)$
Max(Acc - Stat. Parity),Adult Income,Pearson regularized FTL,0.81338,0.011659,16,0.828372,0.008529,0.014992,0.011506,$0.813 (\pm0.01)$,$0.828 (\pm0.01)$,$0.015 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Spearman regularized FTL,0.809098,0.012363,12,0.824912,0.009076,0.015814,0.008208,$0.809 (\pm0.01)$,$0.825 (\pm0.01)$,$0.016 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Standard FTL (baseline),0.799946,0.023751,14,0.827561,0.00875,0.027615,0.026391,$0.800 (\pm0.02)$,$0.828 (\pm0.01)$,$0.028 (\pm0.03)$
Max(MCC - Eq. Odds),Adult Income,Standard FTL (baseline),0.552515,0.029883,14,0.580809,0.010422,0.028294,0.02873,$0.553 (\pm0.03)$,$0.581 (\pm0.01)$,$0.028 (\pm0.03)$
