In [21]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [22]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [23]:
mlp_baseline_results = pd.read_csv('mlp_baseline_results.csv')
mlp_baseline_results.replace({'simple_mlp_initializer': 'Standard MLP (baseline)'}, inplace=True)

mlp_pearson_results = pd.read_csv('mlp_pearson_results.csv')
mlp_pearson_results.replace({'mlp_preg_initializer': 'Pearson regularized MLP'}, inplace=True)

mlp_spearman_results = pd.read_csv('mlp_spearman_results.csv')
mlp_spearman_results.replace({'mlp_sreg_initializer': 'Spearman regularized MLP'}, inplace=True)

mlp_results = pd.concat([mlp_baseline_results, mlp_pearson_results, mlp_spearman_results])

ftl_baseline_results = pd.read_csv('ftl_baseline_results.csv')
ftl_baseline_results.replace({'ftl_mlp_initializer': 'Standard FTL (baseline)'}, inplace=True)

ftl_pearson_results = pd.read_csv('ftl_pearson_results.csv')
ftl_pearson_results.replace({'ftl_mlp_initializer': 'Pearson regularized FTL'}, inplace=True)

ftl_spearman_results = pd.read_csv('ftl_spearman_results.csv')
ftl_spearman_results.replace({'ftl_mlp_initializer': 'Spearman regularized FTL'}, inplace=True)

ftl_results = pd.concat([ftl_baseline_results, ftl_pearson_results, ftl_spearman_results])

In [24]:
for results in [mlp_results,ftl_results]:
    results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
    results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [25]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

for results in [mlp_results,ftl_results]:
    results['Performance'] = 0
    results['Fairness'] = 0
    results['Fitness Rule'] = ''
    for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
        results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
        results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
        results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
        results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

 0.58075151 0.57811729 0.57147862 0.55847176 0.56402881 0.56841327
 0.56481476 0.58141906 0.57136131 0.58334192 0.55288815 0.56058845
 0.5737951  0.57000708 0.57640052 0.58261133 0.57454722 0.57134873
 0.59039745 0.58475101 0.58230021 0.57479453 0.5876658  0.59199945
 0.59145802 0.5797924  0.59391819]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
 0.19442911 0.19755228 0.19564214 0.19166634 0.18299811 0.19053001
 0.16746315 0.16763191 0.19320045 0.18703865 0.18429954 0.1597833
 0.20815391 0.18244748 0.17584256 0.19747861 0.17104265 0.18574429
 0.19784707 0.17967529 0.18831391 0.17770337 0.20533483 0.19024169
 0.19313822 0.18508573 0.17207131]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[resul

In [27]:
display(mlp_results)

Unnamed: 0,dataset,method,fitness_rule,fitness,Accuracy,Mathew Correlation,Equalized Odds,Statistical Parity,Equal Opportunity,solution,Performance,Fairness,Fitness Rule,Fitness Rule Abvr
0,Adult Income,Standard MLP (baseline),mcc_parity,0.392537,0.851299,0.585170,0.113127,0.192633,0.142532,{'dropout': 0.16228954240968418},0.585170,0.192633,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
1,Adult Income,Standard MLP (baseline),mcc_odds,0.473935,0.851520,0.585500,0.111565,0.198915,0.132052,{'dropout': 0.1482543145261793},0.585500,0.111565,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
2,Adult Income,Standard MLP (baseline),mcc_opportunity,0.460175,0.851410,0.585064,0.103235,0.191606,0.124889,{'dropout': 0.17861515209545933},0.585064,0.124889,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
3,Adult Income,Standard MLP (baseline),acc_parity,0.661796,0.844887,0.565373,0.082386,0.183090,0.084174,{'dropout': 0.15655371267306142},0.844887,0.183090,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
4,Adult Income,Standard MLP (baseline),acc_odds,0.766170,0.850857,0.577932,0.084687,0.180366,0.093891,{'dropout': 0.1698638434734938},0.850857,0.084687,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,Adult Income,Spearman regularized MLP,mcc_odds,0.474808,0.849862,0.584044,0.109236,0.197334,0.135481,"{'l2': 0.001, 'dropout': 0.1358582308261664}",0.584044,0.109236,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
62,Adult Income,Spearman regularized MLP,mcc_opportunity,0.529244,0.852847,0.584568,0.068332,0.181148,0.055323,"{'l2': 0.001, 'dropout': 0.046815908519132345}",0.584568,0.055323,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
63,Adult Income,Spearman regularized MLP,acc_parity,0.659911,0.846877,0.579077,0.088519,0.186966,0.094479,"{'l2': 0.001, 'dropout': 0.02420010661774878}",0.846877,0.186966,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
64,Adult Income,Spearman regularized MLP,acc_odds,0.806630,0.849530,0.575692,0.042900,0.164714,0.014491,"{'l2': 0.001, 'dropout': 0.07752729427158084}",0.849530,0.042900,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)


In [28]:
datasets = ['Adult Income']#, 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income']

In [29]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [33]:
ftl_methods = ['Standard FTL (baseline)', 'Pearson regularized FTL', 'Spearman regularized FTL']
mlp_methods = ['Standard MLP (baseline)', 'Pearson regularized MLP', 'Spearman regularized MLP']

In [34]:
for path, methods, results in zip(['mlp_multi_aso_data_list.json', 'ftl_multi_aso_data_list.json'],
                                  [mlp_methods, ftl_methods],
                                  (mlp_results,ftl_results)):
    if os.path.exists(path):
        with open(path) as file:
            multi_aso_data_list = json.load(file)
    else:    
        multi_aso_data_list = []
        for d in datasets:
            multi_aso_data = []
            for f in fitness_rules:
                methods_results = []
                for m in methods:
                    r = results.loc[ (results['dataset'] == d) &
                                         (results['fitness_rule'] == f) &
                                         (results['method'] == m) ]\
                                .fitness.tolist()
                    if len(r) == 0:
                        r = [-1]
                    methods_results.append(r)
                min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
                multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
        with open(path, 'w') as file:
            json.dump(multi_aso_data_list, file)
    
    aso_df_resume = []
    for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
        fitness_rule = aso_result['fitness_rule']
        dataset = aso_result['dataset']
    
        aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
        aso_df['method'] = methods
        aso_df['dataset'] = dataset
        aso_df['fitness_rule'] = fitness_rule
        aso_df_resume.append(aso_df)
    
    print('Significance Testing')
    significance = pd.concat(aso_df_resume)
    significance.replace(fitness_rules_abvr, inplace=True)
    significance = significance.set_index(['fitness_rule', 'dataset'])
    significance = significance.sort_values(by=['fitness_rule', 'dataset'], ascending=[False, True])
    display(significance)
    
    grouped_results = results\
        .groupby(['Fitness Rule Abvr', 'dataset', 'method'])\
        .agg({'fitness': ['mean', 'std', 'count'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
        .sort_values(by=['Fitness Rule Abvr', 'dataset', ('fitness','mean')], ascending=False)
    grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
    grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
    grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
    grouped_results = grouped_results.sort_values(by=['Fitness Rule Abvr', 'dataset'])
    display(grouped_results)

Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 401.06it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 405.77it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 402.54it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 390.34it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 401.64it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 398.40it/s]

Significance Testing





Unnamed: 0_level_0,Unnamed: 1_level_0,Standard MLP (baseline),Pearson regularized MLP,Spearman regularized MLP,method
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Max(MCC - Stat. Parity),Adult Income,1.0,1.0,1.0,Standard MLP (baseline)
Max(MCC - Stat. Parity),Adult Income,0.709022,1.0,1.0,Pearson regularized MLP
Max(MCC - Stat. Parity),Adult Income,0.039775,0.246732,1.0,Spearman regularized MLP
Max(MCC - Eq. Opp.),Adult Income,1.0,1.0,1.0,Standard MLP (baseline)
Max(MCC - Eq. Opp.),Adult Income,0.168983,1.0,0.512791,Pearson regularized MLP
Max(MCC - Eq. Opp.),Adult Income,0.54824,1.0,1.0,Spearman regularized MLP
Max(MCC - Eq. Odds),Adult Income,1.0,1.0,1.0,Standard MLP (baseline)
Max(MCC - Eq. Odds),Adult Income,1.0,1.0,1.0,Pearson regularized MLP
Max(MCC - Eq. Odds),Adult Income,0.589897,0.583747,1.0,Spearman regularized MLP
Max(Acc - Stat. Parity),Adult Income,1.0,0.213667,1.0,Standard MLP (baseline)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,mean,std,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Fitness Rule Abvr,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Max(Acc - Eq. Odds),Adult Income,Standard MLP (baseline),0.76167,0.036923,11,0.846495,0.002916,0.084825,0.035491,$0.762 (\pm0.04)$,$0.846 (\pm0.00)$,$0.085 (\pm0.04)$
Max(Acc - Eq. Odds),Adult Income,Spearman regularized MLP,0.760737,0.044149,11,0.847098,0.002654,0.086361,0.042965,$0.761 (\pm0.04)$,$0.847 (\pm0.00)$,$0.086 (\pm0.04)$
Max(Acc - Eq. Odds),Adult Income,Pearson regularized MLP,0.749159,0.015001,11,0.849319,0.003219,0.10016,0.014794,$0.749 (\pm0.02)$,$0.849 (\pm0.00)$,$0.100 (\pm0.01)$
Max(Acc - Eq. Opp.),Adult Income,Spearman regularized MLP,0.764593,0.02315,11,0.847359,0.004715,0.082766,0.02152,$0.765 (\pm0.02)$,$0.847 (\pm0.00)$,$0.083 (\pm0.02)$
Max(Acc - Eq. Opp.),Adult Income,Pearson regularized MLP,0.759218,0.021135,11,0.846746,0.003549,0.087528,0.020592,$0.759 (\pm0.02)$,$0.847 (\pm0.00)$,$0.088 (\pm0.02)$
Max(Acc - Eq. Opp.),Adult Income,Standard MLP (baseline),0.759144,0.020645,11,0.849379,0.004026,0.090235,0.020157,$0.759 (\pm0.02)$,$0.849 (\pm0.00)$,$0.090 (\pm0.02)$
Max(Acc - Stat. Parity),Adult Income,Standard MLP (baseline),0.663749,0.010845,11,0.849912,0.004575,0.186163,0.011011,$0.664 (\pm0.01)$,$0.850 (\pm0.00)$,$0.186 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Spearman regularized MLP,0.659905,0.015297,11,0.847098,0.003235,0.187193,0.014005,$0.660 (\pm0.02)$,$0.847 (\pm0.00)$,$0.187 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Pearson regularized MLP,0.652003,0.015825,11,0.84749,0.004005,0.195487,0.014863,$0.652 (\pm0.02)$,$0.847 (\pm0.00)$,$0.195 (\pm0.01)$
Max(MCC - Eq. Odds),Adult Income,Spearman regularized MLP,0.478951,0.046799,11,0.568905,0.014998,0.089954,0.040624,$0.479 (\pm0.05)$,$0.569 (\pm0.01)$,$0.090 (\pm0.04)$


Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 384.73it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 394.12it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 390.17it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 387.62it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 394.10it/s]
Model comparisons: 100%|█████████▉| 2997/3000 [00:07<00:00, 394.02it/s]

Significance Testing





Unnamed: 0_level_0,Unnamed: 1_level_0,Standard FTL (baseline),Pearson regularized FTL,Spearman regularized FTL,method
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Max(MCC - Stat. Parity),Adult Income,1.0,1.0,0.371769,Standard FTL (baseline)
Max(MCC - Stat. Parity),Adult Income,0.579539,1.0,0.246382,Pearson regularized FTL
Max(MCC - Stat. Parity),Adult Income,1.0,1.0,1.0,Spearman regularized FTL
Max(MCC - Eq. Opp.),Adult Income,1.0,1.0,1.0,Standard FTL (baseline)
Max(MCC - Eq. Opp.),Adult Income,0.358964,1.0,0.665979,Pearson regularized FTL
Max(MCC - Eq. Opp.),Adult Income,0.453264,1.0,1.0,Spearman regularized FTL
Max(MCC - Eq. Odds),Adult Income,1.0,0.177844,0.602087,Standard FTL (baseline)
Max(MCC - Eq. Odds),Adult Income,1.0,1.0,1.0,Pearson regularized FTL
Max(MCC - Eq. Odds),Adult Income,1.0,0.47277,1.0,Spearman regularized FTL
Max(Acc - Stat. Parity),Adult Income,1.0,1.0,1.0,Standard FTL (baseline)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,mean,std,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Fitness Rule Abvr,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Max(Acc - Eq. Odds),Adult Income,Pearson regularized FTL,0.81644,0.024623,16,0.844002,0.00914,0.027562,0.022816,$0.816 (\pm0.02)$,$0.844 (\pm0.01)$,$0.028 (\pm0.02)$
Max(Acc - Eq. Odds),Adult Income,Standard FTL (baseline),0.8134,0.021986,13,0.847098,0.002867,0.033698,0.02106,$0.813 (\pm0.02)$,$0.847 (\pm0.00)$,$0.034 (\pm0.02)$
Max(Acc - Eq. Odds),Adult Income,Spearman regularized FTL,0.793284,0.036451,12,0.843265,0.003187,0.049981,0.037638,$0.793 (\pm0.04)$,$0.843 (\pm0.00)$,$0.050 (\pm0.04)$
Max(Acc - Eq. Opp.),Adult Income,Pearson regularized FTL,0.806716,0.015056,16,0.843954,0.007224,0.037238,0.01454,$0.807 (\pm0.02)$,$0.844 (\pm0.01)$,$0.037 (\pm0.01)$
Max(Acc - Eq. Opp.),Adult Income,Spearman regularized FTL,0.805827,0.011884,12,0.8425,0.003842,0.036674,0.011862,$0.806 (\pm0.01)$,$0.843 (\pm0.00)$,$0.037 (\pm0.01)$
Max(Acc - Eq. Opp.),Adult Income,Standard FTL (baseline),0.801817,0.015775,14,0.839035,0.008325,0.037218,0.017543,$0.802 (\pm0.02)$,$0.839 (\pm0.01)$,$0.037 (\pm0.02)$
Max(Acc - Stat. Parity),Adult Income,Pearson regularized FTL,0.81338,0.011659,16,0.828372,0.008529,0.014992,0.011506,$0.813 (\pm0.01)$,$0.828 (\pm0.01)$,$0.015 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Spearman regularized FTL,0.809098,0.012363,12,0.824912,0.009076,0.015814,0.008208,$0.809 (\pm0.01)$,$0.825 (\pm0.01)$,$0.016 (\pm0.01)$
Max(Acc - Stat. Parity),Adult Income,Standard FTL (baseline),0.799946,0.023751,14,0.827561,0.00875,0.027615,0.026391,$0.800 (\pm0.02)$,$0.828 (\pm0.01)$,$0.028 (\pm0.03)$
Max(MCC - Eq. Odds),Adult Income,Standard FTL (baseline),0.552515,0.029883,14,0.580809,0.010422,0.028294,0.02873,$0.553 (\pm0.03)$,$0.581 (\pm0.01)$,$0.028 (\pm0.03)$
