In [23]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [24]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [42]:
mlp_baseline_results = pd.read_csv('simple_mlp_results.csv')
mlp_baseline_results.replace({'simple_mlp_initializer': 'MLP'}, inplace=True)

mlp_xi_reg_results = pd.read_csv('mlp_xi_reg_results.csv')
mlp_xi_reg_results.replace({'mlp_xi_reg_initializer': 'MLP+CRP'}, inplace=True)

mlp_results = pd.concat([mlp_baseline_results, mlp_xi_reg_results])

ftl_baseline_results = pd.read_csv('ftl_mlp_results.csv')
ftl_baseline_results.replace({'ftl_mlp_initializer': 'FTL'}, inplace=True)

ftl_xi_reg_results = pd.read_csv('ftl_mlp_xi_reg_results.csv')
ftl_xi_reg_results.replace({'ftl_mlp_xi_reg_initializer': 'FTL+CRP'}, inplace=True)

ftl_results = pd.concat([ftl_baseline_results, ftl_xi_reg_results])
full_results = pd.concat([mlp_results, ftl_results])

In [43]:
for results in [mlp_results,ftl_results,full_results]:
    results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
    results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [44]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

for results in [mlp_results,ftl_results,full_results]:
    results['Performance'] = 0
    results['Fairness'] = 0
    results['Fitness Rule'] = ''
    for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
        results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
        results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
        results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
        results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

 0.58075151 0.57811729 0.57147862 0.55847176 0.56402881 0.58191431
 0.58343253 0.57740222 0.51579307 0.30958237 0.37962947 0.55899189
 0.53558872 0.2598879  0.57717035 0.50086739 0.33275221 0.37929293
 0.47729109 0.29196869 0.29037704 0.52303609 0.27480633 0.38668781
 0.50626951 0.29960979 0.4042848  0.52119218 0.2936422  0.33522388
 0.49730721 0.25112662 0.35119067 0.510052   0.26843855 0.36081008
 0.52157495 0.30114722 0.42995147 0.48267704 0.29056691 0.23139881
 0.54223416 0.23653437 0.27041017 0.52121263 0.25759348 0.23912165
 0.5223892  0.27406652 0.22303564 0.54094974 0.27699558 0.30939251
 0.51731345 0.30339828 0.37825089 0.52113734 0.26323857 0.33963196
 0.5538225  0.27850228 0.30234984 0.51877126 0.29494101 0.2648939
 0.52742877 0.24492927 0.13407928 0.53530394 0.29087766 0.26820576
 0.55204731 0.29318377 0.14015216 0.51983069 0.3145689  0.47077449
 0.50770986 0.28880123 0.29866465 0.5180664  0.29093567 0.3636368
 0.53289704 0.29737768 0.38084556 0.54328387 0.28002914 0.282325

In [28]:
datasets = ['Adult Income', 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income', 'Bank Marketing', 'Compas Recidivism', 'German Credit']

In [29]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [53]:
ftl_methods = ['FTL', 'FTL+CRP']
mlp_methods = ['MLP', 'MLP+CRP']
significances = []
grouped_results_list = []

In [69]:
for path, methods, results in zip(['mlp_multi_aso_data_list.json', 'ftl_multi_aso_data_list.json'],
                                  [mlp_methods, ftl_methods],
                                  (mlp_results,ftl_results)):
    method = methods[0]
    if os.path.exists(path):
        with open(path) as file:
            multi_aso_data_list = json.load(file)
    else:    
        multi_aso_data_list = []
        for d in datasets:
            for f in fitness_rules:
                
                baseline = results.loc[ (results['dataset'] == d) &
                                         (results['fitness_rule'] == f) &
                                         (results['method'] == methods[0]) ]\
                                .fitness.tolist()
                crp = results.loc[ (results['dataset'] == d) &
                                         (results['fitness_rule'] == f) &
                                         (results['method'] == methods[1]) ]\
                                .fitness.tolist()

                min_eps = deepsig.aso(crp, baseline, confidence_level=0.95)
                multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps})
        with open(path, 'w') as file:
            json.dump(multi_aso_data_list, file)

    significance = pd.DataFrame(multi_aso_data_list)
    
    pivot_df = significance.pivot_table(index='fitness_rule', columns='dataset', values='min_eps').sort_values(by='fitness_rule', ascending=False)
    pivot_df.to_latex(f'tables/aso_results_{method.lower()}_crp.tex')
    print(method + ' CRP ASO Results')
    display(pivot_df)

MLP CRP ASO Results


dataset,Adult Income,Bank Marketing,Compas Recidivism,German Credit
fitness_rule,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mcc_parity,0.785045,0.989348,0.033061,1.0
mcc_opportunity,0.264258,1.0,0.00521,1.0
mcc_odds,0.077738,1.0,0.201501,1.0
acc_parity,0.752638,1.0,0.393764,0.873975
acc_opportunity,1.0,0.177703,0.074456,0.2356
acc_odds,0.325908,1.0,0.008194,0.622009


FTL CRP ASO Results


dataset,Adult Income,Bank Marketing,Compas Recidivism,German Credit
fitness_rule,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mcc_parity,0.338501,1.0,1.0,0.314726
mcc_opportunity,0.349227,1.0,0.194598,1.0
mcc_odds,0.298353,1.0,0.559465,1.0
acc_parity,0.442553,1.0,0.02159,1.0
acc_opportunity,0.580331,0.674064,1.0,1.0
acc_odds,0.397716,1.0,0.202845,1.0


In [76]:
grouped_results = full_results\
    .groupby(['fitness_rule', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std', 'count'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['fitness_rule', 'dataset', ('fitness','mean')], ascending=False)
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,mean,std,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
fitness_rule,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
mcc_parity,German Credit,FTL+CRP,0.302067,0.062075,15,0.370891,0.053377,0.068824,0.049439,$0.302 (\pm0.06)$,$0.371 (\pm0.05)$,$0.069 (\pm0.05)$
mcc_parity,German Credit,MLP,0.265661,0.099801,30,0.329468,0.090834,0.063807,0.046639,$0.266 (\pm0.10)$,$0.329 (\pm0.09)$,$0.064 (\pm0.05)$
mcc_parity,German Credit,FTL,0.255934,0.122275,14,0.354889,0.077343,0.098955,0.062630,$0.256 (\pm0.12)$,$0.355 (\pm0.08)$,$0.099 (\pm0.06)$
mcc_parity,German Credit,MLP+CRP,0.245571,0.070406,15,0.329368,0.050637,0.083796,0.060963,$0.246 (\pm0.07)$,$0.329 (\pm0.05)$,$0.084 (\pm0.06)$
mcc_parity,Compas Recidivism,FTL,0.239206,0.031594,15,0.275636,0.025629,0.036430,0.026733,$0.239 (\pm0.03)$,$0.276 (\pm0.03)$,$0.036 (\pm0.03)$
...,...,...,...,...,...,...,...,...,...,...,...,...
acc_odds,Bank Marketing,MLP+CRP,0.830051,0.031753,15,0.902197,0.003271,0.072146,0.030545,$0.830 (\pm0.03)$,$0.902 (\pm0.00)$,$0.072 (\pm0.03)$
acc_odds,Adult Income,FTL+CRP,0.805417,0.018968,15,0.843169,0.005611,0.037752,0.017930,$0.805 (\pm0.02)$,$0.843 (\pm0.01)$,$0.038 (\pm0.02)$
acc_odds,Adult Income,FTL,0.797546,0.022527,25,0.840876,0.007559,0.043330,0.023855,$0.798 (\pm0.02)$,$0.841 (\pm0.01)$,$0.043 (\pm0.02)$
acc_odds,Adult Income,MLP+CRP,0.771922,0.023962,15,0.848719,0.003999,0.076798,0.023499,$0.772 (\pm0.02)$,$0.849 (\pm0.00)$,$0.077 (\pm0.02)$


In [97]:
selected_columns = ['formatted_fitness', 'formatted_performance', 'formatted_fairness']

for fitness_rule in fitness_rules:
    grouped_results.loc[fitness_rule][selected_columns].to_latex(f'tables/grouped_results_{fitness_rule}_crp.tex')
     #.to_latex(f'tables/grouped_results_{fitness_rule}_crp.tex', columns=selected_columns))