In [48]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [49]:
def read_csv_files_from_folder(folder_path):
    # Initialize an empty list to store DataFrames
    dfs = []

    # Get a list of files in the folder
    file_list = os.listdir(folder_path)

    # Iterate through the files in the folder
    for file_name in file_list:
        # Check if the file has a .csv extension
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Append the DataFrame to the list
            dfs.append(df)

    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df

In [50]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [51]:
results = read_csv_files_from_folder('results/')
results.replace({'ftl_mlp_initializer': 'Fair Transition Loss', 'adversarial_debiasing_initializer': 'Adversarial Debiasing', 'gerry_fair_classifier_initializer': 'Gerry Fair Classifier', 'prejudice_remover_initializer': 'Prejudice Remover', 'simple_mlp_initializer': 'Standard MLP (baseline)', 'adaptative_priority_reweighting_classifier_initializer': 'Adaptative Priority Reweighting'}, inplace=True)
results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [52]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

results['Performance'] = 0
results['Fairness'] = 0
results['Fitness Rule'] = ''
for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
    results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

  0.49301295  0.51720445  0.51852522  0.52145169  0.51596673  0.51346999
  0.52671106  0.50214026  0.50858064  0.47112459  0.4656368   0.46740864
  0.46284612  0.47393742  0.47968969  0.49755254  0.48230318  0.49095602
  0.46405302  0.34519814  0.27823778  0.32962888  0.29366842  0.40050094
  0.27456744  0.32378989  0.29231173  0.39462664  0.33729196  0.30324149
  0.48812372  0.29976673  0.4298867   0.25687375  0.25946325  0.47067765
  0.52495866  0.49070029  0.49617108  0.25700904  0.23568871  0.18873327
  0.18067188  0.22875223  0.20710623  0.18684293  0.17710644  0.15556906
  0.14388121  0.08785858  0.32295801  0.19795178  0.06525125  0.27422803
  0.31593009  0.31000841  0.33131997  0.32331717  0.32900626  0.31235223
  0.28531143  0.31725419  0.31646536  0.34244247  0.3265771   0.30718997
  0.34741723  0.32744447  0.33941611  0.48462391  0.51246768  0.51909213
  0.49597118  0.51770609  0.479619    0.48794997  0.52038962  0.48669612
  0.50684677  0.52402237  0.54340012  0.46845419  0

In [53]:
display(results)

Unnamed: 0.1,Unnamed: 0,dataset,method,fitness_rule,fitness,Accuracy,Mathew Correlation,Equalized Odds,Statistical Parity,Equal Opportunity,Performance,Fairness,Fitness Rule,Fitness Rule Abvr
0,0,Adult Income,Adaptative Priority Reweighting,mcc_parity,0.479139,0.817137,0.492558,0.213101,0.013418,0.350850,0.492558,0.013418,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
1,1,Adult Income,Adaptative Priority Reweighting,acc_parity,0.824104,0.826645,0.525174,0.184241,0.002540,0.308934,0.826645,0.002540,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
2,2,Adult Income,Adaptative Priority Reweighting,mcc_parity,0.503263,0.823659,0.507789,0.177868,0.004526,0.299850,0.507789,0.004526,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
3,3,Adult Income,Adaptative Priority Reweighting,acc_parity,0.813385,0.822996,0.510322,0.203541,0.009611,0.344662,0.822996,0.009611,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
4,4,Adult Income,Adaptative Priority Reweighting,mcc_parity,0.499814,0.822222,0.512618,0.202541,0.012804,0.326498,0.512618,0.012804,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,341,German Credit,Standard MLP (baseline),mcc_odds,0.230144,0.755000,0.362899,0.132755,0.143382,0.071722,0.362899,0.132755,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
2195,345,German Credit,Standard MLP (baseline),mcc_opportunity,0.317102,0.765000,0.393271,0.070136,0.009376,0.076169,0.393271,0.076169,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
2196,349,German Credit,Standard MLP (baseline),acc_parity,0.594396,0.720000,0.266520,0.113263,0.125604,0.099487,0.720000,0.125604,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
2197,353,German Credit,Standard MLP (baseline),acc_odds,0.683429,0.760000,0.360217,0.076571,0.094083,0.082555,0.760000,0.076571,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)


In [54]:
datasets = ['Adult Income', 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income', 'Bank Marketing', 'Compas Recidivism', 'German Credit']

In [55]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [56]:
methods = ['Standard MLP (baseline)',
 'Fair Transition Loss',
 'Adversarial Debiasing',
 'Prejudice Remover',
 'Gerry Fair Classifier',
 'Adaptative Priority Reweighting']
methods

['Standard MLP (baseline)',
 'Fair Transition Loss',
 'Adversarial Debiasing',
 'Prejudice Remover',
 'Gerry Fair Classifier',
 'Adaptative Priority Reweighting']

In [57]:
if os.path.exists('multi_aso_data_list.json'):
    with open('multi_aso_data_list.json') as file:
        multi_aso_data_list = json.load(file)
else:    
    multi_aso_data_list = []
    for d in datasets:
        multi_aso_data = []
        for f in fitness_rules:
            methods_results = []
            for m in methods:
                r = results.loc[ (results['dataset'] == d) &
                                     (results['fitness_rule'] == f) &
                                     (results['method'] == m) ]\
                            .fitness.tolist()
                if len(r) == 0:
                    r = [-1]
                methods_results.append(r)
            min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
            multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
    with open('multi_aso_data_list.json', 'w') as file:
        json.dump(multi_aso_data_list, file)

In [58]:
aso_df_resume = []
reverse_aso_df_resume = []
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['method'] = methods
    aso_df['dataset'] = dataset
    aso_df['fitness_rule'] = fitness_rule
    aso_df = aso_df[aso_df['method'] == 'Fair Transition Loss' ]
    aso_df = aso_df.drop(['Fair Transition Loss'], axis=1)
    aso_df = aso_df.drop(['method'], axis=1)
    aso_df_resume.append(aso_df)

    reverse_aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods).transpose()
    mapping = dict()
    for i, m in enumerate(methods):
        mapping[reverse_aso_df.columns[i]] = m + ' (reverse)'
    reverse_aso_df = reverse_aso_df.rename(columns=mapping)
    reverse_aso_df['method'] = methods
    reverse_aso_df['dataset'] = dataset
    reverse_aso_df['fitness_rule'] = fitness_rule
    reverse_aso_df = reverse_aso_df[reverse_aso_df['method'] == 'Fair Transition Loss' ]
    reverse_aso_df = reverse_aso_df.drop(['Fair Transition Loss (reverse)'], axis=1)
    reverse_aso_df = reverse_aso_df.drop(['method'], axis=1)
    reverse_aso_df_resume.append(reverse_aso_df)

print('Significance Testing')
significance = pd.concat(aso_df_resume)
significance = significance.set_index(['fitness_rule', 'dataset'])
significance = significance.sort_values(by=['fitness_rule', 'dataset'], ascending=[False, True])
#display(pd.transpose(significance))
formatted_significance = significance.applymap(lambda x: '\\textbf{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )
formatted_significance.to_latex('tables/significance_resume.tex')

reverse_significance = pd.concat(reverse_aso_df_resume)
reverse_significance = reverse_significance.set_index(['fitness_rule', 'dataset'])
reverse_significance = reverse_significance.sort_values(by=['fitness_rule', 'dataset'])
reverse_formatted_significance = reverse_significance.applymap(lambda x: '\\textit{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )
reverse_formatted_significance.to_latex('tables/reverse_significance_resume.tex')

Significance Testing


  formatted_significance = significance.applymap(lambda x: '\\textbf{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )
  reverse_formatted_significance = reverse_significance.applymap(lambda x: '\\textit{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )


In [59]:
for col, rev_col in zip(formatted_significance.columns, reverse_formatted_significance.columns):
    formatted_significance[col] = formatted_significance[col] + (' (' + reverse_formatted_significance[rev_col] + ')')
formatted_significance.to_latex('tables/combined_significance_resume.tex')
formatted_significance

Unnamed: 0_level_0,Unnamed: 1_level_0,Standard MLP (baseline),Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier,Adaptative Priority Reweighting
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mcc_parity,Adult Income,\textbf{0.00} (1.00),\textbf{0.15} (1.00),1.00 (1.00),\textbf{0.00} (1.00),1.00 (\textit{0.35})
mcc_parity,Bank Marketing,\textbf{0.01} (1.00),\textbf{0.00} (1.00),\textbf{0.00} (1.00),\textbf{0.00} (1.00),\textbf{0.00} (1.00)
mcc_parity,Compas Recidivism,\textbf{0.01} (1.00),\textbf{0.25} (1.00),\textbf{0.00} (1.00),\textbf{0.02} (1.00),\textbf{0.00} (1.00)
mcc_parity,German Credit,\textbf{0.28} (1.00),\textbf{0.30} (1.00),\textbf{0.39} (1.00),\textbf{0.21} (1.00),\textbf{0.28} (1.00)
mcc_opportunity,Adult Income,\textbf{0.01} (1.00),\textbf{0.00} (1.00),\textbf{0.05} (1.00),\textbf{0.00} (1.00),0.93 (0.98)
mcc_opportunity,Bank Marketing,0.81 (1.00),\textbf{0.18} (1.00),\textbf{0.24} (1.00),\textbf{0.09} (1.00),0.77 (1.00)
mcc_opportunity,Compas Recidivism,\textbf{0.00} (1.00),1.00 (\textit{0.19}),\textbf{0.00} (1.00),0.66 (1.00),1.00 (\textit{0.03})
mcc_opportunity,German Credit,1.00 (0.85),\textbf{0.23} (1.00),0.84 (1.00),0.78 (1.00),0.76 (1.00)
mcc_odds,Adult Income,\textbf{0.03} (1.00),\textbf{0.28} (1.00),\textbf{0.42} (1.00),\textbf{0.00} (1.00),\textbf{0.00} (1.00)
mcc_odds,Bank Marketing,\textbf{0.46} (1.00),\textbf{0.18} (1.00),\textbf{0.12} (1.00),\textbf{0.02} (1.00),\textbf{0.18} (1.00)


In [60]:
grouped_results = results\
    .groupby(['Fitness Rule Abvr', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['Fitness Rule Abvr', 'dataset', 'method'], ascending=True)
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results = grouped_results.sort_values(by=['Fitness Rule Abvr', 'dataset'])
grouped_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Fitness Rule Abvr,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Max(Acc - Eq. Odds),Adult Income,Adaptative Priority Reweighting,0.828577,0.012771,0.846582,0.004174,0.018005,0.011826,$0.829 (\pm0.01)$,$0.847 (\pm0.00)$,$0.018 (\pm0.01)$
Max(Acc - Eq. Odds),Adult Income,Adversarial Debiasing,0.756054,0.028345,0.847730,0.002904,0.091676,0.027315,$0.756 (\pm0.03)$,$0.848 (\pm0.00)$,$0.092 (\pm0.03)$
Max(Acc - Eq. Odds),Adult Income,Fair Transition Loss,0.787013,0.083934,0.825826,0.069484,0.038813,0.037378,$0.787 (\pm0.08)$,$0.826 (\pm0.07)$,$0.039 (\pm0.04)$
Max(Acc - Eq. Odds),Adult Income,Gerry Fair Classifier,0.704668,0.074245,0.750809,0.085845,0.046141,0.054413,$0.705 (\pm0.07)$,$0.751 (\pm0.09)$,$0.046 (\pm0.05)$
Max(Acc - Eq. Odds),Adult Income,Prejudice Remover,0.809534,0.022701,0.845788,0.003691,0.036254,0.021244,$0.810 (\pm0.02)$,$0.846 (\pm0.00)$,$0.036 (\pm0.02)$
...,...,...,...,...,...,...,...,...,...,...,...
Max(MCC - Stat. Parity),German Credit,Adversarial Debiasing,0.200070,0.172675,0.367673,0.056485,0.167603,0.148864,$0.200 (\pm0.17)$,$0.368 (\pm0.06)$,$0.168 (\pm0.15)$
Max(MCC - Stat. Parity),German Credit,Fair Transition Loss,0.271735,0.082350,0.354444,0.068050,0.082709,0.044290,$0.272 (\pm0.08)$,$0.354 (\pm0.07)$,$0.083 (\pm0.04)$
Max(MCC - Stat. Parity),German Credit,Gerry Fair Classifier,0.220550,0.088103,0.291429,0.108332,0.070878,0.059346,$0.221 (\pm0.09)$,$0.291 (\pm0.11)$,$0.071 (\pm0.06)$
Max(MCC - Stat. Parity),German Credit,Prejudice Remover,0.233727,0.087339,0.328818,0.051833,0.095091,0.061822,$0.234 (\pm0.09)$,$0.329 (\pm0.05)$,$0.095 (\pm0.06)$


In [61]:
selected_columns = ['formatted_fitness', 'formatted_performance', 'formatted_fairness']
grouped_results[selected_columns].to_latex('tables/grouped_results.tex')