In [27]:
import pandas as pd
import json
import os
import deepsig
from IPython.display import display

In [28]:
def read_csv_files_from_folder(folder_path):
    # Initialize an empty list to store DataFrames
    dfs = []

    # Get a list of files in the folder
    file_list = os.listdir(folder_path)

    # Iterate through the files in the folder
    for file_name in file_list:
        # Check if the file has a .csv extension
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Append the DataFrame to the list
            dfs.append(df)

    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df

In [29]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [30]:
results = read_csv_files_from_folder('results/')
results.replace({'ftl_mlp_initializer': 'Fair Transition Loss', 'adversarial_debiasing_initializer': 'Adversarial Debiasing', 'gerry_fair_classifier_initializer': 'Gerry Fair Classifier', 'prejudice_remover_initializer': 'Prejudice Remover', 'simple_mlp_initializer': 'Standard MLP (baseline)'}, inplace=True)
results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Marketing'}, inplace=True)
results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [31]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

fitness_rules_target_metrics = {
    'mcc_parity': ('Mathew Correlation', 'Statistical Parity'),
    'mcc_opportunity': ('Mathew Correlation', 'Equal Opportunity'),
    'mcc_odds': ('Mathew Correlation', 'Equalized Odds'),
    'acc_parity': ('Accuracy', 'Statistical Parity'),
    'acc_opportunity': ('Accuracy', 'Equal Opportunity'),
    'acc_odds': ('Accuracy', 'Equalized Odds')
}
fitness_rules_abvr = {
    'mcc_parity': 'Max(MCC - Stat. Parity)',
    'mcc_opportunity': 'Max(MCC - Eq. Odds)',
    'mcc_odds': 'Max(MCC - Eq. Opp.)',
    'acc_parity': 'Max(Acc - Stat. Parity)',
    'acc_opportunity': 'Max(Acc - Eq. Odds)',
    'acc_odds': 'Max(Acc - Eq. Opp.)'
}

results['Performance'] = 0
results['Fairness'] = 0
results['Fitness Rule'] = ''
for fitness_rule, (performance_metric, fairness_metric) in fitness_rules_target_metrics.items():
    results.loc[results.fitness_rule == fitness_rule,'Performance'] = results.loc[results.fitness_rule == fitness_rule,performance_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fairness'] = results.loc[results.fitness_rule == fitness_rule,fairness_metric]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule Abvr'] = fitness_rules_abvr[fitness_rule]
    results.loc[results.fitness_rule == fitness_rule,'Fitness Rule'] = 'Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule]

In [32]:
display(results)

Unnamed: 0.1,Unnamed: 0,dataset,method,fitness_rule,fitness,Accuracy,Mathew Correlation,Equalized Odds,Statistical Parity,Equal Opportunity,Performance,Fairness,Fitness Rule,Fitness Rule Abvr
0,1,German Credit,Fair Transition Loss,mcc_parity,-0.059217,0.645000,0.000000,0.073451,0.059217,0.045977,0.000000,0.059217,Max(Mathew Correlation - Statistical Parity),Max(MCC - Stat. Parity)
1,5,German Credit,Fair Transition Loss,mcc_odds,-0.232520,0.700000,0.000000,0.232520,0.167535,0.010739,0.000000,0.232520,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
2,9,German Credit,Fair Transition Loss,mcc_opportunity,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
3,13,German Credit,Fair Transition Loss,acc_parity,0.700000,0.700000,0.000000,0.000000,0.000000,0.000000,0.700000,0.000000,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
4,17,German Credit,Fair Transition Loss,acc_odds,0.649008,0.710000,0.000000,0.060992,0.012155,0.029126,0.710000,0.060992,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1840,1733,Bank Marketing,Standard MLP (baseline),mcc_odds,0.479237,0.898491,0.518615,0.039378,0.080687,0.026026,0.518615,0.039378,Max(Mathew Correlation - Equalized Odds),Max(MCC - Eq. Opp.)
1841,1738,Bank Marketing,Standard MLP (baseline),mcc_opportunity,0.411159,0.905707,0.526744,0.082825,0.056617,0.115585,0.526744,0.115585,Max(Mathew Correlation - Equal Opportunity),Max(MCC - Eq. Odds)
1842,1743,Bank Marketing,Standard MLP (baseline),acc_parity,0.828723,0.892588,0.488114,0.036710,0.063865,0.031354,0.892588,0.063865,Max(Accuracy - Statistical Parity),Max(Acc - Stat. Parity)
1843,1748,Bank Marketing,Standard MLP (baseline),acc_odds,0.859941,0.902919,0.523506,0.042978,0.089112,0.036849,0.902919,0.042978,Max(Accuracy - Equalized Odds),Max(Acc - Eq. Opp.)


In [33]:
datasets = ['Adult Income', 'Bank Marketing', 'Compas Recidivism','German Credit']
datasets

['Adult Income', 'Bank Marketing', 'Compas Recidivism', 'German Credit']

In [34]:
fitness_rules = ['mcc_parity', 'mcc_opportunity', 'mcc_odds', 'acc_parity', 'acc_opportunity', 'acc_odds']
fitness_rules

['mcc_parity',
 'mcc_opportunity',
 'mcc_odds',
 'acc_parity',
 'acc_opportunity',
 'acc_odds']

In [64]:
methods = ['Standard MLP (baseline)',
 'Fair Transition Loss',
 'Adversarial Debiasing',
 'Prejudice Remover',
 'Gerry Fair Classifier']
methods

['Standard MLP (baseline)',
 'Fair Transition Loss',
 'Adversarial Debiasing',
 'Prejudice Remover',
 'Gerry Fair Classifier']

In [65]:
if os.path.exists('multi_aso_data_list.json'):
    with open('multi_aso_data_list.json') as file:
        multi_aso_data_list = json.load(file)
else:    
    multi_aso_data_list = []
    for d in datasets:
        multi_aso_data = []
        for f in fitness_rules:
            methods_results = []
            for m in methods:
                r = results.loc[ (results['dataset'] == d) &
                                     (results['fitness_rule'] == f) &
                                     (results['method'] == m) ]\
                            .fitness.tolist()
                if len(r) == 0:
                    r = [-1]
                methods_results.append(r)
            min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
            multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
    with open('multi_aso_data_list.json', 'w') as file:
        json.dump(multi_aso_data_list, file)

In [83]:
aso_df_resume = []
reverse_aso_df_resume = []
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['method'] = methods
    aso_df['dataset'] = dataset
    aso_df['fitness_rule'] = fitness_rule
    aso_df = aso_df[aso_df['method'] == 'Fair Transition Loss' ]
    aso_df = aso_df.drop(['Fair Transition Loss'], axis=1)
    aso_df = aso_df.drop(['method'], axis=1)
    aso_df_resume.append(aso_df)

    reverse_aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods).transpose()
    mapping = dict()
    for i, m in enumerate(methods):
        mapping[reverse_aso_df.columns[i]] = m + ' (reverse)'
    reverse_aso_df = reverse_aso_df.rename(columns=mapping)
    reverse_aso_df['method'] = methods
    reverse_aso_df['dataset'] = dataset
    reverse_aso_df['fitness_rule'] = fitness_rule
    reverse_aso_df = reverse_aso_df[reverse_aso_df['method'] == 'Fair Transition Loss' ]
    reverse_aso_df = reverse_aso_df.drop(['Fair Transition Loss (reverse)'], axis=1)
    reverse_aso_df = reverse_aso_df.drop(['method'], axis=1)
    reverse_aso_df_resume.append(reverse_aso_df)

print('Significance Testing')
significance = pd.concat(aso_df_resume)
significance = significance.set_index(['fitness_rule', 'dataset'])
significance = significance.sort_values(by=['fitness_rule', 'dataset'])
display(significance)
formatted_significance = significance.applymap(lambda x: '\\textbf{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )
formatted_significance.to_latex('tables/significance_resume.tex')

reverse_significance = pd.concat(reverse_aso_df_resume)
reverse_significance = reverse_significance.set_index(['fitness_rule', 'dataset'])
reverse_significance = reverse_significance.sort_values(by=['fitness_rule', 'dataset'])
reverse_formatted_significance = reverse_significance.applymap(lambda x: '\\textit{' + f'{x:.2f}' + '}' if x < 0.5 else f'{x:.2f}' )
reverse_formatted_significance.to_latex('tables/reverse_significance_resume.tex')

Significance Testing


Unnamed: 0_level_0,Unnamed: 1_level_0,Standard MLP (baseline),Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
acc_odds,Adult Income,0.008806,0.224148,0.189303,0.0
acc_odds,Bank Marketing,0.768386,0.406367,0.808664,1.0
acc_odds,Compas Recidivism,0.006591,0.462377,0.0,0.00579
acc_odds,German Credit,1.0,0.426571,0.957387,0.556245
acc_opportunity,Adult Income,0.889891,0.965546,1.0,0.224633
acc_opportunity,Bank Marketing,1.0,0.389006,0.810667,1.0
acc_opportunity,Compas Recidivism,0.009003,0.788252,0.0,0.092425
acc_opportunity,German Credit,0.013958,0.063457,0.0,0.0
acc_parity,Adult Income,0.006234,0.23969,0.303426,0.0
acc_parity,Bank Marketing,0.253591,1.0,1.0,0.762062


In [84]:
for col, rev_col in zip(formatted_significance.columns, reverse_formatted_significance.columns):
    formatted_significance[col] = formatted_significance[col] + (' (' + reverse_formatted_significance[rev_col] + ')')
formatted_significance.to_latex('tables/combined_significance_resume.tex')
formatted_significance

Unnamed: 0_level_0,Unnamed: 1_level_0,Standard MLP (baseline),Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
fitness_rule,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
acc_odds,Adult Income,\textbf{0.01} (1.00),\textbf{0.22} (1.00),\textbf{0.19} (1.00),\textbf{0.00} (1.00)
acc_odds,Bank Marketing,0.77 (1.00),\textbf{0.41} (1.00),0.81 (1.00),1.00 (0.80)
acc_odds,Compas Recidivism,\textbf{0.01} (1.00),\textbf{0.46} (1.00),\textbf{0.00} (1.00),\textbf{0.01} (1.00)
acc_odds,German Credit,1.00 (0.73),\textbf{0.43} (1.00),0.96 (0.84),0.56 (1.00)
acc_opportunity,Adult Income,0.89 (1.00),0.97 (1.00),1.00 (\textit{0.47}),\textbf{0.22} (1.00)
acc_opportunity,Bank Marketing,1.00 (0.76),\textbf{0.39} (1.00),0.81 (1.00),1.00 (\textit{0.39})
acc_opportunity,Compas Recidivism,\textbf{0.01} (0.99),0.79 (1.00),\textbf{0.00} (1.00),\textbf{0.09} (1.00)
acc_opportunity,German Credit,\textbf{0.01} (1.00),\textbf{0.06} (1.00),\textbf{0.00} (1.00),\textbf{0.00} (1.00)
acc_parity,Adult Income,\textbf{0.01} (1.00),\textbf{0.24} (1.00),\textbf{0.30} (1.00),\textbf{0.00} (1.00)
acc_parity,Bank Marketing,\textbf{0.25} (1.00),1.00 (0.55),1.00 (0.73),0.76 (1.00)


In [85]:
grouped_results = results\
    .groupby(['Fitness Rule Abvr', 'dataset', 'method'])\
    .agg({'fitness': ['mean', 'std'], 'Performance': ['mean', 'std'], 'Fairness': ['mean', 'std']})\
    .sort_values(by=['Fitness Rule Abvr', 'dataset', ('fitness','mean')], ascending=False)
grouped_results['formatted_fitness'] = grouped_results.apply(lambda row: f"${row[('fitness', 'mean')]:.3f} (\pm{row[('fitness', 'std')]:.2f})$", axis=1)
grouped_results['formatted_performance'] = grouped_results.apply(lambda row: f"${row[('Performance', 'mean')]:.3f} (\pm{row[('Performance', 'std')]:.2f})$", axis=1)
grouped_results['formatted_fairness'] = grouped_results.apply(lambda row: f"${row[('Fairness', 'mean')]:.3f} (\pm{row[('Fairness', 'std')]:.2f})$", axis=1)
grouped_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fitness,fitness,Performance,Performance,Fairness,Fairness,formatted_fitness,formatted_performance,formatted_fairness
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Fitness Rule Abvr,dataset,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Max(MCC - Stat. Parity),German Credit,Adversarial Debiasing,0.200070,0.172675,0.367673,0.056485,0.167603,0.148864,$0.200 (\pm0.17)$,$0.368 (\pm0.06)$,$0.168 (\pm0.15)$
Max(MCC - Stat. Parity),German Credit,Gerry Fair Classifier,-0.031208,0.052788,0.000000,0.000000,0.031208,0.052788,$-0.031 (\pm0.05)$,$0.000 (\pm0.00)$,$0.031 (\pm0.05)$
Max(MCC - Stat. Parity),German Credit,Fair Transition Loss,-0.034435,0.073668,0.000000,0.000000,0.034435,0.073668,$-0.034 (\pm0.07)$,$0.000 (\pm0.00)$,$0.034 (\pm0.07)$
Max(MCC - Stat. Parity),German Credit,Prejudice Remover,-0.082559,0.050746,0.000000,0.000000,0.082559,0.050746,$-0.083 (\pm0.05)$,$0.000 (\pm0.00)$,$0.083 (\pm0.05)$
Max(MCC - Stat. Parity),German Credit,Standard MLP (baseline),-0.090370,0.061469,0.000000,0.000000,0.090370,0.061469,$-0.090 (\pm0.06)$,$0.000 (\pm0.00)$,$0.090 (\pm0.06)$
...,...,...,...,...,...,...,...,...,...,...,...
Max(Acc - Eq. Odds),Adult Income,Prejudice Remover,0.809534,0.022701,0.845788,0.003691,0.036254,0.021244,$0.810 (\pm0.02)$,$0.846 (\pm0.00)$,$0.036 (\pm0.02)$
Max(Acc - Eq. Odds),Adult Income,Fair Transition Loss,0.787013,0.083934,0.825826,0.069484,0.038813,0.037378,$0.787 (\pm0.08)$,$0.826 (\pm0.07)$,$0.039 (\pm0.04)$
Max(Acc - Eq. Odds),Adult Income,Adversarial Debiasing,0.756054,0.028345,0.847730,0.002904,0.091676,0.027315,$0.756 (\pm0.03)$,$0.848 (\pm0.00)$,$0.092 (\pm0.03)$
Max(Acc - Eq. Odds),Adult Income,Standard MLP (baseline),0.752260,0.041790,0.849309,0.003078,0.097049,0.042438,$0.752 (\pm0.04)$,$0.849 (\pm0.00)$,$0.097 (\pm0.04)$


In [86]:
selected_columns = ['formatted_fitness', 'formatted_performance', 'formatted_fairness']
grouped_results[selected_columns].to_latex('tables/grouped_results.tex')