In [29]:
import matplotlib.pyplot as plt
import pandas
import pandas as pd
import seaborn as sns
from matplotlib.lines import Line2D
import json
import os
import deepsig
from IPython.display import display

In [3]:
def convert_to_json_of_arrays(array_of_jsons):
    json_of_arrays = {}

    # Iterate through each JSON in the array
    for json_obj in array_of_jsons:
        for key, value in json_obj.items():
            # Check if the key exists in the new JSON, if not, create an array
            if key not in json_of_arrays:
                json_of_arrays[key] = []

            # Append the value to the corresponding array
            json_of_arrays[key].append(value)

    return json_of_arrays

In [4]:
def read_csv_files_from_folder(folder_path):
    # Initialize an empty list to store DataFrames
    dfs = []

    # Get a list of files in the folder
    file_list = os.listdir(folder_path)

    # Iterate through the files in the folder
    for file_name in file_list:
        # Check if the file has a .csv extension
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Append the DataFrame to the list
            dfs.append(df)

    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df

In [5]:
def read_json_files_from_folder(folder_path):
    json_list = []

    # Get a list of files in the folder
    file_list = os.listdir(folder_path)

    # Iterate through the files in the folder
    for file_name in file_list:
        # Check if the file has a .json extension
        if file_name.endswith(".json"):
            file_path = os.path.join(folder_path, file_name)

            # Read the contents of the JSON file
            with open(file_path, "r") as file:
                try:
                    # Parse the JSON data and append it to the list
                    json_data = json.load(file)
                    json_list.append(json_data)
                except json.JSONDecodeError:
                    # Handle parsing errors, if any
                    print(f"Error parsing JSON from file: {file_name}")

    return json_list

In [6]:
cols = ['dataset', 'method', 'fitness_rule', 'fitness', 'ACC', 'MCC', 'f1_score', 'avg_odds_diff', 'stat_par_diff', 'eq_opp_diff']

In [7]:
results = read_csv_files_from_folder('../2023-07-26/results')
results.replace({'ftl_mlp_initializer': 'Fair Transition Loss', 'adversarial_debiasing_initializer': 'Adversarial Debiasing', 'gerry_fair_classifier_initializer': 'Gerry Fair Classifier', 'prejudice_remover_initializer': 'Prejudice Remover', 'simple_mlp_initializer': 'Standard MLP (baseline)'}, inplace=True)
results.replace({'adult_dataset_reader': 'Adult Income', 'compas_dataset_reader': 'Compas Recidivism', 'german_dataset_reader': 'German Credit', 'bank_dataset_reader': 'Bank Market'}, inplace=True)
results.rename(columns={'avg_odds_diff': 'Equalized Odds', 'stat_par_diff': 'Statistical Parity', 'eq_opp_diff': 'Equal Opportunity', 'MCC': 'Mathew Correlation', 'ACC': 'Accuracy'}, inplace=True)

In [8]:
fitness_rules_target_metrics = {
    'mcc_parity': {'performance': 'Mathew Correlation', 'fairness': 'Statistical Parity'},
    'mcc_opportunity': {'performance': 'Mathew Correlation', 'fairness': 'Equal Opportunity'},
    'mcc_odds': {'performance': 'Mathew Correlation', 'fairness': 'Equalized Odds'},
    'acc_parity': {'performance': 'Accuracy', 'fairness': 'Statistical Parity'},
    'acc_opportunity': {'performance': 'Accuracy', 'fairness': 'Equal Opportunity'},
    'acc_odds': {'performance': 'Accuracy', 'fairness': 'Equalized Odds'}
}

In [9]:
display(results)

Unnamed: 0.1,Unnamed: 0,overall_acc,bal_acc,Equalized Odds,disp_imp,Statistical Parity,Equal Opportunity,theil_ind,f1_score,TPR,...,FOR,Accuracy,Mathew Correlation,fitness,solution,tune_results_history,fitness_rule,method,dataset,best_solution_tf_history
0,0,0.650000,0.477941,0.022839,1.007030,0.006558,0.028241,0.087680,0.804954,0.955882,...,0.000000,0.650000,0.000000,-0.006558,{'dropout': 0.18124389809074126},"[{'overall_acc': 0.59375, 'bal_acc': 0.4481132...",mcc_parity,Standard MLP (baseline),German Credit,"{'loss': [0.7094872196515402, 0.64226885371738..."
1,1,0.645000,0.484962,0.073451,1.064832,0.059217,0.045977,0.077539,0.806250,0.969925,...,0.000000,0.645000,0.000000,-0.059217,"{'dropout': 0.022732599406043996, 'privileged_...","[{'overall_acc': 0.00625, 'bal_acc': 0.0043859...",mcc_parity,Fair Transition Loss,German Credit,"{'loss': [0.8503885070482889, 0.78124442100524..."
2,2,0.740000,0.659133,0.025781,0.954128,0.038462,0.017416,0.092947,0.823129,0.937984,...,0.228571,0.740000,0.400810,0.362348,{'adversary_loss_weight': 0.6148404367300289},"[{'overall_acc': 0.70625, 'bal_acc': 0.5580357...",mcc_parity,Adversarial Debiasing,German Credit,
3,3,0.545000,0.397810,0.070884,0.895425,0.077859,0.056402,0.199996,0.775801,0.795620,...,0.000000,0.545000,0.000000,-0.077859,{'eta': 49.471477392162484},"[{'overall_acc': 0.56875, 'bal_acc': 0.4136363...",mcc_parity,Prejudice Remover,German Credit,
4,4,0.615000,0.448905,0.066751,0.894180,0.087912,0.050342,0.121159,0.828283,0.897810,...,0.000000,0.615000,0.000000,-0.066751,{'dropout': 0.0037868792501197348},"[{'overall_acc': 0.56875, 'bal_acc': 0.4099099...",mcc_odds,Standard MLP (baseline),German Credit,"{'loss': [0.7087434199121263, 0.61068538427352..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1752,25,0.896851,0.669304,0.057653,2.118719,0.074544,0.087314,0.093049,0.472758,0.364341,...,0.086635,0.896851,0.445599,0.809537,"{'C': 10.459577021264977, 'gamma': 0.1}","[{'overall_acc': 0.8993439934399344, 'bal_acc'...",acc_opportunity,Gerry Fair Classifier,Bank Market,
1753,26,0.896359,0.753549,0.072641,2.153847,0.133189,0.120516,0.075922,0.584757,0.560453,...,0.064991,0.896359,0.526321,0.775843,{'dropout': 0.000321942353699714},"[{'overall_acc': 0.9093890938909389, 'bal_acc'...",acc_opportunity,Standard MLP (baseline),Bank Market,"{'loss': [0.2854798949427075, 0.22084286485976..."
1754,27,0.898655,0.696844,0.053642,1.420554,0.034509,0.099174,0.086198,0.514914,0.427083,...,0.078684,0.898655,0.473519,0.799482,"{'dropout': 0.014472416047403187, 'privileged_...","[{'overall_acc': 0.8511685116851169, 'bal_acc'...",acc_opportunity,Fair Transition Loss,Bank Market,"{'loss': [0.49211953761153904, 0.4700222834873..."
1755,28,0.901607,0.718345,0.145878,0.755483,0.022450,0.286523,0.081914,0.551570,0.471867,...,0.074522,0.901607,0.507223,0.615084,{'adversary_loss_weight': 0.8391213408528397},"[{'overall_acc': 0.8987289872898729, 'bal_acc'...",acc_opportunity,Adversarial Debiasing,Bank Market,


In [10]:
datasets = results['dataset'].unique().tolist()
datasets

['German Credit', 'Bank Market', 'Adult Income', 'Compas Recidivism']

In [11]:
fitness_rules = results['fitness_rule'].unique().tolist()
fitness_rules

['mcc_parity',
 'mcc_odds',
 'mcc_opportunity',
 'acc_parity',
 'acc_odds',
 'acc_opportunity']

In [12]:
methods = results['method'].unique().tolist()
methods

['Standard MLP (baseline)',
 'Fair Transition Loss',
 'Adversarial Debiasing',
 'Prejudice Remover',
 'Gerry Fair Classifier']

In [13]:
if os.path.exists('multi_aso_data_list.json'):
    with open('multi_aso_data_list.json') as file:
        multi_aso_data_list = json.load(file)
else:    
    multi_aso_data_list = []
    for d in datasets:
        multi_aso_data = []
        for f in fitness_rules:
            methods_results = []
            for m in methods:
                r = results.loc[ (results['dataset'] == d) &
                                     (results['fitness_rule'] == f) &
                                     (results['method'] == m) ]\
                            .fitness.tolist()
                if len(r) == 0:
                    r = [-1]
                methods_results.append(r)
            min_eps = deepsig.multi_aso(methods_results, confidence_level=0.95)
            multi_aso_data_list.append({'fitness_rule': f, 'dataset': d, 'min_eps': min_eps.tolist()})
    with open('multi_aso_data_list.json', 'w') as file:
        json.dump(multi_aso_data_list, file)

In [14]:
multi_aso_data_list

[{'fitness_rule': 'mcc_parity',
  'dataset': 'Adult Income',
  'min_eps': [[1.0,
    0.9988805908520648,
    0.9979750223790821,
    0.995877444722284,
    0.0],
   [0.004620261792774651, 1.0, 0.1528664058142362, 1.0, 0.0],
   [0.004940540275507269, 1.0, 1.0, 1.0, 0.0],
   [0.00595911008417298, 1.0, 0.1050132284790311, 1.0, 0.0],
   [1.0, 1.0, 1.0, 1.0, 1.0]]},
 {'fitness_rule': 'mcc_odds',
  'dataset': 'Adult Income',
  'min_eps': [[1.0, 1.0, 1.0, 1.0, 0.0],
   [0.027617628263128934, 1.0, 0.25458170757222737, 0.42157426878629345, 0.0],
   [0.251460472570234, 1.0, 1.0, 0.7790530472236021, 0.0],
   [0.8744194609335025, 1.0, 1.0, 1.0, 0.04857425621287411],
   [1.0, 1.0, 1.0, 1.0, 1.0]]},
 {'fitness_rule': 'mcc_opportunity',
  'dataset': 'Adult Income',
  'min_eps': [[1.0,
    0.9976536340274126,
    0.9789694086170047,
    0.8910534937566084,
    0.0],
   [0.008614325158317561,
    1.0,
    0.0032188112253179217,
    0.058227525767044014,
    0.0],
   [1.0, 1.0, 1.0, 0.9400209938187005, 

In [15]:
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']
    metrics = fitness_rules_target_metrics[fitness_rule]
    subset_results = results.loc[ (results.fitness_rule == fitness_rule) \
                                      & (results.dataset == dataset) ]

    grouped_results = subset_results\
        .groupby(['method'])\
        .agg({'fitness': ['mean', 'std','count'], metrics['performance']: ['mean', 'std'], metrics['fairness']: ['mean', 'std']})\
        .sort_values(by=('fitness','mean'), ascending=False)

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['methods'] = methods
    aso_df = aso_df.set_index('methods')

    print('%s | max(%s - %s)' % (dataset, metrics['performance'], metrics['fairness']))
    display(grouped_results)
    print('Significance Testing')
    display(aso_df)

Adult Income | max(Mathew Correlation - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.492021,0.016387,16,0.512027,0.010737,0.020006,0.010913
Prejudice Remover,0.490575,0.008865,16,0.500024,0.008227,0.009449,0.009857
Adversarial Debiasing,0.477586,0.014882,16,0.50143,0.020095,0.023844,0.022968
Standard MLP (baseline),0.395355,0.012556,17,0.580617,0.008613,0.185263,0.010419
Gerry Fair Classifier,0.304991,0.128019,16,0.453065,0.173689,0.148074,0.08077


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.998881,0.997975,0.995877,0.0
Fair Transition Loss,0.00462,1.0,0.152866,1.0,0.0
Adversarial Debiasing,0.004941,1.0,1.0,1.0,0.0
Prejudice Remover,0.005959,1.0,0.105013,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Adult Income | max(Mathew Correlation - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.523425,0.020328,16,0.57588,0.01923,0.052455,0.015316
Prejudice Remover,0.508792,0.049072,16,0.557913,0.020503,0.049121,0.029543
Adversarial Debiasing,0.508718,0.030129,16,0.564504,0.015855,0.055786,0.017017
Standard MLP (baseline),0.489029,0.031826,16,0.57579,0.010824,0.086761,0.025333
Gerry Fair Classifier,0.403599,0.068466,16,0.503477,0.039191,0.099878,0.034109


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,1.0,1.0,0.0
Fair Transition Loss,0.027618,1.0,0.254582,0.421574,0.0
Adversarial Debiasing,0.25146,1.0,1.0,0.779053,0.0
Prejudice Remover,0.874419,1.0,1.0,1.0,0.048574
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Adult Income | max(Mathew Correlation - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.555629,0.028296,16,0.584309,0.011226,0.02868,0.027219
Prejudice Remover,0.50488,0.090917,16,0.559953,0.019364,0.055074,0.075105
Adversarial Debiasing,0.492795,0.05221,16,0.572624,0.010513,0.079829,0.045796
Standard MLP (baseline),0.488503,0.02946,16,0.579698,0.010412,0.091195,0.029729
Gerry Fair Classifier,0.383114,0.103134,16,0.481545,0.127041,0.098431,0.041765


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.997654,0.978969,0.891053,0.0
Fair Transition Loss,0.008614,1.0,0.003219,0.058228,0.0
Adversarial Debiasing,1.0,1.0,1.0,0.940021,0.000262
Prejudice Remover,1.0,1.0,1.0,1.0,0.154515
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Adult Income | max(Accuracy - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.813556,0.010233,16,0.827785,0.00724,0.014229,0.009873
Adversarial Debiasing,0.807533,0.008959,16,0.829657,0.005835,0.022124,0.010258
Prejudice Remover,0.807014,0.014847,16,0.824848,0.00438,0.017834,0.013943
Standard MLP (baseline),0.666467,0.012829,16,0.850663,0.004163,0.184197,0.011589
Gerry Fair Classifier,0.526329,0.229352,16,0.600539,0.285115,0.07421,0.079207


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.995036,0.994911,0.996441,0.039455
Fair Transition Loss,0.006226,1.0,0.25293,0.305378,0.0
Adversarial Debiasing,0.006429,1.0,1.0,0.631691,0.0
Prejudice Remover,0.005606,1.0,1.0,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Adult Income | max(Accuracy - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.807825,0.017309,15,0.84227,0.006409,0.034445,0.017407
Adversarial Debiasing,0.796482,0.013597,15,0.848764,0.003349,0.052282,0.013394
Prejudice Remover,0.793973,0.017261,15,0.84496,0.00548,0.050987,0.014133
Standard MLP (baseline),0.765316,0.017537,16,0.849627,0.003754,0.084311,0.017112
Gerry Fair Classifier,0.654975,0.197797,15,0.713787,0.212745,0.058811,0.04148


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.999444,0.997529,1.0,0.030096
Fair Transition Loss,0.009395,1.0,0.225552,0.170713,0.0
Adversarial Debiasing,0.016999,1.0,1.0,0.673625,0.0
Prejudice Remover,0.011947,1.0,1.0,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Adult Income | max(Accuracy - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Prejudice Remover,0.809534,0.022701,13,0.845788,0.003691,0.036254,0.021244
Fair Transition Loss,0.787013,0.083934,15,0.825826,0.069484,0.038813,0.037378
Adversarial Debiasing,0.756054,0.028345,14,0.84773,0.002904,0.091676,0.027315
Standard MLP (baseline),0.75226,0.04179,15,0.849309,0.003078,0.097049,0.042438
Gerry Fair Classifier,0.655568,0.18591,13,0.720083,0.212767,0.064516,0.051427


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,0.875292,1.0,0.062699
Fair Transition Loss,0.898703,1.0,0.959536,1.0,0.204228
Adversarial Debiasing,1.0,1.0,1.0,1.0,0.042513
Prejudice Remover,0.032922,0.480779,0.009661,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Bank Market | max(Mathew Correlation - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.538637,0.030203,16,0.578629,0.010484,0.039992,0.032659
Adversarial Debiasing,0.45905,0.026773,16,0.505485,0.020981,0.046435,0.023318
Prejudice Remover,0.453999,0.028865,16,0.486893,0.019122,0.032895,0.017302
Standard MLP (baseline),0.419313,0.035082,16,0.521521,0.018585,0.102208,0.029541
Gerry Fair Classifier,0.364122,0.023438,16,0.427766,0.023644,0.063644,0.014344


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.998198,1.0,1.0,0.000946
Fair Transition Loss,0.008002,1.0,0.0,0.0,0.0
Adversarial Debiasing,0.042412,1.0,1.0,0.857366,0.0
Prejudice Remover,0.104345,1.0,1.0,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Bank Market | max(Mathew Correlation - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.485232,0.057766,16,0.568878,0.011656,0.083646,0.057749
Standard MLP (baseline),0.4391,0.033272,16,0.513842,0.019228,0.074742,0.026389
Adversarial Debiasing,0.425859,0.059593,16,0.511591,0.019124,0.085732,0.052456
Prejudice Remover,0.412904,0.038026,16,0.484515,0.019109,0.071611,0.041584
Gerry Fair Classifier,0.379921,0.039809,16,0.430305,0.017406,0.050384,0.032472


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,0.458967,0.186403,2e-06
Fair Transition Loss,0.471688,1.0,0.189642,0.108138,0.02729
Adversarial Debiasing,1.0,1.0,1.0,0.909613,0.167464
Prejudice Remover,1.0,1.0,1.0,1.0,0.104496
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Bank Market | max(Mathew Correlation - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.466943,0.106056,16,0.560218,0.028665,0.093275,0.09544
Standard MLP (baseline),0.432485,0.055535,16,0.519779,0.015186,0.087294,0.058595
Prejudice Remover,0.391568,0.086487,16,0.489974,0.016599,0.098406,0.077769
Adversarial Debiasing,0.372598,0.093544,16,0.508244,0.018888,0.135646,0.087756
Gerry Fair Classifier,0.360622,0.056931,16,0.432291,0.020158,0.071669,0.052382


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,0.115116,0.251015,0.033529
Fair Transition Loss,0.820683,1.0,0.180293,0.23648,0.152576
Adversarial Debiasing,1.0,1.0,1.0,1.0,0.862072
Prejudice Remover,1.0,1.0,0.654186,1.0,0.526717
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


Bank Market | max(Accuracy - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.869188,0.026198,16,0.900674,0.004901,0.031487,0.024422
Prejudice Remover,0.860453,0.024102,16,0.898204,0.003222,0.037751,0.023137
Fair Transition Loss,0.85396,0.051311,16,0.888939,0.012644,0.034979,0.051374
Gerry Fair Classifier,0.827902,0.024396,16,0.895253,0.003863,0.06735,0.022572
Standard MLP (baseline),0.798856,0.035529,16,0.901689,0.004638,0.102833,0.033983


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,1.0,0.997085,1.0
Fair Transition Loss,0.235047,1.0,1.0,1.0,0.7633
Adversarial Debiasing,0.00844,0.552059,1.0,0.611625,0.01823
Prejudice Remover,0.013736,0.745856,1.0,1.0,0.004089
Gerry Fair Classifier,0.200796,1.0,1.0,1.0,1.0


Bank Market | max(Accuracy - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Gerry Fair Classifier,0.847492,0.033414,16,0.895622,0.003053,0.048129,0.035106
Fair Transition Loss,0.83285,0.046099,16,0.891881,0.012613,0.05903,0.050259
Prejudice Remover,0.82658,0.03657,16,0.897579,0.003235,0.070999,0.036964
Standard MLP (baseline),0.82566,0.041732,16,0.900951,0.004575,0.075291,0.041982
Adversarial Debiasing,0.807064,0.068668,16,0.902038,0.003512,0.094973,0.068159


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,0.454819,1.0,1.0
Fair Transition Loss,0.77553,1.0,0.38631,0.814048,1.0
Adversarial Debiasing,1.0,1.0,1.0,1.0,1.0
Prejudice Remover,0.887988,1.0,0.408139,1.0,1.0
Gerry Fair Classifier,0.364233,0.47928,0.17268,0.336805,1.0


Bank Market | max(Accuracy - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Gerry Fair Classifier,0.801388,0.055304,16,0.895601,0.00305,0.094213,0.056372
Standard MLP (baseline),0.799991,0.060918,16,0.902222,0.004254,0.102231,0.062028
Fair Transition Loss,0.79942,0.097449,16,0.891471,0.010348,0.092051,0.098067
Prejudice Remover,0.780926,0.071711,16,0.899414,0.003267,0.118488,0.072614
Adversarial Debiasing,0.749968,0.085135,16,0.900203,0.002987,0.150235,0.085263


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.783754,0.209436,0.539397,1.0
Fair Transition Loss,1.0,1.0,0.369527,0.819028,1.0
Adversarial Debiasing,1.0,1.0,1.0,1.0,1.0
Prejudice Remover,1.0,1.0,0.490342,1.0,1.0
Gerry Fair Classifier,1.0,0.736151,0.171768,0.526315,1.0


Compas Recidivism | max(Mathew Correlation - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.219648,0.060505,15,0.276319,0.027969,0.056671,0.045211
Adversarial Debiasing,0.156659,0.143556,15,0.322097,0.015693,0.165438,0.135798
Gerry Fair Classifier,0.114192,0.047612,15,0.245784,0.09305,0.131592,0.057941
Prejudice Remover,-0.318189,0.052017,15,-0.275734,0.029641,0.042455,0.027919
Standard MLP (baseline),-0.511081,0.048197,15,-0.299095,0.029404,0.211986,0.039521


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.996345,1.0,0.997154,0.996968
Fair Transition Loss,0.005095,1.0,0.211472,0.0,0.001872
Adversarial Debiasing,0.003193,1.0,1.0,0.0,0.785546
Prejudice Remover,0.006301,1.0,1.0,1.0,0.997638
Gerry Fair Classifier,0.005356,1.0,1.0,0.005636,1.0


Compas Recidivism | max(Mathew Correlation - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.208479,0.058105,15,0.282652,0.024903,0.074173,0.046962
Adversarial Debiasing,0.191345,0.110034,15,0.324271,0.034796,0.132926,0.096137
Gerry Fair Classifier,0.150205,0.059268,15,0.26914,0.073869,0.118935,0.042625
Prejudice Remover,-0.351727,0.032756,15,-0.278298,0.020505,0.073429,0.02564
Standard MLP (baseline),-0.470693,0.048406,15,-0.294316,0.016584,0.176377,0.04259


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.995841,0.997852,0.998365,0.996993
Fair Transition Loss,0.005317,1.0,0.58261,0.0,0.056215
Adversarial Debiasing,0.003936,1.0,1.0,0.0,0.570301
Prejudice Remover,0.01006,1.0,1.0,1.0,0.997713
Gerry Fair Classifier,0.00494,1.0,1.0,0.004811,1.0


Compas Recidivism | max(Mathew Correlation - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.258418,0.052592,15,0.328905,0.025563,0.070487,0.050916
Fair Transition Loss,0.213496,0.058243,15,0.263713,0.060744,0.050218,0.029355
Gerry Fair Classifier,0.166407,0.052726,15,0.264071,0.053928,0.097664,0.038141
Prejudice Remover,-0.319029,0.031,15,-0.289144,0.027507,0.029885,0.02192
Standard MLP (baseline),-0.435498,0.034788,15,-0.292278,0.021478,0.14322,0.031957


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.996901,0.996192,0.995628,0.996766
Fair Transition Loss,0.004837,1.0,1.0,0.0,0.183791
Adversarial Debiasing,0.004914,0.157117,1.0,0.0,0.002297
Prejudice Remover,0.008905,1.0,1.0,1.0,0.997417
Gerry Fair Classifier,0.004776,1.0,1.0,0.00434,1.0


Compas Recidivism | max(Accuracy - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.537701,0.072416,15,0.67007,0.01598,0.13237,0.08159
Fair Transition Loss,0.501417,0.151259,15,0.6,0.047615,0.098583,0.143214
Gerry Fair Classifier,0.482351,0.038079,15,0.595624,0.085296,0.113273,0.072172
Prejudice Remover,0.30763,0.028754,15,0.359265,0.01323,0.051635,0.023966
Standard MLP (baseline),0.145627,0.032556,15,0.354025,0.017277,0.208397,0.024309


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,0.997684,0.996784,0.996411
Fair Transition Loss,0.002008,1.0,1.0,0.107223,1.0
Adversarial Debiasing,0.003972,0.499794,1.0,0.0,0.091172
Prejudice Remover,0.005943,1.0,1.0,1.0,0.997301
Gerry Fair Classifier,0.004969,0.799772,1.0,0.005285,1.0


Compas Recidivism | max(Accuracy - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.571804,0.028527,15,0.630632,0.04014,0.058828,0.033938
Adversarial Debiasing,0.55261,0.094035,15,0.668504,0.013186,0.115893,0.090752
Gerry Fair Classifier,0.518907,0.0374,15,0.624203,0.054904,0.105296,0.053737
Prejudice Remover,0.264117,0.027071,15,0.356888,0.013023,0.092771,0.020648
Standard MLP (baseline),0.154593,0.04394,15,0.349919,0.015081,0.195326,0.044006


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.995439,1.0,0.994923,0.996265
Fair Transition Loss,0.006612,1.0,0.471443,0.0,0.001646
Adversarial Debiasing,0.003564,1.0,1.0,0.0,0.703765
Prejudice Remover,0.01326,1.0,1.0,1.0,0.997684
Gerry Fair Classifier,0.006098,1.0,1.0,0.004526,1.0


Compas Recidivism | max(Accuracy - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.594341,0.036989,15,0.64792,0.013005,0.053579,0.03286
Adversarial Debiasing,0.594049,0.065326,15,0.671907,0.017399,0.077858,0.064096
Gerry Fair Classifier,0.551639,0.061398,15,0.616532,0.073965,0.064892,0.037954
Prejudice Remover,0.286772,0.032081,15,0.342247,0.011975,0.055476,0.034565
Standard MLP (baseline),0.217957,0.051457,15,0.352998,0.013016,0.135041,0.04874


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.994911,0.998008,0.999586,0.996648
Fair Transition Loss,0.009027,1.0,0.792935,0.0,0.107667
Adversarial Debiasing,0.006306,1.0,1.0,0.0,0.289234
Prejudice Remover,0.037253,1.0,1.0,1.0,0.998079
Gerry Fair Classifier,0.007543,1.0,1.0,0.004032,1.0


German Credit | max(Mathew Correlation - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.20007,0.172675,15,0.367673,0.056485,0.167603,0.148864
Fair Transition Loss,-0.034435,0.073668,15,0.0,0.0,0.034435,0.073668
Prejudice Remover,-0.082559,0.050746,15,0.0,0.0,0.082559,0.050746
Standard MLP (baseline),-0.09037,0.061469,15,0.0,0.0,0.09037,0.061469


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,1.0,1.0,1.0,0.0
Fair Transition Loss,0.36242,1.0,1.0,0.475996,0.0
Adversarial Debiasing,0.018517,0.058075,1.0,0.024615,0.0
Prejudice Remover,0.686937,1.0,1.0,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


German Credit | max(Mathew Correlation - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.03977,0.414069,15,0.300535,0.129892,0.260765,0.296572
Prejudice Remover,-0.087176,0.046848,15,0.0,0.0,0.087176,0.046848
Standard MLP (baseline),-0.097294,0.051877,15,0.0,0.0,0.097294,0.051877
Fair Transition Loss,-0.148704,0.211169,15,0.0,0.0,0.148704,0.211169


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.328722,0.875142,1.0,0.0
Fair Transition Loss,1.0,1.0,1.0,1.0,0.0
Adversarial Debiasing,0.755221,0.501261,1.0,0.769375,0.0
Prejudice Remover,0.611743,0.318726,0.862732,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


German Credit | max(Mathew Correlation - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Mathew Correlation,Mathew Correlation,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Adversarial Debiasing,0.115876,0.403735,15,0.310899,0.135257,0.195024,0.281659
Prejudice Remover,-0.072759,0.061678,15,0.0,0.0,0.072759,0.061678
Standard MLP (baseline),-0.092163,0.059553,15,0.0,0.0,0.092163,0.059553
Fair Transition Loss,-0.11338,0.263931,15,0.0,0.0,0.11338,0.263931


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.560728,1.0,1.0,0.0
Fair Transition Loss,1.0,1.0,1.0,1.0,0.0
Adversarial Debiasing,0.678192,0.423453,1.0,0.696819,0.000533
Prejudice Remover,0.540284,0.563664,1.0,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


German Credit | max(Accuracy - Statistical Parity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Statistical Parity,Statistical Parity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.577267,0.264434,15,0.673667,0.055789,0.0964,0.217653
Standard MLP (baseline),0.511642,0.064916,15,0.621,0.035567,0.109358,0.049178
Prejudice Remover,0.491066,0.059709,15,0.587,0.031041,0.095934,0.049375
Adversarial Debiasing,0.430467,0.331712,15,0.713333,0.090291,0.282867,0.255316


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.949985,0.396354,0.531614,0.0
Fair Transition Loss,1.0,1.0,0.469207,1.0,0.0
Adversarial Debiasing,1.0,1.0,1.0,1.0,0.0
Prejudice Remover,1.0,1.0,0.510048,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


German Credit | max(Accuracy - Equalized Odds)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equalized Odds,Equalized Odds
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.572396,0.313009,15,0.669333,0.091376,0.096937,0.229662
Standard MLP (baseline),0.525039,0.06341,15,0.627,0.032558,0.101961,0.052645
Prejudice Remover,0.483722,0.052698,15,0.591,0.037042,0.107278,0.059138
Adversarial Debiasing,0.368302,0.383015,15,0.685333,0.098152,0.317031,0.304063


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.739006,0.185844,0.265305,0.0
Fair Transition Loss,1.0,1.0,0.429356,0.9695,0.0
Adversarial Debiasing,1.0,1.0,1.0,1.0,0.0
Prejudice Remover,1.0,0.855692,0.28685,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


German Credit | max(Accuracy - Equal Opportunity)


Unnamed: 0_level_0,fitness,fitness,fitness,Accuracy,Accuracy,Equal Opportunity,Equal Opportunity
Unnamed: 0_level_1,mean,std,count,mean,std,mean,std
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Fair Transition Loss,0.679803,0.048559,15,0.692333,0.032616,0.012531,0.024006
Standard MLP (baseline),0.54981,0.059112,15,0.614333,0.033481,0.064523,0.045405
Adversarial Debiasing,0.530106,0.328938,15,0.712667,0.099837,0.18256,0.235414
Prejudice Remover,0.50464,0.082283,15,0.59,0.025284,0.08536,0.074077


Significance Testing


Unnamed: 0_level_0,Standard MLP (baseline),Fair Transition Loss,Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Standard MLP (baseline),1.0,0.998447,0.687304,0.298358,0.0
Fair Transition Loss,0.014195,1.0,0.082456,0.0,0.0
Adversarial Debiasing,1.0,1.0,1.0,1.0,0.0
Prejudice Remover,1.0,1.0,0.828166,1.0,0.0
Gerry Fair Classifier,1.0,1.0,1.0,1.0,1.0


In [28]:
aso_df_resume = []
for aso_result in sorted(multi_aso_data_list, key=lambda x: x['dataset']):
    fitness_rule = aso_result['fitness_rule']
    dataset = aso_result['dataset']

    aso_df = pd.DataFrame(aso_result['min_eps'], columns=methods)
    aso_df['method'] = methods
    aso_df['dataset'] = dataset
    aso_df['fitness_rule'] = fitness_rule
    aso_df = aso_df[aso_df['method'] == 'Fair Transition Loss' ]
    aso_df = aso_df.drop(['Fair Transition Loss'], axis=1)
    aso_df = aso_df.drop(['method'], axis=1)
    aso_df_resume.append(aso_df)


print('Significance Testing')
significance = pd.concat(aso_df_resume)
display(significance)

Significance Testing


Unnamed: 0,Standard MLP (baseline),Adversarial Debiasing,Prejudice Remover,Gerry Fair Classifier,dataset,fitness_rule
1,0.00462,0.152866,1.0,0.0,Adult Income,mcc_parity
1,0.027618,0.254582,0.421574,0.0,Adult Income,mcc_odds
1,0.008614,0.003219,0.058228,0.0,Adult Income,mcc_opportunity
1,0.006226,0.25293,0.305378,0.0,Adult Income,acc_parity
1,0.009395,0.225552,0.170713,0.0,Adult Income,acc_odds
1,0.898703,0.959536,1.0,0.204228,Adult Income,acc_opportunity
1,0.008002,0.0,0.0,0.0,Bank Market,mcc_parity
1,0.471688,0.189642,0.108138,0.02729,Bank Market,mcc_odds
1,0.820683,0.180293,0.23648,0.152576,Bank Market,mcc_opportunity
1,0.235047,1.0,1.0,0.7633,Bank Market,acc_parity


In [0]:
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 16))
fig.subplots_adjust(hspace=0.7)


for fitness_rule, ax in zip(sorted(fitness_rules), axes.flatten()):
    by_fitness_rule = significance[significance.fitness_rule == fitness_rule]

    sns.heatmap(data=by_fitness_rule, x='method', y='dataset', ax=ax )

    ax.set_title('Max(%s - %s)' % fitness_rules_target_metrics[fitness_rule])

plt.tight_layout()
plt.show()