In [2]:
import sys
from pathlib import Path

In [1]:
import ethicml
from ethicml.algorithms.inprocess import GPyT, GPyTDemPar, GPyTEqOdds, LR, SVM, Agarwal, Kamiran, Kamishima
from ethicml.evaluators import evaluate_models, CrossValidator, run_metrics
from ethicml.data import Compas, Adult, load_data
from ethicml.metrics import Accuracy, ProbPos, TPR, TNR, AbsCV
from ethicml.preprocessing import train_test_split

In [18]:
# tell the model where the "run.py" is
file_name = Path('..') / "run.py"
def gp(**kwargs):
    return GPyT(file_name=file_name, executable=sys.executable, **kwargs)
def gp_dp(**kwargs):
    return GPyTDemPar(file_name=file_name, executable=sys.executable, **kwargs)
def gp_eo(**kwargs):
    return GPyTEqOdds(file_name=file_name, executable=sys.executable, **kwargs)

In [20]:
# tnr_race_False = 0.724
# tnr_race_True = 0.702
# tnr_sex_True = 0.724
# tnr_sex_False = 0.744
tnr_in_true_race = 0.71
tnr_in_false_race = 0.74
tnr_in_true_sex = 0.72
tnr_in_false_sex = 0.77

In [29]:
algos = []

# algos += [gp(epochs=2, s_as_input=True)]
# algos += [gp_dp(epochs=70, s_as_input=True)]
for tpr in [0.7]:
    algos += [gp_eo(epochs=10, s_as_input=True, tnr1=tnr_in_true_race, tnr0=tnr_in_true_race, tpr0=tpr, tpr1=tpr)]
    algos += [gp_eo(epochs=10, s_as_input=False, tnr1=tnr_in_false_race, tnr0=tnr_in_false_race, tpr0=tpr, tpr1=tpr)]

baselines = [
    LR(),
    SVM(),
    Agarwal(),
    Kamiran(),
#     Kamishima(),
]
# algos += baselines

In [30]:
data = [
    Compas("Race"),
#     Compas("Sex"),
#     Adult("Race"),
#     Adult("Sex"),
]

In [31]:
results = evaluate_models(
    datasets=data,
    inprocess_models=algos,
    metrics=[Accuracy(), ProbPos(), TPR(), TNR()],
    per_sens_metrics=[ProbPos(), TPR(), TNR()], 
    repeats=1,
    delete_prev=True,  # delete previous results
)
results

 50%|█████     | 1/2 [02:00<02:00, 120.10s/it, model=GPyT_eq_odds_in_False_0tnr_0.724_1tnr_0.724_0tpr_0.7_1tpr_0.7, dataset=Compas Race, transform=no_transform, repeat=0]

Success: ['../run.py', '--inf', 'Variational', '--data', 'sensitive_from_numpy', '--dataset_path', '/var/folders/3_/lt1vgb7n2m99r2wtx8pgxmnm000mry/T/tmpnado623y/data.npz', '--cov', 'RBFKernel', '--mean', 'ZeroMean', '--optimizer', 'Adam', '--lr', '0.05', '--model_name', 'local', '--batch_size', '4933', '--epochs', '10', '--eval_epochs', '100000', '--summary_steps', '100000', '--chkpt_epochs', '100000', '--save_dir', '/var/folders/3_/lt1vgb7n2m99r2wtx8pgxmnm000mry/T/tmpnado623y', '--plot', '', '--logging_steps', '1', '--gpus', '0', '--preds_path', 'predictions.npz', '--num_samples', '20', '--optimize_inducing', 'True', '--length_scale', '1.2', '--sf', '1.0', '--iso', 'False', '--num_samples_pred', '2000', '--s_as_input', 'True', '--num_inducing', '1245', '--manual_seed', '888', '--lik', 'TuneTprLikelihood', '--p_ybary0_s0', '0.702', '--p_ybary0_s1', '0.702', '--p_ybary1_s0', '0.7', '--p_ybary1_s1', '0.7', '--biased_acceptance1', '0.49050245098039214', '--biased_acceptance2', '0.39604553

100%|██████████| 2/2 [04:05<00:00, 121.80s/it, model=GPyT_eq_odds_in_False_0tnr_0.724_1tnr_0.724_0tpr_0.7_1tpr_0.7, dataset=Compas Race, transform=no_transform, repeat=0]

Success: ['../run.py', '--inf', 'Variational', '--data', 'sensitive_from_numpy', '--dataset_path', '/var/folders/3_/lt1vgb7n2m99r2wtx8pgxmnm000mry/T/tmpad2ra3p9/data.npz', '--cov', 'RBFKernel', '--mean', 'ZeroMean', '--optimizer', 'Adam', '--lr', '0.05', '--model_name', 'local', '--batch_size', '4933', '--epochs', '10', '--eval_epochs', '100000', '--summary_steps', '100000', '--chkpt_epochs', '100000', '--save_dir', '/var/folders/3_/lt1vgb7n2m99r2wtx8pgxmnm000mry/T/tmpad2ra3p9', '--plot', '', '--logging_steps', '1', '--gpus', '0', '--preds_path', 'predictions.npz', '--num_samples', '20', '--optimize_inducing', 'True', '--length_scale', '1.2', '--sf', '1.0', '--iso', 'False', '--num_samples_pred', '2000', '--s_as_input', 'False', '--num_inducing', '1245', '--manual_seed', '888', '--lik', 'TuneTprLikelihood', '--p_ybary0_s0', '0.724', '--p_ybary0_s1', '0.724', '--p_ybary1_s0', '0.7', '--p_ybary1_s1', '0.7', '--biased_acceptance1', '0.49050245098039214', '--biased_acceptance2', '0.3960455




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Accuracy,TNR,TNR_race_0,TNR_race_0-race_1,TNR_race_0/race_1,TNR_race_1,TPR,TPR_race_0,TPR_race_0-race_1,TPR_race_0/race_1,TPR_race_1,prob_pos,prob_pos_race_0,prob_pos_race_0-race_1,prob_pos_race_0/race_1,prob_pos_race_1
dataset,transform,model,repeat,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Compas Race,no_transform,GPyT_eq_odds_in_True_0tnr_0.702_1tnr_0.702_0tpr_0.7_1tpr_0.7,0-2410,0.592382,0.554585,0.532374,0.056515,0.904031,0.588889,0.639854,0.645078,0.017749,0.972486,0.627329,0.531605,0.552927,0.061047,0.889593,0.491879
Compas Race,no_transform,GPyT_eq_odds_in_False_0tnr_0.724_1tnr_0.724_0tpr_0.7_1tpr_0.7,0-2410,0.576985,0.516739,0.470024,0.118865,0.798154,0.588889,0.652651,0.663212,0.035883,0.945895,0.627329,0.558347,0.594022,0.102143,0.828048,0.491879


In [4]:
train, test = train_test_split(load_data(Adult()))

In [9]:
# Fair grid search
primary = Accuracy()
fair_measure = AbsCV()
hyperparams = dict(C=[1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7])
lr_cv = CrossValidator(LR, hyperparams, folds=5)
lr_cv.run(train, measures=[primary, fair_measure])
lr_cv.best_hyper_params(primary)

fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_0_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_1_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_model_Logistic Regression_completed
fold_2_mode

{'C': 0.1}

In [12]:
lr_best_acc = lr_cv.results.get_best_result(primary)
lr_best_fair = lr_cv.results.get_best_in_top_k(primary, fair_measure, top_k=3)
print("best accuracy:", lr_best_acc)
print("best fair(+accuracy):", lr_best_fair)

best accuracy: ResultTuple(params={'C': 0.1}, fold_id=-1, scores={'Accuracy': 0.8460901894581458, 'CV absolute': 0.82497297375662})
best fair(+accuracy): ResultTuple(params={'C': 1e-06}, fold_id=-1, scores={'Accuracy': 0.7909169578659723, 'CV absolute': 0.9530144052067022})
