# Model comparison

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklego.metrics import equal_opportunity_score
from sklego.metrics import p_percent_score
from sklearn.metrics import log_loss
from sklearn.utils.extmath import squared_norm
from moopt.scalarization_interface import scalar_interface, single_interface, w_interface
from moopt import monise
import numpy as np
import optuna, sklearn, sklearn.datasets
from fair_models import coefficient_of_variation, MOOLogisticRegression, FindCLogisticRegression, FindCCLogisticRegression

Using license file /opt/gurobi/gurobi.lic
Academic license - for non-commercial use only
No parameters matching '_test' found




In [2]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

## Parte 1: Data treatment

In [3]:
mydata= pd.read_csv("Datasets/german_credit_data.csv")

Dados de pedidos de crédito. É um dos datasets mais utilizados para tutoriais em Fairness, como na biblioteca [$aif360$](https://github.com/IBM/AIF360/blob/master/examples/README.md). Dataset original disponível em [aqui](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)), mas eu utilizei [este](https://www.kaggle.com/kabure/german-credit-data-with-risk), por estar em formato csv com os headers, embora omita informações do dataset original.

Originalmente possui 1000 dados, mas com vários valores NaN, ficando com 522 dados após remoção de dados com informações faltantes.

In [4]:
mydata.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,good
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,bad
2,2,49,male,1,own,little,,2096,12,education,good
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,good
4,4,53,male,2,free,little,little,4870,24,car,bad


In [5]:
mydata = mydata.drop(['Unnamed: 0', 'Purpose'], axis=1)

In [6]:
mydata = mydata.dropna()

In [7]:
mapping_Sex = {'male': 0, 'female': 1}
mapping_Housing = {'free': 1, 'rent': 2, 'own': 3}
mapping_Savings = {'little': 1, 'moderate': 2, 'quite rich': 3, 'rich': 4}
mapping_Checking = {'little': 1, 'moderate': 2, 'rich': 3}
mapping_Risk = {"bad": -1, "good": 1}

numerical_data = mydata.replace({'Sex': mapping_Sex, 'Housing': mapping_Housing, 'Saving accounts': mapping_Savings,
                'Checking account':mapping_Checking, 'Risk': mapping_Risk})

In [8]:
X = numerical_data.drop(['Risk'], axis=1)

In [9]:
y = numerical_data['Risk']

In [53]:
# 395, 808
random_seed = np.random.randint(0, 1000)
random_seed

395

In [71]:
random_seed2 = np.random.randint(0, 1000)
random_seed2

808

In [72]:
X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=100, random_state = random_seed)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=100, random_state = random_seed2)

In [73]:
def calc_reweight(X, y):
    W = {}
    W[0] = {}
    W[1] = {}

    D = len(X)
    len_men = X.groupby('Sex').count()['Age'][0]
    len_women = X.groupby('Sex').count()['Age'][1]
    len_neg = sum(y==-1)
    len_pos = sum(y==1)
    len_men_pos = len(X[(X.Sex == 0) & (y == 1)])
    len_men_neg = len(X[(X.Sex == 0) & (y == -1)])
    len_women_pos = len(X[(X.Sex == 1) & (y == 1)])
    len_women_neg = len(X[(X.Sex == 1) & (y == -1)])

    W[0][1] = (len_men*len_pos)/(D*len_men_pos)
    W[0][-1] = (len_men*len_neg)/(D*len_men_neg)

    W[1][1] = (len_women*len_pos)/(D*len_women_pos)
    W[1][-1] = (len_women*len_neg)/(D*len_women_neg)
    
    sample_weight = []
    for i in range(X.shape[0]):
        sample_weight.append(W[X.iloc[i]['Sex']][y.iloc[i]])

    return sample_weight

In [74]:
sample_weight = calc_reweight(X_train, y_train)

## Parte 3: Comparando os modelos

In [75]:
optimize_metrics = {'accuracy': {'metrics': ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation'],
                'LogReg': [],
                'RegEqual': [],
                'RegDemo': [],
                'RegRewe': [],
                'RegMoo': []},
                'equal_opportunity': {'metrics': ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation'],
                'LogReg': [],
                'RegEqual': [],
                'RegDemo': [],
                'RegRewe': [],
                'RegMoo': []},
                'p_percent': {'metrics': ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation'],
                'LogReg': [],
                'RegEqual': [],
                'RegDemo': [],
                'RegRewe': [],
                'RegMoo': []},
                'c_variation': {'metrics': ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation'],
                'LogReg': [],
                'RegEqual': [],
                'RegDemo': [],
                'RegRewe': [],
                'RegMoo': []}}

In [76]:
metrics = ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation']

In [83]:
# RegLog

for metric in metrics:
    reg_log = FindCLogisticRegression(X_train, y_train, X_val, y_val, metric=metric).tune()
    # RegMoo
    print('Metric: ', metric)
    print('  Train: ')
    print('    Acc: ', reg_log.score(X_val, y_val))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_log, X_val, y_val))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_log, X_val))
    print('    CVar: ', coefficient_of_variation(reg_log, X_val, y_val))
    print('  Test: ')
    print('    Acc: ', reg_log.score(X_test, y_test))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_log, X_test, y_test))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_log, X_test))
    print('    CVar: ', coefficient_of_variation(reg_log, X_test, y_test))
    print('------------------------------------------------------')
    optimize_metrics[metric]['LogReg'] = [reg_log.score(X_test, y_test),
                            equal_opportunity_score(sensitive_column="Sex")(reg_log, X_test, y_test),
                            p_percent_score(sensitive_column="Sex")(reg_log, X_test),
                            coefficient_of_variation(reg_log, X_test, y_test)]

  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  accuracy
  Train: 
    Acc:  0.72
    Equal:  0.9512195121951219
    Parid:  0.9793103448275864
    CVar:  1.0807608787955771
  Test: 
    Acc:  0.59
    Equal:  0.9853372434017594
    Parid:  0.9850869925434964
    CVar:  1.8478123990881972
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  equal_opportunity
  Train: 
    Acc:  0.69
    Equal:  0.9800443458980044
    Parid:  0.8324137931034483
    CVar:  1.215296809050856
  Test: 
    Acc:  0.56
    Equal:  0.9494047619047619
    Parid:  0.9610604805302404
    CVar:  1.9534513616218416
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  p_percent
  Train: 
    Acc:  0.73
    Equal:  0.9512195121951219
    Parid:  0.9992962702322309
    CVar:  1.082403636882329
  Test: 
    Acc:  0.59
    Equal:  0.9853372434017594
    Parid:  0.9850869925434964
    CVar:  1.8478123990881972
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z

  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  c_variation
  Train: 
    Acc:  0.58
    Equal:  1.0
    Parid:  0.9436619718309859
    CVar:  0.7586925376789191
  Test: 
    Acc:  0.61
    Equal:  1.0
    Parid:  0.9310344827586207
    CVar:  0.7750366004935066
------------------------------------------------------


In [84]:
# RegEqual

for metric in metrics:
    reg_equal = FindCCLogisticRegression(X_train, y_train, X_val, y_val, metric=metric, base_model='equal').tune()
    print('Metric: ', metric)
    print('  Train: ')
    print('    Acc: ', reg_equal.score(X_val, y_val))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_equal, X_val, y_val))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_equal, X_val))
    print('    CVar: ', coefficient_of_variation(reg_equal, X_val, y_val))
    print('  Test: ')
    print('    Acc: ', reg_equal.score(X_test, y_test))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_equal, X_test, y_test))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_equal, X_test))
    print('    CVar: ', coefficient_of_variation(reg_equal, X_test, y_test))
    print('------------------------------------------------------')
    optimize_metrics[metric]['RegEqual'] = [reg_equal.score(X_test, y_test),
                            equal_opportunity_score(sensitive_column="Sex")(reg_equal, X_test, y_test),
                            p_percent_score(sensitive_column="Sex")(reg_log, X_test),
                            coefficient_of_variation(reg_equal, X_test, y_test)]

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  accuracy
  Train: 
    Acc:  0.71
    Equal:  0.9247967479674796
    Parid:  0.9992962702322309
    CVar:  1.1252876029482906
  Test: 
    Acc:  0.57
    Equal:  0.8880208333333334
    Parid:  0.8577464788732394
    CVar:  1.7479072570593919
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  equal_opportunity
  Train: 
    Acc:  0.66
    Equal:  0.9855769230769231
    Parid:  0.8855465884079237
    CVar:  1.3540064007726595
  Test: 
    Acc:  0.55
    Equal:  0.8445747800586509
    Parid:  0.9679230152365678
    CVar:  1.8599566332126545
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  p_percent
  Train: 
    Acc:  0.69
    Equal:  0.8983739837398373
    Parid:  0.9992962702322309
    CVar:  1.1665962221617756
  Test: 
    Acc:  0.57
    Equal:  0.8880208333333334
    Parid:  0.8577464788732394
    CVar:  1.7479072570593919
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another sol

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another sol

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  c_variation
  Train: 
    Acc:  0.54
    Equal:  1.0
    Parid:  1.0
    CVar:  0.7342087577794204
  Test: 
    Acc:  0.59
    Equal:  1.0
    Parid:  1.0
    CVar:  0.764348714825459
------------------------------------------------------


In [85]:
# RegDemo

for metric in metrics:
    reg_demo = FindCCLogisticRegression(X_train, y_train, X_val, y_val, metric=metric, base_model='demographic').tune()
    print('Metric: ', metric)
    print('  Train: ')
    print('    Acc: ', reg_demo.score(X_val, y_val))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_demo, X_val, y_val))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_demo, X_val))
    print('    CVar: ', coefficient_of_variation(reg_demo, X_val, y_val))
    print('  Test: ')
    print('    Acc: ', reg_demo.score(X_test, y_test))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_demo, X_test, y_test))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_demo, X_test))
    print('    CVar: ', coefficient_of_variation(reg_demo, X_test, y_test))
    print('------------------------------------------------------')
    optimize_metrics[metric]['RegDemo'] = [reg_demo.score(X_test, y_test),
                            equal_opportunity_score(sensitive_column="Sex")(reg_demo, X_test, y_test),
                            p_percent_score(sensitive_column="Sex")(reg_demo, X_test),
                            coefficient_of_variation(reg_demo, X_test, y_test)]

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  accuracy
  Train: 
    Acc:  0.7
    Equal:  0.7884615384615384
    Parid:  0.6591511936339522
    CVar:  1.2189963630768497
  Test: 
    Acc:  0.61
    Equal:  0.9494047619047619
    Parid:  0.8160919540229884
    CVar:  2.040954399806036
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another sol

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  equal_opportunity
  Train: 
    Acc:  0.55
    Equal:  1.0
    Parid:  0.9859154929577465
    CVar:  0.7405919620773838
  Test: 
    Acc:  0.59
    Equal:  1.0
    Parid:  1.0
    CVar:  0.764348714825459
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another sol

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  p_percent
  Train: 
    Acc:  0.54
    Equal:  1.0
    Parid:  1.0
    CVar:  0.7342087577794204
  Test: 
    Acc:  0.59
    Equal:  1.0
    Parid:  1.0
    CVar:  0.764348714825459
------------------------------------------------------


  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another sol

  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "
  "Solution may be inaccurate. Try another solver, "


Metric:  c_variation
  Train: 
    Acc:  0.54
    Equal:  1.0
    Parid:  1.0
    CVar:  0.7342087577794204
  Test: 
    Acc:  0.59
    Equal:  1.0
    Parid:  1.0
    CVar:  0.764348714825459
------------------------------------------------------


In [86]:
# RegRewe

for metric in metrics:
    reg_rewe = FindCLogisticRegression(X_train, y_train, X_val, y_val, metric=metric, sample_weight=sample_weight).tune()
    print('Metric: ', metric)
    print('  Train: ')
    print('    Acc: ', reg_rewe.score(X_val, y_val))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_rewe, X_val, y_val))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_rewe, X_val))
    print('    CVar: ', coefficient_of_variation(reg_rewe, X_val, y_val))
    print('  Test: ')
    print('    Acc: ', reg_rewe.score(X_test, y_test))
    print('    Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_rewe, X_test, y_test))
    print('    Parid: ', p_percent_score(sensitive_column="Sex")(reg_rewe, X_test))
    print('    CVar: ', coefficient_of_variation(reg_rewe, X_test, y_test))
    print('------------------------------------------------------')
    optimize_metrics[metric]['RegRewe'] = [reg_rewe.score(X_test, y_test),
                            equal_opportunity_score(sensitive_column="Sex")(reg_rewe, X_test, y_test),
                            p_percent_score(sensitive_column="Sex")(reg_rewe, X_test),
                            coefficient_of_variation(reg_rewe, X_test, y_test)]

Metric:  accuracy
  Train: 
    Acc:  0.72
    Equal:  0.9247967479674796
    Parid:  0.980281690140845
    CVar:  1.1276011595417947
  Test: 
    Acc:  0.57
    Equal:  0.9853372434017594
    Parid:  0.9530516431924883
    CVar:  1.8181451186397843
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  equal_opportunity
  Train: 
    Acc:  0.68
    Equal:  0.9959514170040485
    Parid:  0.9618226600985221
    CVar:  0.9789605554743158
  Test: 
    Acc:  0.56
    Equal:  0.8402777777777777
    Parid:  0.8725992317541614
    CVar:  1.5456030825826168
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  p_percent
  Train: 
    Acc:  0.71
    Equal:  0.9247967479674796
    Parid:  0.9992962702322309
    CVar:  1.1252876029482906
  Test: 
    Acc:  0.59
    Equal:  0.8880208333333334
    Parid:  0.8813936249073389
    CVar:  1.7753491677514033
------------------------------------------------------


  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)
  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z

  score = np.minimum(p_y1_z1 / p_y1_z0, p_y1_z0 / p_y1_z1)


Metric:  c_variation
  Train: 
    Acc:  0.58
    Equal:  1.0
    Parid:  0.9436619718309859
    CVar:  0.7586925376789191
  Test: 
    Acc:  0.61
    Equal:  1.0
    Parid:  0.9310344827586207
    CVar:  0.7750366004935066
------------------------------------------------------


In [81]:
# RegMoo

for metric in metrics:
    reg_moo = MOOLogisticRegression(X_train, y_train, X_val, y_val, metric=metric).tune()
    print('------------------------------------------------------')
    print('Metric: ', metric)
    print('   Acc: ', reg_moo.score(X_test, y_test))
    print('   Equal: ', equal_opportunity_score(sensitive_column="Sex")(reg_moo, X_test, y_test))
    print('   Parid: ', p_percent_score(sensitive_column="Sex")(reg_moo, X_test))
    print('   CVar: ', coefficient_of_variation(reg_moo, X_test, y_test))
    print('------------------------------------------------------')







TypeError: '>=' not supported between instances of 'NoneType' and 'int'

In [82]:
optimize_metrics

{'accuracy': {'metrics': ['accuracy',
   'equal_opportunity',
   'p_percent',
   'c_variation'],
  'LogReg': [0.59, 0.9853372434017594, 0.9850869925434964, 1.8478123990881972],
  'RegEqual': [0.57,
   0.8880208333333334,
   0.9310344827586207,
   1.7479072570593919],
  'RegDemo': [0.57, 0.859375, 0.8813936249073389, 1.8181451186397843],
  'RegRewe': [0.57,
   0.9494047619047619,
   0.9370339685169844,
   1.9710443537052722],
  'RegMoo': []},
 'equal_opportunity': {'metrics': ['accuracy',
   'equal_opportunity',
   'p_percent',
   'c_variation'],
  'LogReg': [0.62, 1.0, 0.9443349753694581, 0.7800455591695685],
  'RegEqual': [0.59, 1.0, 0.9310344827586207, 0.764348714825459],
  'RegDemo': [0.6, 0.8307291666666666, 0.9130074565037283, 1.9405142264714244],
  'RegRewe': [0.59, 0.89375, 0.9879007864488808, 1.204863515940801],
  'RegMoo': []},
 'p_percent': {'metrics': ['accuracy',
   'equal_opportunity',
   'p_percent',
   'c_variation'],
  'LogReg': [0.55, 0.9853372434017594, 0.998435054773