In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklego.metrics import equal_opportunity_score
from sklego.metrics import p_percent_score
from sklearn.metrics import log_loss, accuracy_score
from sklearn.utils.extmath import squared_norm
from moopt.scalarization_interface import scalar_interface, single_interface, w_interface
from moopt import monise
import numpy as np
import optuna, sklearn, sklearn.datasets
from fair_models import coefficient_of_variation, MOOLogisticRegression, FindCLogisticRegression, FindCCLogisticRegression
from fair_models import calc_reweight
from fair_models import FairScalarization, EqualScalarization
from fair_models import SimpleVoting

import plotly.graph_objects as go

In [4]:
from sklego.linear_model import DemographicParityClassifier
from sklego.linear_model import EqualOpportunityClassifier
from sklearn.linear_model import LogisticRegression

In [5]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [6]:
def dominate(a, b):
    sense = np.array([1, 1, 1, 1])
    if all((sense*a)>=(sense*b)) and any((sense*a)>(sense*b)):
        return 1
    else:
        return 0

In [7]:
def all_metrics_parallel_coord(dataset, metric = 'accuracy',
                               acc_min = None, acc_max = None,
                               eo_min = None, eo_max = None,
                               pp_min=None, pp_max = None,
                               cv_min = None, cv_max = None):
    
    dominate_metr = np.array([any([dominate(other, row)
                                   for other in dataset.values])
                              for row in dataset.values])
    
    metrics = ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation']
    
    met_aux = {"Modelo": [],
              "Acurácia": [],
              "Igualdade de Oportunidade": [],
              "P porcento": [],
              "Coeficiente de variação": [],
              'model_id': [],
              "metric_id": [],
              "metric": []}

    for m in range(len(metrics)):
        metric = metrics[m]
        
        met_aux["Modelo"].extend(['RegLogistica', 'EqualOp', 'ParDemo', 'Reweight', 'MinMax', 'ErroMO', 'ProbMO'])
        met_aux["Acurácia"].extend([dataset.loc[('LogReg',metric),metrics[0]],
                          dataset.loc[('RegEqual',metric),metrics[0]],
                          dataset.loc[('RegDemo',metric),metrics[0]],
                          dataset.loc[('RegRewe',metric),metrics[0]],
                          dataset.loc[('Minimax',metric),metrics[0]],
                          dataset.loc[('RegMoo',metric),metrics[0]],
                          dataset.loc[('RegEqMoo',metric),metrics[0]]])
        met_aux["Igualdade de Oportunidade"].extend([dataset.loc[('LogReg',metric),metrics[1]],
                          dataset.loc[('RegEqual',metric),metrics[1]],
                          dataset.loc[('RegDemo',metric),metrics[1]],
                          dataset.loc[('RegRewe',metric),metrics[1]],
                          dataset.loc[('Minimax',metric),metrics[1]],
                          dataset.loc[('RegMoo',metric),metrics[1]],
                          dataset.loc[('RegEqMoo',metric),metrics[1]]])
        met_aux["P porcento"].extend([dataset.loc[('LogReg',metric),metrics[2]],
                          dataset.loc[('RegEqual',metric),metrics[2]],
                          dataset.loc[('RegDemo',metric),metrics[2]],
                          dataset.loc[('RegRewe',metric),metrics[2]],
                          dataset.loc[('Minimax',metric),metrics[2]],
                          dataset.loc[('RegMoo',metric),metrics[2]],
                          dataset.loc[('RegEqMoo',metric),metrics[2]]])
        met_aux["Coeficiente de variação"].extend([dataset.loc[('LogReg',metric),metrics[3]],
                          dataset.loc[('RegEqual',metric),metrics[3]],
                          dataset.loc[('RegDemo',metric),metrics[3]],
                          dataset.loc[('RegRewe',metric),metrics[3]],
                          dataset.loc[('Minimax',metric),metrics[3]],
                          dataset.loc[('RegMoo',metric),metrics[3]],
                          dataset.loc[('RegEqMoo',metric),metrics[3]]])
        met_aux['model_id'].extend([0,1,2,3,4,5,6])
        met_aux['metric_id'].extend([m,m,m,m,m,m,m])
        met_aux['metric'].extend([metric,metric,metric,metric,metric,metric,metric])

    m_metrics = pd.DataFrame(data=met_aux, 
                                columns=['Modelo','Acurácia','Igualdade de Oportunidade','P porcento',
                                         'Coeficiente de variação','model_id', 'metric_id', 'metric'])
    
    acc_min = acc_min if acc_min is not None else m_metrics['Acurácia'].min()
    acc_max = acc_max if acc_max is not None else m_metrics['Acurácia'].max()
    eo_min = eo_min if eo_min is not None else m_metrics['Igualdade de Oportunidade'].min()
    eo_max = eo_max if eo_max is not None else m_metrics['Igualdade de Oportunidade'].max()
    pp_min = pp_min if pp_min is not None else m_metrics['P porcento'].min()
    pp_max = pp_max if pp_max is not None else m_metrics['P porcento'].max()
    cv_min = cv_min if cv_min is not None else m_metrics['Coeficiente de variação'].min()
    cv_max = cv_max if cv_max is not None else m_metrics['Coeficiente de variação'].max()

    fig = go.FigureWidget(data=
        go.Parcoords(
            line = dict(color = m_metrics['model_id'], 
                        colorscale = [(0.0, colors[0]),(0.14, colors[0]),
                            (0.14, colors[1]), (0.28, colors[1]),
                            (0.28, colors[2]),(0.42, colors[2]),
                            (0.42, colors[3]),(0.56, colors[3]),
                            (0.56, colors[4]),(0.7, colors[4]),
                            (0.7, colors[5]),(0.84, colors[5]),
                            (0.84, colors[6]),(1.0, colors[6])], 
                        showscale = True, cmin = 0, cmax = 6,
                       colorbar = {'tickvals': m_metrics['model_id'].tolist(),
                              'ticktext': m_metrics['Modelo'].tolist(),
                                  'title': 'Modelo'}),
            dimensions = list([
                dict(tickvals = [0,1,2,3],
                     ticktext = ['Acurácia','Iguald Oport','P porcento',
                                         'Coef de var'],
                     label = 'Métrica otimizada', values = m_metrics['metric_id']),
                #dict(tickvals = [0,1,2,3,4,5,6],
                #     ticktext = ['LogReg', 'RegEqual', 'RegDemo', 'RegRewe', 'Minimax','RegMoo', 'RegEqMoo'],
                #     label = 'Modelo', values = m_metrics['model_id']),
                dict(range = [acc_min, acc_max],
                    label = 'Acurácia', values = m_metrics['Acurácia']),
                dict(range = [eo_min, eo_max],
                    label = 'Igualdade de Oportunidade', values = m_metrics['Igualdade de Oportunidade']),
                dict(range = [pp_min, pp_max],
                    label = 'P porcento', values = m_metrics['P porcento']),
                dict(range = [cv_min, cv_max],
                    label = 'Coeficiente de variação', values = m_metrics['Coeficiente de variação']),
            ])
        ), layout = dict(height=400)#title = "Resultado para cada modelo - German")
    )
    
    def selection_handler(trace, points, selector):
        print(points, selection)

    fig.data[0].on_selection(selection_handler)
    
    
    fig.update_layout(
        plot_bgcolor = 'white',
        paper_bgcolor = 'white'
    )

    return fig

## Compass

In [8]:
data = pd.read_csv("Datasets - Limpos/compas_onerace.csv")
fair_feature = "not_white"
pred_feature = "Two_yr_Recidivism"

In [10]:
X = data.drop([pred_feature], axis=1)
y = data[pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=1000)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=1000)

### Fair Moo

In [11]:
moo_err = monise(weightedScalar=FairScalarization(X_train, y_train, fair_feature),
                 singleScalar=FairScalarization(X_train, y_train, fair_feature),
                  nodeTimeLimit=2, targetSize=150,
                  targetGap=0, nodeGap=0.05, norm=False)

moo_err.optimize()

2020-11-23 09:09:37,060 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-23 09:09:37,116 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-23 09:09:37,254 - moopt.monise - DEBUG: Finding 3th individual minima


Using license file /home/marcos/gurobi.lic
Academic license - for non-commercial use only


2020-11-23 09:09:37,415 - moopt.monise - DEBUG: 4th solution - importance: 1.0
2020-11-23 09:09:37,536 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-23 09:09:37,658 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-23 09:09:37,786 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-23 09:09:37,925 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-23 09:09:38,069 - moopt.monise - DEBUG: 9th solution - importance: 0.4314190635413902
2020-11-23 09:09:38,227 - moopt.monise - DEBUG: 10th solution - importance: 0.2954880160002294
2020-11-23 09:09:38,392 - moopt.monise - DEBUG: 11th solution - importance: 0.2721785156659205
2020-11-23 09:09:38,545 - moopt.monise - DEBUG: 12th solution - importance: 0.15406252874224402
2020-11-23 09:09:38,699 - moopt.monise - DEBUG: 13th solution - importance: 0.12669484422771388
2020-11-23 09:09:38,861 - moopt.monise - DEBUG: 14th solution - importance: 0.09318268420586523
2020-11-23 09:09:39,012 - moopt.m

2020-11-23 09:10:10,365 - moopt.monise - DEBUG: 90th solution - importance: 0.002586055782855667
2020-11-23 09:10:11,129 - moopt.monise - DEBUG: 91th solution - importance: 0.0025594781568631605
2020-11-23 09:10:11,878 - moopt.monise - DEBUG: 92th solution - importance: 0.0024265630784651433
2020-11-23 09:10:13,240 - moopt.monise - DEBUG: 93th solution - importance: 0.002367827525445714
2020-11-23 09:10:14,562 - moopt.monise - DEBUG: 94th solution - importance: 0.002331881212938146
2020-11-23 09:10:15,505 - moopt.monise - DEBUG: 95th solution - importance: 0.0023202934342326544
2020-11-23 09:10:16,944 - moopt.monise - DEBUG: 96th solution - importance: 0.002213957537011325
2020-11-23 09:10:17,762 - moopt.monise - DEBUG: 97th solution - importance: 0.0022133856160526516
2020-11-23 09:10:19,420 - moopt.monise - DEBUG: 98th solution - importance: 0.002206581724120598
2020-11-23 09:10:20,259 - moopt.monise - DEBUG: 99th solution - importance: 0.0021805333451184786
2020-11-23 09:10:22,038 -

In [30]:
m_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}
sols = []

for solution in moo_err.solutionsList:
    sols.append(solution.x)
    m_aux["Acurácia"].append(solution.x.score(X_val, y_val))
    m_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_val, y_val))
    m_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_val))
    m_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_val, y_val))


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0



In [31]:
m_metrics = pd.DataFrame(data=m_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [32]:
m_metrics['cv_aux'] = 1/m_metrics['Coeficiente de variação']

In [33]:
new_results = m_metrics.copy()//0.01/100
new_results = new_results.drop('Coeficiente de variação', axis=1)
dominate_metr = [1*any([dominate(other, row) for other in new_results.values]) for row in new_results.values]
new_results['dominated'] = dominate_metr
new_results

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.65,0.53,0.52,1.05,1
1,0.66,0.86,0.84,1.16,0
2,0.54,0.99,0.00,0.76,0
3,0.65,0.58,0.55,1.05,1
4,0.63,0.40,0.40,0.95,1
...,...,...,...,...,...
145,0.60,0.19,0.22,0.89,1
146,0.59,0.11,0.17,0.85,1
147,0.56,0.20,0.12,0.80,1
148,0.63,0.50,0.50,0.97,1


In [34]:
#new_results[new_results.dominated] = 1
#new_results[new_results.dominated == False] = 0
#new_results = new_results[new_results.dominated == False]

In [45]:
fig = go.FigureWidget(data=
    go.Parcoords(
        #line = dict(color = new_results['dominated'], 
        #                colorscale = ['rgba(11,66,255)', 'rgba(11,0,255)']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = new_results['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = new_results['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = new_results['P porcento']),
            dict(range = [0.5,2.5],
                label = 'Coeficiente de variação', values = new_results['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig

FigureWidget({
    'data': [{'dimensions': [{'label': 'Acurácia',
                              'range': [0, 1…

In [48]:
def calc_res(model__):
    return [accuracy_score(y_test, model__.predict(X_test)),
            equal_opportunity_score(sensitive_column=fair_feature)(model__, X_test, y_test),
            p_percent_score(sensitive_column=fair_feature)(model__, X_test),
            coefficient_of_variation(model__, X_test, y_test)]

In [49]:
metrics = ['Acurácia', 'Igualdade de Oportunidade', 'P porcento', 'Coeficiente de variação']
ens_resuls = pd.DataFrame(index=metrics+['Ensemble'], columns=metrics)
for metr in metrics:
    if metr=='Coeficiente de variação':
        min_idx = np.argmax(np.array(new_results['cv_aux']))
        print(np.array(new_results['cv_aux'])[min_idx])
    else:
        min_idx = np.argmax(np.array(new_results[metr]))
        print(np.array(new_results[metr])[min_idx])
        
    min_model = moo_err.solutionsList[min_idx].x
    ens_resuls.loc[metr,:] = calc_res(min_model)
    
selected_idx = new_results.index
for dimension in fig.data[0]['dimensions']:
    if dimension['label']=='Coeficiente de variação':
        label = 'cv_aux'
    else:
        label = dimension['label']
        
    constr = dimension['constraintrange']
    curr_pd = new_results.loc[selected_idx,:]
    if constr is not None:
        selected_idx = curr_pd[(curr_pd[label]>=constr[0])&(curr_pd[label]<=constr[1])].index
        
models_t = [("Model "+str(idx), moo_model.x) 
            for idx, moo_model in enumerate(moo_err.solutionsList)
            if idx in selected_idx]
ensemble_model = SimpleVoting(estimators=models_t, voting='soft')

ens_resuls.loc['Ensemble',:] = calc_res(ensemble_model)
ens_resuls

0.66
0.99



invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0



0.99
1.17


Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,Coeficiente de variação
Acurácia,0.656,0.914705,0.889232,0.817773
Igualdade de Oportunidade,0.539,1.0,0.0,1.30789
P porcento,0.544,0.650391,0.45028,1.29479
Coeficiente de variação,0.67,0.898722,0.86015,0.812404
Ensemble,0.68,0.763643,0.672157,0.862255


In [21]:
moo_prob = monise(weightedScalar=EqualScalarization(X_train, y_train, fair_feature),
             singleScalar=EqualScalarization(X_train, y_train, fair_feature),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo_prob.optimize()

2020-11-23 09:16:38,860 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-23 09:16:39,010 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-23 09:16:39,128 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-23 09:16:39,246 - moopt.monise - DEBUG: Finding 4th individual minima
2020-11-23 09:16:39,485 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-23 09:16:39,733 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-23 09:16:39,967 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-23 09:16:40,171 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-23 09:16:40,420 - moopt.monise - DEBUG: 9th solution - importance: 1.0
2020-11-23 09:16:40,595 - moopt.monise - DEBUG: 10th solution - importance: 0.7027370756555614
2020-11-23 09:16:40,799 - moopt.monise - DEBUG: 11th solution - importance: 0.5565446142807015
2020-11-23 09:16:40,982 - moopt.monise - DEBUG: 12th solution - importance: 0.4663002111827504
2020-11-

2020-11-23 09:17:24,507 - moopt.monise - DEBUG: 88th solution - importance: 0.010865937823143516
2020-11-23 09:17:25,816 - moopt.monise - DEBUG: 89th solution - importance: 0.010865461866206138
2020-11-23 09:17:27,274 - moopt.monise - DEBUG: 90th solution - importance: 0.010775064212730764
2020-11-23 09:17:28,825 - moopt.monise - DEBUG: 91th solution - importance: 0.01061739227772757
2020-11-23 09:17:30,292 - moopt.monise - DEBUG: 92th solution - importance: 0.010509203975143312
2020-11-23 09:17:31,825 - moopt.monise - DEBUG: 93th solution - importance: 0.010485820687563324
2020-11-23 09:17:33,253 - moopt.monise - DEBUG: 94th solution - importance: 0.01048225800305198
2020-11-23 09:17:34,770 - moopt.monise - DEBUG: 95th solution - importance: 0.010279747605829134
2020-11-23 09:17:36,430 - moopt.monise - DEBUG: 96th solution - importance: 0.010141867838568314
2020-11-23 09:17:38,087 - moopt.monise - DEBUG: 97th solution - importance: 0.010030766341194236
2020-11-23 09:17:39,636 - moopt.

In [50]:
sols = []

m_aux2 = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}
for solution in moo_prob.solutionsList:
    sols.append(solution.x)
    m_aux2["Acurácia"].append(solution.x.score(X_test, y_test))
    m_aux2["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    m_aux2["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_test))
    m_aux2["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0



In [51]:
m_metrics2 = pd.DataFrame(data=m_aux2, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [71]:
m_metrics2['cv_aux'] = 1/m_metrics2['Coeficiente de variação']

new_results2 = m_metrics2.copy()
new_results2 = new_results2.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in new_results2.values]) for row in new_results2.values]
new_results2['dominated'] = dominate_metr

#new_results2[new_results2.dominated] = 1
#new_results2[new_results2.dominated == False] = 0

In [82]:
fig = go.FigureWidget(data=
    go.Parcoords(
        #line = dict(color = new_results2['dominated'], 
        #                colorscale = ['#0043E0', '#7F5EEF']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = m_metrics2['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = m_metrics2['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = m_metrics2['P porcento']),
            dict(range = [0.5,2.5],
                label = 'Coeficiente de variação', values = m_metrics2['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig

FigureWidget({
    'data': [{'dimensions': [{'label': 'Acurácia',
                              'range': [0, 1…

In [83]:
metrics = ['Acurácia', 'Igualdade de Oportunidade', 'P porcento', 'Coeficiente de variação']
ens_resuls = pd.DataFrame(index=metrics+['Ensemble'], columns=metrics)
for metr in metrics:
    if metr=='Coeficiente de variação':
        min_idx = np.argmax(np.array(new_results2['cv_aux']))
    else:
        min_idx = np.argmax(np.array(new_results2[metr]))
        
    min_model = moo_prob.solutionsList[min_idx].x
    ens_resuls.loc[metr,:] = calc_res(min_model)
    
selected_idx = new_results2.index
for dimension in fig.data[0]['dimensions']:
    if dimension['label']=='Coeficiente de variação':
        label = 'cv_aux'
    else:
        label = dimension['label']
        
    constr = dimension['constraintrange']
    curr_pd = new_results2.loc[selected_idx,:]
    if constr is not None:
        selected_idx = curr_pd[(curr_pd[label]>=constr[0])&(curr_pd[label]<=constr[1])].index
        
print(selected_idx)
        
models_t = [("Model "+str(idx), moo_model.x) 
            for idx, moo_model in enumerate(moo_prob.solutionsList)
            if idx in selected_idx]
ensemble_model = SimpleVoting(estimators=models_t, voting='soft')

ens_resuls.loc['Ensemble',:] = calc_res(ensemble_model)
ens_resuls

Acurácia
0.688 34
Igualdade de Oportunidade
1.0 1
P porcento
1.0 1
Coeficiente de variação
2.183125873309389 1
Int64Index([0, 11, 20, 30, 34, 54, 62, 86, 102, 104, 110, 125, 132, 138], dtype='int64')


Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,Coeficiente de variação
Acurácia,0.688,0.588093,0.460645,0.765822
Igualdade de Oportunidade,0.461,1.0,1.0,0.458059
P porcento,0.461,1.0,1.0,0.458059
Coeficiente de variação,0.461,1.0,1.0,0.458059
Ensemble,0.667,0.905344,0.84246,0.743951


## Logistic regression

In [26]:
import numpy as np

C_values = np.logspace(-10, 10, 50)

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)
    model.fit(X_train, y_train)
    metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
    metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                            X_test, y_test))
    metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
    metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0



In [27]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [28]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [0.5,2.5],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

# Parid dem

In [29]:
import numpy as np

C_values = np.logspace(-5, 5, 20)
c_values = np.logspace(-5, 5, 20)

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    for c in c_values:
        model = DemographicParityClassifier(sensitive_cols=fair_feature,
                                            covariance_threshold=c, C=C, max_iter=10**3)
        model.fit(X_train, y_train)
        metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
        metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                                X_test, y_test))
        metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
        metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for not_white == 1, returning 0


divide by zero encountered in double_scalars


No samples with y_hat == 1 for not_white == 0, returning 0


divide by zero encountered in 

SolverError: Solver 'ECOS' failed. Try another solver, or solve with verbose=True for more information.

In [None]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [None]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [0.5,2.5],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

# Eq op

In [None]:
import numpy as np

C_values = np.logspace(-5, 5, 20)
c_values = np.logspace(-5, 5, 20)

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    for c in c_values:
        model = EqualOpportunityClassifier(sensitive_cols=fair_feature, positive_target=True,
                                           covariance_threshold=c, C=C, max_iter=10**3)
        model.fit(X_train, y_train)
        metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
        metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                                X_test, y_test))
        metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
        metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))

In [None]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [None]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [0.5,2.5],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()