In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklego.metrics import equal_opportunity_score
from sklego.metrics import p_percent_score
from sklearn.metrics import log_loss, accuracy_score
from sklearn.utils.extmath import squared_norm
from moopt.scalarization_interface import scalar_interface, single_interface, w_interface
from moopt import monise
import numpy as np
import optuna, sklearn, sklearn.datasets
from fair_models import coefficient_of_variation, MOOLogisticRegression, FindCLogisticRegression, FindCCLogisticRegression
from fair_models import calc_reweight
from fair_models import FairScalarization, EqualScalarization
from fair_models import SimpleVoting

import plotly.graph_objects as go

Using Python-MIP package version 1.7.2




In [2]:
from sklego.linear_model import DemographicParityClassifier
from sklego.linear_model import EqualOpportunityClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
def dominate(a, b):
    sense = np.array([1, 1, 1, 1])
    if all((sense*a)>=(sense*b)) and any((sense*a)>(sense*b)):
        return 1
    else:
        return 0

In [4]:
def all_metrics_parallel_coord(dataset, metric = 'accuracy',
                               acc_min = None, acc_max = None,
                               eo_min = None, eo_max = None,
                               pp_min=None, pp_max = None,
                               cv_min = None, cv_max = None):
    
    dominate_metr = np.array([any([dominate(other, row)
                                   for other in dataset.values])
                              for row in dataset.values])
    
    metrics = ['accuracy', 'equal_opportunity', 'p_percent', 'c_variation']
    
    met_aux = {"Modelo": [],
              "Acurácia": [],
              "Igualdade de Oportunidade": [],
              "P porcento": [],
              "Coeficiente de variação": [],
              'model_id': [],
              "metric_id": [],
              "metric": []}

    for m in range(len(metrics)):
        metric = metrics[m]
        
        met_aux["Modelo"].extend(['RegLogistica', 'EqualOp', 'ParDemo', 'Reweight', 'MinMax', 'ErroMO', 'ProbMO'])
        met_aux["Acurácia"].extend([dataset.loc[('LogReg',metric),metrics[0]],
                          dataset.loc[('RegEqual',metric),metrics[0]],
                          dataset.loc[('RegDemo',metric),metrics[0]],
                          dataset.loc[('RegRewe',metric),metrics[0]],
                          dataset.loc[('Minimax',metric),metrics[0]],
                          dataset.loc[('RegMoo',metric),metrics[0]],
                          dataset.loc[('RegEqMoo',metric),metrics[0]]])
        met_aux["Igualdade de Oportunidade"].extend([dataset.loc[('LogReg',metric),metrics[1]],
                          dataset.loc[('RegEqual',metric),metrics[1]],
                          dataset.loc[('RegDemo',metric),metrics[1]],
                          dataset.loc[('RegRewe',metric),metrics[1]],
                          dataset.loc[('Minimax',metric),metrics[1]],
                          dataset.loc[('RegMoo',metric),metrics[1]],
                          dataset.loc[('RegEqMoo',metric),metrics[1]]])
        met_aux["P porcento"].extend([dataset.loc[('LogReg',metric),metrics[2]],
                          dataset.loc[('RegEqual',metric),metrics[2]],
                          dataset.loc[('RegDemo',metric),metrics[2]],
                          dataset.loc[('RegRewe',metric),metrics[2]],
                          dataset.loc[('Minimax',metric),metrics[2]],
                          dataset.loc[('RegMoo',metric),metrics[2]],
                          dataset.loc[('RegEqMoo',metric),metrics[2]]])
        met_aux["Coeficiente de variação"].extend([dataset.loc[('LogReg',metric),metrics[3]],
                          dataset.loc[('RegEqual',metric),metrics[3]],
                          dataset.loc[('RegDemo',metric),metrics[3]],
                          dataset.loc[('RegRewe',metric),metrics[3]],
                          dataset.loc[('Minimax',metric),metrics[3]],
                          dataset.loc[('RegMoo',metric),metrics[3]],
                          dataset.loc[('RegEqMoo',metric),metrics[3]]])
        met_aux['model_id'].extend([0,1,2,3,4,5,6])
        met_aux['metric_id'].extend([m,m,m,m,m,m,m])
        met_aux['metric'].extend([metric,metric,metric,metric,metric,metric,metric])

    m_metrics = pd.DataFrame(data=met_aux, 
                                columns=['Modelo','Acurácia','Igualdade de Oportunidade','P porcento',
                                         'Coeficiente de variação','model_id', 'metric_id', 'metric'])
    
    acc_min = acc_min if acc_min is not None else m_metrics['Acurácia'].min()
    acc_max = acc_max if acc_max is not None else m_metrics['Acurácia'].max()
    eo_min = eo_min if eo_min is not None else m_metrics['Igualdade de Oportunidade'].min()
    eo_max = eo_max if eo_max is not None else m_metrics['Igualdade de Oportunidade'].max()
    pp_min = pp_min if pp_min is not None else m_metrics['P porcento'].min()
    pp_max = pp_max if pp_max is not None else m_metrics['P porcento'].max()
    cv_min = cv_min if cv_min is not None else m_metrics['Coeficiente de variação'].min()
    cv_max = cv_max if cv_max is not None else m_metrics['Coeficiente de variação'].max()

    fig = go.FigureWidget(data=
        go.Parcoords(
            line = dict(color = m_metrics['model_id'], 
                        colorscale = [(0.0, colors[0]),(0.14, colors[0]),
                            (0.14, colors[1]), (0.28, colors[1]),
                            (0.28, colors[2]),(0.42, colors[2]),
                            (0.42, colors[3]),(0.56, colors[3]),
                            (0.56, colors[4]),(0.7, colors[4]),
                            (0.7, colors[5]),(0.84, colors[5]),
                            (0.84, colors[6]),(1.0, colors[6])], 
                        showscale = True, cmin = 0, cmax = 6,
                       colorbar = {'tickvals': m_metrics['model_id'].tolist(),
                              'ticktext': m_metrics['Modelo'].tolist(),
                                  'title': 'Modelo'}),
            dimensions = list([
                dict(tickvals = [0,1,2,3],
                     ticktext = ['Acurácia','Iguald Oport','P porcento',
                                         'Coef de var'],
                     label = 'Métrica otimizada', values = m_metrics['metric_id']),
                #dict(tickvals = [0,1,2,3,4,5,6],
                #     ticktext = ['LogReg', 'RegEqual', 'RegDemo', 'RegRewe', 'Minimax','RegMoo', 'RegEqMoo'],
                #     label = 'Modelo', values = m_metrics['model_id']),
                dict(range = [acc_min, acc_max],
                    label = 'Acurácia', values = m_metrics['Acurácia']),
                dict(range = [eo_min, eo_max],
                    label = 'Igualdade de Oportunidade', values = m_metrics['Igualdade de Oportunidade']),
                dict(range = [pp_min, pp_max],
                    label = 'P porcento', values = m_metrics['P porcento']),
                dict(range = [cv_min, cv_max],
                    label = 'Coeficiente de variação', values = m_metrics['Coeficiente de variação']),
            ])
        ), layout = dict(height=400)#title = "Resultado para cada modelo - German")
    )
    
    def selection_handler(trace, points, selector):
        print(points, selection)

    fig.data[0].on_selection(selection_handler)
    
    
    fig.update_layout(
        plot_bgcolor = 'white',
        paper_bgcolor = 'white'
    )

    return fig

## German

In [5]:
german_result_metrics = pd.read_csv('Results/german_result_metrics.csv')
german_result_metrics = german_result_metrics.set_index(['Unnamed: 0', 'Unnamed: 1'])

In [6]:
german_data = pd.read_csv("Datasets - Limpos/german.csv")
german_fair_feature = "Sex"
german_pred_feature = "Risk"

In [7]:
german_data = german_data.drop("Unnamed: 0", axis=1)

### Fair Moo

In [8]:
X = german_data.drop([german_pred_feature], axis=1)
y = german_data[german_pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=200)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=100)

In [9]:
m_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [10]:
moo_err = monise(weightedScalar=FairScalarization(X_train, y_train, 'Sex'),
             singleScalar=FairScalarization(X_train, y_train, 'Sex'),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo_err.optimize()

sols = []

for solution in moo_err.solutionsList:
    sols.append(solution.x)
    m_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    m_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=german_fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    m_aux["P porcento"].append(p_percent_score(sensitive_column=german_fair_feature)(solution.x,X_test))
    m_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-18 09:49:26,354 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-18 09:49:26,388 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-18 09:49:26,420 - moopt.monise - DEBUG: Finding 3th individual minima


Using license file /home/marcos/gurobi.lic
Academic license - for non-commercial use only


2020-11-18 09:49:26,506 - moopt.monise - DEBUG: 4th solution - importance: 1.0
2020-11-18 09:49:26,543 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-18 09:49:26,564 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-18 09:49:26,619 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-18 09:49:26,657 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-18 09:49:26,690 - moopt.monise - DEBUG: 9th solution - importance: 0.5339384566285893
2020-11-18 09:49:26,749 - moopt.monise - DEBUG: 10th solution - importance: 0.49587277117558626
2020-11-18 09:49:26,782 - moopt.monise - DEBUG: 11th solution - importance: 0.20448755961946657
2020-11-18 09:49:26,817 - moopt.monise - DEBUG: 12th solution - importance: 0.20142087616789603
2020-11-18 09:49:26,854 - moopt.monise - DEBUG: 13th solution - importance: 0.15097986835517202
2020-11-18 09:49:26,895 - moopt.monise - DEBUG: 14th solution - importance: 0.1492935916337049
2020-11-18 09:49:26,944 - moopt.

2020-11-18 09:49:54,435 - moopt.monise - DEBUG: 90th solution - importance: 0.0017383360654968849
2020-11-18 09:49:55,820 - moopt.monise - DEBUG: 91th solution - importance: 0.0016947682286262283
2020-11-18 09:49:57,189 - moopt.monise - DEBUG: 92th solution - importance: 0.0016834691171875497
2020-11-18 09:49:58,633 - moopt.monise - DEBUG: 93th solution - importance: 0.0016162799990929791
2020-11-18 09:50:00,130 - moopt.monise - DEBUG: 94th solution - importance: 0.0015105385633296608
2020-11-18 09:50:01,624 - moopt.monise - DEBUG: 95th solution - importance: 0.0015103112656135456
2020-11-18 09:50:03,012 - moopt.monise - DEBUG: 96th solution - importance: 0.001500946365577341
2020-11-18 09:50:04,601 - moopt.monise - DEBUG: 97th solution - importance: 0.0014351578491263273
2020-11-18 09:50:06,057 - moopt.monise - DEBUG: 98th solution - importance: 0.0014215295102785795
2020-11-18 09:50:07,543 - moopt.monise - DEBUG: 99th solution - importance: 0.0014145061107510085
2020-11-18 09:50:09,3

In [11]:
m_metrics = pd.DataFrame(data=m_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [12]:
m_metrics['cv_aux'] = 1/m_metrics['Coeficiente de variação']

In [13]:
new_results = m_metrics.copy()//0.01/100
new_results = new_results.drop('Coeficiente de variação', axis=1)
dominate_metr = [1*any([dominate(other, row) for other in new_results.values]) for row in new_results.values]
new_results['dominated'] = dominate_metr
new_results

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.57,0.91,0.95,1.27,1
1,0.60,0.81,0.70,1.74,1
2,0.42,0.99,0.00,0.61,1
3,0.60,0.92,0.98,1.41,1
4,0.57,0.97,0.93,1.27,1
...,...,...,...,...,...
145,0.61,0.96,0.98,1.59,0
146,0.63,0.98,0.93,1.49,1
147,0.62,0.92,0.97,1.47,1
148,0.62,0.89,0.99,1.43,1


In [14]:
#new_results[new_results.dominated] = 1
#new_results[new_results.dominated == False] = 0
#new_results = new_results[new_results.dominated == False]

In [30]:
fig = go.FigureWidget(data=
    go.Parcoords(
        #line = dict(color = new_results['dominated'], 
        #                colorscale = ['rgba(11,66,255)', 'rgba(11,0,255)']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = new_results['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = new_results['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = new_results['P porcento']),
            dict(range = [0.5,2.5],
                label = 'Coeficiente de variação', values = new_results['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig

FigureWidget({
    'data': [{'dimensions': [{'label': 'Acurácia',
                              'range': [0, 1…

In [33]:
def calc_res(model__):
    return [accuracy_score(y_test, model__.predict(X_test)),
            equal_opportunity_score(sensitive_column=german_fair_feature)(model__, X_test, y_test),
            p_percent_score(sensitive_column=german_fair_feature)(model__, X_test),
            coefficient_of_variation(model__, X_test, y_test)]

In [34]:
metrics = ['Acurácia', 'Igualdade de Oportunidade', 'P porcento', 'Coeficiente de variação']
ens_resuls = pd.DataFrame(index=metrics+['Ensemble'], columns=metrics)
for metr in metrics:
    if metr=='Coeficiente de variação':
        min_idx = np.argmin(np.array(new_results['cv_aux']))
        print(np.array(new_results['cv_aux'])[min_idx])
    else:
        min_idx = np.argmax(np.array(new_results[metr]))
        print(np.array(new_results[metr])[min_idx])
        
    #min_model = moo_err.solutionsList[min_idx].x
    ens_resuls.loc[metr,:] = new_results.iloc[min_idx,:].values[:-1]#calc_res(min_model)
    
selected_idx = new_results.index
for dimension in fig.data[0]['dimensions']:
    if dimension['label']=='Coeficiente de variação':
        label = 'cv_aux'
    else:
        label = dimension['label']
        
    constr = dimension['constraintrange']
    curr_pd = new_results.loc[selected_idx,:]
    if constr is not None:
        selected_idx = curr_pd[(curr_pd[label]>=constr[0])&(curr_pd[label]<=constr[1])].index
        
models_t = [("Model "+str(idx), moo_model.x) 
            for idx, moo_model in enumerate(moo_err.solutionsList)
            if idx in selected_idx]
ensemble_model = SimpleVoting(estimators=models_t, voting='soft')

ens_resuls.loc['Ensemble',:] = calc_res(ensemble_model)
ens_resuls

0.65
0.99
0.99
0.61


Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,Coeficiente de variação
Acurácia,0.65,0.97,0.91,1.57
Igualdade de Oportunidade,0.42,0.99,0.0,0.61
P porcento,0.62,0.88,0.99,1.41
Coeficiente de variação,0.42,0.99,0.0,0.61
Ensemble,0.625,0.968318,0.968638,0.677226


In [18]:
m_aux2 = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [19]:
moo = monise(weightedScalar=EqualScalarization(X_train, y_train, 'Sex'),
             singleScalar=EqualScalarization(X_train, y_train, 'Sex'),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    m_aux2["Acurácia"].append(solution.x.score(X_test, y_test))
    m_aux2["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=german_fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    m_aux2["P porcento"].append(p_percent_score(sensitive_column=german_fair_feature)(solution.x,X_test))
    m_aux2["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-17 22:16:28,703 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-17 22:16:28,731 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-17 22:16:28,740 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-17 22:16:28,749 - moopt.monise - DEBUG: Finding 4th individual minima
2020-11-17 22:16:28,772 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-17 22:16:28,798 - moopt.monise - DEBUG: 6th solution - importance: 1.000058871949015
2020-11-17 22:16:28,812 - moopt.monise - DEBUG: 7th solution - importance: 1.000058871949015
2020-11-17 22:16:28,826 - moopt.monise - DEBUG: 8th solution - importance: 1.000058871949015
2020-11-17 22:16:28,840 - moopt.monise - DEBUG: 9th solution - importance: 1.000058871949015
2020-11-17 22:16:28,854 - moopt.monise - DEBUG: 10th solution - importance: 1.000058871949015

invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0



In [17]:
m_metrics2 = pd.DataFrame(data=m_aux2, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [18]:
m_metrics2

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,Coeficiente de variação
0,0.645,0.813492,0.666095,0.68549
1,0.55,1.0,1.0,0.485215
2,0.55,1.0,1.0,0.485215
3,0.45,1.0,0.0,1.563472
4,0.59,0.995935,0.95996,0.666933
5,0.55,1.0,1.0,0.485215
6,0.55,1.0,1.0,0.485215
7,0.55,1.0,1.0,0.485215
8,0.55,1.0,1.0,0.485215
9,0.55,1.0,1.0,0.485215


In [19]:
m_metrics2['cv_aux'] = 1/m_metrics2['Coeficiente de variação']

new_results2 = m_metrics2.copy()
new_results2 = new_results2.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in new_results2.values]) for row in new_results2.values]
new_results2['dominated'] = dominate_metr

new_results2[new_results2.dominated] = 1
new_results2[new_results2.dominated == False] = 0

In [20]:
new_results2

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0
3,1.0,1.0,1.0,1.0,1
4,0.0,0.0,0.0,0.0,0
5,0.0,0.0,0.0,0.0,0
6,0.0,0.0,0.0,0.0,0
7,0.0,0.0,0.0,0.0,0
8,0.0,0.0,0.0,0.0,0
9,0.0,0.0,0.0,0.0,0


In [21]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = new_results2['dominated'], 
                        colorscale = ['#0043E0', '#7F5EEF']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = m_metrics2['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = m_metrics2['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = m_metrics2['P porcento']),
            dict(range = [0.5,2],
                label = 'Coeficiente de variação', values = m_metrics2['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

## Logistic regression

In [40]:
import numpy as np

C_values = np.logspace(-10, 10, 150)

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)
    model.fit(X_train, y_train)
    metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
    metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=german_fair_feature)(model,
                                                                                                            X_test, y_test))
    metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=german_fair_feature)(model,X_test))
    metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for Sex == 1, returning 0



In [41]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [42]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

## Adult

In [43]:
data = pd.read_csv("Datasets - Limpos/adult.csv")
fair_feature = "race"
pred_feature = "income"

In [44]:
X = data.drop([pred_feature], axis=1)
y = data[pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=5000)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=5000)

### Fair Moo

In [45]:
metrics_adult_fair_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [46]:
moo = monise(weightedScalar=FairScalarization(X_train, y_train, fair_feature),
             singleScalar=FairScalarization(X_train, y_train, fair_feature),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    metrics_adult_fair_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    metrics_adult_fair_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    metrics_adult_fair_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_test))
    metrics_adult_fair_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-12 17:27:54,376 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-12 17:27:54,767 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-12 17:27:55,415 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-12 17:27:55,849 - moopt.monise - DEBUG: 4th solution - importance: 1.0
2020-11-12 17:27:56,222 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-12 17:27:56,598 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-12 17:27:56,999 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-12 17:27:57,439 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-12 17:27:57,780 - moopt.monise - DEBUG: 9th solution - importance: 0.2518029585418239
2020-11-12 17:27:58,210 - moopt.monise - DEBUG: 10th solution - importance: 0.24617348594768693
2020-11-12 17:27:58,659 - moopt.monise - DEBUG: 11th solution - importance: 0.06248614541647035
2020-11-12 17:27:59,078 - moopt.monise - DEBUG: 12th solution - importance: 0.0606192

2020-11-12 17:29:03,275 - moopt.monise - DEBUG: 87th solution - importance: 8.448684363626858e-05
2020-11-12 17:29:05,004 - moopt.monise - DEBUG: 88th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:07,462 - moopt.monise - DEBUG: 89th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:09,865 - moopt.monise - DEBUG: 90th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:12,065 - moopt.monise - DEBUG: 91th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:14,513 - moopt.monise - DEBUG: 92th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:16,619 - moopt.monise - DEBUG: 93th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:19,069 - moopt.monise - DEBUG: 94th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:21,409 - moopt.monise - DEBUG: 95th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:23,874 - moopt.monise - DEBUG: 96th solution - importance: 6.135704002134124e-05
2020-11-12 17:29:26,

In [47]:
metrics_adult_fair = pd.DataFrame(data=metrics_adult_fair_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_fair['cv_aux'] = 1/metrics_adult_fair['Coeficiente de variação']

metrics_adult_fair = metrics_adult_fair.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_fair.values]) for row in metrics_adult_fair.values]
metrics_adult_fair['dominated'] = dominate_metr

In [48]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = metrics_adult_fair['dominated'], 
                        colorscale = ['rgba(0.0,0.0,255,0.0)', 'rgba(0.0,0.0,255,0.5)']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_fair['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_fair['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_fair['P porcento']),
            dict(range = [1.2,1.5],
                label = 'cv_aux', values = metrics_adult_fair['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

### Equal Moo

In [49]:
metrics_adult_equal_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [50]:
moo = monise(weightedScalar=EqualScalarization(X_train, y_train, fair_feature),
             singleScalar=EqualScalarization(X_train, y_train, fair_feature),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    metrics_adult_equal_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    metrics_adult_equal_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    metrics_adult_equal_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_test))
    metrics_adult_equal_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-12 17:31:41,879 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-12 17:31:42,634 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-12 17:31:42,815 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-12 17:31:42,996 - moopt.monise - DEBUG: Finding 4th individual minima
2020-11-12 17:31:43,864 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-12 17:31:43,989 - moopt.monise - DEBUG: 6th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,115 - moopt.monise - DEBUG: 7th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,242 - moopt.monise - DEBUG: 8th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,369 - moopt.monise - DEBUG: 9th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,497 - moopt.monise - DEBUG: 10th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,625 - moopt.monise - DEBUG: 11th solution - importance: 1.0000009437597428
2020-11-12 17:31:44,753 - moopt.monise - DE

2020-11-12 17:31:55,660 - moopt.monise - DEBUG: 89th solution - importance: 1.0000009437597428
2020-11-12 17:31:55,814 - moopt.monise - DEBUG: 90th solution - importance: 1.0000009437597428
2020-11-12 17:31:55,971 - moopt.monise - DEBUG: 91th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,127 - moopt.monise - DEBUG: 92th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,284 - moopt.monise - DEBUG: 93th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,441 - moopt.monise - DEBUG: 94th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,598 - moopt.monise - DEBUG: 95th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,756 - moopt.monise - DEBUG: 96th solution - importance: 1.0000009437597428
2020-11-12 17:31:56,914 - moopt.monise - DEBUG: 97th solution - importance: 1.0000009437597428
2020-11-12 17:31:57,072 - moopt.monise - DEBUG: 98th solution - importance: 1.0000009437597428
2020-11-12 17:31:57,231 - moopt.monise - DEBUG: 99


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0


invalid value encountered

In [51]:
metrics_adult_equal = pd.DataFrame(data=metrics_adult_equal_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_equal['cv_aux'] = 1/metrics_adult_equal['Coeficiente de variação']

metrics_adult_equal = metrics_adult_equal.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_equal.values]) for row in metrics_adult_equal.values]
metrics_adult_equal['dominated'] = dominate_metr

In [52]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_equal['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_equal['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_equal['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_equal['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

In [53]:
import numpy as np

C_values = np.logspace(-10, 10, 150)

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)
    model.fit(X_train, y_train)
    metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
    metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                            X_test, y_test))
    metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
    metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))

In [54]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [55]:
metrics_adult_log

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.7906,0.857863,0.371588,1.375592,False
1,0.7900,0.910765,0.414919,1.373797,True
2,0.7894,0.957835,0.422259,1.372221,True
3,0.7890,0.939486,0.464014,1.371286,True
4,0.7886,0.925305,0.450326,1.370304,True
...,...,...,...,...,...
145,0.7884,0.932736,0.520582,1.382372,True
146,0.7884,0.932736,0.520582,1.382372,True
147,0.7884,0.932736,0.520582,1.382372,True
148,0.7884,0.932736,0.520582,1.382372,True


In [56]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

In [None]:
C_values = np.logspace(-10, 10, 150)
c_values = np.linspace(1e-5, 1, 10)

metrics_adult_eqo_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    for c in c_values:
        try:
            model = EqualOpportunityClassifier(sensitive_cols=fair_feature, positive_target=True, covariance_threshold=c, 
                                               C=C, max_iter=10**3)
            model.fit(X_train, y_train)
            metrics_adult_eqo_aux["Acurácia"].append(model.score(X_test, y_test))
            metrics_adult_eqo_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                                    X_test, y_test))
            metrics_adult_eqo_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
            metrics_adult_eqo_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))
        except:
            print("C: ",C," - c: ",c)

C:  1e-10  - c:  1e-05
C:  1e-10  - c:  0.11112
C:  1e-10  - c:  0.22223
C:  1e-10  - c:  0.33334
C:  1e-10  - c:  0.44445
C:  1e-10  - c:  0.5555599999999999
C:  1e-10  - c:  0.66667
C:  1e-10  - c:  0.7777799999999999
C:  1e-10  - c:  0.88889
C:  1e-10  - c:  1.0
C:  1.3621602035512733e-10  - c:  1e-05
C:  1.3621602035512733e-10  - c:  0.11112
C:  1.3621602035512733e-10  - c:  0.22223
C:  1.3621602035512733e-10  - c:  0.33334
C:  1.3621602035512733e-10  - c:  0.44445
C:  1.3621602035512733e-10  - c:  0.5555599999999999
C:  1.3621602035512733e-10  - c:  0.66667
C:  1.3621602035512733e-10  - c:  0.7777799999999999
C:  1.3621602035512733e-10  - c:  0.88889
C:  1.3621602035512733e-10  - c:  1.0
C:  1.855480420138846e-10  - c:  1e-05
C:  1.855480420138846e-10  - c:  0.11112
C:  1.855480420138846e-10  - c:  0.22223
C:  1.855480420138846e-10  - c:  0.33334
C:  1.855480420138846e-10  - c:  0.44445
C:  1.855480420138846e-10  - c:  0.5555599999999999
C:  1.855480420138846e-10  - c:  0.66667
C:

In [None]:
metrics_adult_eqo = pd.DataFrame(data=metrics_adult_eqo_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_eqo['cv_aux'] = 1/metrics_adult_eqo['Coeficiente de variação']

metrics_adult_eqo = metrics_adult_eqo.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_eqo.values]) for row in metrics_adult_eqo.values]
metrics_adult_eqo['dominated'] = dominate_metr

In [None]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_eqo['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_eqo['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_eqo['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_eqo['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

## COMPAS

In [15]:
data = pd.read_csv("Datasets - Limpos/compas_onerace.csv")
fair_feature = "not_white"
pred_feature = "Two_yr_Recidivism"

In [16]:
X = data.drop([pred_feature], axis=1)
y = data[pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=1000)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=1000)