In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklego.metrics import equal_opportunity_score
from sklego.metrics import p_percent_score
from sklearn.metrics import log_loss
from sklearn.utils.extmath import squared_norm
from moopt.scalarization_interface import scalar_interface, single_interface, w_interface
from moopt import monise
import numpy as np
import optuna, sklearn, sklearn.datasets
from fair_models import coefficient_of_variation, MOOLogisticRegression, FindCLogisticRegression, FindCCLogisticRegression
from fair_models import calc_reweight
from fair_models import FairScalarization, EqualScalarization

import plotly.graph_objects as go


The sklearn.linear_model.base module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.linear_model. Anything that cannot be imported from sklearn.linear_model is now part of the private API.



In [2]:
from sklego.linear_model import DemographicParityClassifier
from sklego.linear_model import EqualOpportunityClassifier
from sklearn.linear_model import LogisticRegression

In [10]:
def dominate(a, b):
    sense = np.array([1, 1, 1, 1])
    if all((sense*a)>=(sense*b)) and any((sense*a)>(sense*b)):
        return 1
    else:
        return 0

## German

In [3]:
german_data = pd.read_csv("Datasets - Limpos/german.csv")
german_fair_feature = "Sex"
german_pred_feature = "Risk"

In [4]:
german_data = german_data.drop("Unnamed: 0", axis=1)

### Fair Moo

In [5]:
X = german_data.drop([german_pred_feature], axis=1)
y = german_data[german_pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=200)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=100)

In [6]:
m_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [7]:
moo = monise(weightedScalar=FairScalarization(X_train, y_train, 'Sex'),
             singleScalar=FairScalarization(X_train, y_train, 'Sex'),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    m_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    m_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=german_fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    m_aux["P porcento"].append(p_percent_score(sensitive_column=german_fair_feature)(solution.x,X_test))
    m_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-11 09:31:27,405 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-11 09:31:27,616 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-11 09:31:27,664 - moopt.monise - DEBUG: Finding 3th individual minima


Using license file /opt/gurobi/gurobi.lic
Academic license - for non-commercial use only


2020-11-11 09:31:28,280 - moopt.monise - DEBUG: 4th solution - importance: 1.0
2020-11-11 09:31:28,454 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-11 09:31:28,505 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-11 09:31:28,565 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-11 09:31:28,636 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-11 09:31:28,727 - moopt.monise - DEBUG: 9th solution - importance: 0.7598821211729929
2020-11-11 09:31:28,786 - moopt.monise - DEBUG: 10th solution - importance: 0.5444746328894028
2020-11-11 09:31:28,893 - moopt.monise - DEBUG: 11th solution - importance: 0.37083838521143847
2020-11-11 09:31:28,997 - moopt.monise - DEBUG: 12th solution - importance: 0.2602301128416963
2020-11-11 09:31:29,078 - moopt.monise - DEBUG: 13th solution - importance: 0.24619378608437026
2020-11-11 09:31:29,186 - moopt.monise - DEBUG: 14th solution - importance: 0.22147699162409631
2020-11-11 09:31:29,294 - moopt.m

2020-11-11 09:32:01,899 - moopt.monise - DEBUG: 89th solution - importance: 0.009523565923060398
2020-11-11 09:32:02,788 - moopt.monise - DEBUG: 90th solution - importance: 0.009347909113144513
2020-11-11 09:32:04,393 - moopt.monise - DEBUG: 91th solution - importance: 0.008933760828695913
2020-11-11 09:32:05,467 - moopt.monise - DEBUG: 92th solution - importance: 0.008928439628250977
2020-11-11 09:32:07,065 - moopt.monise - DEBUG: 93th solution - importance: 0.008927757588990051
2020-11-11 09:32:08,781 - moopt.monise - DEBUG: 94th solution - importance: 0.008792391389795614
2020-11-11 09:32:10,646 - moopt.monise - DEBUG: 95th solution - importance: 0.008520894169593026
2020-11-11 09:32:12,428 - moopt.monise - DEBUG: 96th solution - importance: 0.008509711308737861
2020-11-11 09:32:14,361 - moopt.monise - DEBUG: 97th solution - importance: 0.008435193375701663
2020-11-11 09:32:16,324 - moopt.monise - DEBUG: 98th solution - importance: 0.008255676634573115
2020-11-11 09:32:18,184 - moop

In [8]:
m_metrics = pd.DataFrame(data=m_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [9]:
m_metrics['cv_aux'] = 1/m_metrics['Coeficiente de variação']

In [11]:
new_results = m_metrics.copy()//0.01/100
new_results = new_results.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in new_results.values]) for row in new_results.values]
new_results['dominated'] = dominate_metr
new_results

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.60,0.80,0.83,1.35,True
1,0.57,0.44,0.48,1.21,True
2,0.41,0.99,0.00,0.60,True
3,0.63,0.72,0.71,1.34,True
4,0.57,0.59,0.70,1.01,True
...,...,...,...,...,...
145,0.65,0.83,0.78,1.36,True
146,0.57,0.64,0.73,1.05,True
147,0.60,0.70,0.73,1.19,True
148,0.62,0.68,0.64,1.34,True


In [12]:
new_results[new_results.dominated] = 1
new_results[new_results.dominated == False] = 0
new_results

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,1.0,1.0,1.0,1.0,1
1,1.0,1.0,1.0,1.0,1
2,1.0,1.0,1.0,1.0,1
3,1.0,1.0,1.0,1.0,1
4,1.0,1.0,1.0,1.0,1
...,...,...,...,...,...
145,1.0,1.0,1.0,1.0,1
146,1.0,1.0,1.0,1.0,1
147,1.0,1.0,1.0,1.0,1
148,1.0,1.0,1.0,1.0,1


In [34]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = new_results['dominated'], 
                        colorscale = ['rgba(11,66,255, 1)', 'rgba(11,0,255, 0.5)']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = m_metrics['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = m_metrics['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = m_metrics['P porcento']),
            dict(range = [0.5,2],
                label = 'Coeficiente de variação', values = m_metrics['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

In [46]:
m_aux2 = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [47]:
moo = monise(weightedScalar=EqualScalarization(X_train, y_train, 'Sex'),
             singleScalar=EqualScalarization(X_train, y_train, 'Sex'),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    m_aux2["Acurácia"].append(solution.x.score(X_test, y_test))
    m_aux2["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=german_fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    m_aux2["P porcento"].append(p_percent_score(sensitive_column=german_fair_feature)(solution.x,X_test))
    m_aux2["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-11 00:32:08,521 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-11 00:32:08,555 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-11 00:32:08,580 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-11 00:32:08,597 - moopt.monise - DEBUG: Finding 4th individual minima
2020-11-11 00:32:08,651 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-11 00:32:08,694 - moopt.monise - DEBUG: 6th solution - importance: 1.0000600756132034
2020-11-11 00:32:08,730 - moopt.monise - DEBUG: 7th solution - importance: 1.0000600756132034
2020-11-11 00:32:08,762 - moopt.monise - DEBUG: 8th solution - importance: 1.0000600756132034
2020-11-11 00:32:08,819 - moopt.monise - DEBUG: 9th solution - importance: 1.0000600756132034
2020-11-11 00:32:08,843 - moopt.monise - DEBUG: 10th solution - importance: 1.0000600756132034
2020-11-11 00:32:08,870 - moopt.monise - DEBUG: 11th solution - importance: 0.2233404188338421
2020-11-11 00:32:08,900 - moopt.monise - DE

In [39]:
m_metrics2 = pd.DataFrame(data=m_aux2, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

In [40]:
m_metrics2

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,Coeficiente de variação
0,0.635,0.970626,0.941176,0.797597
1,0.535,1.0,1.0,0.481483
2,0.535,1.0,1.0,0.481483
3,0.465,1.0,0.0,1.51693
4,0.615,0.95625,0.896781,0.657593
5,0.535,1.0,1.0,0.481483
6,0.535,1.0,1.0,0.481483
7,0.535,1.0,1.0,0.481483
8,0.535,1.0,1.0,0.481483
9,0.535,1.0,1.0,0.481483


In [43]:
m_metrics2['cv_aux'] = 1/m_metrics2['Coeficiente de variação']

new_results2 = m_metrics2.copy()
new_results2 = new_results2.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in new_results2.values]) for row in new_results2.values]
new_results2['dominated'] = dominate_metr

new_results2[new_results2.dominated] = 1
new_results2[new_results2.dominated == False] = 0

In [44]:
new_results2

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0
3,1.0,1.0,1.0,1.0,1
4,0.0,0.0,0.0,0.0,0
5,0.0,0.0,0.0,0.0,0
6,0.0,0.0,0.0,0.0,0
7,0.0,0.0,0.0,0.0,0
8,0.0,0.0,0.0,0.0,0
9,0.0,0.0,0.0,0.0,0


In [22]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = new_results2['dominated'], 
                        colorscale = ['#0043E0', '#7F5EEF']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = m_metrics2['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = m_metrics2['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = m_metrics2['P porcento']),
            dict(range = [0.5,2],
                label = 'Coeficiente de variação', values = m_metrics2['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

## Adult

In [14]:
data = pd.read_csv("Datasets - Limpos/adult.csv")
fair_feature = "race"
pred_feature = "income"

In [15]:
X = data.drop([pred_feature], axis=1)
y = data[pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=5000)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=5000)

### Fair Moo

In [16]:
metrics_adult_fair_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [17]:
moo = monise(weightedScalar=FairScalarization(X_train, y_train, fair_feature),
             singleScalar=FairScalarization(X_train, y_train, fair_feature),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    metrics_adult_fair_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    metrics_adult_fair_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    metrics_adult_fair_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_test))
    metrics_adult_fair_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-11 09:37:08,829 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-11 09:37:09,166 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-11 09:37:09,465 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-11 09:37:09,675 - moopt.monise - DEBUG: 4th solution - importance: 1.0
2020-11-11 09:37:09,968 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-11 09:37:10,215 - moopt.monise - DEBUG: 6th solution - importance: 1.0
2020-11-11 09:37:10,514 - moopt.monise - DEBUG: 7th solution - importance: 1.0
2020-11-11 09:37:10,757 - moopt.monise - DEBUG: 8th solution - importance: 1.0
2020-11-11 09:37:11,063 - moopt.monise - DEBUG: 9th solution - importance: 0.25297094120846686
2020-11-11 09:37:11,305 - moopt.monise - DEBUG: 10th solution - importance: 0.24390574461880976
2020-11-11 09:37:11,534 - moopt.monise - DEBUG: 11th solution - importance: 0.07533778892009706
2020-11-11 09:37:11,760 - moopt.monise - DEBUG: 12th solution - importance: 0.061846

2020-11-11 09:38:05,356 - moopt.monise - DEBUG: 86th solution - importance: 7.855975174890816e-05
2020-11-11 09:38:07,457 - moopt.monise - DEBUG: 87th solution - importance: 7.855975174890816e-05
2020-11-11 09:38:09,711 - moopt.monise - DEBUG: 88th solution - importance: 7.855975174890816e-05
2020-11-11 09:38:12,034 - moopt.monise - DEBUG: 89th solution - importance: 7.855975174890816e-05
2020-11-11 09:38:14,139 - moopt.monise - DEBUG: 90th solution - importance: 7.855975174890816e-05
2020-11-11 09:38:16,554 - moopt.monise - DEBUG: 91th solution - importance: 7.85597517488918e-05
2020-11-11 09:38:18,503 - moopt.monise - DEBUG: 92th solution - importance: 7.8559751748892e-05
2020-11-11 09:38:20,784 - moopt.monise - DEBUG: 93th solution - importance: 0.00011987265471663516
2020-11-11 09:38:23,067 - moopt.monise - DEBUG: 94th solution - importance: 0.00011987265471663516
2020-11-11 09:38:25,252 - moopt.monise - DEBUG: 95th solution - importance: 0.00011987265471663516
2020-11-11 09:38:27,

In [18]:
metrics_adult_fair = pd.DataFrame(data=metrics_adult_fair_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_fair['cv_aux'] = 1/metrics_adult_fair['Coeficiente de variação']

metrics_adult_fair = metrics_adult_fair.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_fair.values]) for row in metrics_adult_fair.values]
metrics_adult_fair['dominated'] = dominate_metr

In [22]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = metrics_adult_fair['dominated'], 
                        colorscale = ['rgba(0.0,0.0,255,0.0)', 'rgba(0.0,0.0,255,0.5)']),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_fair['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_fair['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_fair['P porcento']),
            dict(range = [1.2,1.5],
                label = 'cv_aux', values = metrics_adult_fair['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

### Equal Moo

In [23]:
metrics_adult_equal_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

In [24]:
moo = monise(weightedScalar=EqualScalarization(X_train, y_train, fair_feature),
             singleScalar=EqualScalarization(X_train, y_train, fair_feature),
              nodeTimeLimit=2, targetSize=150,
              targetGap=0, nodeGap=0.01, norm=False)

moo.optimize()

sols = []

for solution in moo.solutionsList:
    sols.append(solution.x)
    metrics_adult_equal_aux["Acurácia"].append(solution.x.score(X_test, y_test))
    metrics_adult_equal_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(solution.x,
                                                                                                            X_test, y_test))
    metrics_adult_equal_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(solution.x,X_test))
    metrics_adult_equal_aux["Coeficiente de variação"].append(coefficient_of_variation(solution.x, X_test, y_test))

2020-11-11 09:42:39,994 - moopt.monise - DEBUG: Finding 1th individual minima
2020-11-11 09:42:40,388 - moopt.monise - DEBUG: Finding 2th individual minima
2020-11-11 09:42:40,505 - moopt.monise - DEBUG: Finding 3th individual minima
2020-11-11 09:42:40,625 - moopt.monise - DEBUG: Finding 4th individual minima
2020-11-11 09:42:41,061 - moopt.monise - DEBUG: 5th solution - importance: 1.0
2020-11-11 09:42:41,379 - moopt.monise - DEBUG: 6th solution - importance: 1.0000006622388495
2020-11-11 09:42:41,706 - moopt.monise - DEBUG: 7th solution - importance: 1.0000006622388495
2020-11-11 09:42:41,802 - moopt.monise - DEBUG: 8th solution - importance: 1.0000006622388495
2020-11-11 09:42:41,931 - moopt.monise - DEBUG: 9th solution - importance: 1.0000006622388495
2020-11-11 09:42:42,067 - moopt.monise - DEBUG: 10th solution - importance: 1.0000006622388495
2020-11-11 09:42:42,193 - moopt.monise - DEBUG: 11th solution - importance: 0.9962161423794477
2020-11-11 09:42:42,323 - moopt.monise - DE

In [25]:
metrics_adult_equal = pd.DataFrame(data=metrics_adult_equal_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_equal['cv_aux'] = 1/metrics_adult_equal['Coeficiente de variação']

metrics_adult_equal = metrics_adult_equal.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_equal.values]) for row in metrics_adult_equal.values]
metrics_adult_equal['dominated'] = dominate_metr

In [26]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_equal['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_equal['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_equal['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_equal['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

In [27]:
import numpy as np

C_values = np.linspace(1e-10, 10, 150)**2

metrics_adult_log_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)
    model.fit(X_train, y_train)
    metrics_adult_log_aux["Acurácia"].append(model.score(X_test, y_test))
    metrics_adult_log_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                            X_test, y_test))
    metrics_adult_log_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
    metrics_adult_log_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))


invalid value encountered in double_scalars


No samples with y_hat == 1 for race == 1, returning 0



In [28]:
metrics_adult_log = pd.DataFrame(data=metrics_adult_log_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_log['cv_aux'] = 1/metrics_adult_log['Coeficiente de variação']

metrics_adult_log = metrics_adult_log.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_log.values]) for row in metrics_adult_log.values]
metrics_adult_log['dominated'] = dominate_metr

In [29]:
metrics_adult_log

Unnamed: 0,Acurácia,Igualdade de Oportunidade,P porcento,cv_aux,dominated
0,0.7638,1.000000,0.000000,1.271554,False
1,0.8006,0.779869,0.532755,1.435362,False
2,0.8006,0.779869,0.532755,1.435362,False
3,0.8006,0.779869,0.532755,1.435362,False
4,0.8006,0.779869,0.532755,1.435362,False
...,...,...,...,...,...
145,0.8006,0.779869,0.532755,1.435362,False
146,0.8006,0.779869,0.532755,1.435362,False
147,0.8006,0.779869,0.532755,1.435362,False
148,0.8006,0.779869,0.532755,1.435362,False


In [30]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_log['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_log['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_log['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_log['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

In [31]:
C_values = np.linspace(1e-10, 10, 15)**2
c_values = np.linspace(1e-5, 1, 10)

metrics_adult_eqo_aux = {"Acurácia": [], "Igualdade de Oportunidade": [], "P porcento": [], "Coeficiente de variação": []}

for C in C_values:
    for c in c_values:
        try:
            model = EqualOpportunityClassifier(sensitive_cols=fair_feature, positive_target=True, covariance_threshold=c, 
                                               C=C, max_iter=10**3)
            model.fit(X_train, y_train)
            metrics_adult_eqo_aux["Acurácia"].append(model.score(X_test, y_test))
            metrics_adult_eqo_aux["Igualdade de Oportunidade"].append(equal_opportunity_score(sensitive_column=fair_feature)(model,
                                                                                                                    X_test, y_test))
            metrics_adult_eqo_aux["P porcento"].append(p_percent_score(sensitive_column=fair_feature)(model,X_test))
            metrics_adult_eqo_aux["Coeficiente de variação"].append(coefficient_of_variation(model, X_test, y_test))
        except:
            print("C: ",C," - c: ",c)

C:  1.0000000000000001e-20  - c:  1e-05
C:  1.0000000000000001e-20  - c:  0.11112
C:  1.0000000000000001e-20  - c:  0.22223
C:  1.0000000000000001e-20  - c:  0.33334
C:  1.0000000000000001e-20  - c:  0.44445
C:  1.0000000000000001e-20  - c:  0.5555599999999999
C:  1.0000000000000001e-20  - c:  0.66667
C:  1.0000000000000001e-20  - c:  0.7777799999999999
C:  1.0000000000000001e-20  - c:  0.88889
C:  1.0000000000000001e-20  - c:  1.0


In [32]:
metrics_adult_eqo = pd.DataFrame(data=metrics_adult_eqo_aux, columns=['Acurácia','Igualdade de Oportunidade','P porcento',
                                     'Coeficiente de variação'])

metrics_adult_eqo['cv_aux'] = 1/metrics_adult_eqo['Coeficiente de variação']

metrics_adult_eqo = metrics_adult_eqo.drop('Coeficiente de variação', axis=1)
dominate_metr = [any([dominate(other, row) for other in metrics_adult_eqo.values]) for row in metrics_adult_eqo.values]
metrics_adult_eqo['dominated'] = dominate_metr

In [33]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(),
        dimensions = list([
            dict(range = [0,1],
                label = 'Acurácia', values = metrics_adult_eqo['Acurácia']),
            dict(range = [0,1],
                label = 'Igualdade de Oportunidade', values = metrics_adult_eqo['Igualdade de Oportunidade']),
            dict(range = [0,1],
                label = 'P porcento', values = metrics_adult_eqo['P porcento']),
            dict(range = [1.2,3],
                label = 'cv_aux', values = metrics_adult_eqo['cv_aux']),
        ])
    )
)

fig.update_layout(
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
)

fig.show()

## COMPAS

In [15]:
data = pd.read_csv("Datasets - Limpos/compas_onerace.csv")
fair_feature = "not_white"
pred_feature = "Two_yr_Recidivism"

In [16]:
X = data.drop([pred_feature], axis=1)
y = data[pred_feature]


X_tv, X_test, y_tv, y_test = train_test_split(X, y, test_size=1000)
X_train, X_val, y_train, y_val = train_test_split(X_tv, y_tv, test_size=1000)