In [35]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_validate
from sklearn import metrics
from collections import OrderedDict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, balanced_accuracy_score, f1_score, precision_score, recall_score

In [36]:
import seaborn as sns
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
from aequitas.plotting import Plot

## 1. Replicate the ORES model, with a test set

In [3]:
# read in data
df = pd.read_csv('data/enwiki.labeled_revisions.20k_2015.csv')
df = df.dropna()
df.head()

Unnamed: 0,rev_id,auto_labeled,damaging,goodfaith,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,...,feature.wikitext.revision.parent.external_links,feature.wikitext.revision.parent.headings,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks
0,644933637.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,574.0,121.0,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0
1,629393521.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,13.0,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0
2,655365754.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0
3,616502017.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,12.0,6.0,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0
4,651762922.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0


In [4]:
#defining newcomer as: < 3.637819e+06
df['feature.temporal.revision.user.seconds_since_registration'].describe()

count    1.931900e+04
mean     1.307313e+08
std      1.138684e+08
min      0.000000e+00
25%      3.637819e+06
50%      1.207869e+08
75%      2.407948e+08
max      4.278274e+08
Name: feature.temporal.revision.user.seconds_since_registration, dtype: float64

In [5]:
# Combine anon and new to a 3-category new feature, anonymous, newcomers, experienced
newcomer_seconds = 3.637819e+06

conditions = [
    (df['feature.revision.user.is_anon'] == True),
    (df['feature.revision.user.is_anon'] == False) & (df['feature.temporal.revision.user.seconds_since_registration'] < newcomer_seconds),
    (df['feature.revision.user.is_anon'] == False) & (df['feature.temporal.revision.user.seconds_since_registration'] >= newcomer_seconds)]
choices = [0,1,2]
df['user.type'] = np.select(conditions, choices)
df.head()

Unnamed: 0,rev_id,auto_labeled,damaging,goodfaith,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,...,feature.wikitext.revision.parent.headings,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks,user.type
0,644933637.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,121.0,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0,2
1,629393521.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,13.0,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0,2
2,655365754.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0,2
3,616502017.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,6.0,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0,0
4,651762922.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0,2


In [6]:
# check how damaging/goodfaith distributed, also by the sensitive feature
# 96% of data is not damaging
print(df['damaging'].describe())
print(18572/19319)

count     19319
unique        2
top       False
freq      18572
Name: damaging, dtype: object
0.9613334023500181


In [7]:
# 3474 many anonymous users
# 13.7% of anonymous users' edits are damaging
print(df[df['user.type'] == 0]['damaging'].describe())
print(1-2995/3474)

count      3474
unique        2
top       False
freq       2995
Name: damaging, dtype: object
0.13788140472078292


In [8]:
# 1356 many newcoming users
# 14.5% of newcoming users' edits are damaging
print(df[df['user.type'] == 1]['damaging'].describe())
print(1-1159/1356)

count      1356
unique        2
top       False
freq       1159
Name: damaging, dtype: object
0.1452802359882006


In [9]:
# 14489 many experienced users
# 0.049% of experienced users' edits are damaging
print(df[df['user.type'] == 2]['damaging'].describe())
print(1-14418/14489)

count     14489
unique        2
top       False
freq      14418
Name: damaging, dtype: object
0.004900269169714977


In [10]:
# add in sample weights
df['sample_weight'] = np.where(df['damaging']==True, 10, 1)

In [11]:
# delete the two sensitive features
df = df.drop(['feature.revision.user.is_anon', 'feature.temporal.revision.user.seconds_since_registration'], axis=1)

In [12]:
# convert user.type to categorical
df['user.type'] = pd.Categorical(df['user.type'])

In [19]:
# divide into X, X_weights and y
y = df["damaging"]
X_with_weights = df.iloc[:,4:].copy()
X_with_weights.head()

Unnamed: 0,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,feature.english.dictionary.revision.diff.dict_word_delta_decrease,feature.english.dictionary.revision.diff.dict_word_delta_increase,feature.english.dictionary.revision.diff.dict_word_delta_sum,feature.english.dictionary.revision.diff.dict_word_prop_delta_decrease,...,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks,user.type,sample_weight
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66.0,66.0,0.0,...,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0,2,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0,2,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0,2,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0,0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,44.0,0.0,...,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0,2,1


In [20]:
# split into train and test set
X_with_weights_train, X_with_weights_test, y_train, y_test = train_test_split(X_with_weights, y, test_size=0.3, random_state=42)

In [28]:
# split train with weight to train and weight
X_train = X_with_weights_train.iloc[:,:-1].copy()
X_train_weights = X_with_weights_train.iloc[:,-1].copy()
X_test = X_with_weights_test.iloc[:,:-1].copy()

In [29]:
# parameters from 
#https://github.com/wikimedia/editquality/blob/master/model_info/enwiki.damaging.md
params= {'min_impurity_decrease': 0.0, 
         'loss': 'deviance', 
         'n_estimators': 700, 
         'min_impurity_split': None, 
         'verbose': 0, 
         'criterion': 'friedman_mse', 
         'subsample': 1.0, 
         #'center': True, 
         #'scale': True, 
         'presort': 'auto', 
         'init': None, 
         #'multilabel': False, 
         'max_depth': 7, 
         'random_state': None, 
         'learning_rate': 0.01, 
         'validation_fraction': 0.1, 
         'warm_start': False, 
         'min_samples_split': 2, 
         'min_samples_leaf': 1, 
         'min_weight_fraction_leaf': 0.0, 
         'n_iter_no_change': None, 
         'max_leaf_nodes': None, 
         'tol': 0.0001, 
         'max_features': 'log2'}
         #'labels': [True, False], 
         #'label_weights': OrderedDict([(True, 10)])

In [30]:
# Training
gb_clf_replicate = GradientBoostingClassifier(**params)
gb_clf_replicate.fit(X_train, y_train, sample_weight=X_train_weights)



GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.01, loss='deviance', max_depth=7,
                           max_features='log2', max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=700,
                           n_iter_no_change=None, presort='auto',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [44]:
# test on training set
y_pred_train = gb_clf_replicate.predict(X_train)

print("accuracy: ", accuracy_score(y_train, y_pred_train).round(4))
print("balanced accuracy: ", balanced_accuracy_score(y_train, y_pred_train).round(4))
print("f1: ", f1_score(y_train, y_pred_train).round(4))
print("recall: ", recall_score(y_train, y_pred_train).round(4))
print("precision: ", precision_score(y_train, y_pred_train).round(4))
print("auc: ", roc_auc_score(y_train, y_pred_train).round(4))

accuracy:  0.9748
balanced accuracy:  0.9782
f1:  0.761
recall:  0.9819
precision:  0.6213
auc:  0.9782


In [45]:
# test on test set
y_pred_test = gb_clf_replicate.predict(X_test)

print("accuracy: ", accuracy_score(y_test, y_pred_test).round(4))
print("balanced accuracy: ", balanced_accuracy_score(y_test, y_pred_test).round(4))
print("f1: ", f1_score(y_test, y_pred_test).round(4))
print("recall: ", recall_score(y_test, y_pred_test).round(4))
print("precision: ", precision_score(y_test, y_pred_test).round(4))
print("auc: ", roc_auc_score(y_test, y_pred_test).round(4))

accuracy:  0.9524
balanced accuracy:  0.749
f1:  0.4274
recall:  0.5309
precision:  0.3576
auc:  0.749


### 1.1 Bias analysis for replicate of ORES

In [37]:
# Replicate model
df_bias_rep = pd.DataFrame(columns = ['score', 'label_value', 'user.type'])
df_bias_rep['label_value'] = y_test
df_bias_rep['user.type'] = X_test.iloc[:,-1].copy().astype(str)
df_bias_rep['score'] = gb_clf_replicate.predict(X_test)

g = Group()
xtab, _ = g.get_crosstabs(df_bias_rep)
absolute_metrics = g.list_absolute_metrics(xtab)
xtab[[col for col in xtab.columns if col not in absolute_metrics]]

model_id, score_thresholds 0 {'rank_abs': [288]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
0,0,binary 0/1,288,user.type,0,220,774,142,53,721,78,131,863,994,5796
1,0,binary 0/1,288,user.type,1,66,341,42,21,320,24,45,362,407,5796
2,0,binary 0/1,288,user.type,2,2,4393,1,17,4376,1,18,4377,4395,5796


In [38]:
xtab[['attribute_name', 'attribute_value'] + absolute_metrics].round(4)

Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
0,user.type,0,0.5954,0.8355,0.0685,0.6455,0.1645,0.4046,0.9315,0.3545,0.7639,0.2213,0.1318
1,user.type,1,0.5333,0.884,0.0616,0.6364,0.116,0.4667,0.9384,0.3636,0.2292,0.1622,0.1106
2,user.type,2,0.0556,0.9998,0.0039,0.5,0.0002,0.9444,0.9961,0.5,0.0069,0.0005,0.0041


## 2. FairLearn on replicate the ORES model

In [39]:
from fairlearn.reductions import GridSearch, ExponentiatedGradient
from fairlearn.reductions import DemographicParity, EqualizedOdds

In [42]:
# extract sensitive feature
A_train = X_train[['user.type']]
A_train.head()

Unnamed: 0,user.type
6724,2
19070,1
5203,0
8133,1
15788,2


### 2.1 Default eps value

In [43]:
# default eps
gb_clf_fair=ExponentiatedGradient(GradientBoostingClassifier(**params),
                           constraints=EqualizedOdds())
gb_clf_fair.fit(X_train, y_train, sensitive_features=A_train, sample_weight=X_train_weights)



In [47]:
y_pred_test = gb_clf_fair.predict(X_test)

print("accuracy: ", accuracy_score(y_test, y_pred_test).round(4))
print("balanced accuracy: ", balanced_accuracy_score(y_test, y_pred_test).round(4))
print("f1: ", f1_score(y_test, y_pred_test).round(4))
print("recall: ", recall_score(y_test, y_pred_test).round(4))
print("precision: ", precision_score(y_test, y_pred_test).round(4))
print("auc: ", roc_auc_score(y_test, y_pred_test).round(4))

accuracy:  0.9705
balanced accuracy:  0.5916
f1:  0.2963
recall:  0.1856
precision:  0.7347
auc:  0.5916


In [48]:
# Bias 
df_bias_fair = pd.DataFrame(columns = ['score', 'label_value', 'user.type'])
df_bias_fair['label_value'] = y_test
df_bias_fair['user.type'] = X_test.iloc[:,-1].copy().astype(str)
df_bias_fair['score'] = gb_clf_fair.predict(X_test)

g = Group()
xtab, _ = g.get_crosstabs(df_bias_fair)
absolute_metrics = g.list_absolute_metrics(xtab)
xtab[[col for col in xtab.columns if col not in absolute_metrics]]

model_id, score_thresholds 0 {'rank_abs': [18]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
0,0,binary 0/1,18,user.type,0,3,991,2,130,861,1,131,863,994,5796
1,0,binary 0/1,18,user.type,1,1,406,1,45,361,0,45,362,407,5796
2,0,binary 0/1,18,user.type,2,14,4381,14,18,4363,0,18,4377,4395,5796


In [49]:
xtab[['attribute_name', 'attribute_value'] + absolute_metrics].round(4)

Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
0,user.type,0,0.0076,0.9977,0.1312,0.6667,0.0023,0.9924,0.8688,0.3333,0.1667,0.003,0.1318
1,user.type,1,0.0,0.9972,0.1108,1.0,0.0028,1.0,0.8892,0.0,0.0556,0.0025,0.1106
2,user.type,2,0.0,0.9968,0.0041,1.0,0.0032,1.0,0.9959,0.0,0.7778,0.0032,0.0041


### Testing with different eps

In [79]:
def train_fairlearn(eps):
    print("For eps = ", eps)
    gb_clf_fair=ExponentiatedGradient(GradientBoostingClassifier(**params),
                           constraints=EqualizedOdds(), eps=eps)
    gb_clf_fair.fit(X_train, y_train, sensitive_features=A_train, sample_weight=X_train_weights)
    
    y_pred_test = gb_clf_fair.predict(X_test)
    
    print("accuracy: ", accuracy_score(y_test, y_pred_test))
    print("balanced accuracy: ", balanced_accuracy_score(y_test, y_pred_test))
    print("f1: ", f1_score(y_test, y_pred_test))
    print("recall: ", recall_score(y_test, y_pred_test))
    print("precision: ", precision_score(y_test, y_pred_test))
    print("auc: ", roc_auc_score(y_test, y_pred_test))
    
    df_bias_fair = pd.DataFrame(columns = ['score', 'label_value', 'user.type'])
    df_bias_fair['label_value'] = y_test
    df_bias_fair['user.type'] = X_test.iloc[:,-1].copy().astype(str)
    df_bias_fair['score'] = gb_clf_fair.predict(X_test)
    g = Group()
    xtab, _ = g.get_crosstabs(df_bias_fair)
    absolute_metrics = g.list_absolute_metrics(xtab)
    
    print("fpr of anon: ", xtab[absolute_metrics]["fpr"][0])
    print("fpr of new: ", xtab[absolute_metrics]["fpr"][1])
    print("fpr of exp: ", xtab[absolute_metrics]["fpr"][2])
    
    print("fnr of anon: ", xtab[absolute_metrics]["fnr"][0])
    print("fnr of new: ", xtab[absolute_metrics]["fnr"][1])
    print("fnr of exp: ", xtab[absolute_metrics]["fnr"][2])
    print("\n")
    
    
    d = {"eps": eps,
         "accuracy": accuracy_score(y_test, y_pred_test).round(4),
         "balanced_accuracy": balanced_accuracy_score(y_test, y_pred_test).round(4),
         "f1": f1_score(y_test, y_pred_test).round(4),
         "recall": recall_score(y_test, y_pred_test).round(4),
         "precision": precision_score(y_test, y_pred_test).round(4),
         "auc": roc_auc_score(y_test, y_pred_test).round(4),
         "fpr_anon": xtab[absolute_metrics]["fpr"][0].round(4),
         "fpr_new": xtab[absolute_metrics]["fpr"][1].round(4),
         "fpr_exp": xtab[absolute_metrics]["fpr"][2].round(4),
         "fnr_anon": xtab[absolute_metrics]["fnr"][0].round(4),
         "fnr_new": xtab[absolute_metrics]["fnr"][1].round(4),
         "fnr_exp": xtab[absolute_metrics]["fnr"][2].round(4)
        }
    
    return d

In [84]:
# compute performance and biases for the fairlearn model with different eps scores
def create_fairlearn_eps_data(interval):
    columns = ['eps',
               'accuracy', 
               'balanced_accuracy', 
               'f1', 
               'recall', 
               'precision', 
               'auc', 
               'fpr_anon', 
               'fpr_new', 
               'fpr_exp', 
               'fnr_anon',
               'fnr_new',
               'fnr_exp']
    df_new = pd.DataFrame(columns=columns)
    
    for i in np.arange(0.01, 1.01, interval):
        d = train_fairlearn(i)
        df_new = df_new.append(d, ignore_index=True)
        
        df_new.to_csv("fairlearn"+str(interval)+".csv", index=False)

In [85]:
create_fairlearn_eps_data(0.1)

For eps =  0.01




accuracy:  0.9708419599723948
balanced accuracy:  0.604242041686143
f1:  0.32669322709163345
recall:  0.211340206185567
precision:  0.7192982456140351
auc:  0.604242041686143
model_id, score_thresholds 0 {'rank_abs': [17]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0034762456546929316
fpr of new:  0.0027624309392265192
fpr of exp:  0.0027416038382453737
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  0.51






accuracy:  0.9696342305037957
balanced accuracy:  0.5911769360721686
f1:  0.2903225806451613
recall:  0.18556701030927836
precision:  0.6666666666666666
auc:  0.5911769360721687
model_id, score_thresholds 0 {'rank_abs': [17]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.002317497103128621
fpr of new:  0.0027624309392265192
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  1.01




accuracy:  0.970151828847481
balanced accuracy:  0.6063730920841968
f1:  0.32684824902723736
recall:  0.21649484536082475
precision:  0.6666666666666666
auc:  0.6063730920841968
model_id, score_thresholds 0 {'rank_abs': [20]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0034762456546929316
fpr of new:  0.0055248618784530384
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  1.51




accuracy:  0.9699792960662525
balanced accuracy:  0.5888673779982848
f1:  0.28688524590163933
recall:  0.18041237113402062
precision:  0.7
auc:  0.5888673779982849
model_id, score_thresholds 0 {'rank_abs': [15]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0011587485515643105
fpr of new:  0.0027624309392265192
fpr of exp:  0.0027416038382453737
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  2.01






accuracy:  0.9704968944099379
balanced accuracy:  0.5965993367611715
f1:  0.3076923076923077
recall:  0.1958762886597938
precision:  0.7169811320754716
auc:  0.5965993367611714
model_id, score_thresholds 0 {'rank_abs': [17]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0011587485515643105
fpr of new:  0.0055248618784530384
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  2.51






accuracy:  0.970151828847481
balanced accuracy:  0.5914446975859138
f1:  0.29387755102040813
recall:  0.18556701030927836
precision:  0.7058823529411765
auc:  0.5914446975859137
model_id, score_thresholds 0 {'rank_abs': [18]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0011587485515643105
fpr of new:  0.0055248618784530384
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  3.01






accuracy:  0.970151828847481
balanced accuracy:  0.5914446975859138
f1:  0.29387755102040813
recall:  0.18556701030927836
precision:  0.7058823529411765
auc:  0.5914446975859137
model_id, score_thresholds 0 {'rank_abs': [16]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


fpr of anon:  0.0011587485515643105
fpr of new:  0.0027624309392265192
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  3.51






accuracy:  0.9699792960662525
balanced accuracy:  0.5888673779982848
f1:  0.28688524590163933
recall:  0.18041237113402062
precision:  0.7
auc:  0.5888673779982849
model_id, score_thresholds 0 {'rank_abs': [17]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0


For eps =  4.01






accuracy:  0.9703243616287095
balanced accuracy:  0.5915339514238287
f1:  0.29508196721311475
recall:  0.18556701030927836
precision:  0.72
auc:  0.5915339514238287
model_id, score_thresholds 0 {'rank_abs': [16]}
fpr of anon:  0.0011587485515643105
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  4.51






accuracy:  0.9706694271911663
balanced accuracy:  0.5991766563488004
f1:  0.31451612903225806
recall:  0.20103092783505155
precision:  0.7222222222222222
auc:  0.5991766563488004
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.0011587485515643105
fpr of new:  0.0027624309392265192
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)


For eps =  5.01






accuracy:  0.9706694271911663
balanced accuracy:  0.5917124590996587
f1:  0.2975206611570248
recall:  0.18556701030927836
precision:  0.75
auc:  0.5917124590996589
model_id, score_thresholds 0 {'rank_abs': [15]}
fpr of anon:  0.0011587485515643105
fpr of new:  0.0
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  5.51






accuracy:  0.9699792960662525
balanced accuracy:  0.5913554437479986
f1:  0.29268292682926833
recall:  0.18556701030927836
precision:  0.6923076923076923
auc:  0.5913554437479986
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.0011587485515643105
fpr of new:  0.0027624309392265192
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)


For eps =  6.01






accuracy:  0.970151828847481
balanced accuracy:  0.5889566318361998
f1:  0.2880658436213992
recall:  0.18041237113402062
precision:  0.7142857142857143
auc:  0.5889566318362
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  6.51






accuracy:  0.9703243616287095
balanced accuracy:  0.5915339514238287
f1:  0.29508196721311475
recall:  0.18556701030927836
precision:  0.72
auc:  0.5915339514238287
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  7.01






accuracy:  0.9704968944099379
balanced accuracy:  0.5941112710114576
f1:  0.30204081632653057
recall:  0.19072164948453607
precision:  0.7254901960784313
auc:  0.5941112710114577
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  7.51






accuracy:  0.9698067632850241
balanced accuracy:  0.5912661899100837
f1:  0.291497975708502
recall:  0.18556701030927836
precision:  0.6792452830188679
auc:  0.5912661899100837
model_id, score_thresholds 0 {'rank_abs': [16]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  8.01






accuracy:  0.9710144927536232
balanced accuracy:  0.6018432297743442
f1:  0.3225806451612903
recall:  0.20618556701030927
precision:  0.7407407407407407
auc:  0.6018432297743442
model_id, score_thresholds 0 {'rank_abs': [16]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.002970070824765821
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan


For eps =  8.51






accuracy:  0.9708419599723948
balanced accuracy:  0.5992659101867154
f1:  0.3157894736842105
recall:  0.20103092783505155
precision:  0.7358490566037735
auc:  0.5992659101867153
model_id, score_thresholds 0 {'rank_abs': [18]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0027624309392265192
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)


For eps =  9.01






accuracy:  0.9699792960662525
balanced accuracy:  0.5913554437479986
f1:  0.29268292682926833
recall:  0.18556701030927836
precision:  0.6923076923076923
auc:  0.5913554437479986
model_id, score_thresholds 0 {'rank_abs': [19]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0055248618784530384
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)


For eps =  9.51






accuracy:  0.9706694271911663
balanced accuracy:  0.5966885905990865
f1:  0.3089430894308943
recall:  0.1958762886597938
precision:  0.7307692307692307
auc:  0.5966885905990865
model_id, score_thresholds 0 {'rank_abs': [17]}
fpr of anon:  0.002317497103128621
fpr of new:  0.0
fpr of exp:  0.003198537811286269
fnr of anon:  0.9923664122137404
fnr of new:  1.0
fnr of exp:  1.0




  col_group = df.fillna({col: pd.np.nan}).groupby(col)
  divide = lambda x, y: x / y if y != 0 else pd.np.nan
