In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_validate
from sklearn import metrics
from collections import OrderedDict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, balanced_accuracy_score, f1_score, precision_score, recall_score

In [2]:
import seaborn as sns
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
from aequitas.plotting import Plot

In [3]:
# read in data
df = pd.read_csv('data/enwiki.labeled_revisions.20k_2015.csv')
df = df.dropna()
df.head()

Unnamed: 0,rev_id,auto_labeled,damaging,goodfaith,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,...,feature.wikitext.revision.parent.external_links,feature.wikitext.revision.parent.headings,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks
0,644933637.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,574.0,121.0,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0
1,629393521.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,13.0,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0
2,655365754.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0
3,616502017.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,12.0,6.0,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0
4,651762922.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0


In [4]:
# Combine anon and new to a 3-category new feature, anonymous, newcomers, experienced
newcomer_seconds = 3.637819e+06

conditions = [
    (df['feature.revision.user.is_anon'] == True),
    (df['feature.revision.user.is_anon'] == False) & (df['feature.temporal.revision.user.seconds_since_registration'] < newcomer_seconds),
    (df['feature.revision.user.is_anon'] == False) & (df['feature.temporal.revision.user.seconds_since_registration'] >= newcomer_seconds)]
choices = [0,1,2]
df['user.type'] = np.select(conditions, choices)
df.head()

Unnamed: 0,rev_id,auto_labeled,damaging,goodfaith,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,...,feature.wikitext.revision.parent.headings,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks,user.type
0,644933637.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,121.0,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0,2
1,629393521.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,13.0,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0,2
2,655365754.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0,2
3,616502017.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,6.0,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0,0
4,651762922.0,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0,2


In [5]:
# add in sample weights
df['sample_weight'] = np.where(df['damaging']==True, 10, 1)

# delete the two sensitive features
df = df.drop(['feature.revision.user.is_anon', 'feature.temporal.revision.user.seconds_since_registration'], axis=1)

# convert user.type to categorical
df['user.type'] = pd.Categorical(df['user.type'])

# divide into X, X_weights and y
y = df["damaging"]
X_with_weights = df.iloc[:,4:].copy()
X_with_weights.head()

Unnamed: 0,feature.english.badwords.revision.diff.match_delta_decrease,feature.english.badwords.revision.diff.match_delta_increase,feature.english.badwords.revision.diff.match_delta_sum,feature.english.badwords.revision.diff.match_prop_delta_decrease,feature.english.badwords.revision.diff.match_prop_delta_increase,feature.english.badwords.revision.diff.match_prop_delta_sum,feature.english.dictionary.revision.diff.dict_word_delta_decrease,feature.english.dictionary.revision.diff.dict_word_delta_increase,feature.english.dictionary.revision.diff.dict_word_delta_sum,feature.english.dictionary.revision.diff.dict_word_prop_delta_decrease,...,feature.wikitext.revision.parent.ref_tags,feature.wikitext.revision.parent.tags,feature.wikitext.revision.parent.templates,feature.wikitext.revision.parent.wikilinks,feature.wikitext.revision.ref_tags,feature.wikitext.revision.tags,feature.wikitext.revision.templates,feature.wikitext.revision.wikilinks,user.type,sample_weight
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66.0,66.0,0.0,...,0.0,1097.0,1.0,373.0,0.0,1119.0,1.0,381.0,2,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,142.0,33.0,44.0,0.0,142.0,33.0,44.0,2,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,322.0,1.0,73.0,0.0,355.0,1.0,73.0,2,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,5.0,26.0,12.0,59.0,5.0,26.0,12.0,59.0,0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,44.0,0.0,...,0.0,17.0,6.0,16.0,0.0,26.0,7.0,20.0,2,1


In [6]:
# split into train and test set
X_with_weights_train, X_with_weights_test, y_train, y_test = train_test_split(X_with_weights, y, test_size=0.3, random_state=42)

# split train with weight to train and weight
X_train = X_with_weights_train.iloc[:,:-1].copy()
X_train_weights = X_with_weights_train.iloc[:,-1].copy()
X_test = X_with_weights_test.iloc[:,:-1].copy()

In [7]:
# parameters from 
#https://github.com/wikimedia/editquality/blob/master/model_info/enwiki.damaging.md
params= {'min_impurity_decrease': 0.0, 
         'loss': 'deviance', 
         'n_estimators': 700, 
         'min_impurity_split': None, 
         'verbose': 0, 
         'criterion': 'friedman_mse', 
         'subsample': 1.0, 
         #'center': True, 
         #'scale': True, 
         'presort': 'auto', 
         'init': None, 
         #'multilabel': False, 
         'max_depth': 7, 
         'random_state': None, 
         'learning_rate': 0.01, 
         'validation_fraction': 0.1, 
         'warm_start': False, 
         'min_samples_split': 2, 
         'min_samples_leaf': 1, 
         'min_weight_fraction_leaf': 0.0, 
         'n_iter_no_change': None, 
         'max_leaf_nodes': None, 
         'tol': 0.0001, 
         'max_features': 'log2'}
         #'labels': [True, False], 
         #'label_weights': OrderedDict([(True, 10)])

## 3. Dropping the sensitive features

In [17]:
# drop the sensitive features
X_train_nosen = X_train.iloc[:,:-1].copy()
X_test_nosen = X_test.iloc[:,:-1].copy()

In [18]:
gb_clf_nosen = GradientBoostingClassifier(**params)
gb_clf_nosen.fit(X_train_nosen, y_train, sample_weight=X_train_weights)



GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.01, loss='deviance', max_depth=7,
                           max_features='log2', max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=700,
                           n_iter_no_change=None, presort='auto',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [19]:
# test on test set
y_pred_test = gb_clf_nosen.predict(X_test_nosen)

print("accuracy: ", accuracy_score(y_test, y_pred_test).round(4))
print("balanced accuracy: ", balanced_accuracy_score(y_test, y_pred_test).round(4))
print("f1: ", f1_score(y_test, y_pred_test).round(4))
print("recall: ", recall_score(y_test, y_pred_test).round(4))
print("precision: ", precision_score(y_test, y_pred_test).round(4))
print("auc: ", roc_auc_score(y_test, y_pred_test).round(4))

accuracy:  0.946
balanced accuracy:  0.7158
f1:  0.3677
recall:  0.4691
precision:  0.3023
auc:  0.7158


### 3.1 bias for no sensitive feature model

In [21]:
# No sensitive feature model
df_bias_nosen = pd.DataFrame(columns = ['score', 'label_value', 'user.type'])
df_bias_nosen['label_value'] = y_test
df_bias_nosen['user.type'] = X_test.iloc[:,-1].copy().astype(str)
df_bias_nosen['score'] = gb_clf_nosen.predict(X_test_nosen)

g = Group()
xtab, _ = g.get_crosstabs(df_bias_nosen)
absolute_metrics = g.list_absolute_metrics(xtab)
xtab[[col for col in xtab.columns if col not in absolute_metrics]]

model_id, score_thresholds 0 {'rank_abs': [301]}


  col_group = df.fillna({col: pd.np.nan}).groupby(col)


Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
0,0,binary 0/1,301,user.type,0,167,827,102,66,761,65,131,863,994,5796
1,0,binary 0/1,301,user.type,1,45,362,26,26,336,19,45,362,407,5796
2,0,binary 0/1,301,user.type,2,89,4306,82,11,4295,7,18,4377,4395,5796


In [22]:
xtab[['attribute_name', 'attribute_value'] + absolute_metrics].round(4)

Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
0,user.type,0,0.4962,0.8818,0.0798,0.6108,0.1182,0.5038,0.9202,0.3892,0.5548,0.168,0.1318
1,user.type,1,0.4222,0.9282,0.0718,0.5778,0.0718,0.5778,0.9282,0.4222,0.1495,0.1106,0.1106
2,user.type,2,0.3889,0.9813,0.0026,0.9213,0.0187,0.6111,0.9974,0.0787,0.2957,0.0203,0.0041


## 4. Balance # of damaging edits within each group

In [None]:
# balancing