In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import re
import gc
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

from lime import lime_text
from lime.lime_text import LimeTextExplainer
from sklearn.pipeline import make_pipeline

from keras.preprocessing import text

Using TensorFlow backend.


In [None]:
#Write a method to measure termwise FNRs and FPRs along with the difference with Overall FNR and FPR
wordslists = ['Jew', 'Muslim', 'Christian', 'Asian', 'Black', 'White', 'Female', 'Male', 'Heterosexual', 'Homosexual_gay_or_lesbian']
scoresBeforeROC = {}
scoresAfterROC = {}

In [4]:
def ML_Model(  ):
    model = LogisticRegression( )
    return model

def ROC(df, theta):
    df['PredictedLabel'][(df['Maximum'] < theta) & (df['Sum'] == 0)] = 1 #deprived
    df['PredictedLabel'][(df['Maximum'] < theta) & (df['Sum'] >= 1)] = 0 #favored
    return df

def ROC_BAT(df, theta, bat):
    #ROC
    df['PredictedLabel'][(df['Maximum'] < theta) & (df['Sum'] == 0)] = 1 #deprived
    df['PredictedLabel'][(df['Maximum'] < theta) & (df['Sum'] >= 1)] = 0 #favored
    #BAT
    df['PredictedLabel'][(df['Sum'] == 0) & (df['Difference'] > bat)] = 1
    df['PredictedLabel'][(df['Sum'] == 0) & (df['Difference'] < bat)] = 0
    df['PredictedLabel'][(df['Sum'] >= 1) & (df['Difference'] > bat)] = 1
    df['PredictedLabel'][(df['Sum'] >= 1) & (df['Difference'] < bat)] = 0
    return df

def evaluations(df):
    df.TrueLabel = df.TrueLabel.astype('int')
    df.PredictedLabel = df.PredictedLabel.astype('int')
    accuracy = accuracy_score( df.TrueLabel, df.PredictedLabel ) 
    precision = precision_score( df.TrueLabel, df.PredictedLabel ) 
    recall = recall_score( df.TrueLabel, df.PredictedLabel ) 
    f1 = f1_score( df.TrueLabel, df.PredictedLabel ) 
    cr = classification_report( df.TrueLabel, df.PredictedLabel )
    cm = confusion_matrix( df.TrueLabel, df.PredictedLabel )
    
    _tn, _fp, _fn, _tp = cm.ravel()
    
    return accuracy, precision, recall, f1, cr, cm, _tn, _fp, _fn, _tp 

#FNRs, FPRs, Equalized Error Rates
def EERs(df, wordslists, scores, tnoverall, fpoverall, fnoverall, tpoverall):
    fnrd, fprd = 0, 0
    for i in wordslists:
        temp = df[df[i] == 1][['Comment', 'ProbabilityNontoxic', 'ProbabilityToxic', 'TrueLabel', 'PredictedLabel', 'Maximum']]
        if(len(temp) > 0):
            cm = confusion_matrix( temp.TrueLabel, temp.PredictedLabel )
            fp, fn = cm.ravel()[1], cm.ravel()[2]
            scores[i] = {}
            scores[i]['fp'] = fp
            scores[i]['fn'] = fn
        fprd = fprd + (fpoverall - fp)
        fnrd = fnrd + (fnoverall - fn)
    pbr = (fpoverall - fnoverall) / (tnoverall + fpoverall + fnoverall + tpoverall)
    return scores, fnrd, fprd, pbr

In [1]:
#training set
df = pd.read_csv(r'training_data/wiki_debias_train.csv')
# df = pd.read_csv(r'training_data/wiki_train.csv')
df = df[['comment', 'is_toxic']]
df.columns = ['Comment', 'Toxic']
df['Toxic'][df['Toxic'].astype(bool) == True] = 1
df['Toxic'][df['Toxic'].astype(bool) == False] = 0
df.head()

NameError: name 'pd' is not defined

In [6]:
#split data into train and test
X_train, X_test, ytrain, ytest  = train_test_split( df.Comment, df.Toxic, test_size = 0.10, random_state = 1990)

#tokenize train and test then padd with given values
vectorizer = TfidfVectorizer(  )
vectorizer.fit( X_train )

xtrain = vectorizer.transform( X_train )
xtest = vectorizer.transform( X_test )

#define a model
model = ML_Model(  )
#train the model
model.fit( xtrain, ytrain )

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [7]:
def lime_pipeline(vectorizer, model, X_test, ytest):
    c = make_pipeline(vectorizer, model)

    class_names = ['Nontoxic', 'Toxic']

    explainer = LimeTextExplainer(class_names=class_names)

    predictions_df = pd.DataFrame()
    predictions_df['Comment'] = X_test
    probabilities = c.predict_proba(list(X_test))
    predictions_df['ProbabilityNontoxic'] = probabilities[:, 0]
    predictions_df['ProbabilityToxic'] = probabilities[:, 1]
    #predictions_df['PredictedValue'] = c.predict_proba(list(X_test))[:, 1]
    predictions_df['TrueLabel'] = ytest

    predictions_df['PredictedLabel'] = 0
    predictions_df['PredictedLabel'][(predictions_df['ProbabilityToxic'] >= 0.5)] = 1

    predictions_df['Maximum'] = predictions_df[['ProbabilityToxic', 'ProbabilityNontoxic']].max(axis=1)
    predictions_df['Difference'] = abs(predictions_df['ProbabilityToxic'] - predictions_df['ProbabilityNontoxic'])

    predictions_df.reset_index(drop=True, inplace=True)
    predictions_df.head()

    return predictions_df

In [8]:
predictions_df = lime_pipeline(vectorizer, model, X_test, ytest)
accuracy, precision, recall, f1, cr, cm, _tn, _fp, _fn, _tp = evaluations(predictions_df)
print(accuracy, precision, recall, f1)
print(_tn, _fp, _fn, _tp)

0.9534086325131101 0.8934707903780069 0.5652173913043478 0.6924101198402131
8934 62 400 520


In [9]:
testset = pd.read_csv(r'testing_data/Synthetic_debias_test.csv')
# testset = testset [ testset['Sum'] > 0 ]
# testset.reset_index(drop=True, inplace=True)
print (testset.shape)
testset.head()

(76564, 15)


Unnamed: 0,Comment,Toxic,Jew,Muslim,Christian,Hindu,Asian,Black,White,Female,Male,Heterosexual,Homosexual_gay_or_lesbian,PredictedLabel,Sum
0,murder american,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0
1,respect trans,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
2,respect homosexual,0,0,0,0,0,0,0,0,0,0,0,1,1.0,1
3,hug mexican,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
4,kill sikh,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0


In [10]:
#Before ROC
predictions_df = lime_pipeline(vectorizer, model, testset.Comment, testset.Toxic)
output_df = testset.join(predictions_df, lsuffix='test')
output_df.drop(columns=['Commenttest', 'PredictedLabeltest'], inplace=True)
accuracy, precision, recall, f1, cr, cm, _tn, _fp, _fn, _tp = evaluations(output_df)
print(accuracy, precision, recall, f1)
print(_tn, _fp, _fn, _tp)

0.6038347003813803 0.9963786213786214 0.2084269369416436 0.3447396845971052
38253 29 30303 7979


In [11]:
scoresb4, fnrdb4, fprdb4, pbrb4 = EERs(output_df, wordslists, scoresBeforeROC, tnoverall=_tn, fpoverall=_fp, fnoverall=_fn, tpoverall=_tp)
print(fnrdb4, fprdb4, pbrb4)

322935 294 -0.3954077634397367


In [12]:
#After ROC
roc_output_df = ROC(output_df, 0.6)
accuracy, precision, recall, f1, cr, cm, _tn, _fp, _fn, _tp = evaluations(roc_output_df)
print(accuracy, precision, recall, f1)
print(_tn, _fp, _fn, _tp)

0.6149887675670027 0.9959441189725101 0.23091792487330862 0.37490987743330934
38246 36 29442 8840


In [13]:
scoresAfter, fnrdAfter, fprdAfter, pbrAfter = EERs(roc_output_df, wordslists, scoresAfterROC, tnoverall=_tn, fpoverall=_fp, fnoverall=_fn, tpoverall=_tp)
print(fnrdAfter, fprdAfter, pbrAfter)

312547 386 -0.3840708426936942
