In [1]:
import warnings
warnings.filterwarnings( 'ignore' )
import gc
import os
import time
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold, train_test_split

import tensorflow as tf
import keras.backend as K
from keras.models import load_model
from keras.preprocessing import text, sequence

from tqdm import tqdm

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

Using TensorFlow backend.


In [2]:
import All_RUT_Models
import RUT_Utils

In [3]:
max_len = 150
max_features = 153958

modelname = 'CNN_George_w2v_sg_biased'

modelpath = './Models/' + modelname + '/'

if not os.path.exists( modelpath ):
    os.makedirs( modelpath )
if not os.path.exists( './Results/' ):
    os.makedirs( './Results/' )

In [4]:
def remove_sc(x):
    alphanumeric = [character for character in x if (character.isalnum()) | (character==' ')]
    alphanumeric = "".join(alphanumeric)
    return alphanumeric

df = pd.read_csv('wiki_train.csv')
# df = pd.read_csv(r'training_data/wiki_train.csv')
df = df[['comment', 'is_toxic']]
df.columns = ['Comment', 'Toxic']
df.Comment = df.Comment.astype( 'str' )
df.Comment = df.Comment.apply(lambda x: x.lower())
df.Comment = df.Comment.apply(lambda x: remove_sc(x))
df['Toxic'][df['Toxic'].astype(bool) == True] = 1
df['Toxic'][df['Toxic'].astype(bool) == False] = 0
df.shape

(95692, 2)

In [5]:
X_train, X_test, y_train, y_test  = train_test_split( df.Comment, df.Toxic, test_size = 0.20, random_state = 0)

# clearing previous sessions
K.clear_session()
tf.reset_default_graph()

# tokenization with keras tokenizer
tokenizer = text.Tokenizer( num_words=max_features )
tokenizer.fit_on_texts( X_train.values )

In [6]:
# load saved model
loaded_model = load_model( modelpath + modelname + '.h5' )

In [7]:
df = pd.read_csv('SR_Results/CNN_George_w2v_sg_biased_synthetic_predictions.csv')
df['Predicted_Probability_Nontoxic'] = 1 - df.Predicted_Probability
df['Maximum'] = df[['Predicted_Probability_Nontoxic', 'Predicted_Probability']].max(axis=1)
df.head()

Unnamed: 0,Comment,Toxic,Jew,Muslim,Christian,Hindu,Asian,Black,White,Female,...,@0.6,@0.65,@0.7,@0.75,@0.8,@0.85,@0.9,@0.95,Predicted_Probability_Nontoxic,Maximum
0,murder american,1,0,0,0,0,0,0,0,0,...,0.022528,0.022528,0.022528,0.022528,0.022528,0.022528,0.022528,0.022528,0.977472,0.977472
1,respect trans,0,0,0,0,0,0,0,0,0,...,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.991504,0.991504
2,respect homosexual,0,0,0,0,0,0,0,0,0,...,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.008496,0.991504,0.991504
3,hug mexican,0,0,0,0,0,0,0,0,0,...,0.282306,0.282306,0.282306,0.282306,0.282306,0.282306,0.282306,0.282306,0.717694,0.717694
4,kill sikh,1,0,0,0,0,0,0,0,0,...,0.79772,0.79772,0.79772,0.79772,0.79772,0.79772,0.79772,0.79772,0.20228,0.79772


In [8]:
theta = [0.55, 0.6, 0.65, 0.7, 0.75 , 0.8, 0.85, 0.9, 0.95]

for i in theta:
    val = '@'+str(i)
    df[val] = ( df[val] >= 0.5 ).astype( 'int32' )
    
df.head()

Unnamed: 0,Comment,Toxic,Jew,Muslim,Christian,Hindu,Asian,Black,White,Female,...,@0.6,@0.65,@0.7,@0.75,@0.8,@0.85,@0.9,@0.95,Predicted_Probability_Nontoxic,Maximum
0,murder american,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.977472,0.977472
1,respect trans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.991504,0.991504
2,respect homosexual,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.991504,0.991504
3,hug mexican,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.717694,0.717694
4,kill sikh,1,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,0.20228,0.79772


In [None]:
jew = ['jew', 'jewish', 'jewboy', 'zionis', 'torah']
muslim = ['muslim', 'moslem', 'islam', 'mosque', 'hadith', 'mevlana', 'quran', 'koran', 'deobandi', 'shia', 'sunni', 'mohammed', 'mohemmed', 'mohammad', 'allah', 'jihadi', 'islamis']
christian = ['christian', 'catholic', 'jesus', 'christ', 'cathedral', 'church', 'bible', 'priest', 'orthodox', 'baptis', 'pope']
asian = ['asian', 'asian people', 'asians']
black = ['black', 'blacks', 'black people', 'negro', 'nigga', 'nigger', 'nig', 'african', 'coon']
white = ['white', 'whites', 'white people', 'caucasian', 'caucasoid', 'supermacist']
female = ['aunt', 'her', 'mother', 'sister',  'sis', 'her', 'she', 'female', 'women', 'woman', 'girl', 'lady']
male = ['uncle', ' his', 'father', 'bro', 'brother', 'him', ' he', ' male ', ' men ', ' man ', 'boy', 'guy', 'dude']
homosexual = ['gay', 'lesbian', 'homosexual', 'homos']
heterosexual = ['heterosexual', 'husband', 'wife']

religious = {'jew': jew, 'muslim': muslim, 'christian': christian}
race = {'asian': asian, 'black': black, 'white': white}
gender = {'female': female, 'men': male}
sexuality = {'homosexual': homosexual, 'heterosexual':heterosexual}
             
bias = {'religious': religious, 'race': race, 'gender': gender, 'sexuality': sexuality}


def construct_favored_nn(comment, word, wordslist_id):
    neighbors = []
    for biastype_id in bias:
        for wordslists in bias[biastype_id]:
            if (wordslists not in wordslist_id):
                for i in bias[biastype_id][wordslists]:
                    neighbors.append(comment.replace(word, i))
    neighbors.append(comment.replace(word, ''))
    return neighbors

def construct_deprived_nn(comment, wordslist_id):
    neighbors = []
    for biastype_id in bias:
        for wordslists in bias[biastype_id]:
            if (wordslists not in wordslist_id):
                for i in bias[biastype_id][wordslists]:
                    neighbors.append(comment + ' ' + i)
    neighbors.append(comment)
    return neighbors

def situational_testing(comment):
    #test for the group
    found = False
    word = ''
    biastype_id = ''
    wordslist_id = ''
    nearest_neighbors = []
    for bias_type in bias:
        for wordslist in bias[bias_type]:
            for term in bias[bias_type][wordslist]:
                if ((term in comment) & (not found)):
                    found = True
                    word = term
                    biastype_id = bias_type
                    wordslist_id = wordslist
                    break
            if found:
                break
        if found:
            break
    
    if(biastype_id == ''):
        nearest_neighbors = construct_deprived_nn(comment, wordslist_id)
    else:     
        nearest_neighbors = construct_favored_nn(comment, word, wordslist_id)
        
    return nearest_neighbors

In [10]:
def situational_roc(df, theta_val):
    temp = df[df['Maximum'] < theta_val][['Sum', '@'+str(theta_val)]].index
    for i in temp:
        sentences = situational_testing(df.loc[i, 'Comment'])
        comments = tokenizer.texts_to_sequences( sentences )
        comments = sequence.pad_sequences( comments, maxlen=max_len )
        predictions = loaded_model.predict( comments, verbose=0, batch_size=2048 )[ :, 1 ]
        labels = (predictions>=0.5).astype( 'int32' ).sum() / len(predictions)
        df.loc[i, '@'+str(theta_val)] = int(labels>=0.5)
    return df

In [11]:
%%time

for i in tqdm(theta):
    df = situational_roc(df, i)

100%|██████████| 9/9 [22:38<00:00, 150.91s/it]

CPU times: user 23min 34s, sys: 1min 39s, total: 25min 14s
Wall time: 22min 38s





In [12]:
df.to_csv('SR_Results/CNN_George_w2v_sg_biased_synthetic_predictions.csv', index=False)