# Import Dataset

In [1]:
import os
import sys
import gensim
import pandas as pd
from gensim.models.doc2vec import LabeledSentence

data_physics = pd.read_csv('ad_physics.csv',index_col=0)
data_race = pd.read_csv('ad_race.csv',index_col=0)
data_religion = pd.read_csv('ad_religion.csv',index_col=0)
data_physics.head()



Unnamed: 0,sentence,physics,race,religion
0,I don't get why negroes always traveling to wh...,0,1,0
1,lmao how funny that true does know where the c...,0,0,0
2,@art_is_forever when did she publicly thank him?,0,0,0
3,Post a picture of Khloe already!!!!! Come on!!!!,0,0,0
4,@progreenlc no we don't. When did he become th...,0,0,0


# Preprocess Data

In [2]:
CONTRACTION_MAP = {"ain't": "is not", "aren't": "are not","can't": "cannot", 
                   "can't've": "cannot have", "'cause": "because", "could've": "could have", 
                   "couldn't": "could not", "couldn't've": "could not have","didn't": "did not", 
                   "doesn't": "does not", "don't": "do not", "hadn't": "had not", 
                   "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", 
                   "he'd": "he would", "he'd've": "he would have", "he'll": "he will", 
                   "he'll've": "he he will have", "he's": "he is", "how'd": "how did", 
                   "how'd'y": "how do you", "how'll": "how will", "how's": "how is", 
                   "I'd": "I would", "I'd've": "I would have", "I'll": "I will", 
                   "I'll've": "I will have","I'm": "I am", "I've": "I have", 
                   "i'd": "i would", "i'd've": "i would have", "i'll": "i will", 
                   "i'll've": "i will have","i'm": "i am", "i've": "i have", 
                   "isn't": "is not", "it'd": "it would", "it'd've": "it would have", 
                   "it'll": "it will", "it'll've": "it will have","it's": "it is", 
                   "let's": "let us", "ma'am": "madam", "mayn't": "may not", 
                   "might've": "might have","mightn't": "might not","mightn't've": "might not have", 
                   "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", 
                   "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock", 
                   "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not",
                   "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", 
                   "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", 
                   "she's": "she is", "should've": "should have", "shouldn't": "should not", 
                   "shouldn't've": "should not have", "so've": "so have","so's": "so as", 
                   "this's": "this is",
                   "that'd": "that would", "that'd've": "that would have","that's": "that is", 
                   "there'd": "there would", "there'd've": "there would have","there's": "there is", 
                   "they'd": "they would", "they'd've": "they would have", "they'll": "they will", 
                   "they'll've": "they will have", "they're": "they are", "they've": "they have", 
                   "to've": "to have", "wasn't": "was not", "we'd": "we would", 
                   "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", 
                   "we're": "we are", "we've": "we have", "weren't": "were not", 
                   "what'll": "what will", "what'll've": "what will have", "what're": "what are", 
                   "what's": "what is", "what've": "what have", "when's": "when is", 
                   "when've": "when have", "where'd": "where did", "where's": "where is", 
                   "where've": "where have", "who'll": "who will", "who'll've": "who will have", 
                   "who's": "who is", "who've": "who have", "why's": "why is", 
                   "why've": "why have", "will've": "will have", "won't": "will not", 
                   "won't've": "will not have", "would've": "would have", "wouldn't": "would not", 
                   "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would",
                   "y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
                   "you'd": "you would", "you'd've": "you would have", "you'll": "you will", 
                   "you'll've": "you will have", "you're": "you are", "you've": "you have" } 

In [3]:
import re, nltk, string
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
from nltk.corpus import wordnet

stop_words = set(stopwords.words('english'))
wordnet_lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

def expand_contractions(text) :
    pattern = re.compile("({})".format("|".join(CONTRACTION_MAP.keys())),flags = re.DOTALL| re.IGNORECASE)
    
    def replace_text(t):
        txt = t.group(0)
        if txt.lower() in CONTRACTION_MAP.keys():
            return CONTRACTION_MAP[txt.lower()]
        
    expand_text = pattern.sub(replace_text,text)
    return expand_text

def remove_repeated_characters(word):
    pattern = re.compile(r"(\w*)(\w)\2(\w*)")
    substitution_pattern = r"\1\2\3"
    while True:
        if wordnet.synsets(word):
            return word
        new_word = pattern.sub(substitution_pattern,word)
        if new_word != word:
            word = new_word
            continue
        else:
            return new_word

def spelling_checker(word):
    checker = suggest(word)
    return checker[0][0]

def cleanhtml(text):
    cleanr = re.compile('&#[0-9]+;')
    cleantext = re.sub(cleanr, '', text)
    return cleantext

def clean_emoji(text):
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)
    
def normalizer(text):
    text = re.sub(r"http\S+", "", text.lower(), flags=re.MULTILINE) #remove url
    text = re.sub('@[^\s]+','',text) #remove username
    text = clean_emoji(text)
    text = cleanhtml(text)
    expand = expand_contractions(text)
    pattern = re.compile("[{}]".format(re.escape(string.punctuation)))
    filter_char =  filter(None,[pattern.sub('' ,expand)])
    text_filter_char =  " ".join(filter_char)
    tokens = nltk.WhitespaceTokenizer().tokenize(text_filter_char)
    lemmas = [wordnet_lemmatizer.lemmatize(t) for t in tokens]
    stems = [stemmer.stem(t) for t in lemmas]
    filtered_result = list(filter(lambda l: l not in stop_words, stems))
    concate = ' '.join(filtered_result)
    return concate

In [4]:
normalizer('people')

'peopl'

In [5]:
data_physics.sentence = data_physics.sentence.apply(normalizer)
data_race.sentence = data_race.sentence.apply(normalizer)
data_religion.sentence = data_religion.sentence.apply(normalizer)
data_physics.sentence.head()

0    get whi negro alway travel white countri take ...
1                  lmao funni true doe know camara lol
2                                       publicli thank
3                       post pictur khloe alreadi come
4    becom poster child faith partner pleas got caught
Name: sentence, dtype: object

# Split Data Train and Test

In [6]:
from sklearn.cross_validation import train_test_split
SEED = 2000

x_train_physics, x_test_physics, y_train_physics, y_test_physics = train_test_split(data_physics.sentence, data_physics.physics, test_size=.2, random_state=SEED)
x_train_race, x_test_race, y_train_race, y_test_race = train_test_split(data_race.sentence, data_race.race, test_size=.2, random_state=SEED)
x_train_religion, x_test_religion, y_train_religion, y_test_religion = train_test_split(data_religion.sentence, data_religion.religion, test_size=.2, random_state=SEED)



In [7]:
def labelize_text(text,label):
    result = []
    prefix = label
    for i, t in zip(text.index, text):
        result.append(LabeledSentence(t.split(), [prefix + '_%s' % i]))
    return result

all_x = pd.concat([x_train_physics, x_test_physics])
all_x_w2v = labelize_text(all_x, 'ALL')

# physics
x_train_physics = labelize_text(x_train_physics, 'TRAIN')
x_test_physics = labelize_text(x_test_physics, 'TEST')

# race
x_train_race = labelize_text(x_train_race, 'TRAIN')
x_test_race = labelize_text(x_test_race, 'TEST')

# religion
x_train_religion = labelize_text(x_train_religion, 'TRAIN')
x_test_religion = labelize_text(x_test_religion, 'TEST')

  """


# Train Word2Vec

In [8]:
from gensim.models.word2vec import Word2Vec
from tqdm import tqdm
from sklearn import utils
import numpy as np

model_w2v = Word2Vec(size=200, min_count=20)
model_w2v.build_vocab([x.words for x in tqdm(all_x_w2v)])
model_w2v.train([x.words for x in tqdm(all_x_w2v)], total_examples=len(all_x_w2v), epochs=1)

100%|█████████████████████████████████████████████████████████████████████████| 4174/4174 [00:00<00:00, 1238733.81it/s]
100%|██████████████████████████████████████████████████████████████████████████| 4174/4174 [00:00<00:00, 475514.71it/s]


(8059, 26188)

In [9]:
model_w2v.most_similar('cute')

  """Entry point for launching an IPython kernel.
  if np.issubdtype(vec.dtype, np.int):


[('anoth', 0.25935670733451843),
 ('realli', 0.2524394392967224),
 ('would', 0.24629220366477966),
 ('hate', 0.244271919131279),
 ('girl', 0.23824137449264526),
 ('child', 0.22931347787380219),
 ('nice', 0.2237091064453125),
 ('dumb', 0.22180451452732086),
 ('littl', 0.22179366648197174),
 ('pretti', 0.21975255012512207)]

# Build Document Vector using Average Word Vector With TF-IDF

In [10]:
from sklearn.preprocessing import scale
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from time import time
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(analyzer=lambda x: x)
matrix = vectorizer.fit_transform([x.words for x in all_x_w2v])
tfidf = dict(zip(vectorizer.get_feature_names(), vectorizer.idf_))

def build_Word_Vector(tokens, size):
    vec = np.zeros(size).reshape((1, size))
    count = 0.
    for word in tokens:
        try:
            vec += model_w2v[word].reshape((1, size)) * tfidf[word]
            count += 1.
        except KeyError: 
            
            continue
    if count != 0:
        vec /= count
    return vec

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [11]:
# physics
train_vecs_physics = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_train_physics))])
test_vecs_physics = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_test_physics))])

# race
train_vecs_race = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_train_race))])
test_vecs_race = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_test_race))])

# religion
train_vecs_religion = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_train_religion))])
test_vecs_religion = np.concatenate([build_Word_Vector(z, 200) for z in tqdm(map(lambda x: x.words, x_test_religion))])

3339it [00:00, 7624.17it/s]
835it [00:00, 8386.48it/s]
1040it [00:00, 9270.56it/s]
260it [00:00, 6896.82it/s]
688it [00:00, 5736.10it/s]
173it [00:00, 6658.91it/s]


# Bi-LSTM Aspect Detection Model for Physics

In [12]:
batch_size = 1
num_epochs = 100
hidden_size = 10
timesteps = 1
num_class = 1
data_dim = len(train_vecs_physics[0])
num_data = len(train_vecs_physics)
num_data_test = len(test_vecs_physics)

train_vecs_physics = train_vecs_physics.reshape((num_data, timesteps, data_dim))
y_train_physics = y_train_physics.reshape((num_data, num_class))
test_vecs_physics = test_vecs_physics.reshape((num_data_test, timesteps, data_dim))
y_test_physics = y_test_physics.reshape((num_data_test, num_class))

model_ad_physics = Sequential()
model_ad_physics.add(Bidirectional(LSTM(hidden_size, input_shape=(timesteps, data_dim)), merge_mode='concat'))
model_ad_physics.add(Dropout(0.5))
model_ad_physics.add(Dense(1, activation='sigmoid'))
model_ad_physics.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_ad_physics.fit(train_vecs_physics, y_train_physics, epochs=num_epochs, validation_data=[test_vecs_physics, y_test_physics])

  # This is added back by InteractiveShellApp.init_path()
  del sys.path[0]


Train on 3339 samples, validate on 835 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100


Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100


Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1de74ec19e8>

In [13]:
from sklearn.metrics import accuracy_score, classification_report

prediction = {}
prediction['ad_physics'] = model_ad_physics.predict(test_vecs_physics)

for i in range(len(prediction['ad_physics'])):
    prediction['ad_physics'][i][0] = round(prediction['ad_physics'][i][0])

accuracy = {}
accuracy['ad_physics'] = accuracy_score(y_test_physics, prediction['ad_physics'])
print("Accuracy: ", accuracy['ad_physics'], "\n")
print(classification_report(y_test_physics, prediction['ad_physics'], labels = [0, 1]))

Accuracy:  0.8562874251497006 

             precision    recall  f1-score   support

          0       0.84      0.89      0.87       431
          1       0.88      0.82      0.85       404

avg / total       0.86      0.86      0.86       835



# Bi-LSTM Aspect Detection Model for Race

In [14]:
data_dim = len(train_vecs_race[0])
num_data = len(train_vecs_race)
num_data_test = len(test_vecs_race)

train_vecs_race = train_vecs_race.reshape((num_data, timesteps, data_dim))
y_train_race = y_train_race.reshape((num_data, num_class))
test_vecs_race = test_vecs_race.reshape((num_data_test, timesteps, data_dim))
y_test_race = y_test_race.reshape((num_data_test, num_class))

model_ad_race = Sequential()
model_ad_race.add(Bidirectional(LSTM(hidden_size, input_shape=(timesteps, data_dim)), merge_mode='concat'))
model_ad_race.add(Dropout(0.5))
model_ad_race.add(Dense(1, activation='sigmoid'))
model_ad_race.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_ad_race.fit(train_vecs_race, y_train_race, epochs=num_epochs, validation_data=[test_vecs_race, y_test_race])

  
  


Train on 1040 samples, validate on 260 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100


Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100


Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1de794f62e8>

In [15]:
prediction['ad_race'] = model_ad_race.predict(test_vecs_race)

for i in range(len(prediction['ad_race'])):
    prediction['ad_race'][i][0] = round(prediction['ad_race'][i][0])

accuracy['ad_race'] = accuracy_score(y_test_race, prediction['ad_race'])
print("Accuracy: ", accuracy['ad_race'], "\n")
print(classification_report(y_test_race, prediction['ad_race'], labels = [0, 1]))

Accuracy:  0.8269230769230769 

             precision    recall  f1-score   support

          0       0.82      0.93      0.87       165
          1       0.84      0.65      0.73        95

avg / total       0.83      0.83      0.82       260



# Bi-LSTM Aspect Detection Model for Religion

In [16]:
data_dim = len(train_vecs_religion[0])
num_data = len(train_vecs_religion)
num_data_test = len(test_vecs_religion)

train_vecs_religion = train_vecs_religion.reshape((num_data, timesteps, data_dim))
y_train_religion = y_train_religion.reshape((num_data, num_class))
test_vecs_religion = test_vecs_religion.reshape((num_data_test, timesteps, data_dim))
y_test_religion = y_test_religion.reshape((num_data_test, num_class))

model_ad_religion = Sequential()
model_ad_religion.add(Bidirectional(LSTM(hidden_size, input_shape=(timesteps, data_dim)), merge_mode='concat'))
model_ad_religion.add(Dropout(0.5))
model_ad_religion.add(Dense(1, activation='sigmoid'))
model_ad_religion.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_ad_religion.fit(train_vecs_religion, y_train_religion, epochs=num_epochs, validation_data=[test_vecs_religion, y_test_religion])

  
  


Train on 688 samples, validate on 173 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100


Epoch 100/100


<keras.callbacks.History at 0x1de7d9810f0>

In [17]:
prediction['ad_religion'] = model_ad_religion.predict(test_vecs_religion)

for i in range(len(prediction['ad_religion'])):
    prediction['ad_religion'][i][0] = round(prediction['ad_religion'][i][0])

accuracy['ad_religion'] = accuracy_score(y_test_religion, prediction['ad_religion'])
print("Accuracy: ", accuracy['ad_religion'], "\n")
print(classification_report(y_test_religion, prediction['ad_religion'], labels = [0, 1]))

Accuracy:  0.861271676300578 

             precision    recall  f1-score   support

          0       0.86      0.91      0.89       103
          1       0.86      0.79      0.82        70

avg / total       0.86      0.86      0.86       173



# Save Model

In [18]:
import pickle

pickle.dump(model_ad_physics, open('model_ad_physics.sav', 'wb'))
pickle.dump(model_ad_race, open('model_ad_race.sav', 'wb'))
pickle.dump(model_ad_religion, open('model_ad_religion.sav', 'wb'))

# Load Model

In [19]:
model_ad_physics = pickle.load(open('model_ad_physics.sav', 'rb'))
model_ad_race = pickle.load(open('model_ad_race.sav', 'rb'))
model_ad_religion = pickle.load(open('model_ad_religion.sav', 'rb'))

# Aspect Detection Model for Predict New Data 

In [30]:
# 1:exist, 0:not given

def predict_aspect(text):
    text = normalizer(text)
    tokens = nltk.WhitespaceTokenizer().tokenize(text)
    vecs = build_Word_Vector(tokens, 200)
    vecs = vecs.reshape((1, 1, 200))
    aspect = {}
    aspect['physics'] = int(round(model_ad_physics.predict(vecs)[0][0]))
    aspect['race'] = int(round(model_ad_race.predict(vecs)[0][0]))
    aspect['religion'] = int(round(model_ad_religion.predict(vecs)[0][0]))
    return aspect

In [33]:
predict_aspect("nigga bitch")



{'physics': 1, 'race': 1, 'religion': 0}