In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

# Helpers

In [18]:
def count_words(text):
    counter = 0
    for i in text:
        counter += len(i.split())
    return counter

def count_token(text):
    s = set()
    for i in text:
        tokenize = i.split()
        for j in tokenize:
            s.add(j)    
    return len(s)

def load_dataset(ds):
    if ds == 1:
        dataset_name = "GabHateCorpus"
    elif ds == 2:
        dataset_name = "Implicit_hate_corpus"
    elif ds == 3:
        dataset_name = "SE2019"
    else:
        dataset_name = "Balanced"

    filepath = "Dataset/"+dataset_name
    df = pd.read_csv(filepath+"/data_final.csv")
    
    print(df['class'].value_counts(normalize=True))
    return df, dataset_name

def split_data(df):
    test_size = 0.20
    x = np.array(df["text"])
    y = np.array(df["class"])

    x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = test_size, random_state=42) #random state ensure same sample
    print("Train Set :", x_train.shape, y_train.shape) 
    print("Test Set  :", x_test.shape, y_test.shape) 
    print("Total ", len(df))
    # y in digit form
    y_train_binary = np.array(list(map(lambda x:1 if x=="Hate" else 0, y_train)))
    y_test_binary = np.array(list(map(lambda x:1 if x=="Hate" else 0, y_test)))
    return x_train, y_train, y_train_binary, x_test, y_test, y_test_binary
    

# Deep Learning

In [19]:
from keras.models import Sequential
from keras.utils.data_utils import pad_sequences
from keras.layers.core import Activation, Dropout, Dense
from keras.layers import Flatten, GlobalMaxPooling1D, Embedding
from keras.layers import Conv1D, LSTM, SpatialDropout1D, Bidirectional, GRU, SimpleRNN, TextVectorization

from keras.metrics import BinaryAccuracy,Precision,Recall
import keras
from keras.models import load_model
import tensorflow as tf

from gensim.models import FastText, Word2Vec, KeyedVectors

## Helpers

In [20]:
def get_classification_report(i, cr):
    return [i, cr['accuracy'], cr['macro avg']['precision'], 
            cr['macro avg']['recall'], cr['macro avg']['f1-score'],
            cr['Hate']['f1-score'],cr['Non-Hate']['f1-score'], 
            cr['Hate']['support'],cr['Non-Hate']['support']]

def get_result_table():
    c = ['Model', 'Accuracy', 'precision', 'recall', 'f1-score', 'hate f1', "non-hate f1", 'hate support', 'non-hate support']
    result_table = pd.DataFrame(columns=c)
    return result_table

def get_result_single(y_test, y_test_pred, model_name, result_table):
    cr = classification_report(y_test, y_test_pred, labels=["Hate","Non-Hate"], output_dict=True)
    result_table.loc[len(result_table)] = get_classification_report(model_name, cr)

def nn_predict(model,x_test, y_test_binary):
    score = model.evaluate(x_test, y_test_binary, verbose=0)
    print("Score: ", score[0])
    print("Accuracy: ", score[1])

    y_test_pred_percent = model.predict(x_test, verbose=0)
    y_test_pred = np.where(y_test_pred_percent > 0.5, "Hate", "Non-Hate") 
    y_test_pred = y_test_pred.flatten()

    return y_test_pred

def save_model_nn(model, model_name, embedding_name, dataset_name):
    filename = f"models/{dataset_name}_{embedding_name}_{model_name}"
    model.save(filename)
    return filename

def load_model_nn(model_name):
    filename = f"models/{model_name}"
    print(filename)
    return load_model(filename) 

METRICS = [
    BinaryAccuracy(name="accuracy"),
    Precision(name="precision"),
    Recall(name="recall")
]

def compile_fit_save(x_train, y_train_binary, x_test,y_test_binary, model, model_name, embedding_name, dataset_name, save, epoch=5, batch_size=32, lr=0.01):    
    opt = keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=opt,
                loss='binary_crossentropy',
                metrics=METRICS)
    
    history = model.fit(x_train, y_train_binary, epochs=epoch,
                        validation_data=(x_test,y_test_binary),
                        batch_size = batch_size,
                        )

    if save: 
        save_model_nn(model, model_name, embedding_name, dataset_name)        
    print(f"acc {history.history['val_accuracy'][0]}")
    return model, history

## Embedding

### Glove

In [13]:
def glove_em(x_train):
    embedding_name = "glove"
    text_length = 50 

    custom_encoder = TextVectorization(
        standardize = None,
        output_sequence_length=text_length, 
    )
    custom_encoder.adapt(x_train)
    vocab = custom_encoder.get_vocabulary()
    print(f"total vocab {len(vocab)}")
    vocab_dict = dict(zip(vocab, range(len(vocab))))

    embeddings_dic = dict()
    glove_file = open("Dataset/trained/glove.42B.300d.txt", encoding="utf8")

    for line in glove_file:
        records = line.split()
        word = records[0]
        vector_dimensions = np.asarray(records[1:], dtype='float32')
        embeddings_dic[word] = vector_dimensions
    glove_file.close()
    print("Total words ", len(embeddings_dic))

    vocab_length = len(vocab) + 1
    embedding_dim = 300 

    hits = 0
    miss = 0
    missWord = []

    embedding_matrix = np.zeros((vocab_length, embedding_dim))
    for word, index in vocab_dict.items():
        embedding_vector = embeddings_dic.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector
            hits += 1
        else:
            miss +=1
            missWord.append(word)
    print("Converted %d words (%d misses)" % (hits, miss))

    custom_embedding = Embedding(vocab_length, embedding_dim, 
                embeddings_initializer=keras.initializers.Constant(embedding_matrix),
                trainable = False,
                input_length=text_length,
                mask_zero=True)
    
    return custom_encoder, custom_embedding, embedding_name, missWord

### FastText, Word2Vec

In [14]:
def get_fasttext_model():
    model_name="fasttext_trained"
    return KeyedVectors.load_word2vec_format("./Dataset/trained/wiki-news-300d-1M-subword.vec", binary=False), model_name

def get_word2vec_model():
    model_name = "word2vec_trained"
    return KeyedVectors.load_word2vec_format("./Dataset/trained/GoogleNews-vectors-negative300.bin", binary=True), model_name

def pre_trained_em(x_train, model_em, embedding_name):
    text_length = 50 

    custom_encoder = TextVectorization(
        standardize = None,
        output_sequence_length=text_length, 
    )
    custom_encoder.adapt(x_train)
    vocab = custom_encoder.get_vocabulary()
    print(f"total vocab {len(vocab)}")
    vocab_dict = dict(zip(vocab, range(len(vocab))))

    vocab_length = len(vocab) + 1
    embedding_dim = 300 

    hits = 0
    miss = 0
    missWord = []

    embedding_matrix = np.zeros((vocab_length, embedding_dim))
    keyVector_key = model_em.index_to_key
    print(f"total vector {len(keyVector_key)}")
    for word, index in vocab_dict.items():
        if word in keyVector_key:
            embedding_vector = np.array(model_em[word])
            if embedding_vector is not None:
                embedding_matrix[index] = embedding_vector
                hits += 1
        else:
            miss +=1
            missWord.append(word)
            
    print("Converted %d words (%d misses)" % (hits, miss))

    custom_embedding = Embedding(vocab_length, embedding_dim, 
                embeddings_initializer=keras.initializers.Constant(embedding_matrix),
                trainable = False,
                input_length=text_length,
                mask_zero=True)
    
    return custom_encoder, custom_embedding, embedding_name, missWord

### No pre-trained embedding

In [15]:
def noTrained_em(x_train):
    embedding_name = "no_train"
    text_length = 50 
    vector_size= 300

    custom_encoder = TextVectorization(
        standardize = None,
        output_sequence_length=text_length, 
    )
    custom_encoder.adapt(x_train)
    vocab = custom_encoder.get_vocabulary()
    print(f"total vocab {len(vocab)}")
    vocab_dict = dict(zip(vocab, range(len(vocab))))

    vocab_length = len(vocab) + 1
    embedding_dim = vector_size

    custom_embedding = Embedding(vocab_length, embedding_dim,
                input_length=text_length,
                mask_zero=True)
    return custom_encoder, custom_embedding, embedding_name

## Models

In [16]:
from keras.layers import BatchNormalization
def add_connected_layer(model):
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))

In [17]:
def cnn(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name, save = True, epoch = 10, batch_size=32, lr=0.01):
    model_name = "cnn"
    print(model_name)
    model = Sequential()
    model.add(custom_encoder)
    model.add(custom_embedding)
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(GlobalMaxPooling1D())
    add_connected_layer(model)
    return compile_fit_save(x_train, y_train_binary, x_test,y_test_binary,model, model_name, embedding_name, dataset_name, save, epoch, batch_size, lr)

def rnn(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name, save = True, epoch = 10, batch_size=32, lr=0.01):
    model_name = "rnn"
    print(model_name)
    model = Sequential()
    model.add(custom_encoder)
    model.add(custom_embedding)
    model.add(SimpleRNN(128))
    add_connected_layer(model)
    return compile_fit_save(x_train, y_train_binary, x_test,y_test_binary,model, model_name, embedding_name, dataset_name, save, epoch, batch_size, lr)

def lstm(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name, save = True, epoch = 10, batch_size=32, lr=0.01):
    model_name = "lstm"
    print(model_name)
    model = Sequential()
    model.add(custom_encoder)
    model.add(custom_embedding)
    model.add(LSTM(128))
    add_connected_layer(model)
    return compile_fit_save(x_train, y_train_binary, x_test,y_test_binary,model, model_name, embedding_name, dataset_name, save, epoch, batch_size, lr)

def gru(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name, save = True, epoch = 10, batch_size=32, lr=0.01):
    model_name = "gru"
    print(model_name)
    model = Sequential()
    model.add(custom_encoder)
    model.add(custom_embedding)
    model.add(GRU(128))
    add_connected_layer(model)
    return compile_fit_save(x_train, y_train_binary, x_test,y_test_binary,model, model_name, embedding_name, dataset_name, save, epoch, batch_size, lr)

# Test

In [22]:
def model_start_train(x_train, y_train_binary, x_test, y_test_binary, y_test, custom_encoder, custom_embedding, embedding_name, dataset_name, df_result):
    model, h = cnn(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name,save=False, epoch=8, batch_size=256, lr=0.001)
    y_test_pred = nn_predict(model, x_test, y_test_binary)
    get_result_single(y_test, y_test_pred, dataset_name+"_"+embedding_name+"_cnn", df_result)

    model, h = rnn(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name,save=False, epoch=8, batch_size=256, lr=0.001)
    y_test_pred = nn_predict(model, x_test, y_test_binary)
    get_result_single(y_test, y_test_pred, dataset_name+"_"+embedding_name+"_rnn", df_result)

    model, h = lstm(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name,save=False, epoch=8, batch_size=256, lr=0.001)
    y_test_pred = nn_predict(model, x_test, y_test_binary)
    get_result_single(y_test, y_test_pred, dataset_name+"_"+embedding_name+"_lstm", df_result)

    model, h = gru(x_train, y_train_binary, x_test,y_test_binary,custom_encoder, custom_embedding, embedding_name, dataset_name,save=False, epoch=8, batch_size=256, lr=0.001)
    y_test_pred = nn_predict(model, x_test, y_test_binary)
    get_result_single(y_test, y_test_pred, dataset_name+"_"+embedding_name+"_gru", df_result)

In [30]:
def train_dataset_all_model(df, dataset_name, df_result):
    x_train, y_train, y_train_binary, x_test, y_test, y_test_binary = split_data(df)
    print(dataset_name)

    # word2vec word embedding 
    pre_trained_model, model_name = get_word2vec_model()
    custom_encoder, custom_embedding, embedding_name, missWord = pre_trained_em(x_train, pre_trained_model, model_name)
    print(embedding_name)
    print(dataset_name)
    model_start_train(x_train, y_train_binary, x_test,y_test_binary,y_test, custom_encoder, custom_embedding, embedding_name, dataset_name, df_result)

    # fasttext word embedding 
    pre_trained_model, model_name = get_fasttext_model()
    custom_encoder, custom_embedding, embedding_name, missWord = pre_trained_em(x_train, pre_trained_model, model_name)
    print(embedding_name)
    print(dataset_name)
    model_start_train(x_train, y_train_binary, x_test,y_test_binary,y_test, custom_encoder, custom_embedding, embedding_name, dataset_name, df_result)

    # glove word embedding
    custom_encoder, custom_embedding, embedding_name, missWord = glove_em(x_train)
    print(embedding_name)
    print(dataset_name)
    model_start_train(x_train, y_train_binary, x_test,y_test_binary,y_test, custom_encoder, custom_embedding, embedding_name, dataset_name, df_result)

    # learned word embedding
    custom_encoder, custom_embedding, embedding_name = noTrained_em(x_train)
    print(embedding_name)
    print(dataset_name)
    model_start_train(x_train, y_train_binary, x_test,y_test_binary,y_test, custom_encoder, custom_embedding, embedding_name, dataset_name, df_result)

In [33]:
df_result = get_result_table()
# train gab dataset
df, dataset_name = load_dataset(1) 
train_dataset_all_model(df, dataset_name, df_result)

class
Non-Hate    0.876805
Hate        0.123195
Name: proportion, dtype: float64
Train Set : (21715,) (21715,)
Test Set  : (5429,) (5429,)
Total  27144
GabHateCorpus
total vocab 33351
total vector 3000000
Converted 22819 words (10532 misses)
word2vec_trained
GabHateCorpus
cnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.8933505415916443
Score:  0.25177374482154846
Accuracy:  0.9036654829978943
rnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.8861668705940247
Score:  0.3013761639595032
Accuracy:  0.8933505415916443
lstm
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.8907718062400818
Score:  0.2573467791080475
Accuracy:  0.9027445316314697
gru
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.8940873146057129
Score:  0.2581101357936859
Accuracy:  0.903849720954895
total vocab 33351
total vector 999994
Converted 24169 words (9182 miss

In [35]:
df_result

Unnamed: 0,Model,Accuracy,precision,recall,f1-score,hate f1,non-hate f1,hate support,non-hate support
0,GabHateCorpus_word2vec_trained_cnn,0.903666,0.77709,0.669429,0.705335,0.46359,0.947081,617.0,4812.0
1,GabHateCorpus_word2vec_trained_rnn,0.893351,0.733768,0.669968,0.694509,0.448046,0.940973,617.0,4812.0
2,GabHateCorpus_word2vec_trained_lstm,0.902745,0.772704,0.668909,0.703886,0.461224,0.946548,617.0,4812.0
3,GabHateCorpus_word2vec_trained_gru,0.90385,0.773624,0.681542,0.714574,0.482143,0.947005,617.0,4812.0
4,GabHateCorpus_fasttext_trained_cnn,0.899613,0.769999,0.634645,0.671516,0.39779,0.945243,617.0,4812.0
5,GabHateCorpus_fasttext_trained_rnn,0.894824,0.74597,0.623466,0.656535,0.370452,0.942619,617.0,4812.0
6,GabHateCorpus_fasttext_trained_lstm,0.902376,0.803162,0.618542,0.658071,0.369048,0.947095,617.0,4812.0
7,GabHateCorpus_fasttext_trained_gru,0.903481,0.795327,0.636121,0.676685,0.405896,0.947474,617.0,4812.0
8,GabHateCorpus_glove_cnn,0.904218,0.79368,0.645014,0.685637,0.423503,0.94777,617.0,4812.0
9,GabHateCorpus_glove_rnn,0.892798,0.735641,0.620204,0.651663,0.361842,0.941484,617.0,4812.0


In [37]:
df_result = get_result_table()
# train implicit dataset
df, dataset_name = load_dataset(2) 
train_dataset_all_model(df, dataset_name, df_result)

class
Non-Hate    0.618726
Hate        0.381274
Name: proportion, dtype: float64
Train Set : (17182,) (17182,)
Test Set  : (4296,) (4296,)
Total  21478
Implicit_hate_corpus
total vocab 20616
total vector 3000000
Converted 15542 words (5074 misses)
word2vec_trained
Implicit_hate_corpus
cnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.7160149216651917
Score:  0.5221508145332336
Accuracy:  0.7474395036697388
rnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6808659434318542
Score:  0.6316041350364685
Accuracy:  0.6727188229560852
lstm
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6952979564666748
Score:  0.5356711745262146
Accuracy:  0.7288175225257874
gru
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6545623540878296
Score:  0.5429683923721313
Accuracy:  0.7267225384712219
total vocab 20616
total vector 999994
Converted 16317 wor

In [38]:
df_result

Unnamed: 0,Model,Accuracy,precision,recall,f1-score,hate f1,non-hate f1,hate support,non-hate support
0,Implicit_hate_corpus_word2vec_trained_cnn,0.747439,0.73234,0.724198,0.727466,0.653687,0.801246,1632.0,2664.0
1,Implicit_hate_corpus_word2vec_trained_rnn,0.672719,0.649471,0.641875,0.644315,0.543803,0.744828,1632.0,2664.0
2,Implicit_hate_corpus_word2vec_trained_lstm,0.728818,0.712166,0.711913,0.712039,0.642528,0.781549,1632.0,2664.0
3,Implicit_hate_corpus_word2vec_trained_gru,0.726723,0.722606,0.676518,0.682966,0.565185,0.800747,1632.0,2664.0
4,Implicit_hate_corpus_fasttext_trained_cnn,0.726723,0.724219,0.675093,0.68142,0.561286,0.801555,1632.0,2664.0
5,Implicit_hate_corpus_fasttext_trained_rnn,0.6946,0.674907,0.653465,0.657729,0.545392,0.770067,1632.0,2664.0
6,Implicit_hate_corpus_fasttext_trained_lstm,0.70973,0.691233,0.688096,0.689483,0.610191,0.768774,1632.0,2664.0
7,Implicit_hate_corpus_fasttext_trained_gru,0.70973,0.70197,0.65617,0.660564,0.531379,0.789749,1632.0,2664.0
8,Implicit_hate_corpus_glove_cnn,0.738594,0.729065,0.698789,0.705867,0.607754,0.80398,1632.0,2664.0
9,Implicit_hate_corpus_glove_rnn,0.694832,0.695267,0.627898,0.625817,0.465116,0.786517,1632.0,2664.0


In [39]:
df_result = get_result_table()
# train se2019 dataset
df, dataset_name = load_dataset(3) 
train_dataset_all_model(df, dataset_name, df_result)

class
Non-Hate    0.578737
Hate        0.421263
Name: proportion, dtype: float64
Train Set : (10384,) (10384,)
Test Set  : (2596,) (2596,)
Total  12980
SE2019
total vocab 19393
total vector 3000000
Converted 13078 words (6315 misses)
word2vec_trained
SE2019
cnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6760400533676147
Score:  0.5274875164031982
Accuracy:  0.7446070909500122
rnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6714175939559937
Score:  0.7357242107391357
Accuracy:  0.652927577495575
lstm
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6633281707763672
Score:  0.5491708517074585
Accuracy:  0.7099383473396301
gru
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6598613262176514
Score:  0.5536291003227234
Accuracy:  0.7114791870117188
total vocab 19393
total vector 999994
Converted 13608 words (5785 misses)
fasttext_tra

In [40]:
df_result

Unnamed: 0,Model,Accuracy,precision,recall,f1-score,hate f1,non-hate f1,hate support,non-hate support
0,SE2019_word2vec_trained_cnn,0.744607,0.741205,0.744746,0.741999,0.71606,0.767938,1121.0,1475.0
1,SE2019_word2vec_trained_rnn,0.652928,0.645165,0.636664,0.637533,0.562834,0.712233,1121.0,1475.0
2,SE2019_word2vec_trained_lstm,0.709938,0.705085,0.706744,0.705715,0.67046,0.74097,1121.0,1475.0
3,SE2019_word2vec_trained_gru,0.711479,0.709244,0.712917,0.709295,0.6841,0.734491,1121.0,1475.0
4,SE2019_fasttext_trained_cnn,0.6953,0.696357,0.673845,0.67512,0.594151,0.75609,1121.0,1475.0
5,SE2019_fasttext_trained_rnn,0.677966,0.675475,0.678501,0.675418,0.646661,0.704176,1121.0,1475.0
6,SE2019_fasttext_trained_lstm,0.699153,0.695131,0.683872,0.685912,0.621425,0.750399,1121.0,1475.0
7,SE2019_fasttext_trained_gru,0.704931,0.699298,0.699126,0.69921,0.65773,0.740691,1121.0,1475.0
8,SE2019_glove_cnn,0.743837,0.740199,0.733684,0.735792,0.689687,0.781896,1121.0,1475.0
9,SE2019_glove_rnn,0.662558,0.655073,0.650919,0.652008,0.591418,0.712598,1121.0,1475.0


In [41]:
df_result = get_result_table()
# train balance dataset
df, dataset_name = load_dataset(4) 
train_dataset_all_model(df, dataset_name, df_result)

class
Hate        0.500427
Non-Hate    0.499573
Name: proportion, dtype: float64
Train Set : (27178,) (27178,)
Test Set  : (6795,) (6795,)
Total  33973
Balanced
total vocab 33332
total vector 3000000
Converted 21403 words (11929 misses)
word2vec_trained
Balanced
cnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.7027226090431213
Score:  0.5521321296691895
Accuracy:  0.7292126417160034
rnn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6768211722373962
Score:  0.620477557182312
Accuracy:  0.6769683361053467
lstm
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6866813898086548
Score:  0.5634526014328003
Accuracy:  0.7130242586135864
gru
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
acc 0.6878587007522583
Score:  0.5630669593811035
Accuracy:  0.7200883030891418
total vocab 33332
total vector 999994
Converted 22650 words (10682 misses)
fastte

In [42]:
df_result

Unnamed: 0,Model,Accuracy,precision,recall,f1-score,hate f1,non-hate f1,hate support,non-hate support
0,Balanced_word2vec_trained_cnn,0.729213,0.729339,0.729255,0.729196,0.731308,0.727084,3384.0,3411.0
1,Balanced_word2vec_trained_rnn,0.676968,0.677053,0.677005,0.676955,0.679047,0.674863,3384.0,3411.0
2,Balanced_word2vec_trained_lstm,0.713024,0.713111,0.71306,0.713014,0.714745,0.711282,3384.0,3411.0
3,Balanced_word2vec_trained_gru,0.720088,0.720205,0.720129,0.720073,0.722174,0.717972,3384.0,3411.0
4,Balanced_fasttext_trained_cnn,0.713466,0.714334,0.713333,0.71309,0.702703,0.723477,3384.0,3411.0
5,Balanced_fasttext_trained_rnn,0.67844,0.678439,0.678441,0.678438,0.677681,0.679195,3384.0,3411.0
6,Balanced_fasttext_trained_lstm,0.694923,0.696126,0.695072,0.694554,0.705163,0.683946,3384.0,3411.0
7,Balanced_fasttext_trained_gru,0.693598,0.701012,0.69321,0.690471,0.659359,0.721583,3384.0,3411.0
8,Balanced_glove_cnn,0.714054,0.716576,0.713834,0.71308,0.696359,0.729801,3384.0,3411.0
9,Balanced_glove_rnn,0.68933,0.693955,0.68963,0.68768,0.710386,0.664974,3384.0,3411.0
