In [1]:
import torch
import json
import numpy as np
import pandas as pd
from torch import nn

from utils import get_loaders
from classifier_utils import HateSpeechClassifier, forward_back_prop, train_classifier

tweets = np.load("data/tweets.npy")
labels = np.load("data/hate_original.npy")

with open('vocab_to_int.json', 'r') as fp:
    vocab_to_int = json.load(fp)

with open('int_to_vocab.json', 'r') as fp:
    int_to_vocab = json.load(fp)
    
train_on_gpu = torch.cuda.is_available()
print("Using GPU." if train_on_gpu else "Using CPU.")

Using GPU.


### Creating the Training, Validation, and Test Sets

In [2]:
train_loader, valid_loader, test_loader = get_loaders(tweets, labels)

			Feature Shapes:
Train set: 		(19826, 91) 
Validation set: 	(2478, 91) 
Test set: 		(2479, 91)


In [3]:
# obtain one batch of training data
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()

print('Sample input size: ', sample_x.size()) # batch_size, seq_length
print('Sample input: \n', sample_x)
print()
print('Sample label size: ', sample_y.size()) # batch_size
print('Sample label: \n', sample_y)

Sample input size:  torch.Size([64, 91])
Sample input: 
 tensor([[    0,     0,     0,  ...,     0, 12719, 18692],
        [    0,     0,     0,  ...,  1128,  2724,  8082],
        [    0,     0,     0,  ...,  5715,  5805,   657],
        ...,
        [    0,     0,     0,  ...,    53, 10432, 18484],
        [    0,     0,     0,  ...,  3546, 20202,  1454],
        [    0,     0,     0,  ...,  4997, 17609, 19327]], dtype=torch.int32)

Sample label size:  torch.Size([64])
Sample label: 
 tensor([2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1,
        1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1,
        1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1])


### Hyperparameters

In [4]:
sequence_length = tweets.shape[1]  # number of words in a sequence
num_epochs = 5
learning_rate = 0.0005
vocab_size = len(vocab_to_int)
output_size = pd.Series(labels).nunique()
embedding_dim = 25
hidden_dim = 256
batch_size = 64
n_layers = 2
show_every_n_batches = 50
cnn_params = (32, 25, 1, 4)
pool_params = (4, 4, 0)

### Model Instantiation

In [5]:
model = HateSpeechClassifier(vocab_size, output_size, embedding_dim, 
                             cnn_params, pool_params, hidden_dim, n_layers, 
                             dropout=0.5, embedding_path="glove/glove.twitter.27B.25d.txt", 
                             vocab_to_int=vocab_to_int)

if train_on_gpu:
    model.cuda()

4186 words in the vocabulary have no pre-trained embedding.


### Train network

In [6]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [9]:
model = train_classifier(model, batch_size, optimizer, criterion, num_epochs, train_loader, valid_loader,
                         show_every_n_batches=show_every_n_batches)

Training for 5 epoch(s)...
Epoch: 1/5. 	Batch: 50/310.	 Avg. Training Loss: 0.7162245172262192
Epoch: 1/5. 	Batch: 100/310.	 Avg. Training Loss: 0.64970160394907
Epoch: 1/5. 	Batch: 150/310.	 Avg. Training Loss: 0.5914298526446025
Epoch: 1/5. 	Batch: 200/310.	 Avg. Training Loss: 0.5466499134898186
Epoch: 1/5. 	Batch: 250/310.	 Avg. Training Loss: 0.5110641062259674
Epoch: 1/5. 	Batch: 300/310.	 Avg. Training Loss: 0.48945421174168585
Epoch: 1 	Training Loss: 0.485784 	Validation Loss: 0.326976 	 Accuracy: 89.843750

Epoch: 2/5. 	Batch: 50/310.	 Avg. Training Loss: 0.3435819044709206
Epoch: 2/5. 	Batch: 100/310.	 Avg. Training Loss: 0.34118292570114134
Epoch: 2/5. 	Batch: 150/310.	 Avg. Training Loss: 0.33503892133633295
Epoch: 2/5. 	Batch: 200/310.	 Avg. Training Loss: 0.330935439877212
Epoch: 2/5. 	Batch: 250/310.	 Avg. Training Loss: 0.333333226531744
Epoch: 2/5. 	Batch: 300/310.	 Avg. Training Loss: 0.33263800295690693
Epoch: 2 	Training Loss: 0.331690 	Validation Loss: 0.298196 	 