In [22]:
from torchtext.data import Field, TabularDataset, Iterator, BucketIterator
import torch
import torchtext.datasets
import numpy as np
import spacy
import json
import os
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [2]:
%matplotlib inline
plt.style.use('ggplot')

In [3]:
dataset_path = "/data/rali7/Tmp/solimanz/LBJ/dataset/"

In [4]:
nlp = spacy.load('en',disable=['parser', 'tagger', 'ner'])

In [5]:
def tokenizer(s): 
    return [w.text.lower() for w in nlp(s)]

In [None]:
def load_dataset(tokenizer):
    
    TEXT = data.Field(sequential=True, tokenize=tokenize, lower=True, include_lengths=True, batch_first=True, fix_length=200)
    LABEL = data.LabelField(tensor_type=torch.FloatTensor)
    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    TEXT.build_vocab(train_data, vectors=GloVe(name='6B', dim=300))
    LABEL.build_vocab(train_data)

    word_embeddings = TEXT.vocab.vectors
    print ("Length of Text Vocabulary: " + str(len(TEXT.vocab)))
    print ("Vector size of Text Vocabulary: ", TEXT.vocab.vectors.size())
    print ("Label Length: " + str(len(LABEL.vocab)))

    train_data, valid_data = train_data.split() # Further splitting of training_data to create new training_data & validation_data
    train_iter, valid_iter, test_iter = data.BucketIterator.splits((train_data, valid_data, test_data), batch_size=32, sort_key=lambda x: len(x.text), repeat=False, shuffle=True)

    '''Alternatively we can also use the default configurations'''
    # train_iter, test_iter = datasets.IMDB.iters(batch_size=32)

    vocab_size = len(TEXT.vocab)

return TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter

In [6]:
txt_field = Field(sequential=True, 
                       tokenize=tokenizer, 
                       include_lengths=True, 
                       use_vocab=True)
label_field = Field(sequential=False, 
                         use_vocab=True, 
                         pad_token=None, 
                         unk_token=None)

In [5]:
TEXT = Field(sequential=True, include_lengths=True, unk_token=None, pad_token=None)
LABELS = Field(sequential=False, include_lengths=False, unk_token=None, is_target=True, pad_token=None)

In [8]:
def _sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()

    batch_size = sequence_length.size(0)
    seq_range = torch.range(0, max_len - 1).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    seq_range_expand = Variable(seq_range_expand)
    if sequence_length.is_cuda:
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1).expand_as(seq_range_expand))
    return seq_range_expand < seq_length_expand

In [6]:
tv_datafields = {'sequence': ('sequence', TEXT), 'labels': ('labels', LABELS)}
trn, vld = TabularDataset.splits(
               path=dataset_path,
               train='train/train.json', validation="valid/valid.json",
               format='json',
               fields=tv_datafields)

In [7]:
TEXT.build_vocab(trn)

In [8]:
LABELS.vocab = TEXT.vocab

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
device

device(type='cuda', index=0)

In [14]:
trn, vld = torchtext.datasets.TREC.splits(text_field=txt_field, label_field=label_field)

In [15]:
txt_field.build_vocab(trn)
label_field.build_vocab(trn)

In [136]:
class LSTMBaseline(nn.Module):
    def __init__(self, hidden_dim, emb_dim=300):
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(len(txt_field.vocab), emb_dim)
        self.lstm = nn.LSTM(emb_dim, hidden_dim, num_layers=1, batch_first=True)      
        layers = []
        layers.append(nn.Linear(hidden_dim, len(label_field.vocab)))
        #layers.append(nn.Softmax(1))
        self.fully_connected = nn.Sequential(*layers)
 
    def forward(self, seq, lengths):
        
        #print(f"Input shape: {seq.shape}")
        embs = self.embedding(seq)
        #print(f"Embedding shape: {embs.shape}")
        packed = nn.utils.rnn.pack_padded_sequence(embs, lengths, batch_first=True)
        hdn, _ = self.lstm(packed)
        hdn, ls = nn.utils.rnn.pad_packed_sequence(hdn, batch_first=True)
        #print(f"LSTM out shape: {hdn.shape}")
        
        # Extract the outputs for the last timestep of each example
        idx = (torch.cuda.LongTensor(lengths) - 1).view(-1, 1).expand(len(lengths), hdn.size(2))
        time_dimension = 1 #if batch_first else 0
        idx = idx.unsqueeze(time_dimension)
        if hdn.is_cuda:
            idx = idx.cuda(hdn.data.get_device())
        # Shape: (batch_size, rnn_hidden_dim)
        last_output = hdn.gather(time_dimension, Variable(idx)).squeeze(time_dimension)
        #reshaped = torch.reshape(hdn, (-1, self.hidden_dim))
        #print(f"Last Output shape: {last_output.shape}")
        preds = self.fully_connected(last_output)
        preds = F.log_softmax(preds, dim=1)
        # lengths[0] is max sequence length for the batch
        #print(f"Preds shape: {preds.shape}")
        #preds = torch.reshape(preds, (-1, len(TEXT.vocab), lengths[0]))
        return preds
    
em_sz = 50
nh = 128
model = LSTMBaseline(nh, emb_dim=em_sz).to(device)

In [15]:
train_iter, val_iter = BucketIterator.splits(
    (trn, vld),
    batch_sizes=(100, 100),
    device=device,
    sort_key=lambda x: len(x.sequence),
    sort_within_batch=True,
    repeat=False
)
#test_iter = Iterator(tst, batch_size=64, device=-1, sort=False, sort_within_batch=False, repeat=False)


In [138]:
import tqdm
opt = optim.Adam(model.parameters(), lr=0.01)
loss_func = nn.CrossEntropyLoss()
 
epochs = 100
e = []
val_l = []
train_l = []

for epoch in range(1, epochs + 1):
    running_loss = 0.0
    running_corrects = 0
    model.train() # turn on training mode
    for b in train_iter:
        opt.zero_grad()
        shape = b.text[0].shape
        #print(shape)
        x = torch.reshape(b.text[0], (shape[1], shape[0]))
        y = b.label
        #y = torch.reshape(b.labels[0], (shape[1], shape[0]))
        x_lengths = b.text[1]
        
        #mask = _sequence_mask(x_lengths)
        
        preds = model(x, x_lengths)
        loss = loss_func(preds, y)
       
        loss.backward()
        opt.step()
        running_loss += (loss.data * x.size(0)).float()
        running_corrects += torch.sum(torch.max(preds, 1)[1] == y).float()
        #print(f"{torch.sum(torch.max(preds, 1)[1] == y)/shape[1]:.4f}")
    epoch_loss = running_loss.data / len(trn)
    epoch_acc = running_corrects.data / len(trn)
 
    # calculate the validation loss for this epoch
#     val_loss = 0.0
#     val_acc = 0
#     model.eval() # turn on evaluation mode
#     for dat in val_iter:        
#         shape = dat.text[0].shape        
#         x = torch.reshape(dat.text[0], (shape[1], shape[0]))
#         y = dat.label
#         x_lengths = dat.text[1]  
#         preds = model(x, x_lengths)
#         loss = loss_func(preds, y)
                
#         val_loss += loss.data * x.size(0)
#         val_acc += torch.sum(torch.max(preds, 1)[1] == y)
 
#     val_loss /= len(vld)
#     val_acc /= len(vld)
    e.append(epoch)
    #val_l.append(val_loss)
    train_l.append(epoch_loss)
    print(f'Epoch: {epoch}, Training Loss: {epoch_loss:.4f}')#, Validation Loss: {val_loss:.4f}')
    print(f'Epoch: {epoch}, Training Accuracy: {epoch_acc*100:.4f}')#, Validation Accuracy: {val_acc*100:.4f}')

RuntimeError: CuDNN error: CUDNN_STATUS_EXECUTION_FAILED

In [120]:
loss

tensor(1.8717, device='cuda:0', grad_fn=<NllLossBackward>)

In [47]:
preds.shape

torch.Size([64, 6])

In [51]:
torch.sum(torch.max(preds, 1)[1] == y)

tensor(28, device='cuda:0')

In [63]:
running_loss / len(trn)

tensor(1.6548, device='cuda:0')

In [61]:
loss.data * x.size(0)

tensor(102.7893, device='cuda:0')

In [55]:
len(trn)

5452

In [56]:
running_corrects

tensor(1047, device='cuda:0')

In [58]:
1047/5452

0.19203961848862802

In [73]:
print(f'{loss.data:.4}')

1.715


In [78]:
shape[1]

64

In [105]:
34/345

0.09855072463768116

In [116]:
torch.sum(torch.max(preds, 1)[1] == y).float()/64

tensor(0.3594, device='cuda:0')

In [112]:
?torch.div

[0;31mDocstring:[0m
.. function:: div(input, value, out=None) -> Tensor

Divides each element of the input :attr:`input` with the scalar :attr:`value`
and returns a new resulting tensor.

.. math::
    out_i = \frac{input_i}{value}

If :attr:`input` is of type `FloatTensor` or `DoubleTensor`, :attr:`value`
should be a real number, otherwise it should be an integer

Args:
    input (Tensor): the input tensor
    value (Number): the number to be divided to each element of :attr:`input`
    out (Tensor, optional): the output tensor

Example::

    >>> a = torch.randn(5)
    >>> a
    tensor([ 0.3810,  1.2774, -0.2972, -0.3719,  0.4637])
    >>> torch.div(a, 0.5)
    tensor([ 0.7620,  2.5548, -0.5944, -0.7439,  0.9275])

.. function:: div(input, other, out=None) -> Tensor

Each element of the tensor :attr:`input` is divided by each element
of the tensor :attr:`other`. The resulting tensor is returned. The shapes of
:attr:`input` and :attr:`other` must be
:ref:`broadcastable <broadcasting