# Training a Bidirectional LSTM for Sentiment Analysis on IMDB Dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import utils
import time
device= torch.device("cuda")

### Download IMDB dataset

In [6]:
train_data = torch.load('imdb/train_data.pt')
train_label = torch.load('imdb/train_label.pt')
test_data = torch.load('imdb/test_data.pt')
test_label = torch.load('imdb/test_label.pt')

print('num review in train = ', len(train_data))
print('num review in test = ', len(test_data))

num review in train =  25000
num review in test =  25000


### Create the network. Using a bidirectional LSTM

In [7]:
class rec_neural_net(nn.Module):
    
    def __init__(self, vocab_size, hidden_size, output_size, num_layers):
        super().__init__()
        
        self.emb_layer = nn.Embedding(vocab_size, hidden_size)
        self.rec_layer = nn.LSTM(hidden_size,hidden_size,num_layers=num_layers,bidirectional=True)
        self.lin_layer = nn.Linear(hidden_size*2,output_size)
        
        
    def forward(self, input_seq):
        
        input_seq_emb = self.emb_layer(input_seq)
    
        output_seq,(h_last,c_last) = self.rec_layer(input_seq_emb)
        
        h_direc_1  = h_last[2,:,:]
        h_direc_2  = h_last[3,:,:]
        h_direc_12 = torch.cat( (h_direc_1, h_direc_2)  , dim=1) 
        
        scores = self.lin_layer(h_direc_12)
            
        return scores

### Instantiate the neural net

In [9]:
vocab_size = 25002
num_layers = 2
hid_size = 50
out_size = 2

net = rec_neural_net(vocab_size,hid_size,out_size,num_layers)

# SEND NETWORK TO GPU:
net = net.to(device)

print(net)
utils.display_num_param(net)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1)
bs = 64

rec_neural_net(
  (emb_layer): Embedding(25002, 50)
  (rec_layer): LSTM(50, 50, num_layers=2, bidirectional=True)
  (lin_layer): Linear(in_features=100, out_features=2, bias=True)
)
There are 1351902 (1.35 million) parameters in this neural network


### Function to evaluate the network on the test set

In [10]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    with torch.no_grad():

        for i in range(0,25000-bs,bs):

            # extract the minibatch
            indices = torch.arange(i,i+bs)            
            minibatch_data, minibatch_label =  utils.make_minibatch(indices, test_data, test_label) 
            
            # truncate if review longer than 500:
            if minibatch_data.size(0)>500:
                minibatch_data = minibatch_data[0:499]  
                
            # send to GPU    
            minibatch_data = minibatch_data.to(device)
            minibatch_label = minibatch_label.to(device) 
            
            # feed it to the network
            scores=net(minibatch_data) 

            # compute the error made on this batch
            error = utils.get_error( scores , minibatch_label)

            # add it to the running error
            running_error += error.item()

            num_batches+=1

    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

### Do 16 passes through the training set.

In [11]:
start=time.time()

for epoch in range(16):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(25000)
 
    for count in range(0,25000-bs,bs):
      
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # get the minibatch
        indices = shuffled_indices[count:count+bs]
        minibatch_data, minibatch_label =  utils.make_minibatch(indices, train_data, train_label) 
        
        # truncate if review longer than 500:
        if minibatch_data.size(0)>500:
            minibatch_data = minibatch_data[0:500]  
            
        # send to GPU    
        minibatch_data = minibatch_data.to(device)
        minibatch_label = minibatch_label.to(device) 

        # forward the minibatch through the net        
        scores = net(minibatch_data)

        # Compute the average of the losses of the data points in the minibatch
        loss = criterion( scores , minibatch_label) 
        
        # backward pass    
        loss.backward()
        
        # clip the gradient
        nn.utils.clip_grad_norm_(net.parameters(), 5)

        # do one step of stochastic gradient descent
        optimizer.step()     

        
        # computing stats
        num_batches+=1
        with torch.no_grad():
            running_loss += loss.item()
            error = utils.get_error( scores , minibatch_label)
            running_error += error.item()
                          
    # epoch finished:  compute and display stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    print('epoch=',epoch, '\t time=', elapsed/60, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    
    # compute error on the test set:
    eval_on_test_set() 
    print(" ")

epoch= 0 	 time= 1.0688046177228292 	 loss= 0.6922096460293501 	 error= 47.628205128205124 percent
error rate on test set = 45.03605769230769 percent
 
epoch= 1 	 time= 2.4563684264818826 	 loss= 0.6783428499331841 	 error= 42.70432692307693 percent
error rate on test set = 40.22035256410256 percent
 
epoch= 2 	 time= 3.8757243355115256 	 loss= 0.662305282935118 	 error= 39.142628205128204 percent
error rate on test set = 36.97115384615385 percent
 
epoch= 3 	 time= 5.304314347108205 	 loss= 0.6393880152549499 	 error= 36.08173076923077 percent
error rate on test set = 31.58253205128205 percent
 
epoch= 4 	 time= 6.69748762845993 	 loss= 0.630812076269052 	 error= 35.797275641025635 percent
error rate on test set = 34.83173076923077 percent
 
epoch= 5 	 time= 8.083333082993825 	 loss= 0.6077810445657144 	 error= 32.96073717948718 percent
error rate on test set = 44.607371794871796 percent
 
epoch= 6 	 time= 9.527599668502807 	 loss= 0.5901870279740065 	 error= 31.254006410256412 percen

### Save the trained parameters

In [12]:
torch.save( net.state_dict() , 'trained_parameters_LSTM.pt'  )