<a href="https://colab.research.google.com/github/venkatanadikatla/pytorch/blob/main/RNN_Building_Training_and_Evaluation_Functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [82]:
!pip uninstall -y torchtext
!pip install torchtext==0.6.0

Found existing installation: torchtext 0.6.0
Uninstalling torchtext-0.6.0:
  Successfully uninstalled torchtext-0.6.0
Collecting torchtext==0.6.0
  Using cached torchtext-0.6.0-py3-none-any.whl (64 kB)
Installing collected packages: torchtext
Successfully installed torchtext-0.6.0


In [1]:
import copy # this module provides functions to duplicate objects. It seems to be imported but not yet used in the code
import torch # The MAIN PyTorch package
from torch import nn # contains the essential modules for building NN in pytorch.
from torch import optim # Provides optimization algorithms, such as SGD, Adam, etc
import torchtext # A library for text processing that works well with pytorch (Currently a version 0.6.0 is being used in this code)
from torchtext import data # A module in torchtext used for data handling
from torchtext import datasets # provides datasets, including various NLP datasets.

TEXT = data.Field(sequential=True, batch_first=True, lower=True) # Sequential = True indicates that the data consists of sequences.
#Batch_first=True Ensure that batch dimenstion is the first dimension in the tensor. # lower=True Converts all the text to lowercase

LABEL = data.LabelField() # A subclass of Field specifically for handling labels in a classification task.

# load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL) #datasets.SST.splits - Loads the Standford Sentiment Treebank(SST) dataset and splits the dataset

# build dictionary
# build_vocab: Creates a mapping from tokens(words) to indices. This is essential for converting text data into numerical form that can be used by NN.
TEXT.build_vocab(train_data) # Builds the vocabulary for the text field using the training data.
LABEL.build_vocab(train_data)# Builds the vocabulary for the label field using the training data.

# hyperparameters
vocab_size = len(TEXT.vocab) # the size of the vocabulary (number of unique tokens in the training data)
label_size = len(LABEL.vocab) # the number of unique labels (classes) in the traning data
padding_idx = TEXT.vocab.stoi['<pad>'] # The index used for padding sequences to the same length
embedding_dim = 128 # The size of the word embeddings (dense vector representation of words)
hidden_dim = 128 # Size of the hidden layers in the model

# build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size=32)

# Data.bucketiterator.splits - Creates iterators for the training, validation and test sets.


In [3]:
print(vars (train_data.examples[0]))

{'text': ['the', 'rock', 'is', 'destined', 'to', 'be', 'the', '21st', 'century', "'s", 'new', '``', 'conan', "''", 'and', 'that', 'he', "'s", 'going', 'to', 'make', 'a', 'splash', 'even', 'greater', 'than', 'arnold', 'schwarzenegger', ',', 'jean-claud', 'van', 'damme', 'or', 'steven', 'segal', '.'], 'label': 'positive'}


In [4]:
print(vars(val_data.examples[0]))

{'text': ['it', "'s", 'a', 'lovely', 'film', 'with', 'lovely', 'performances', 'by', 'buy', 'and', 'accorsi', '.'], 'label': 'positive'}


In [5]:
print(vars(test_data.examples[0]))

{'text': ['effective', 'but', 'too-tepid', 'biopic'], 'label': 'neutral'}


In [6]:
print(f'Num Train: {len(train_data)}')
print(f'Num Val: {len(val_data)}')
print(f'Num Test: {len(test_data)}')

Num Train: 8544
Num Val: 1101
Num Test: 2210


In [7]:
print(f'Vocabulary size: {len(TEXT.vocab)}')

Vocabulary size: 16581


In [8]:
print(f'Vocabulary size: {len(LABEL.vocab)}')

Vocabulary size: 3


In [9]:
print(LABEL.vocab.freqs)


Counter({'positive': 3610, 'negative': 3310, 'neutral': 1624})


In [10]:
print(TEXT.vocab.freqs.most_common(20))

[('.', 8024), ('the', 7303), (',', 7131), ('a', 5281), ('and', 4473), ('of', 4396), ('to', 3021), ('is', 2561), ("'s", 2544), ('it', 2422), ('that', 1954), ('in', 1888), ('as', 1296), ('but', 1172), ('film', 1162), ('with', 1139), ('for', 1023), ('this', 998), ('movie', 976), ('an', 972)]


In [11]:
print(TEXT.vocab.itos[:10])
#Tokens corresponding to the first 10 indices (0,1....9)

['<unk>', '<pad>', '.', 'the', ',', 'a', 'and', 'of', 'to', 'is']


In [2]:
class RNN (torch.nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, padding_idx):
    super().__init__()
    self.vocab_size = vocab_size
    self.embedding_dim = embedding_dim
    self.hidden_dim = hidden_dim
    self.label_size = label_size
    self.num_layers = 1
    self.embedding = torch.nn.Embedding(vocab_size, embedding_dim, padding_idx = padding_idx)
    # self.rnn = torch.nn.RNN(embedding_dim,hidden_dim, nonlinearity='relu',batch_first=True)
    self.lstm = torch.nn.LSTM(embedding_dim,hidden_dim, batch_first=True)
    self.fc = torch.nn.Linear(hidden_dim, output_dim)

  def zero_state(self, batch_size):
    # Implement the function, which returns an initial hidden state.
    return torch.zeros(self.num_layers, batch_size, self.hidden_dim)


  def forward(self, text):
    #text dim = [sentence length, batchsize]
    embedded = self.embedding(text)
    #embedded dim = [sentence length, batchsize, embedding_dim]
    batch_size = text.size(0)
    h_0 = self.zero_state(batch_size).to(text.device)  # Ensure the hidden state is on the same device as the input
    output, (hidden,cell) = self.lstm(embedded)
    #output dim = [sentence length, batchsize, hidden_dim]
    #hidden dim = [1, batch_size, hidden_dim]
    hidden = hidden.squeeze_(0)
    #hidden_dim = [batch_size, hidden_dim]

    return self.fc(hidden)





In [3]:
def train_model(model,train_iter, optimizer, criterion, num_epochs =10):
  model.train()
  for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_acc = 0
    correct = 0
    total = 0
    for batch in train_iter:
      optimizer.zero_grad()
      text, labels = batch.text, batch.label
      output = model(text)
      loss = criterion(output, labels)
      loss.backward()
      optimizer.step()
      epoch_loss += loss.item()
      epoch_acc += (output.argmax(1) ==labels).sum().item()

      _, predicted = torch.max(output.data,1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    avg_loss = epoch_loss/len(train_iter)
    avg_acc = epoch_acc/len(train_iter.dataset)

    epoch_accuracy = 100*correct/total


    print(f' Epoch {epoch+1}, Train Loss: {avg_loss}, Train Accuracy: {epoch_accuracy}%')



In [17]:
def eval_model(model, val_iter, criterion):
  model.eval()
  epoch_loss = 0
  epoch_acc = 0
  correct = 0
  total = 0
  for batch in val_iter:
    text, labels = batch.text, batch.label
    output = model(text)
    loss = criterion(output, labels)
    epoch_loss +=loss.item()
    epoch_acc += (output.argmax(1)==labels).sum().item()
    _, predicted = torch.max(output.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  avg_loss = epoch_loss/len(val_iter)
  avg_acc = epoch_acc/len(val_iter.dataset)

  epoch_accuracy = 100*correct/total

  print(f'Validation Loss: {avg_loss},  Validation Accuracy: {epoch_accuracy}%')




In [18]:
def test_model(model, test_iter, criterion):
  model.eval()
  epoch_loss = 0
  epoch_acc = 0
  correct = 0
  total = 0
  for batch in test_iter:
    text, labels = batch.text, batch.label
    output = model(text)
    loss = criterion(output, labels)
    epoch_loss +=loss.item()
    epoch_acc += (output.argmax(1)==labels).sum().item()
    _, predicted = torch.max(output.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  avg_loss = epoch_loss/len(test_iter)
  avg_acc = epoch_acc/len(test_iter.dataset)

  epoch_accuracy = 100*correct/total

  print(f'Test Loss: {avg_loss},  Test Accuracy: {epoch_accuracy}%')




In [13]:
torch.manual_seed(42)
model = RNN(vocab_size,embedding_dim,hidden_dim,label_size, padding_idx)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [16]:
train_model(model,train_iter, optimizer, criterion, num_epochs =10)

 Epoch 1, Train Loss: 1.0498392095280051, Train Accuracy: 41.99438202247191
 Epoch 2, Train Loss: 1.0465504748097967, Train Accuracy: 41.760299625468164
 Epoch 3, Train Loss: 1.0401260879602325, Train Accuracy: 43.07116104868914
 Epoch 4, Train Loss: 1.024933625249827, Train Accuracy: 43.64466292134831
 Epoch 5, Train Loss: 0.9535704116696275, Train Accuracy: 53.581460674157306
 Epoch 6, Train Loss: 0.7931898524252217, Train Accuracy: 65.03979400749064
 Epoch 7, Train Loss: 0.6362470295768552, Train Accuracy: 73.43164794007491
 Epoch 8, Train Loss: 0.506110270762265, Train Accuracy: 78.9442883895131
 Epoch 9, Train Loss: 0.4063299367490333, Train Accuracy: 84.07069288389513
 Epoch 10, Train Loss: 0.3223361052042536, Train Accuracy: 88.21395131086142


In [19]:
# Evaluate the model on validation data
eval_model(model, val_iter, criterion)



Validation Loss: 1.3423104882240295,  Validation Accuracy: 54.31425976385105%


In [20]:
# Evaluate the model on test data
test_model(model, test_iter, criterion)

Test Loss: 1.2701764217444829,  Test Accuracy: 58.23529411764706%
