<a href="https://colab.research.google.com/github/venkatanadikatla/pytorch/blob/main/RNN_with_SST_Better_Accuracy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip uninstall -y torchtext
!pip install torchtext==0.6.0

Found existing installation: torchtext 0.18.0
Uninstalling torchtext-0.18.0:
  Successfully uninstalled torchtext-0.18.0
Collecting torchtext==0.6.0
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->torchtext==0.6.0)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->torchtext==0.6.0)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->torchtext==0.6.0)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->torchtext==0.6.0)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12=

In [14]:
import copy # this module provides functions to duplicate objects. It seems to be imported but not yet used in the code
import torch # The MAIN PyTorch package
from torch import nn # contains the essential modules for building NN in pytorch.
from torch import optim # Provides optimization algorithms, such as SGD, Adam, etc
import torchtext # A library for text processing that works well with pytorch (Currently a version 0.6.0 is being used in this code)
from torchtext import data # A module in torchtext used for data handling
from torchtext import datasets # provides datasets, including various NLP datasets.

TEXT = data.Field(sequential=True, batch_first=True, lower=True) # Sequential = True indicates that the data consists of sequences.
#Batch_first=True Ensure that batch dimenstion is the first dimension in the tensor. # lower=True Converts all the text to lowercase

LABEL = data.LabelField() # A subclass of Field specifically for handling labels in a classification task.

# load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL) #datasets.SST.splits - Loads the Standford Sentiment Treebank(SST) dataset and splits the dataset

# build dictionary
# build_vocab: Creates a mapping from tokens(words) to indices. This is essential for converting text data into numerical form that can be used by NN.
TEXT.build_vocab(train_data) # Builds the vocabulary for the text field using the training data.
LABEL.build_vocab(train_data)# Builds the vocabulary for the label field using the training data.

# hyperparameters
vocab_size = len(TEXT.vocab) # the size of the vocabulary (number of unique tokens in the training data)
label_size = len(LABEL.vocab) # the number of unique labels (classes) in the traning data
padding_idx = TEXT.vocab.stoi['<pad>'] # The index used for padding sequences to the same length
embedding_dim = 256 # The size of the word embeddings (dense vector representation of words)
hidden_dim = 256 # Size of the hidden layers in the model

# build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size=64)

# Data.bucketiterator.splits - Creates iterators for the training, validation and test sets.


In [15]:
class RNN (torch.nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, padding_idx):
    super().__init__()
    self.vocab_size = vocab_size
    self.embedding_dim = embedding_dim
    self.hidden_dim = hidden_dim
    self.label_size = label_size
    self.num_layers = 1
    self.embedding = torch.nn.Embedding(vocab_size, embedding_dim, padding_idx = padding_idx)
    # self.rnn = torch.nn.RNN(embedding_dim,hidden_dim, nonlinearity='relu',batch_first=True)
    self.lstm = torch.nn.LSTM(embedding_dim,hidden_dim, batch_first=True)
    self.fc = torch.nn.Linear(hidden_dim, output_dim)

  def zero_state(self, batch_size):
    # Implement the function, which returns an initial hidden state.
    return torch.zeros(self.num_layers, batch_size, self.hidden_dim)


  def forward(self, text):
    #text dim = [sentence length, batchsize]
    embedded = self.embedding(text)
    #embedded dim = [sentence length, batchsize, embedding_dim]
    batch_size = text.size(0)
    h_0 = self.zero_state(batch_size).to(text.device)  # Ensure the hidden state is on the same device as the input
    output, (hidden,cell) = self.lstm(embedded)
    #output dim = [sentence length, batchsize, hidden_dim]
    #hidden dim = [1, batch_size, hidden_dim]
    hidden = hidden.squeeze_(0)
    #hidden_dim = [batch_size, hidden_dim]

    return self.fc(hidden)





In [16]:
def train_model(model,train_iter, optimizer, criterion, num_epochs =10):
  model.train()
  for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_acc = 0
    correct = 0
    total = 0
    for batch in train_iter:
      optimizer.zero_grad()
      text, labels = batch.text, batch.label
      output = model(text)
      loss = criterion(output, labels)
      loss.backward()
      optimizer.step()
      epoch_loss += loss.item()
      epoch_acc += (output.argmax(1) ==labels).sum().item()

      _, predicted = torch.max(output.data,1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    avg_loss = epoch_loss/len(train_iter)
    avg_acc = epoch_acc/len(train_iter.dataset)

    epoch_accuracy = 100*correct/total


    print(f' Epoch {epoch+1}, Train Loss: {avg_loss}, Train Accuracy: {epoch_accuracy}')



In [17]:
def eval_model(model, val_iter, criterion):
  model.eval()
  epoch_loss = 0
  epoch_acc = 0
  correct = 0
  total = 0
  for batch in val_iter:
    text, labels = batch.text, batch.label
    output = model(text)
    loss = criterion(output, labels)
    epoch_loss +=loss.item()
    epoch_acc += (output.argmax(1)==labels).sum().item()
    _, predicted = torch.max(output.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  avg_loss = epoch_loss/len(val_iter)
  avg_acc = epoch_acc/len(val_iter.dataset)

  epoch_accuracy = 100*correct/total

  print(f'Validation Loss: {avg_loss},  Validation Accuracy: {epoch_accuracy}')




In [18]:
def test_model(model, test_iter, criterion):
  model.eval()
  epoch_loss = 0
  epoch_acc = 0
  correct = 0
  total = 0
  for batch in test_iter:
    text, labels = batch.text, batch.label
    output = model(text)
    loss = criterion(output, labels)
    epoch_loss +=loss.item()
    epoch_acc += (output.argmax(1)==labels).sum().item()
    _, predicted = torch.max(output.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  avg_loss = epoch_loss/len(test_iter)
  avg_acc = epoch_acc/len(test_iter.dataset)

  epoch_accuracy = 100*correct/total

  print(f'Test Loss: {avg_loss},  Test Accuracy: {epoch_accuracy}')




In [19]:
torch.manual_seed(42)
model = RNN(vocab_size,embedding_dim,hidden_dim,label_size, padding_idx)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [20]:
train_model(model,train_iter, optimizer, criterion, num_epochs =10)

 Epoch 1, Train Loss: 1.052848070859909, Train Accuracy: 41.80711610486891
 Epoch 2, Train Loss: 1.046015306640027, Train Accuracy: 42.017790262172284
 Epoch 3, Train Loss: 1.03978153618414, Train Accuracy: 42.86048689138577
 Epoch 4, Train Loss: 1.0250565173910624, Train Accuracy: 45.89185393258427
 Epoch 5, Train Loss: 0.9306610022018205, Train Accuracy: 58.075842696629216
 Epoch 6, Train Loss: 0.7499198464315329, Train Accuracy: 68.98408239700375
 Epoch 7, Train Loss: 0.5758530855623644, Train Accuracy: 75.71395131086142
 Epoch 8, Train Loss: 0.4486773389933714, Train Accuracy: 81.2382958801498
 Epoch 9, Train Loss: 0.3531589556985827, Train Accuracy: 86.3998127340824
 Epoch 10, Train Loss: 0.2569176818230259, Train Accuracy: 90.69522471910112


In [21]:
# Evaluate the model on validation data
eval_model(model, val_iter, criterion)



Validation Loss: 1.4657740261819627,  Validation Accuracy: 53.58764759309719


In [22]:
# Evaluate the model on test data
test_model (model, test_iter, criterion)

Test Loss: 1.2865886858531408,  Test Accuracy: 58.46153846153846
