# Part 2

## 1. Import Embeddings and Dataset

In [1]:
# Import Embedding Matrix and Embedding Matrix's Train dataset vocab to index dictionary
from utils.file import load_from_local_file

embedding_matrix = load_from_local_file("models/embedding_matrix.pckl")
embedding_matrix_train_dataset_vocab_to_index: dict = load_from_local_file("models/embedding_matrix_train_dataset_vocab_to_index.pckl")

Loading object from local...
Object loaded from local!
Loading object from local...
Object loaded from local!


In [2]:
# Import Dataset
import pandas as pd

train_df = pd.read_csv("datasets/train.csv")
val_df = pd.read_csv("datasets/val.csv")
test_df = pd.read_csv("datasets/test.csv")
train_df.head()

Unnamed: 0,text,label
0,the rock is destined to be the 21st century's ...,1
1,"the gorgeously elaborate continuation of "" the...",1
2,effective but too-tepid biopic,1
3,if you sometimes like to go to the movies to h...,1
4,"emerges as something rare , an issue movie tha...",1


## 2. Train RNN Model

In [3]:
SEARCH_SPACE = {
  "batch_size": [32, 64, 128, 256],
  "learning_rate": [0.001, 0.01, 0.05, 0.1],
  "optimizer_name": ["SGD", "Adagrad", "Adam", "RMSprop"],

  # RNN Model Parameters
  "hidden_dim": [16],
  "num_layers": [2]
}

In [4]:
from models.RNN import RNN
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
from solver import train, plot_loss_acc_graph
from utils.custom_dataset import TextDataset

def train_rnn_model_with_parameters(
    batch_size: int,
    learning_rate: float,
    optimizer_name: str,
    hidden_dim: int,
    num_layers: int,
):
  # Model
  model_rnn = RNN(
    embedding_matrix=embedding_matrix,
    hidden_dim=hidden_dim,
    num_layers=num_layers,
    output_dim=2,
    sentence_representation_type="last"
  )

  ########################
  ###### Parameters ######
  ########################
  batch_size = batch_size
  min_epoch = 20
  max_epochs = 10_000

  # SGD Optimizer
  learning_rate = learning_rate
  match optimizer_name:
    case "SGD":
      optimizer = torch.optim.SGD(model_rnn.parameters(), lr=learning_rate)
    case "Adagrad":
      optimizer = torch.optim.Adagrad(model_rnn.parameters(), lr=learning_rate)
    case "Adam":
      optimizer = torch.optim.Adam(model_rnn.parameters(), lr=learning_rate)
    case "RMSprop":
      optimizer = torch.optim.RMSprop(model_rnn.parameters(), lr=learning_rate)
    case _:
      raise Exception("Invalid optimizer name!")

  # Cross Entropy Loss 
  criterion = nn.CrossEntropyLoss()

  ########################
  ######## Dataset #######
  ########################
  train_dataset = TextDataset(
    dataframe=train_df,
    max_len=train_df["text"].str.split().apply(len).max(),
    embedding_matrix_vocab_to_index=embedding_matrix_train_dataset_vocab_to_index
  )
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

  val_dataset = TextDataset(
    dataframe=val_df,
    max_len=train_df["text"].str.split().apply(len).max(),
    embedding_matrix_vocab_to_index=embedding_matrix_train_dataset_vocab_to_index
  )
  val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

  ########################
  ######### Train ########
  ########################
  model, avg_train_loss, avg_train_acc, avg_val_loss, avg_val_acc, num_of_epochs = train(
    model=model_rnn,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    min_epoch=min_epoch,
    max_epoch=max_epochs,
  )

  ########################
  ######### Plot #########
  ########################
  subtitle = f"batch_size_{batch_size}; lr_{learning_rate}; optimizer_{optimizer_name}; hidden_dim_{hidden_dim}; num_layers_{num_layers}"
  save_filename_prefix = f"rnn/last/batch_size_{batch_size}-lr_{learning_rate}-optimizer_{optimizer_name}-hidden_dim_{hidden_dim}-num_layers_{num_layers}"
  
  # Plot Train Loss and Accuracy Graph
  plot_loss_acc_graph(
    loss_list=avg_train_loss, 
    acc_list=avg_train_acc, 
    dataset_type="train",
    subtitle=subtitle,
    save_filename_prefix=save_filename_prefix,
    display=False
  )

  # Plot Validation Loss and Accuracy Graph
  plot_loss_acc_graph(
    loss_list=avg_val_loss, 
    acc_list=avg_val_acc, 
    dataset_type="val",
    subtitle=subtitle,
    save_filename_prefix=save_filename_prefix,
    display=False
  )

  ########################
  ##### Return Value #####
  ########################
  configuration_results = {
    "model_id": None, # To keep track of trained model object

    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "optimizer_name": optimizer_name,

    # RNN Model Parameters
    "hidden_dim": hidden_dim,
    "num_layers": num_layers,

    # Model performance
    "train_loss": avg_train_loss[-1],
    "train_accuracy": avg_train_acc[-1],
    "val_loss": avg_val_loss[-1],
    "val_accuracy": avg_val_acc[-1],

    # Epoch Number
    "num_of_epochs": num_of_epochs
  }
  return model, configuration_results

[nltk_data] Downloading package punkt to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package treebank to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package punkt_tab to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [5]:
all_configurations_results = []
rnn_models = {}

current_model_id = 0

for hidden_dim in SEARCH_SPACE["hidden_dim"]:
  for num_layers in SEARCH_SPACE["num_layers"]:
    for optimizer_name in SEARCH_SPACE["optimizer_name"]:
      for batch_size in SEARCH_SPACE["batch_size"]:
        for learning_rate in SEARCH_SPACE["learning_rate"]:
          current_model_id += 1

          print(f"---------- batch_size_{batch_size}; lr_{learning_rate}; optimizer_{optimizer_name}; hidden_dim_{hidden_dim}; num_layers_{num_layers} ----------")
          model, configuration_results = train_rnn_model_with_parameters(
            batch_size=batch_size,
            learning_rate=learning_rate,
            optimizer_name=optimizer_name,
            hidden_dim=hidden_dim,
            num_layers=num_layers
          )
          configuration_results["model_id"] = current_model_id

          all_configurations_results.append(configuration_results)
          rnn_models[current_model_id] = model


---------- batch_size_32; lr_0.001; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 267/267 [00:09<00:00, 28.49it/s, acc=0.5, loss=0.693]  
Epoch 1 (Val): 100%|██████████| 34/34 [00:00<00:00, 60.30it/s, acc=0.496, loss=0.693]
Epoch 2 (Train): 100%|██████████| 267/267 [00:08<00:00, 30.44it/s, acc=0.5, loss=0.693]  
Epoch 2 (Val): 100%|██████████| 34/34 [00:00<00:00, 58.84it/s, acc=0.496, loss=0.693]
Epoch 3 (Train): 100%|██████████| 267/267 [00:08<00:00, 30.14it/s, acc=0.5, loss=0.693]  
Epoch 3 (Val): 100%|██████████| 34/34 [00:00<00:00, 55.21it/s, acc=0.498, loss=0.693]
Epoch 4 (Train): 100%|██████████| 267/267 [00:09<00:00, 27.60it/s, acc=0.5, loss=0.693]  
Epoch 4 (Val): 100%|██████████| 34/34 [00:00<00:00, 55.19it/s, acc=0.506, loss=0.693]
Epoch 5 (Train): 100%|██████████| 267/267 [00:10<00:00, 26.18it/s, acc=0.5, loss=0.693]  
Epoch 5 (Val): 100%|██████████| 34/34 [00:00<00:00, 49.08it/s, acc=0.498, loss=0.693]
Epoch 6 (Train): 100%|██████████| 267/267 [00:10<00:00, 24.53it/s, acc=0.491, loss=0.693]
Epoch 6 (Val): 100%|██████████

---------- batch_size_32; lr_0.01; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 267/267 [00:09<00:00, 28.22it/s, acc=0.494, loss=0.693]
Epoch 1 (Val): 100%|██████████| 34/34 [00:00<00:00, 54.57it/s, acc=0.502, loss=0.693]
Epoch 2 (Train): 100%|██████████| 267/267 [00:08<00:00, 30.09it/s, acc=0.498, loss=0.693]
Epoch 2 (Val): 100%|██████████| 34/34 [00:00<00:00, 69.97it/s, acc=0.496, loss=0.693]
Epoch 3 (Train): 100%|██████████| 267/267 [00:07<00:00, 33.54it/s, acc=0.49, loss=0.693] 
Epoch 3 (Val): 100%|██████████| 34/34 [00:00<00:00, 55.56it/s, acc=0.504, loss=0.693]
Epoch 4 (Train): 100%|██████████| 267/267 [00:08<00:00, 30.32it/s, acc=0.502, loss=0.693]
Epoch 4 (Val): 100%|██████████| 34/34 [00:00<00:00, 64.78it/s, acc=0.496, loss=0.693]
Epoch 5 (Train): 100%|██████████| 267/267 [00:08<00:00, 32.85it/s, acc=0.493, loss=0.693]
Epoch 5 (Val): 100%|██████████| 34/34 [00:00<00:00, 37.59it/s, acc=0.496, loss=0.693]
Epoch 6 (Train): 100%|██████████| 267/267 [00:09<00:00, 28.64it/s, acc=0.499, loss=0.693]
Epoch 6 (Val): 100%|██████████

---------- batch_size_32; lr_0.05; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 267/267 [00:06<00:00, 40.88it/s, acc=0.502, loss=0.694]
Epoch 1 (Val): 100%|██████████| 34/34 [00:00<00:00, 80.23it/s, acc=0.502, loss=0.694]
Epoch 2 (Train): 100%|██████████| 267/267 [00:06<00:00, 40.53it/s, acc=0.497, loss=0.694]
Epoch 2 (Val): 100%|██████████| 34/34 [00:00<00:00, 65.44it/s, acc=0.502, loss=0.693]
Epoch 3 (Train): 100%|██████████| 267/267 [00:09<00:00, 29.06it/s, acc=0.49, loss=0.694] 
Epoch 3 (Val): 100%|██████████| 34/34 [00:00<00:00, 73.82it/s, acc=0.498, loss=0.693]
Epoch 4 (Train): 100%|██████████| 267/267 [00:08<00:00, 31.26it/s, acc=0.503, loss=0.693]
Epoch 4 (Val): 100%|██████████| 34/34 [00:00<00:00, 78.83it/s, acc=0.502, loss=0.694]
Epoch 5 (Train): 100%|██████████| 267/267 [00:06<00:00, 40.43it/s, acc=0.5, loss=0.694]  
Epoch 5 (Val): 100%|██████████| 34/34 [00:00<00:00, 79.70it/s, acc=0.496, loss=0.693]
Epoch 6 (Train): 100%|██████████| 267/267 [00:06<00:00, 40.31it/s, acc=0.501, loss=0.694]
Epoch 6 (Val): 100%|██████████

KeyboardInterrupt: 

## 3. Model Configurations Comparison

In [7]:
model_configurations_results_df = pd.DataFrame.from_dict(all_configurations_results)
model_configurations_results_df.sort_values(by=["val_accuracy"], ascending=False).reset_index(drop=True)

Unnamed: 0,model_id,batch_size,learning_rate,optimizer_name,hidden_dim,num_layers,train_loss,train_accuracy,val_loss,val_accuracy,num_of_epochs
0,2,32,0.01,SGD,16,2,0.693214,0.500286,0.693145,0.506066,30
1,1,32,0.001,SGD,16,2,0.693179,0.495552,0.693145,0.502022,29


# a. Final Configuration of best model

In [8]:
best_rnn_model_configuration = model_configurations_results_df.head(1)
best_rnn_model_configuration

Unnamed: 0,model_id,batch_size,learning_rate,optimizer_name,hidden_dim,num_layers,train_loss,train_accuracy,val_loss,val_accuracy,num_of_epochs
0,1,32,0.001,SGD,16,2,0.693179,0.495552,0.693145,0.502022,29


In [9]:
best_rnn_model_id = best_rnn_model_configuration["model_id"][0]
best_rnn_model = rnn_models[best_rnn_model_id]
best_rnn_model

RNN(
  (embedding): Embedding(16332, 300)
  (rnn): RNN(300, 16, num_layers=2, batch_first=True)
  (relu): ReLU()
  (fc): Linear(in_features=16, out_features=2, bias=True)
)

**Export Best Model**

In [10]:
best_rnn_model_configuration

Unnamed: 0,model_id,batch_size,learning_rate,optimizer_name,hidden_dim,num_layers,train_loss,train_accuracy,val_loss,val_accuracy,num_of_epochs
0,1,32,0.001,SGD,16,2,0.693179,0.495552,0.693145,0.502022,29


In [11]:
from utils.file import save_to_local_file

model_name = f"batch_size_{batch_size}-lr_{learning_rate}-optimizer_{optimizer_name}-hidden_dim_{hidden_dim}-num_layers_{num_layers}"
save_to_local_file(f"models/rnn/{model_name}.pckl", best_rnn_model)

Saving object to local...
Object saved to local!


# b. Accuracy on Testset

In [14]:
from solver import test

########################
######## Dataset #######
########################
test_dataset = TextDataset(
  dataframe=test_df,
  max_len=test_df["text"].str.split().apply(len).max(),
  embedding_matrix_vocab_to_index=embedding_matrix_train_dataset_vocab_to_index
)
test_dataloader = DataLoader(test_dataset, shuffle=False)

########################
######### Train ########
########################
test_loss, test_accuracy = test(
  model=best_rnn_model,
  criterion=nn.CrossEntropyLoss(),
  test_dataloader=test_dataloader,
)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 0.6931437
Test Accuracy: 0.50093806


# c. Strategies to derive final sentence representation

1. Last State: Use last hidden state as sentence representation
2. Max Pooling
3. Average Pooling