# Part 2

## 1. Import Embeddings and Dataset

In [1]:
# Import Embedding Matrix and Embedding Matrix's Train dataset vocab to index dictionary

from utils.file import load_from_local_file

embedding_matrix = load_from_local_file("models/embedding_matrix.pckl")
embedding_matrix_train_dataset_vocab_to_index: dict = load_from_local_file("models/embedding_matrix_train_dataset_vocab_to_index.pckl")

Loading object from local...
Object loaded from local!
Loading object from local...
Object loaded from local!


In [2]:
# Import Dataset
import pandas as pd

train_df = pd.read_csv("datasets/train.csv")
val_df = pd.read_csv("datasets/val.csv")
train_df.head()

Unnamed: 0,text,label
0,the rock is destined to be the 21st century's ...,1
1,"the gorgeously elaborate continuation of "" the...",1
2,effective but too-tepid biopic,1
3,if you sometimes like to go to the movies to h...,1
4,"emerges as something rare , an issue movie tha...",1


## 2. Train RNN Model

In [3]:
SEARCH_SPACE = {
  "batch_size": [50, 100, 200],
  "learning_rate": [0.005, 0.01, 0.025, 0.05],
  "optimizer_name": ["SGD", "Adagrad", "Adam", "RMSprop"],

  # RNN Model Parameters
  "hidden_dim": [16, 32, 64],
  "num_layers": [2, 4, 8, 16]
}

In [4]:
from models.RNN import RNN
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
from solver import train, plot_loss_acc_graph
from utils.custom_dataset import TextDataset

def train_rnn_model_with_parameters(
    batch_size: int,
    learning_rate: float,
    optimizer_name: str,
    hidden_dim: int,
    num_layers: int,
):
  # Model
  model_rnn = RNN(
    embedding_matrix=embedding_matrix,
    hidden_dim=hidden_dim,
    num_layers=num_layers,
    output_dim=2
  )

  ########################
  ###### Parameters ######
  ########################
  batch_size = batch_size
  max_epochs = 10_000

  # SGD Optimizer
  learning_rate = learning_rate
  match optimizer_name:
    case "SGD":
      optimizer = torch.optim.SGD(model_rnn.parameters(), lr=learning_rate)
    case "Adagrad":
      optimizer = torch.optim.Adagrad(model_rnn.parameters(), lr=learning_rate)
    case "Adam":
      optimizer = torch.optim.Adam(model_rnn.parameters(), lr=learning_rate)
    case "RMSprop":
      optimizer = torch.optim.RMSprop(model_rnn.parameters(), lr=learning_rate)
    case _:
      raise Exception("Invalid optimizer name!")

  # Cross Entropy Loss 
  criterion = nn.CrossEntropyLoss()

  ########################
  ######## Dataset #######
  ########################
  train_dataset = TextDataset(
    dataframe=train_df,
    max_len=train_df["text"].str.split().apply(len).max(),
    embedding_matrix_vocab_to_index=embedding_matrix_train_dataset_vocab_to_index
  )
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

  val_dataset = TextDataset(
    dataframe=val_df,
    max_len=val_df["text"].str.split().apply(len).max(),
    embedding_matrix_vocab_to_index=embedding_matrix_train_dataset_vocab_to_index
  )
  val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

  ########################
  ######### Train ########
  ########################
  model, avg_train_loss, avg_train_acc, avg_val_loss, avg_val_acc, num_of_epochs = train(
    model=model_rnn,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    max_epoch=max_epochs
  )

  ########################
  ######### Plot #########
  ########################
  subtitle = f"batch_size_{batch_size}; lr_{learning_rate}; optimizer_{optimizer_name}; hidden_dim_{hidden_dim}; num_layers_{num_layers}"
  save_filename_prefix = f"rnn/batch_size_{batch_size}-lr_{learning_rate}-optimizer_{optimizer_name}-hidden_dim_{hidden_dim}-num_layers_{num_layers}"
  
  # Plot Train Loss and Accuracy Graph
  plot_loss_acc_graph(
    loss_list=avg_train_loss, 
    acc_list=avg_train_acc, 
    dataset_type="train",
    subtitle=subtitle,
    save_filename_prefix=save_filename_prefix,
    display=False
  )

  # Plot Validation Loss and Accuracy Graph
  plot_loss_acc_graph(
    loss_list=avg_val_loss, 
    acc_list=avg_val_acc, 
    dataset_type="val",
    subtitle=subtitle,
    save_filename_prefix=save_filename_prefix,
    display=False
  )

  ########################
  ##### Return Value #####
  ########################
  configuration_results = {
    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "optimizer_name": optimizer_name,

    # RNN Model Parameters
    "hidden_dim": hidden_dim,
    "num_layers": num_layers,

    # Model performance
    "train_loss": avg_train_loss[-1],
    "train_accuracy": avg_train_acc[-1],
    "val_loss": avg_val_loss[-1],
    "val_accuracy": avg_val_acc[-1],

    # Epoch Number
    "num_of_epochs": num_of_epochs
  }
  return configuration_results

[nltk_data] Downloading package punkt to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package treebank to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package punkt_tab to C:\Users\Toh Jing
[nltk_data]     Qiang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [5]:
all_configurations_results = []

for hidden_dim in SEARCH_SPACE["hidden_dim"]:
  for num_layers in SEARCH_SPACE["num_layers"]:
    for optimizer_name in SEARCH_SPACE["optimizer_name"]:
      for batch_size in SEARCH_SPACE["batch_size"]:
        for learning_rate in SEARCH_SPACE["learning_rate"]:
          print(f"---------- batch_size_{batch_size}; lr_{learning_rate}; optimizer_{optimizer_name}; hidden_dim_{hidden_dim}; num_layers_{num_layers} ----------")
          configuration_results = train_rnn_model_with_parameters(
            batch_size=batch_size,
            learning_rate=learning_rate,
            optimizer_name=optimizer_name,
            hidden_dim=hidden_dim,
            num_layers=num_layers
          )
          all_configurations_results.append(configuration_results)

---------- batch_size_50; lr_0.005; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:07<00:00, 23.60it/s, acc=0.686, loss=0.616]
Epoch 1 (Val): 100%|██████████| 22/22 [00:00<00:00, 44.91it/s, acc=0.515, loss=0.767]
Epoch 2 (Train): 100%|██████████| 171/171 [00:13<00:00, 12.91it/s, acc=0.616, loss=0.656]
Epoch 2 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.34it/s, acc=0.515, loss=0.781]
Epoch 3 (Train): 100%|██████████| 171/171 [00:13<00:00, 13.09it/s, acc=0.61, loss=0.664] 
Epoch 3 (Val): 100%|██████████| 22/22 [00:00<00:00, 42.98it/s, acc=0.515, loss=0.781]
Epoch 4 (Train): 100%|██████████| 171/171 [00:06<00:00, 26.26it/s, acc=0.604, loss=0.666]
Epoch 4 (Val): 100%|██████████| 22/22 [00:00<00:00, 37.23it/s, acc=0.515, loss=0.779]
Epoch 5 (Train): 100%|██████████| 171/171 [00:06<00:00, 27.84it/s, acc=0.604, loss=0.667]
Epoch 5 (Val): 100%|██████████| 22/22 [00:00<00:00, 43.41it/s, acc=0.515, loss=0.777]
Epoch 6 (Train): 100%|██████████| 171/171 [00:06<00:00, 27.23it/s, acc=0.604, loss=0.668]
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.01; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:06<00:00, 26.19it/s, acc=0.785, loss=0.513]
Epoch 1 (Val): 100%|██████████| 22/22 [00:00<00:00, 45.55it/s, acc=0.515, loss=0.997]
Epoch 2 (Train): 100%|██████████| 171/171 [00:08<00:00, 20.77it/s, acc=0.674, loss=0.612]
Epoch 2 (Val): 100%|██████████| 22/22 [00:00<00:00, 44.26it/s, acc=0.515, loss=0.997]
Epoch 3 (Train): 100%|██████████| 171/171 [00:06<00:00, 26.20it/s, acc=0.663, loss=0.625]
Epoch 3 (Val): 100%|██████████| 22/22 [00:00<00:00, 49.07it/s, acc=0.515, loss=0.975]
Epoch 4 (Train): 100%|██████████| 171/171 [00:06<00:00, 25.84it/s, acc=0.657, loss=0.631]
Epoch 4 (Val): 100%|██████████| 22/22 [00:00<00:00, 35.21it/s, acc=0.515, loss=0.956]
Epoch 5 (Train): 100%|██████████| 171/171 [00:06<00:00, 25.43it/s, acc=0.651, loss=0.635]
Epoch 5 (Val): 100%|██████████| 22/22 [00:00<00:00, 43.72it/s, acc=0.515, loss=0.94] 
Epoch 6 (Train): 100%|██████████| 171/171 [00:06<00:00, 25.78it/s, acc=0.645, loss=0.639]
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.025; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:06<00:00, 26.39it/s, acc=0.914, loss=0.279]
Epoch 1 (Val): 100%|██████████| 22/22 [00:00<00:00, 45.02it/s, acc=0.515, loss=1.59] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:07<00:00, 24.33it/s, acc=0.838, loss=0.403]
Epoch 2 (Val): 100%|██████████| 22/22 [00:00<00:00, 47.71it/s, acc=0.515, loss=1.55] 
Epoch 3 (Train): 100%|██████████| 171/171 [00:06<00:00, 25.46it/s, acc=0.832, loss=0.409]
Epoch 3 (Val): 100%|██████████| 22/22 [00:00<00:00, 45.42it/s, acc=0.515, loss=1.54] 
Epoch 4 (Train): 100%|██████████| 171/171 [00:09<00:00, 17.24it/s, acc=0.826, loss=0.416]
Epoch 4 (Val): 100%|██████████| 22/22 [00:00<00:00, 46.44it/s, acc=0.515, loss=1.52] 
Epoch 5 (Train): 100%|██████████| 171/171 [00:07<00:00, 21.70it/s, acc=0.826, loss=0.423]
Epoch 5 (Val): 100%|██████████| 22/22 [00:00<00:00, 46.03it/s, acc=0.515, loss=1.5]  
Epoch 6 (Train): 100%|██████████| 171/171 [00:07<00:00, 24.04it/s, acc=0.815, loss=0.43] 
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.05; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:08<00:00, 20.31it/s, acc=0.961, loss=0.148]
Epoch 1 (Val): 100%|██████████| 22/22 [00:00<00:00, 36.48it/s, acc=0.515, loss=2.17] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:06<00:00, 26.22it/s, acc=0.891, loss=0.295]
Epoch 2 (Val): 100%|██████████| 22/22 [00:01<00:00, 20.78it/s, acc=0.515, loss=2.03] 
Epoch 3 (Train): 100%|██████████| 171/171 [00:07<00:00, 22.94it/s, acc=0.891, loss=0.291]
Epoch 3 (Val): 100%|██████████| 22/22 [00:00<00:00, 35.22it/s, acc=0.515, loss=2]    
Epoch 4 (Train): 100%|██████████| 171/171 [00:06<00:00, 25.23it/s, acc=0.896, loss=0.285]
Epoch 4 (Val): 100%|██████████| 22/22 [00:00<00:00, 50.21it/s, acc=0.515, loss=2.04] 
Epoch 5 (Train): 100%|██████████| 171/171 [00:06<00:00, 28.17it/s, acc=0.896, loss=0.28] 
Epoch 5 (Val): 100%|██████████| 22/22 [00:00<00:00, 40.44it/s, acc=0.515, loss=2.04] 
Epoch 6 (Train): 100%|██████████| 171/171 [00:07<00:00, 22.76it/s, acc=0.896, loss=0.282]
Epoch 6 (Val): 100%|██████████

---------- batch_size_100; lr_0.005; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.32it/s, acc=0.566, loss=0.689] 
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 28.92it/s, acc=0.515, loss=0.726]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.67it/s, acc=0.554, loss=0.688] 
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.71it/s, acc=0.515, loss=0.724]
Epoch 3 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.56it/s, acc=0.554, loss=0.688] 
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.92it/s, acc=0.515, loss=0.722]
Epoch 4 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.62it/s, acc=0.554, loss=0.688] 
Epoch 4 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.78it/s, acc=0.515, loss=0.721]
Epoch 5 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.73it/s, acc=0.554, loss=0.688] 
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 30.80it/s, acc=0.515, loss=0.719]
Epoch 6 (Train): 100%|██████████| 86/86 [00:04<00:00, 20.50it/s, acc=0.554, loss=0.689] 
Epoch 6 (Val): 100%|██████████| 11/1

---------- batch_size_100; lr_0.01; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:04<00:00, 20.19it/s, acc=0.659, loss=0.655]
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 33.03it/s, acc=0.515, loss=0.706]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.39it/s, acc=0.589, loss=0.672] 
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 27.18it/s, acc=0.515, loss=0.721]
Epoch 3 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.93it/s, acc=0.566, loss=0.682] 
Epoch 3 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.28it/s, acc=0.515, loss=0.726]
Epoch 4 (Train): 100%|██████████| 86/86 [00:12<00:00,  7.05it/s, acc=0.566, loss=0.686] 
Epoch 4 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.71it/s, acc=0.515, loss=0.727]
Epoch 5 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.42it/s, acc=0.554, loss=0.687] 
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 12.27it/s, acc=0.515, loss=0.726]
Epoch 6 (Train): 100%|██████████| 86/86 [00:05<00:00, 15.11it/s, acc=0.554, loss=0.688] 
Epoch 6 (Val): 100%|██████████| 11/11

---------- batch_size_100; lr_0.025; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.47it/s, acc=0.787, loss=0.502]
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.96it/s, acc=0.515, loss=1.02]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.04it/s, acc=0.67, loss=0.621] 
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 23.54it/s, acc=0.515, loss=0.998]
Epoch 3 (Train): 100%|██████████| 86/86 [00:09<00:00,  8.62it/s, acc=0.659, loss=0.636]
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 13.80it/s, acc=0.515, loss=0.958]
Epoch 4 (Train): 100%|██████████| 86/86 [00:09<00:00,  8.68it/s, acc=0.647, loss=0.645]
Epoch 4 (Val): 100%|██████████| 11/11 [00:00<00:00, 12.67it/s, acc=0.515, loss=0.925]
Epoch 5 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.40it/s, acc=0.635, loss=0.652]
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 11.48it/s, acc=0.515, loss=0.896]
Epoch 6 (Train): 100%|██████████| 86/86 [00:12<00:00,  7.03it/s, acc=0.624, loss=0.657]
Epoch 6 (Val): 100%|██████████| 11/11 [00:0

---------- batch_size_100; lr_0.05; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.21it/s, acc=0.926, loss=0.256]
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 27.73it/s, acc=0.515, loss=1.71]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.87it/s, acc=0.856, loss=0.376]
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 30.19it/s, acc=0.515, loss=1.67]
Epoch 3 (Train): 100%|██████████| 86/86 [00:04<00:00, 20.61it/s, acc=0.845, loss=0.387]
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.88it/s, acc=0.515, loss=1.65]
Epoch 4 (Train): 100%|██████████| 86/86 [00:04<00:00, 20.08it/s, acc=0.845, loss=0.396]
Epoch 4 (Val): 100%|██████████| 11/11 [00:00<00:00, 26.17it/s, acc=0.515, loss=1.63]
Epoch 5 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.08it/s, acc=0.833, loss=0.404]
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.90it/s, acc=0.515, loss=1.61]
Epoch 6 (Train): 100%|██████████| 86/86 [00:12<00:00,  7.07it/s, acc=0.833, loss=0.413]
Epoch 6 (Val): 100%|██████████| 11/11 [00:01<00

---------- batch_size_200; lr_0.005; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:04<00:00, 10.20it/s, acc=0.566, loss=0.688]
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.26it/s, acc=0.556, loss=0.687]
Epoch 2 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.24it/s, acc=0.542, loss=0.691]
Epoch 2 (Val): 100%|██████████| 6/6 [00:00<00:00, 16.69it/s, acc=0.556, loss=0.687]
Epoch 3 (Train): 100%|██████████| 43/43 [00:03<00:00, 10.92it/s, acc=0.519, loss=0.693]
Epoch 3 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.36it/s, acc=0.556, loss=0.687]
Epoch 4 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.45it/s, acc=0.519, loss=0.694] 
Epoch 4 (Val): 100%|██████████| 6/6 [00:00<00:00, 17.99it/s, acc=0.556, loss=0.687]
Epoch 5 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.46it/s, acc=0.519, loss=0.695] 
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.44it/s, acc=0.556, loss=0.687]
Epoch 6 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.32it/s, acc=0.496, loss=0.696]
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00, 

---------- batch_size_200; lr_0.01; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:08<00:00,  5.23it/s, acc=0.519, loss=0.695] 
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.11it/s, acc=0.556, loss=0.694]
Epoch 2 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.38it/s, acc=0.519, loss=0.696] 
Epoch 2 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.77it/s, acc=0.556, loss=0.693]
Epoch 3 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.47it/s, acc=0.519, loss=0.696] 
Epoch 3 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.17it/s, acc=0.556, loss=0.693]
Epoch 4 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.45it/s, acc=0.519, loss=0.696] 
Epoch 4 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.11it/s, acc=0.556, loss=0.692]
Epoch 5 (Train): 100%|██████████| 43/43 [00:05<00:00,  7.88it/s, acc=0.519, loss=0.696] 
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.66it/s, acc=0.556, loss=0.692]
Epoch 6 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.18it/s, acc=0.519, loss=0.696] 
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:

---------- batch_size_200; lr_0.025; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:03<00:00, 10.81it/s, acc=0.728, loss=0.58] 
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00, 17.04it/s, acc=0.556, loss=0.793]
Epoch 2 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.77it/s, acc=0.612, loss=0.68] 
Epoch 2 (Val): 100%|██████████| 6/6 [00:00<00:00, 18.16it/s, acc=0.556, loss=0.76] 
Epoch 3 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.09it/s, acc=0.589, loss=0.691]
Epoch 3 (Val): 100%|██████████| 6/6 [00:00<00:00, 17.07it/s, acc=0.556, loss=0.735]
Epoch 4 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.27it/s, acc=0.566, loss=0.693] 
Epoch 4 (Val): 100%|██████████| 6/6 [00:00<00:00, 16.09it/s, acc=0.556, loss=0.721]
Epoch 5 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.61it/s, acc=0.566, loss=0.694]
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00, 16.00it/s, acc=0.556, loss=0.712]
Epoch 6 (Train): 100%|██████████| 43/43 [00:03<00:00, 10.80it/s, acc=0.542, loss=0.694] 
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00, 

---------- batch_size_200; lr_0.05; optimizer_SGD; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:10<00:00,  4.20it/s, acc=0.775, loss=0.485]
Epoch 1 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.67it/s, acc=0.556, loss=1.08]
Epoch 2 (Train): 100%|██████████| 43/43 [00:10<00:00,  4.25it/s, acc=0.705, loss=0.599]
Epoch 2 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.30it/s, acc=0.556, loss=1.06]
Epoch 3 (Train): 100%|██████████| 43/43 [00:08<00:00,  4.78it/s, acc=0.705, loss=0.61] 
Epoch 3 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.71it/s, acc=0.556, loss=1.01]
Epoch 4 (Train): 100%|██████████| 43/43 [00:10<00:00,  4.26it/s, acc=0.682, loss=0.625]
Epoch 4 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.30it/s, acc=0.556, loss=0.963]
Epoch 5 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.60it/s, acc=0.682, loss=0.64] 
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00,  7.00it/s, acc=0.556, loss=0.914]
Epoch 6 (Train): 100%|██████████| 43/43 [00:08<00:00,  4.91it/s, acc=0.635, loss=0.653]
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.59

---------- batch_size_50; lr_0.005; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.10it/s, acc=0.908, loss=0.327]
Epoch 1 (Val): 100%|██████████| 22/22 [00:01<00:00, 18.08it/s, acc=0.515, loss=1.18] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.14it/s, acc=0.75, loss=0.564] 
Epoch 2 (Val): 100%|██████████| 22/22 [00:00<00:00, 30.74it/s, acc=0.515, loss=0.987]
Epoch 3 (Train): 100%|██████████| 171/171 [00:15<00:00, 10.84it/s, acc=0.686, loss=0.621]
Epoch 3 (Val): 100%|██████████| 22/22 [00:01<00:00, 18.58it/s, acc=0.515, loss=0.887]
Epoch 4 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.13it/s, acc=0.645, loss=0.65] 
Epoch 4 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.75it/s, acc=0.515, loss=0.829]
Epoch 5 (Train): 100%|██████████| 171/171 [00:14<00:00, 11.94it/s, acc=0.622, loss=0.664]
Epoch 5 (Val): 100%|██████████| 22/22 [00:00<00:00, 41.64it/s, acc=0.515, loss=0.793]
Epoch 6 (Train): 100%|██████████| 171/171 [00:12<00:00, 13.67it/s, acc=0.604, loss=0.672]
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.01; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.56it/s, acc=0.967, loss=0.145]
Epoch 1 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.46it/s, acc=0.515, loss=1.97] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:17<00:00, 10.01it/s, acc=0.908, loss=0.289]
Epoch 2 (Val): 100%|██████████| 22/22 [00:00<00:00, 63.58it/s, acc=0.515, loss=1.64] 
Epoch 3 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.44it/s, acc=0.861, loss=0.377]
Epoch 3 (Val): 100%|██████████| 22/22 [00:01<00:00, 18.40it/s, acc=0.515, loss=1.47] 
Epoch 4 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.32it/s, acc=0.82, loss=0.443] 
Epoch 4 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.21it/s, acc=0.515, loss=1.34] 
Epoch 5 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.93it/s, acc=0.785, loss=0.495]
Epoch 5 (Val): 100%|██████████| 22/22 [00:01<00:00, 19.90it/s, acc=0.515, loss=1.23] 
Epoch 6 (Train): 100%|██████████| 171/171 [00:15<00:00, 11.11it/s, acc=0.756, loss=0.536]
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.025; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.93it/s, acc=0.973, loss=0.106]
Epoch 1 (Val): 100%|██████████| 22/22 [00:01<00:00, 14.92it/s, acc=0.515, loss=2.21] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.97it/s, acc=0.92, loss=0.266] 
Epoch 2 (Val): 100%|██████████| 22/22 [00:01<00:00, 16.59it/s, acc=0.515, loss=1.8]  
Epoch 3 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.89it/s, acc=0.873, loss=0.361]
Epoch 3 (Val): 100%|██████████| 22/22 [00:01<00:00, 18.22it/s, acc=0.515, loss=1.57] 
Epoch 4 (Train): 100%|██████████| 171/171 [00:16<00:00, 10.46it/s, acc=0.826, loss=0.438]
Epoch 4 (Val): 100%|██████████| 22/22 [00:01<00:00, 18.21it/s, acc=0.515, loss=1.37] 
Epoch 5 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.92it/s, acc=0.774, loss=0.51] 
Epoch 5 (Val): 100%|██████████| 22/22 [00:01<00:00, 19.23it/s, acc=0.515, loss=1.24] 
Epoch 6 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.98it/s, acc=0.733, loss=0.557]
Epoch 6 (Val): 100%|██████████

---------- batch_size_50; lr_0.05; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 171/171 [00:19<00:00,  8.77it/s, acc=0.978, loss=0.0854]
Epoch 1 (Val): 100%|██████████| 22/22 [00:01<00:00, 16.97it/s, acc=0.515, loss=2.52] 
Epoch 2 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.71it/s, acc=0.943, loss=0.206]
Epoch 2 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.88it/s, acc=0.515, loss=2.07] 
Epoch 3 (Train): 100%|██████████| 171/171 [00:18<00:00,  9.44it/s, acc=0.908, loss=0.28] 
Epoch 3 (Val): 100%|██████████| 22/22 [00:01<00:00, 16.33it/s, acc=0.515, loss=1.83] 
Epoch 4 (Train): 100%|██████████| 171/171 [00:18<00:00,  9.31it/s, acc=0.873, loss=0.343]
Epoch 4 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.47it/s, acc=0.515, loss=1.66] 
Epoch 5 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.73it/s, acc=0.838, loss=0.406]
Epoch 5 (Val): 100%|██████████| 22/22 [00:01<00:00, 17.11it/s, acc=0.515, loss=1.55] 
Epoch 6 (Train): 100%|██████████| 171/171 [00:17<00:00,  9.65it/s, acc=0.82, loss=0.434] 
Epoch 6 (Val): 100%|█████████

---------- batch_size_100; lr_0.005; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.30it/s, acc=0.787, loss=0.513]
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 21.90it/s, acc=0.515, loss=0.936]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.71it/s, acc=0.612, loss=0.691]
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 31.49it/s, acc=0.515, loss=0.798]
Epoch 3 (Train): 100%|██████████| 86/86 [00:04<00:00, 20.39it/s, acc=0.577, loss=0.695] 
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.73it/s, acc=0.515, loss=0.751]
Epoch 4 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.01it/s, acc=0.554, loss=0.695] 
Epoch 4 (Val): 100%|██████████| 11/11 [00:00<00:00, 30.44it/s, acc=0.515, loss=0.73]
Epoch 5 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.78it/s, acc=0.554, loss=0.695] 
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 30.23it/s, acc=0.515, loss=0.719]
Epoch 6 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.82it/s, acc=0.531, loss=0.695] 
Epoch 6 (Val): 100%|██████████| 11/11 [

---------- batch_size_100; lr_0.01; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.18it/s, acc=0.915, loss=0.294] 
Epoch 1 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.35it/s, acc=0.515, loss=1.32]
Epoch 2 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.51it/s, acc=0.763, loss=0.555]
Epoch 2 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.94it/s, acc=0.515, loss=1.08]
Epoch 3 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.55it/s, acc=0.694, loss=0.621]
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 11.14it/s, acc=0.515, loss=0.957]
Epoch 4 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.21it/s, acc=0.647, loss=0.654]
Epoch 4 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.35it/s, acc=0.515, loss=0.87]
Epoch 5 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.34it/s, acc=0.612, loss=0.673]
Epoch 5 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.58it/s, acc=0.515, loss=0.817]
Epoch 6 (Train): 100%|██████████| 86/86 [00:10<00:00,  8.32it/s, acc=0.601, loss=0.681] 
Epoch 6 (Val): 100%|██████████| 11/11 [00:0

---------- batch_size_100; lr_0.025; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.26it/s, acc=0.949, loss=0.183] 
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 13.29it/s, acc=0.515, loss=1.81]
Epoch 2 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.33it/s, acc=0.856, loss=0.408]
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 11.01it/s, acc=0.515, loss=1.44]
Epoch 3 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.49it/s, acc=0.787, loss=0.519]
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 11.05it/s, acc=0.515, loss=1.17]
Epoch 4 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.33it/s, acc=0.728, loss=0.583]
Epoch 4 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.79it/s, acc=0.515, loss=1.03]
Epoch 5 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.40it/s, acc=0.682, loss=0.624]
Epoch 5 (Val): 100%|██████████| 11/11 [00:01<00:00, 10.98it/s, acc=0.515, loss=0.927]
Epoch 6 (Train): 100%|██████████| 86/86 [00:11<00:00,  7.38it/s, acc=0.647, loss=0.65] 
Epoch 6 (Val): 100%|██████████| 11/11 [00:01<

---------- batch_size_100; lr_0.05; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 86/86 [00:05<00:00, 16.19it/s, acc=0.973, loss=0.127] 
Epoch 1 (Val): 100%|██████████| 11/11 [00:00<00:00, 24.86it/s, acc=0.515, loss=2.27]
Epoch 2 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.97it/s, acc=0.891, loss=0.318]
Epoch 2 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.43it/s, acc=0.515, loss=1.73]
Epoch 3 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.50it/s, acc=0.856, loss=0.389]
Epoch 3 (Val): 100%|██████████| 11/11 [00:00<00:00, 30.16it/s, acc=0.515, loss=1.54]
Epoch 4 (Train): 100%|██████████| 86/86 [00:04<00:00, 17.94it/s, acc=0.81, loss=0.46]  
Epoch 4 (Val): 100%|██████████| 11/11 [00:00<00:00, 27.55it/s, acc=0.515, loss=1.37]
Epoch 5 (Train): 100%|██████████| 86/86 [00:04<00:00, 18.81it/s, acc=0.775, loss=0.503]
Epoch 5 (Val): 100%|██████████| 11/11 [00:00<00:00, 29.77it/s, acc=0.515, loss=1.32]
Epoch 6 (Train): 100%|██████████| 86/86 [00:04<00:00, 19.79it/s, acc=0.752, loss=0.532]
Epoch 6 (Val): 100%|██████████| 11/11 [00:00<0

---------- batch_size_200; lr_0.005; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.73it/s, acc=0.682, loss=0.647]
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00, 17.75it/s, acc=0.556, loss=0.733]
Epoch 2 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.02it/s, acc=0.566, loss=0.707]
Epoch 2 (Val): 100%|██████████| 6/6 [00:00<00:00, 18.38it/s, acc=0.556, loss=0.702]
Epoch 3 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.46it/s, acc=0.519, loss=0.703] 
Epoch 3 (Val): 100%|██████████| 6/6 [00:00<00:00, 19.42it/s, acc=0.556, loss=0.694]
Epoch 4 (Train): 100%|██████████| 43/43 [00:03<00:00, 12.95it/s, acc=0.519, loss=0.701]
Epoch 4 (Val): 100%|██████████| 6/6 [00:00<00:00, 18.07it/s, acc=0.556, loss=0.69] 
Epoch 5 (Train): 100%|██████████| 43/43 [00:03<00:00, 11.52it/s, acc=0.519, loss=0.7]   
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.82it/s, acc=0.556, loss=0.689]
Epoch 6 (Train): 100%|██████████| 43/43 [00:04<00:00,  9.84it/s, acc=0.519, loss=0.699] 
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00,

---------- batch_size_200; lr_0.01; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.64it/s, acc=0.868, loss=0.413]
Epoch 1 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.76it/s, acc=0.556, loss=1.1] 
Epoch 2 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.41it/s, acc=0.659, loss=0.691]
Epoch 2 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.36it/s, acc=0.556, loss=0.835]
Epoch 3 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.36it/s, acc=0.589, loss=0.701]
Epoch 3 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.98it/s, acc=0.556, loss=0.756]
Epoch 4 (Train): 100%|██████████| 43/43 [00:08<00:00,  4.93it/s, acc=0.566, loss=0.701]
Epoch 4 (Val): 100%|██████████| 6/6 [00:00<00:00,  6.07it/s, acc=0.556, loss=0.724]
Epoch 5 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.45it/s, acc=0.542, loss=0.7]  
Epoch 5 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.95it/s, acc=0.556, loss=0.709]
Epoch 6 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.33it/s, acc=0.542, loss=0.7]   
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00,  6

---------- batch_size_200; lr_0.025; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:06<00:00,  6.98it/s, acc=0.891, loss=0.312] 
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00, 14.63it/s, acc=0.556, loss=1.48]
Epoch 2 (Train): 100%|██████████| 43/43 [00:07<00:00,  6.09it/s, acc=0.728, loss=0.637]
Epoch 2 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.95it/s, acc=0.556, loss=1.03]
Epoch 3 (Train): 100%|██████████| 43/43 [00:10<00:00,  4.29it/s, acc=0.635, loss=0.697]
Epoch 3 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.90it/s, acc=0.556, loss=0.847]
Epoch 4 (Train): 100%|██████████| 43/43 [00:09<00:00,  4.34it/s, acc=0.589, loss=0.702]
Epoch 4 (Val): 100%|██████████| 6/6 [00:01<00:00,  5.88it/s, acc=0.556, loss=0.768]
Epoch 5 (Train): 100%|██████████| 43/43 [00:05<00:00,  7.37it/s, acc=0.542, loss=0.703]
Epoch 5 (Val): 100%|██████████| 6/6 [00:00<00:00, 15.34it/s, acc=0.556, loss=0.724]
Epoch 6 (Train): 100%|██████████| 43/43 [00:04<00:00, 10.58it/s, acc=0.542, loss=0.702]
Epoch 6 (Val): 100%|██████████| 6/6 [00:00<00:00, 14.

---------- batch_size_200; lr_0.05; optimizer_Adagrad; hidden_dim_16; num_layers_2 ----------


Epoch 1 (Train): 100%|██████████| 43/43 [00:04<00:00,  9.37it/s, acc=0.915, loss=0.239] 
Epoch 1 (Val): 100%|██████████| 6/6 [00:00<00:00, 13.36it/s, acc=0.556, loss=1.81]
Epoch 2 (Train):  19%|█▊        | 8/43 [00:00<00:03, 10.38it/s, acc=0.625, loss=1.17]


KeyboardInterrupt: 

## 3. Model Configurations Comparison

In [9]:
model_configurations_results_df = pd.DataFrame.from_dict(all_configurations_results)
model_configurations_results_df.sort_values(by=["val_accuracy"], ascending=False).reset_index(drop=True)

Unnamed: 0,batch_size,learning_rate,optimizer_name,hidden_dim,num_layers,train_loss,train_accuracy,val_loss,val_accuracy,num_of_epochs
0,200,0.05,SGD,16,2,0.692284,0.565698,0.71611,0.555833,11
1,200,0.005,SGD,16,2,0.696331,0.519186,0.687343,0.555833,11
2,200,0.01,Adagrad,16,2,0.698037,0.519186,0.688852,0.555833,11
3,200,0.005,Adagrad,16,2,0.697021,0.49593,0.686907,0.555833,11
4,200,0.025,SGD,16,2,0.695644,0.542442,0.696105,0.555833,11
5,200,0.01,SGD,16,2,0.696223,0.519186,0.690097,0.555833,11
6,200,0.025,Adagrad,16,2,0.698202,0.49593,0.687463,0.555833,11
7,100,0.05,SGD,16,2,0.455782,0.798256,1.509111,0.515455,11
8,100,0.025,SGD,16,2,0.672918,0.588953,0.794848,0.515455,11
9,100,0.01,SGD,16,2,0.689455,0.55407,0.719162,0.515455,11


# a. Final Configuration of best model

# b. Accuracy on Testset

# c. Strategies to derive final sentence representation