# Update Embedding Matrix

This notebook will highlight the process, result and insights obtained from allowing updates to the embedding matrix during training.

## Import Embedding Matrix

In [1]:
import json
from pathlib import Path

import numpy as np

embedding_path = Path("models/w2v_matrix.npy")
index_from_word_path = Path("models/w2v_index.json")

embedding_matrix = np.load(embedding_path)
with index_from_word_path.open() as f:
    index_from_word = json.load(f)

## Prepare Dataaset

(a) Import datasets

In [2]:
from utils.text import tokenize
from datasets import load_dataset

dataset = load_dataset("rotten_tomatoes")
train_dataset = tokenize(dataset["train"])
val_dataset = tokenize(dataset["validation"])
test_dataset = tokenize(dataset["test"])

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to /Users/juinlee/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package treebank to
[nltk_data]     /Users/juinlee/nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/juinlee/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
from utils.text import token_to_index

train_dataset = token_to_index(dataset=train_dataset, index_from_word=index_from_word)
val_dataset = token_to_index(dataset=val_dataset, index_from_word=index_from_word)
test_dataset = token_to_index(dataset=test_dataset, index_from_word=index_from_word)

train_dataset

Dataset({
    features: ['text', 'label', 'tokens', 'original_len', 'indexes'],
    num_rows: 8530
})

In [4]:
train_dataset = train_dataset.select_columns(["label", "original_len", "indexes"])
val_dataset = val_dataset.select_columns(["label", "original_len", "indexes"])
test_dataset = test_dataset.select_columns(["label", "original_len", "indexes"])

In [5]:
train_dataset.set_format(type="torch")
val_dataset.set_format(type="torch")
test_dataset.set_format(type="torch")

## Train RNN with Update to Embeddings

We use Optuna to perform heuristic search on optimal configuration when embeddings are updatabled during training.

In [6]:
import optuna
from utils.train import train_rnn_model_with_parameters

SEARCH_SPACE = {
    "batch_size": [32, 64, 128, 256, 512, 1024, 2048],
    "learning_rate": [1e-1, 1e-2, 1e-3, 1e-4],
    "optimizer_name": ["Adam"],
    # RNN Model Parameters
    "hidden_dim": [256, 128, 64, 32],
    "num_layers": [1, 2, 4],
    "sentence_representation_type": ["last", "average", "max"],
}

# Define the objective function for Optuna
def objective(trial):
    # Sample hyperparameters from search space
    hidden_dim = trial.suggest_categorical("hidden_dim", SEARCH_SPACE["hidden_dim"])
    num_layers = trial.suggest_int("num_layers", min(SEARCH_SPACE["num_layers"]), max(SEARCH_SPACE["num_layers"]))
    optimizer_name = trial.suggest_categorical("optimizer_name", SEARCH_SPACE["optimizer_name"])
    batch_size = trial.suggest_categorical("batch_size", SEARCH_SPACE["batch_size"])
    learning_rate = trial.suggest_categorical("learning_rate", SEARCH_SPACE["learning_rate"])
    sentence_representation_type = trial.suggest_categorical("sentence_representation_type", SEARCH_SPACE["sentence_representation_type"])
    
    log_message = f"---------- batch_size_{batch_size}; lr_{learning_rate}; optimizer_{optimizer_name}; hidden_dim_{hidden_dim}; num_layers_{num_layers}; sentence_representation_{sentence_representation_type} ----------"
    print(log_message)

    # Train the model and get the validation loss or accuracy
    val_acc = train_rnn_model_with_parameters(
        embedding_matrix=embedding_matrix,
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        batch_size=batch_size,
        learning_rate=learning_rate,
        optimizer_name=optimizer_name,
        hidden_dim=hidden_dim,
        num_layers=num_layers,
        sentence_representation_type=sentence_representation_type,
        show_progress=True,
        freeze_embedding=False,
        log_dir="rnn/test/w2v"
    )
    
    # Return the metric to minimize (for example, validation loss)
    return val_acc

# Set up the Optuna study
study = optuna.create_study(direction="maximize")  # Change to "maximize" if you're maximizing an objective
study.optimize(objective, n_trials=50)  # Specify the number of trials you want

# Get the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)


[I 2024-11-03 02:18:21,788] A new study created in memory with name: no-name-30756290-9226-488e-959b-e1cd0665e67f
Seed set to 42
/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'rnn_model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['rnn_model'])`.


---------- batch_size_512; lr_0.0001; optimizer_Adam; hidden_dim_32; num_layers_4; sentence_representation_average ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.671    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:419: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:419: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (17) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 15: 100%|██████████| 17/17 [00:11<00:00,  1.51it/s, v_num=0, train_loss=0.505, train_acc=0.822, val_loss=0.596, val_acc=0.501]


[I 2024-11-03 02:21:54,512] Trial 0 finished with value: 0.7204502820968628 and parameters: {'hidden_dim': 32, 'num_layers': 4, 'optimizer_name': 'Adam', 'batch_size': 512, 'learning_rate': 0.0001, 'sentence_representation_type': 'average'}. Best is trial 0 with value: 0.7204502820968628.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.0 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
20.119    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


---------- batch_size_2048; lr_0.01; optimizer_Adam; hidden_dim_128; num_layers_3; sentence_representation_last ----------
                                                                           

/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (5) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 6: 100%|██████████| 5/5 [00:12<00:00,  0.41it/s, v_num=0, train_loss=0.388, train_acc=0.836, val_loss=0.810, val_acc=0.574]


[I 2024-11-03 02:23:52,028] Trial 1 finished with value: 0.5741088390350342 and parameters: {'hidden_dim': 128, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 2048, 'learning_rate': 0.01, 'sentence_representation_type': 'last'}. Best is trial 0 with value: 0.7204502820968628.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


---------- batch_size_1024; lr_0.0001; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_last ----------
                                                                           

/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (9) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 12: 100%|██████████| 9/9 [00:12<00:00,  0.71it/s, v_num=0, train_loss=0.327, train_acc=0.883, val_loss=0.513, val_acc=0.753]


[I 2024-11-03 02:27:09,279] Trial 2 finished with value: 0.7598043084144592 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.0001, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.703    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


---------- batch_size_2048; lr_0.1; optimizer_Adam; hidden_dim_64; num_layers_1; sentence_representation_last ----------
Epoch 3: 100%|██████████| 5/5 [00:10<00:00,  0.46it/s, v_num=0, train_loss=0.716, train_acc=0.597, val_loss=0.903, val_acc=0.508]


[I 2024-11-03 02:28:24,679] Trial 3 finished with value: 0.508442759513855 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 2048, 'learning_rate': 0.1, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_4; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.0 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.803    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 6: 100%|██████████| 9/9 [00:11<00:00,  0.77it/s, v_num=0, train_loss=0.190, train_acc=0.928, val_loss=0.651, val_acc=0.726]


[I 2024-11-03 02:30:18,587] Trial 4 finished with value: 0.7429304718971252 and parameters: {'hidden_dim': 64, 'num_layers': 4, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_512; lr_0.1; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.736    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 7: 100%|██████████| 17/17 [00:11<00:00,  1.52it/s, v_num=0, train_loss=0.652, train_acc=0.611, val_loss=0.727, val_acc=0.360]


[I 2024-11-03 02:32:20,262] Trial 5 finished with value: 0.37757259607315063 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 512, 'learning_rate': 0.1, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


---------- batch_size_1024; lr_0.0001; optimizer_Adam; hidden_dim_32; num_layers_3; sentence_representation_max ----------



  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.663    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 29: 100%|██████████| 9/9 [00:11<00:00,  0.81it/s, v_num=0, train_loss=0.349, train_acc=0.907, val_loss=0.559, val_acc=0.726]


[I 2024-11-03 02:38:26,251] Trial 6 finished with value: 0.7276918292045593 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.0001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_128; lr_0.0001; optimizer_Adam; hidden_dim_32; num_layers_1; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.646    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 14: 100%|██████████| 67/67 [00:11<00:00,  5.73it/s, v_num=0, train_loss=0.210, train_acc=0.954, val_loss=0.521, val_acc=0.431]


[I 2024-11-03 02:41:53,121] Trial 7 finished with value: 0.4313875436782837 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 128, 'learning_rate': 0.0001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_128; lr_0.0001; optimizer_Adam; hidden_dim_256; num_layers_1; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.1 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.305    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 7: 100%|██████████| 67/67 [00:13<00:00,  4.95it/s, v_num=0, train_loss=0.275, train_acc=0.924, val_loss=0.545, val_acc=0.423]


[I 2024-11-03 02:44:14,284] Trial 8 finished with value: 0.42862069606781006 and parameters: {'hidden_dim': 256, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 128, 'learning_rate': 0.0001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_512; lr_0.01; optimizer_Adam; hidden_dim_256; num_layers_4; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.5 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.5 M     Trainable params
0         Non-trainable params
5.5 M     Total params
21.884    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 6: 100%|██████████| 17/17 [00:13<00:00,  1.22it/s, v_num=0, train_loss=0.694, train_acc=0.499, val_loss=0.693, val_acc=0.402]


[I 2024-11-03 02:46:27,708] Trial 9 finished with value: 0.4822967052459717 and parameters: {'hidden_dim': 256, 'num_layers': 4, 'optimizer_name': 'Adam', 'batch_size': 512, 'learning_rate': 0.01, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_32; lr_0.001; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_average ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|██████████| 267/267 [00:17<00:00, 15.08it/s, v_num=0, train_loss=0.0528, train_acc=1.000, val_loss=0.995, val_acc=0.382]


[I 2024-11-03 02:48:11,124] Trial 10 finished with value: 0.4034603536128998 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 32, 'learning_rate': 0.001, 'sentence_representation_type': 'average'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_3; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.770    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 7: 100%|██████████| 9/9 [00:11<00:00,  0.79it/s, v_num=0, train_loss=0.163, train_acc=0.941, val_loss=0.719, val_acc=0.729]


[I 2024-11-03 02:50:14,330] Trial 11 finished with value: 0.7494197487831116 and parameters: {'hidden_dim': 64, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_64; lr_0.001; optimizer_Adam; hidden_dim_128; num_layers_2; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.0 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.987    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|██████████| 134/134 [00:13<00:00,  9.63it/s, v_num=0, train_loss=0.0112, train_acc=1.000, val_loss=0.912, val_acc=0.398]


[I 2024-11-03 02:51:41,966] Trial 12 finished with value: 0.4100709557533264 and parameters: {'hidden_dim': 128, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 64, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_256; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_3; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.770    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


                                                                           

/Users/juinlee/Documents/GitHub/sc4002-nlp-sentiment-classification/venv/lib/python3.9/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (34) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 4: 100%|██████████| 34/34 [00:11<00:00,  2.91it/s, v_num=0, train_loss=0.0759, train_acc=0.975, val_loss=0.926, val_acc=0.445]


[I 2024-11-03 02:53:11,806] Trial 13 finished with value: 0.48146820068359375 and parameters: {'hidden_dim': 64, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 256, 'learning_rate': 0.001, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_1024; lr_0.0001; optimizer_Adam; hidden_dim_256; num_layers_3; sentence_representation_average ----------


[I 2024-11-03 02:53:12,141] Trial 14 finished with value: 0.7310747504234314 and parameters: {'hidden_dim': 256, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.0001, 'sentence_representation_type': 'average'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.736    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


[Skipping] rnn/test/w2v/batch_size_1024-lr_0.0001-optimizer_Adam-hidden_dim_256-num_layers_3-sr_type_average-freeze_False
---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_max ----------
Epoch 6: 100%|██████████| 9/9 [00:11<00:00,  0.79it/s, v_num=0, train_loss=0.175, train_acc=0.933, val_loss=0.636, val_acc=0.748]


[I 2024-11-03 02:55:03,498] Trial 15 finished with value: 0.755849301815033 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_1024; lr_0.1; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 16: 100%|██████████| 9/9 [00:12<00:00,  0.71it/s, v_num=0, train_loss=0.694, train_acc=0.540, val_loss=0.731, val_acc=0.520]


[I 2024-11-03 02:59:11,603] Trial 16 finished with value: 0.5201090574264526 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.1, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_256; lr_0.01; optimizer_Adam; hidden_dim_128; num_layers_2; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.0 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.987    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 34/34 [00:12<00:00,  2.68it/s, v_num=0, train_loss=0.295, train_acc=0.883, val_loss=0.707, val_acc=0.387]


[I 2024-11-03 03:00:47,643] Trial 17 finished with value: 0.4178423583507538 and parameters: {'hidden_dim': 128, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 256, 'learning_rate': 0.01, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_64; lr_0.001; optimizer_Adam; hidden_dim_256; num_layers_1; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.1 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.305    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 3: 100%|██████████| 134/134 [00:13<00:00,  9.75it/s, v_num=0, train_loss=0.0627, train_acc=1.000, val_loss=0.864, val_acc=0.399]


[I 2024-11-03 03:02:15,356] Trial 18 finished with value: 0.40644070506095886 and parameters: {'hidden_dim': 256, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 64, 'learning_rate': 0.001, 'sentence_representation_type': 'last'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_32; lr_0.0001; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_average ----------


[I 2024-11-03 03:02:15,975] Trial 19 finished with value: 0.3878194987773895 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 32, 'learning_rate': 0.0001, 'sentence_representation_type': 'average'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.703    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


[Skipping] rnn/test/w2v/batch_size_32-lr_0.0001-optimizer_Adam-hidden_dim_64-num_layers_2-sr_type_average-freeze_False
---------- batch_size_1024; lr_0.0001; optimizer_Adam; hidden_dim_64; num_layers_1; sentence_representation_max ----------
Epoch 28: 100%|██████████| 9/9 [00:11<00:00,  0.80it/s, v_num=0, train_loss=0.332, train_acc=0.873, val_loss=0.521, val_acc=0.736]


[I 2024-11-03 03:08:11,939] Trial 20 finished with value: 0.7392423748970032 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.0001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_3; sentence_representation_max ----------


[I 2024-11-03 03:08:12,546] Trial 21 finished with value: 0.7494197487831116 and parameters: {'hidden_dim': 64, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
[I 2024-11-03 03:08:12,662] Trial 22 finished with value: 0.755849301815033 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


[Skipping] rnn/test/w2v/batch_size_1024-lr_0.001-optimizer_Adam-hidden_dim_64-num_layers_3-sr_type_max-freeze_False
---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_max ----------
[Skipping] rnn/test/w2v/batch_size_1024-lr_0.001-optimizer_Adam-hidden_dim_64-num_layers_2-sr_type_max-freeze_False
---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_max ----------


[I 2024-11-03 03:08:12,779] Trial 23 finished with value: 0.755849301815033 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42
[I 2024-11-03 03:08:12,897] Trial 24 finished with value: 0.755849301815033 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 2 with value: 0.7598043084144592.
Seed set to 42


[Skipping] rnn/test/w2v/batch_size_1024-lr_0.001-optimizer_Adam-hidden_dim_64-num_layers_2-sr_type_max-freeze_False
---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_64; num_layers_2; sentence_representation_max ----------
[Skipping] rnn/test/w2v/batch_size_1024-lr_0.001-optimizer_Adam-hidden_dim_64-num_layers_2-sr_type_max-freeze_False
---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 9/9 [00:12<00:00,  0.69it/s, v_num=0, train_loss=0.265, train_acc=0.914, val_loss=0.601, val_acc=0.765]


[I 2024-11-03 03:09:52,914] Trial 25 finished with value: 0.7648007273674011 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'last'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42


---------- batch_size_1024; lr_0.01; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 9/9 [00:12<00:00,  0.71it/s, v_num=0, train_loss=0.482, train_acc=0.740, val_loss=0.823, val_acc=0.520]


[I 2024-11-03 03:11:32,168] Trial 26 finished with value: 0.5332277417182922 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.01, 'sentence_representation_type': 'last'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42


---------- batch_size_128; lr_0.1; optimizer_Adam; hidden_dim_256; num_layers_1; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.1 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.305    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 7: 100%|██████████| 67/67 [00:12<00:00,  5.16it/s, v_num=0, train_loss=1.260, train_acc=0.380, val_loss=1.240, val_acc=0.305]


[I 2024-11-03 03:13:47,587] Trial 27 finished with value: 0.3051903247833252 and parameters: {'hidden_dim': 256, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 128, 'learning_rate': 0.1, 'sentence_representation_type': 'last'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.2 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.2 M     Trainable params
0         Non-trainable params
5.2 M     Total params
20.831    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


---------- batch_size_32; lr_0.001; optimizer_Adam; hidden_dim_256; num_layers_2; sentence_representation_last ----------
Epoch 3: 100%|██████████| 267/267 [00:18<00:00, 14.81it/s, v_num=0, train_loss=0.214, train_acc=0.938, val_loss=0.634, val_acc=0.381] 


[I 2024-11-03 03:15:33,675] Trial 28 finished with value: 0.39418506622314453 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 32, 'learning_rate': 0.001, 'sentence_representation_type': 'last'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.663    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


---------- batch_size_256; lr_0.0001; optimizer_Adam; hidden_dim_32; num_layers_3; sentence_representation_average ----------
Epoch 12: 100%|██████████| 34/34 [00:11<00:00,  2.96it/s, v_num=0, train_loss=0.393, train_acc=0.892, val_loss=0.568, val_acc=0.442]


[I 2024-11-03 03:18:35,138] Trial 29 finished with value: 0.600375235080719 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'optimizer_name': 'Adam', 'batch_size': 256, 'learning_rate': 0.0001, 'sentence_representation_type': 'average'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42


---------- batch_size_64; lr_0.0001; optimizer_Adam; hidden_dim_256; num_layers_1; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.1 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.305    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 6: 100%|██████████| 134/134 [00:14<00:00,  9.48it/s, v_num=0, train_loss=0.101, train_acc=0.917, val_loss=0.579, val_acc=0.413] 


[I 2024-11-03 03:20:45,904] Trial 30 finished with value: 0.41384565830230713 and parameters: {'hidden_dim': 256, 'num_layers': 1, 'optimizer_name': 'Adam', 'batch_size': 64, 'learning_rate': 0.0001, 'sentence_representation_type': 'last'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42


---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_128; num_layers_2; sentence_representation_max ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 5.0 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.987    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 6: 100%|██████████| 9/9 [00:12<00:00,  0.73it/s, v_num=0, train_loss=0.104, train_acc=0.964, val_loss=0.754, val_acc=0.743]


[I 2024-11-03 03:22:44,955] Trial 31 finished with value: 0.7481059432029724 and parameters: {'hidden_dim': 128, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'max'}. Best is trial 25 with value: 0.7648007273674011.
Seed set to 42


---------- batch_size_1024; lr_0.001; optimizer_Adam; hidden_dim_32; num_layers_2; sentence_representation_last ----------


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type               | Params | Mode 
------------------------------------------------------
0 | model  | RNN                | 4.9 M  | train
1 | metric | MulticlassAccuracy | 0      | train
------------------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.654    Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode

Detected KeyboardInterrupt, attempting graceful shutdown ...
[W 2024-11-03 03:23:02,843] Trial 32 failed with parameters: {'hidden_dim': 32, 'num_layers': 2, 'optimizer_name': 'Adam', 'batch_size': 1024, 'learning_rate': 0.001, 'sentence_representation_type': 'last'} because of the following error: NameError("name 'exit' is not defined").
Traceback (most recent call last):
  File "/Users/juinlee/Documents/GitHub/sc4002-nlp-senti

NameError: name 'exit' is not defined