In [6]:
import torch
from tokenizer import TokenizerWrapper, download_and_merge_text_files, train_tokenizer,download_file_from_url
from dataset_loader import TextDataset, collate_fn
from gru_model import GRULanguageModel
from train_utils import train_model, evaluate_model

In [7]:
# main.py

import torch
from tokenizer import TokenizerWrapper, download_and_merge_text_files, train_tokenizer
from dataset_loader import TextDataset, collate_fn
from train_utils import train_model, evaluate_model

# Import all models
from gru_model import GRULanguageModel
from lstm_model import LSTMLanguageModel
from rnn_model import RNNLanguageModel
from transformer_model import TransformerLanguageModel

# --- Config ---
DATA_URL = "https://api.github.com/repos/jghawaly/CSC7809_FoundationModels/contents/Project2/data/raw"
CORPUS_FILE = "corpus.txt"
TRAIN_FILE = "train.jsonl"
TEST_FILE = "test.jsonl"
TOKENIZER_PREFIX = "bpe_tokenizer"
VOCAB_SIZE = 10000
MAX_SEQ_LEN = 128
BATCH_SIZE = 256
EPOCHS = 50
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Select Model Here ===
MODEL_TYPE = "transformer"  # Options: 'gru', 'lstm', 'rnn', 'transformer'
MODEL_SAVE_PATH = f"best_{MODEL_TYPE}_model.pt"

# --- Step 1: Download data & train tokenizer ---
download_and_merge_text_files(DATA_URL, CORPUS_FILE)
train_tokenizer(CORPUS_FILE, TOKENIZER_PREFIX, vocab_size=VOCAB_SIZE)
tokenizer = TokenizerWrapper(f"{TOKENIZER_PREFIX}.model")

# --- Step 2: Dataset ---
train_dataset = TextDataset(TRAIN_FILE, tokenizer, MAX_SEQ_LEN)
test_dataset = TextDataset(TEST_FILE, tokenizer, MAX_SEQ_LEN)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=lambda b: collate_fn(b, tokenizer.get_pad_id()))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=lambda b: collate_fn(b, tokenizer.get_pad_id()))


sentencepiece_trainer.cc(78) LOG(INFO) Starts training with : 
trainer_spec {
  input: corpus.txt
  input_format: 
  model_prefix: bpe_tokenizer
  model_type: BPE
  vocab_size: 10000
  self_test_sample_size: 0
  character_coverage: 0.9995
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  pretokenization_delimiter: 
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  user_defined_symbols: <bos>
  user_defined_symbols: <eos>
  user_defined_symbols: <pad>
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece_score: 1
  train_extremely_large_corpus: 0
  seed_sentencepieces_file: 
  hard_vocab_limit: 1
  use_all_vocab: 0
  unk_id: 0
  bos_id: 1
  eos_id: 2
  pad_id: 3
  unk_piece: <unk>
  bos_piece: <s>
  eos_p

In [1]:

# --- Step 3: Model Factory ---
def build_model(model_type):
    if model_type == "gru":
        return GRULanguageModel(VOCAB_SIZE, 256, 512, 2, tokenizer.get_pad_id()).to(DEVICE)
    elif model_type == "lstm":
        return LSTMLanguageModel(VOCAB_SIZE, 256, 512, 2, tokenizer.get_pad_id()).to(DEVICE)
    elif model_type == "rnn":
        return RNNLanguageModel(VOCAB_SIZE, 256, 512, 2, tokenizer.get_pad_id()).to(DEVICE)
    elif model_type == "transformer":
        return TransformerLanguageModel(
            vocab_size=VOCAB_SIZE,
            embed_dim=256,
            num_heads=4,
            num_layers=4,
            pad_token_id=tokenizer.get_pad_id()
        ).to(DEVICE)
    else:
        raise ValueError(f"Unsupported model type: {model_type}")

model = build_model(MODEL_TYPE)

# --- Step 4: Train & Evaluate ---
train_model(model, train_loader, test_loader, tokenizer, DEVICE, MODEL_SAVE_PATH, lr=1e-3, epochs=EPOCHS)
evaluate_model(model, MODEL_SAVE_PATH, test_loader, tokenizer, DEVICE)

# --- Step 5: Sample Generation ---
custom_prompts = [
    "The spaceship landed on the surface of Mars and",
    "He walked into the room, completely unaware that",
    "Long ago in a forgotten village, a child was born who",
    "Artificial intelligence will change the world when",
    "What do you prefer — cat or dog?"
]

print(f"\n--- Generations using {MODEL_TYPE.upper()} ---")
for prompt in custom_prompts:
    output = model.generate(tokenizer, prompt, device=DEVICE, return_continuation_only=True)
    print(f" Prompt    : {prompt}")
    print(f" Generated : {output}")


sentencepiece_trainer.cc(78) LOG(INFO) Starts training with : 
trainer_spec {
  input: corpus.txt
  input_format: 
  model_prefix: bpe_tokenizer
  model_type: BPE
  vocab_size: 10000
  self_test_sample_size: 0
  character_coverage: 0.9995
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  pretokenization_delimiter: 
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  user_defined_symbols: <bos>
  user_defined_symbols: <eos>
  user_defined_symbols: <pad>
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece_score: 1
  train_extremely_large_corpus: 0
  seed_sentencepieces_file: 
  hard_vocab_limit: 1
  use_all_vocab: 0
  unk_id: 0
  bos_id: 1
  eos_id: 2
  pad_id: 3
  unk_piece: <unk>
  bos_piece: <s>
  eos_p

Epoch 1 | Train Loss: 6.5641 | Val Loss: 5.9267
✅ Model saved to best_transformer_model.pt
📈 Saved loss curve as: TransformerLanguageModel_loss_curve_20250416_025711.png

🧪 Test Perplexity: 374.9277
🧪 Average BLEU Score: 0.0073

--- Generations using TRANSFORMER ---

📝 Prompt    : The spaceship landed on the surface of Mars and
🧠 Generated : began with a path took the tomb and the terms off. I exampleously is servants end, and they were continually and astonished who made a broken God that saw the sailorsvents of their friends of the future in the herald through anyone. A Py

📝 Prompt    : He walked into the room, completely unaware that
🧠 Generated : she expected. Oh, anxious the But when better not suppose a hundred used go with a buck breadon, and all him that the fisher is due from the Sp falseance of events and related, took him to Mrs", a time, now

📝 Prompt    : Long ago in a forgotten village, a child was born who
🧠 Generated : ushes a tri

📝 Prompt    : Artificial intelligence

In [8]:
# Define model-specific hyperparameter grids
hyperparams_grid = {
    "gru": [
        {"embed_dim": 128, "hidden_dim": 256, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 256, "hidden_dim": 512, "num_layers": 3, "lr": 5e-4}
    ],
    "lstm": [
        {"embed_dim": 128, "hidden_dim": 256, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 256, "hidden_dim": 512, "num_layers": 3, "lr": 1e-4}
    ],
    "rnn": [
        {"embed_dim": 128, "hidden_dim": 128, "num_layers": 2, "lr": 1e-3}
    ],
    "transformer": [
        {"embed_dim": 256, "num_heads": 4, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 512, "num_heads": 8, "num_layers": 4, "lr": 5e-4}
    ]
}


In [9]:
from train_utils import train_model, evaluate_model
import pandas as pd
import time

results = []

def run_experiments(model_type, ModelClass, grid, tokenizer, train_loader, test_loader, device):
    for idx, config in enumerate(grid):
        model_id = f"{model_type}_{idx}_{int(time.time())}"
        print(f"\n Training {model_id} with config: {config}")
        
        if model_type == "transformer":
            model = ModelClass(
                vocab_size=VOCAB_SIZE,
                embed_dim=config["embed_dim"],
                num_heads=config["num_heads"],
                num_layers=config["num_layers"],
                pad_token_id=tokenizer.get_pad_id()
            ).to(device)
        else:
            model = ModelClass(
                vocab_size=VOCAB_SIZE,
                embed_dim=config["embed_dim"],
                hidden_dim=config["hidden_dim"],
                num_layers=config["num_layers"],
                pad_token_id=tokenizer.get_pad_id()
            ).to(device)
        
        save_path = f"{model_id}.pt"
        train_model(model, train_loader, test_loader, tokenizer, device, save_path, lr=config["lr"], epochs=50)

        ppl, bleu = evaluate_model(model, save_path, test_loader, tokenizer, device)
        results.append({
            "model_type": model_type,
            "config": config,
            "perplexity": ppl,
            "bleu_score": bleu,
            "model_path": save_path
        })


In [None]:
from gru_model import GRULanguageModel
from lstm_model import LSTMLanguageModel
from rnn_model import RNNLanguageModel
from transformer_model import TransformerLanguageModel

run_experiments("gru", GRULanguageModel, hyperparams_grid["gru"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("lstm", LSTMLanguageModel, hyperparams_grid["lstm"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("rnn", RNNLanguageModel, hyperparams_grid["rnn"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("transformer", TransformerLanguageModel, hyperparams_grid["transformer"], tokenizer, train_loader, test_loader, DEVICE)



 Training gru_0_1744794807 with config: {'embed_dim': 128, 'hidden_dim': 256, 'num_layers': 2, 'lr': 0.001}


Epoch 1/50: 100%|██████████| 155/155 [00:08<00:00, 18.03it/s]


Epoch 1 | Train Loss: 6.9873 | Val Loss: 6.6835
 Model saved to gru_0_1744794807.pt


Epoch 2/50: 100%|██████████| 155/155 [00:08<00:00, 19.14it/s]


Epoch 2 | Train Loss: 6.6077 | Val Loss: 6.4148
 Model saved to gru_0_1744794807.pt


Epoch 3/50: 100%|██████████| 155/155 [00:08<00:00, 18.71it/s]


Epoch 3 | Train Loss: 6.3652 | Val Loss: 6.1637
 Model saved to gru_0_1744794807.pt


Epoch 4/50: 100%|██████████| 155/155 [00:08<00:00, 19.09it/s]


Epoch 4 | Train Loss: 6.1687 | Val Loss: 5.9979
 Model saved to gru_0_1744794807.pt


Epoch 5/50: 100%|██████████| 155/155 [00:08<00:00, 19.06it/s]


Epoch 5 | Train Loss: 6.0386 | Val Loss: 5.8699
 Model saved to gru_0_1744794807.pt


Epoch 6/50: 100%|██████████| 155/155 [00:08<00:00, 19.07it/s]


Epoch 6 | Train Loss: 5.9322 | Val Loss: 5.7647
 Model saved to gru_0_1744794807.pt


Epoch 7/50: 100%|██████████| 155/155 [00:08<00:00, 19.07it/s]


Epoch 7 | Train Loss: 5.8389 | Val Loss: 5.6659
 Model saved to gru_0_1744794807.pt


Epoch 8/50: 100%|██████████| 155/155 [00:08<00:00, 19.07it/s]


Epoch 8 | Train Loss: 5.7439 | Val Loss: 5.5671
 Model saved to gru_0_1744794807.pt


Epoch 9/50: 100%|██████████| 155/155 [00:08<00:00, 19.04it/s]


Epoch 9 | Train Loss: 5.6481 | Val Loss: 5.4747
 Model saved to gru_0_1744794807.pt


Epoch 10/50: 100%|██████████| 155/155 [00:08<00:00, 19.08it/s]


Epoch 10 | Train Loss: 5.5560 | Val Loss: 5.3857
 Model saved to gru_0_1744794807.pt


Epoch 11/50: 100%|██████████| 155/155 [00:08<00:00, 18.58it/s]


Epoch 11 | Train Loss: 5.4661 | Val Loss: 5.3048
 Model saved to gru_0_1744794807.pt


Epoch 12/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 12 | Train Loss: 5.3814 | Val Loss: 5.2258
 Model saved to gru_0_1744794807.pt


Epoch 13/50: 100%|██████████| 155/155 [00:08<00:00, 19.00it/s]


Epoch 13 | Train Loss: 5.3021 | Val Loss: 5.1534
 Model saved to gru_0_1744794807.pt


Epoch 14/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 14 | Train Loss: 5.2307 | Val Loss: 5.0944
 Model saved to gru_0_1744794807.pt


Epoch 15/50: 100%|██████████| 155/155 [00:08<00:00, 18.97it/s]


Epoch 15 | Train Loss: 5.1683 | Val Loss: 5.0356
 Model saved to gru_0_1744794807.pt


Epoch 16/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 16 | Train Loss: 5.1116 | Val Loss: 4.9863
 Model saved to gru_0_1744794807.pt


Epoch 17/50: 100%|██████████| 155/155 [00:08<00:00, 18.98it/s]


Epoch 17 | Train Loss: 5.0624 | Val Loss: 4.9454
 Model saved to gru_0_1744794807.pt


Epoch 18/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 18 | Train Loss: 5.0178 | Val Loss: 4.9054
 Model saved to gru_0_1744794807.pt


Epoch 19/50: 100%|██████████| 155/155 [00:08<00:00, 18.55it/s]


Epoch 19 | Train Loss: 4.9778 | Val Loss: 4.8706
 Model saved to gru_0_1744794807.pt


Epoch 20/50: 100%|██████████| 155/155 [00:08<00:00, 18.96it/s]


Epoch 20 | Train Loss: 4.9413 | Val Loss: 4.8403
 Model saved to gru_0_1744794807.pt


Epoch 21/50: 100%|██████████| 155/155 [00:08<00:00, 18.97it/s]


Epoch 21 | Train Loss: 4.9101 | Val Loss: 4.8137
 Model saved to gru_0_1744794807.pt


Epoch 22/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 22 | Train Loss: 4.8830 | Val Loss: 4.7951
 Model saved to gru_0_1744794807.pt


Epoch 23/50: 100%|██████████| 155/155 [00:08<00:00, 18.96it/s]


Epoch 23 | Train Loss: 4.8589 | Val Loss: 4.7722
 Model saved to gru_0_1744794807.pt


Epoch 24/50: 100%|██████████| 155/155 [00:08<00:00, 18.98it/s]


Epoch 24 | Train Loss: 4.8329 | Val Loss: 4.7520
 Model saved to gru_0_1744794807.pt


Epoch 25/50: 100%|██████████| 155/155 [00:08<00:00, 19.03it/s]


Epoch 25 | Train Loss: 4.8106 | Val Loss: 4.7370
 Model saved to gru_0_1744794807.pt


Epoch 26/50: 100%|██████████| 155/155 [00:08<00:00, 18.97it/s]


Epoch 26 | Train Loss: 4.7909 | Val Loss: 4.7178
 Model saved to gru_0_1744794807.pt


Epoch 27/50: 100%|██████████| 155/155 [00:08<00:00, 18.53it/s]


Epoch 27 | Train Loss: 4.7724 | Val Loss: 4.7076
 Model saved to gru_0_1744794807.pt


Epoch 28/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 28 | Train Loss: 4.7562 | Val Loss: 4.6942
 Model saved to gru_0_1744794807.pt


Epoch 29/50: 100%|██████████| 155/155 [00:08<00:00, 19.00it/s]


Epoch 29 | Train Loss: 4.7398 | Val Loss: 4.6834
 Model saved to gru_0_1744794807.pt


Epoch 30/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 30 | Train Loss: 4.7252 | Val Loss: 4.6728
 Model saved to gru_0_1744794807.pt


Epoch 31/50: 100%|██████████| 155/155 [00:08<00:00, 19.03it/s]


Epoch 31 | Train Loss: 4.7103 | Val Loss: 4.6619
 Model saved to gru_0_1744794807.pt


Epoch 32/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 32 | Train Loss: 4.6978 | Val Loss: 4.6536
 Model saved to gru_0_1744794807.pt


Epoch 33/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 33 | Train Loss: 4.6861 | Val Loss: 4.6444
 Model saved to gru_0_1744794807.pt


Epoch 34/50: 100%|██████████| 155/155 [00:08<00:00, 19.00it/s]


Epoch 34 | Train Loss: 4.6733 | Val Loss: 4.6359
 Model saved to gru_0_1744794807.pt


Epoch 35/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 35 | Train Loss: 4.6628 | Val Loss: 4.6320
 Model saved to gru_0_1744794807.pt


Epoch 36/50: 100%|██████████| 155/155 [00:08<00:00, 18.98it/s]


Epoch 36 | Train Loss: 4.6575 | Val Loss: 4.6263
 Model saved to gru_0_1744794807.pt


Epoch 37/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 37 | Train Loss: 4.6447 | Val Loss: 4.6188
 Model saved to gru_0_1744794807.pt


Epoch 38/50: 100%|██████████| 155/155 [00:08<00:00, 18.98it/s]


Epoch 38 | Train Loss: 4.6342 | Val Loss: 4.6136
 Model saved to gru_0_1744794807.pt


Epoch 39/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 39 | Train Loss: 4.6234 | Val Loss: 4.6078
 Model saved to gru_0_1744794807.pt


Epoch 40/50: 100%|██████████| 155/155 [00:08<00:00, 19.06it/s]


Epoch 40 | Train Loss: 4.6160 | Val Loss: 4.6009
 Model saved to gru_0_1744794807.pt


Epoch 41/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 41 | Train Loss: 4.6073 | Val Loss: 4.5974
 Model saved to gru_0_1744794807.pt


Epoch 42/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 42 | Train Loss: 4.5994 | Val Loss: 4.5914
 Model saved to gru_0_1744794807.pt


Epoch 43/50: 100%|██████████| 155/155 [00:08<00:00, 19.00it/s]


Epoch 43 | Train Loss: 4.5910 | Val Loss: 4.5884
 Model saved to gru_0_1744794807.pt


Epoch 44/50: 100%|██████████| 155/155 [00:08<00:00, 18.56it/s]


Epoch 44 | Train Loss: 4.5837 | Val Loss: 4.5845
 Model saved to gru_0_1744794807.pt


Epoch 45/50: 100%|██████████| 155/155 [00:08<00:00, 18.99it/s]


Epoch 45 | Train Loss: 4.5759 | Val Loss: 4.5800
 Model saved to gru_0_1744794807.pt


Epoch 46/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 46 | Train Loss: 4.5689 | Val Loss: 4.5765
 Model saved to gru_0_1744794807.pt


Epoch 47/50: 100%|██████████| 155/155 [00:08<00:00, 19.01it/s]


Epoch 47 | Train Loss: 4.5631 | Val Loss: 4.5758
 Model saved to gru_0_1744794807.pt


Epoch 48/50: 100%|██████████| 155/155 [00:08<00:00, 19.03it/s]


Epoch 48 | Train Loss: 4.5560 | Val Loss: 4.5714
 Model saved to gru_0_1744794807.pt


Epoch 49/50: 100%|██████████| 155/155 [00:08<00:00, 19.02it/s]


Epoch 49 | Train Loss: 4.5502 | Val Loss: 4.5700
 Model saved to gru_0_1744794807.pt


Epoch 50/50: 100%|██████████| 155/155 [00:08<00:00, 18.97it/s]


Epoch 50 | Train Loss: 4.5432 | Val Loss: 4.5679
 Model saved to gru_0_1744794807.pt
📈 Saved loss curve as: GRULanguageModel_loss_curve_20250416_042105.png

 Test Perplexity: 96.3410
 Average BLEU Score: 0.0139

 Training gru_1_1744795270 with config: {'embed_dim': 256, 'hidden_dim': 512, 'num_layers': 3, 'lr': 0.0005}


Epoch 1/50: 100%|██████████| 155/155 [00:14<00:00, 10.73it/s]


Epoch 1 | Train Loss: 6.9900 | Val Loss: 6.8218
 Model saved to gru_1_1744795270.pt


Epoch 2/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 2 | Train Loss: 6.8360 | Val Loss: 6.7175
 Model saved to gru_1_1744795270.pt


Epoch 3/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 3 | Train Loss: 6.7068 | Val Loss: 6.6217
 Model saved to gru_1_1744795270.pt


Epoch 4/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 4 | Train Loss: 6.5499 | Val Loss: 6.3708
 Model saved to gru_1_1744795270.pt


Epoch 5/50: 100%|██████████| 155/155 [00:14<00:00, 10.88it/s]


Epoch 5 | Train Loss: 6.2867 | Val Loss: 6.0828
 Model saved to gru_1_1744795270.pt


Epoch 6/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 6 | Train Loss: 6.0525 | Val Loss: 5.8910
 Model saved to gru_1_1744795270.pt


Epoch 7/50: 100%|██████████| 155/155 [00:14<00:00, 10.84it/s]


Epoch 7 | Train Loss: 5.9016 | Val Loss: 5.7512
 Model saved to gru_1_1744795270.pt


Epoch 8/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 8 | Train Loss: 5.7817 | Val Loss: 5.6371
 Model saved to gru_1_1744795270.pt


Epoch 9/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 9 | Train Loss: 5.6787 | Val Loss: 5.5368
 Model saved to gru_1_1744795270.pt


Epoch 10/50: 100%|██████████| 155/155 [00:14<00:00, 10.75it/s]


Epoch 10 | Train Loss: 5.5805 | Val Loss: 5.4370
 Model saved to gru_1_1744795270.pt


Epoch 11/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 11 | Train Loss: 5.4736 | Val Loss: 5.3270
 Model saved to gru_1_1744795270.pt


Epoch 12/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 12 | Train Loss: 5.3615 | Val Loss: 5.2214
 Model saved to gru_1_1744795270.pt


Epoch 13/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 13 | Train Loss: 5.2514 | Val Loss: 5.1249
 Model saved to gru_1_1744795270.pt


Epoch 14/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 14 | Train Loss: 5.1497 | Val Loss: 5.0326
 Model saved to gru_1_1744795270.pt


Epoch 15/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 15 | Train Loss: 5.0607 | Val Loss: 4.9577
 Model saved to gru_1_1744795270.pt


Epoch 16/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 16 | Train Loss: 4.9810 | Val Loss: 4.8955
 Model saved to gru_1_1744795270.pt


Epoch 17/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 17 | Train Loss: 4.9113 | Val Loss: 4.8377
 Model saved to gru_1_1744795270.pt


Epoch 18/50: 100%|██████████| 155/155 [00:14<00:00, 10.74it/s]


Epoch 18 | Train Loss: 4.8521 | Val Loss: 4.7943
 Model saved to gru_1_1744795270.pt


Epoch 19/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 19 | Train Loss: 4.7973 | Val Loss: 4.7525
 Model saved to gru_1_1744795270.pt


Epoch 20/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 20 | Train Loss: 4.7492 | Val Loss: 4.7188
 Model saved to gru_1_1744795270.pt


Epoch 21/50: 100%|██████████| 155/155 [00:14<00:00, 10.83it/s]


Epoch 21 | Train Loss: 4.7066 | Val Loss: 4.6909
 Model saved to gru_1_1744795270.pt


Epoch 22/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 22 | Train Loss: 4.6684 | Val Loss: 4.6661
 Model saved to gru_1_1744795270.pt


Epoch 23/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 23 | Train Loss: 4.6341 | Val Loss: 4.6424
 Model saved to gru_1_1744795270.pt


Epoch 24/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 24 | Train Loss: 4.6017 | Val Loss: 4.6229
 Model saved to gru_1_1744795270.pt


Epoch 25/50: 100%|██████████| 155/155 [00:14<00:00, 10.84it/s]


Epoch 25 | Train Loss: 4.5720 | Val Loss: 4.6061
 Model saved to gru_1_1744795270.pt


Epoch 26/50: 100%|██████████| 155/155 [00:14<00:00, 10.76it/s]


Epoch 26 | Train Loss: 4.5453 | Val Loss: 4.5909
 Model saved to gru_1_1744795270.pt


Epoch 27/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 27 | Train Loss: 4.5198 | Val Loss: 4.5752
 Model saved to gru_1_1744795270.pt


Epoch 28/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 28 | Train Loss: 4.4997 | Val Loss: 4.5627
 Model saved to gru_1_1744795270.pt


Epoch 29/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 29 | Train Loss: 4.4757 | Val Loss: 4.5514
 Model saved to gru_1_1744795270.pt


Epoch 30/50: 100%|██████████| 155/155 [00:14<00:00, 10.91it/s]


Epoch 30 | Train Loss: 4.4551 | Val Loss: 4.5434
 Model saved to gru_1_1744795270.pt


Epoch 31/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 31 | Train Loss: 4.4352 | Val Loss: 4.5339
 Model saved to gru_1_1744795270.pt


Epoch 32/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 32 | Train Loss: 4.4191 | Val Loss: 4.5286
 Model saved to gru_1_1744795270.pt


Epoch 33/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 33 | Train Loss: 4.4014 | Val Loss: 4.5195
 Model saved to gru_1_1744795270.pt


Epoch 34/50: 100%|██████████| 155/155 [00:14<00:00, 10.75it/s]


Epoch 34 | Train Loss: 4.3838 | Val Loss: 4.5135
 Model saved to gru_1_1744795270.pt


Epoch 35/50: 100%|██████████| 155/155 [00:14<00:00, 10.90it/s]


Epoch 35 | Train Loss: 4.3671 | Val Loss: 4.5101
 Model saved to gru_1_1744795270.pt


Epoch 36/50:  79%|███████▊  | 122/155 [00:11<00:03, 10.88it/s]

In [None]:
results_df = pd.DataFrame(results)
results_df.to_csv("experiment_results.csv", index=False)
results_df.sort_values(by="perplexity").head()
