In [1]:
import torch
from tokenizer import TokenizerWrapper, download_and_merge_text_files, train_tokenizer,download_file_from_url
from dataset_loader import TextDataset, collate_fn
from gru_model import GRULanguageModel
from train_utils import train_model, evaluate_model

In [2]:
# Define model-specific hyperparameter grids
hyperparams_grid = {
    "gru": [
        {"embed_dim": 128, "hidden_dim": 256, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 256, "hidden_dim": 512, "num_layers": 3, "lr": 5e-4}
    ],
    "lstm": [
        {"embed_dim": 128, "hidden_dim": 256, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 256, "hidden_dim": 512, "num_layers": 3, "lr": 1e-4}
    ],
    "rnn": [
        {"embed_dim": 128, "hidden_dim": 128, "num_layers": 2, "lr": 1e-3}
    ],
    "transformer": [
        {"embed_dim": 256, "num_heads": 4, "num_layers": 2, "lr": 1e-3},
        {"embed_dim": 512, "num_heads": 8, "num_layers": 4, "lr": 5e-4}
    ]
}


In [3]:
from train_utils import train_model, evaluate_model
import pandas as pd
import time

results = []

def run_experiments(model_type, ModelClass, grid, tokenizer, train_loader, test_loader, device):
    for idx, config in enumerate(grid):
        model_id = f"{model_type}_{idx}_{int(time.time())}"
        print(f"\n Training {model_id} with config: {config}")
        
        if model_type == "transformer":
            model = ModelClass(
                vocab_size=VOCAB_SIZE,
                embed_dim=config["embed_dim"],
                num_heads=config["num_heads"],
                num_layers=config["num_layers"],
                pad_token_id=tokenizer.get_pad_id()
            ).to(device)
        else:
            model = ModelClass(
                vocab_size=VOCAB_SIZE,
                embed_dim=config["embed_dim"],
                hidden_dim=config["hidden_dim"],
                num_layers=config["num_layers"],
                pad_token_id=tokenizer.get_pad_id()
            ).to(device)
        
        save_path = f"{model_id}.pt"
        train_model(model, train_loader, test_loader, tokenizer, device, save_path, lr=config["lr"], epochs=50)

        ppl, bleu = evaluate_model(model, save_path, test_loader, tokenizer, device)
        results.append({
            "model_type": model_type,
            "config": config,
            "perplexity": ppl,
            "bleu_score": bleu,
            "model_path": save_path
        })


In [4]:

CORPUS_FILE = "corpus.txt"
TRAIN_FILE = "train.jsonl"
TEST_FILE = "test.jsonl"
TOKENIZER_PREFIX = "bpe_tokenizer"
VOCAB_SIZE = 10000
MAX_SEQ_LEN = 128
BATCH_SIZE = 256
EPOCHS = 50
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Select Model Here ===
#MODEL_TYPE = "transformer"  # Options: 'gru', 'lstm', 'rnn', 'transformer'
#MODEL_SAVE_PATH = f"best_{MODEL_TYPE}_model.pt"

# --- Step 1: Download data & train tokenizer ---
#download_file_from_url(TRAIN_URL, TRAIN_FILE)
#download_file_from_url(TEST_URL, TEST_FILE)
#download_and_merge_text_files(DATA_URL, CORPUS_FILE)
train_tokenizer(CORPUS_FILE, TOKENIZER_PREFIX, vocab_size=VOCAB_SIZE)
tokenizer = TokenizerWrapper(f"{TOKENIZER_PREFIX}.model")
     

sentencepiece_trainer.cc(78) LOG(INFO) Starts training with : 
trainer_spec {
  input: corpus.txt
  input_format: 
  model_prefix: bpe_tokenizer
  model_type: BPE
  vocab_size: 10000
  self_test_sample_size: 0
  character_coverage: 0.9995
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  pretokenization_delimiter: 
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  user_defined_symbols: <bos>
  user_defined_symbols: <eos>
  user_defined_symbols: <pad>
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece_score: 1
  train_extremely_large_corpus: 0
  seed_sentencepieces_file: 
  hard_vocab_limit: 1
  use_all_vocab: 0
  unk_id: 0
  bos_id: 1
  eos_id: 2
  pad_id: 3
  unk_piece: <unk>
  bos_piece: <s>
  eos_p

In [5]:
train_dataset = TextDataset(TRAIN_FILE, tokenizer, MAX_SEQ_LEN)
test_dataset = TextDataset(TEST_FILE, tokenizer, MAX_SEQ_LEN)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=lambda b: collate_fn(b, tokenizer.get_pad_id()))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=lambda b: collate_fn(b, tokenizer.get_pad_id()))
     

In [6]:

from gru_model import GRULanguageModel
from lstm_model import LSTMLanguageModel
from rnn_model import RNNLanguageModel
from transformer_model import TransformerLanguageModel

EPOCHS = 30

run_experiments("gru", GRULanguageModel, hyperparams_grid["gru"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("lstm", LSTMLanguageModel, hyperparams_grid["lstm"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("rnn", RNNLanguageModel, hyperparams_grid["rnn"], tokenizer, train_loader, test_loader, DEVICE)
run_experiments("transformer", TransformerLanguageModel, hyperparams_grid["transformer"], tokenizer, train_loader, test_loader, DEVICE)

  


 Training gru_0_1744850565 with config: {'embed_dim': 128, 'hidden_dim': 256, 'num_layers': 2, 'lr': 0.001}


Epoch 1/50: 100%|██████████| 155/155 [00:08<00:00, 18.06it/s]


Epoch 1 | Train Loss: 6.9670 | Val Loss: 6.6449
 Model saved to gru_0_1744850565.pt


Epoch 2/50: 100%|██████████| 155/155 [00:08<00:00, 19.11it/s]


Epoch 2 | Train Loss: 6.5663 | Val Loss: 6.3638
 Model saved to gru_0_1744850565.pt


Epoch 3/50: 100%|██████████| 155/155 [00:08<00:00, 19.05it/s]


Epoch 3 | Train Loss: 6.3275 | Val Loss: 6.1364
 Model saved to gru_0_1744850565.pt


Epoch 4/50: 100%|██████████| 155/155 [00:08<00:00, 19.05it/s]


Epoch 4 | Train Loss: 6.1418 | Val Loss: 5.9534
 Model saved to gru_0_1744850565.pt


Epoch 5/50: 100%|██████████| 155/155 [00:08<00:00, 18.57it/s]


Epoch 5 | Train Loss: 5.9856 | Val Loss: 5.8078
 Model saved to gru_0_1744850565.pt


Epoch 6/50: 100%|██████████| 155/155 [00:08<00:00, 18.94it/s]


Epoch 6 | Train Loss: 5.8744 | Val Loss: 5.7101
 Model saved to gru_0_1744850565.pt


Epoch 7/50: 100%|██████████| 155/155 [00:08<00:00, 18.92it/s]


Epoch 7 | Train Loss: 5.7854 | Val Loss: 5.6151
 Model saved to gru_0_1744850565.pt


Epoch 8/50: 100%|██████████| 155/155 [00:08<00:00, 18.93it/s]


Epoch 8 | Train Loss: 5.6914 | Val Loss: 5.5177
 Model saved to gru_0_1744850565.pt


Epoch 9/50: 100%|██████████| 155/155 [00:08<00:00, 18.88it/s]


Epoch 9 | Train Loss: 5.5903 | Val Loss: 5.4156
 Model saved to gru_0_1744850565.pt


Epoch 10/50: 100%|██████████| 155/155 [00:08<00:00, 18.87it/s]


Epoch 10 | Train Loss: 5.4871 | Val Loss: 5.3166
 Model saved to gru_0_1744850565.pt


Epoch 11/50: 100%|██████████| 155/155 [00:08<00:00, 18.87it/s]


Epoch 11 | Train Loss: 5.3881 | Val Loss: 5.2292
 Model saved to gru_0_1744850565.pt


Epoch 12/50: 100%|██████████| 155/155 [00:08<00:00, 18.84it/s]


Epoch 12 | Train Loss: 5.3007 | Val Loss: 5.1490
 Model saved to gru_0_1744850565.pt


Epoch 13/50: 100%|██████████| 155/155 [00:08<00:00, 18.45it/s]


Epoch 13 | Train Loss: 5.2228 | Val Loss: 5.0848
 Model saved to gru_0_1744850565.pt


Epoch 14/50: 100%|██████████| 155/155 [00:08<00:00, 18.82it/s]


Epoch 14 | Train Loss: 5.1559 | Val Loss: 5.0209
 Model saved to gru_0_1744850565.pt


Epoch 15/50: 100%|██████████| 155/155 [00:08<00:00, 18.82it/s]


Epoch 15 | Train Loss: 5.0964 | Val Loss: 4.9737
 Model saved to gru_0_1744850565.pt


Epoch 16/50: 100%|██████████| 155/155 [00:08<00:00, 18.87it/s]


Epoch 16 | Train Loss: 5.0436 | Val Loss: 4.9261
 Model saved to gru_0_1744850565.pt


Epoch 17/50: 100%|██████████| 155/155 [00:08<00:00, 18.82it/s]


Epoch 17 | Train Loss: 4.9983 | Val Loss: 4.8842
 Model saved to gru_0_1744850565.pt


Epoch 18/50: 100%|██████████| 155/155 [00:08<00:00, 18.86it/s]


Epoch 18 | Train Loss: 4.9570 | Val Loss: 4.8535
 Model saved to gru_0_1744850565.pt


Epoch 19/50: 100%|██████████| 155/155 [00:08<00:00, 18.82it/s]


Epoch 19 | Train Loss: 4.9213 | Val Loss: 4.8218
 Model saved to gru_0_1744850565.pt


Epoch 20/50: 100%|██████████| 155/155 [00:08<00:00, 18.49it/s]


Epoch 20 | Train Loss: 4.8901 | Val Loss: 4.7999
 Model saved to gru_0_1744850565.pt


Epoch 21/50: 100%|██████████| 155/155 [00:08<00:00, 18.83it/s]


Epoch 21 | Train Loss: 4.8623 | Val Loss: 4.7737
 Model saved to gru_0_1744850565.pt


Epoch 22/50: 100%|██████████| 155/155 [00:08<00:00, 18.88it/s]


Epoch 22 | Train Loss: 4.8364 | Val Loss: 4.7521
 Model saved to gru_0_1744850565.pt


Epoch 23/50: 100%|██████████| 155/155 [00:08<00:00, 18.86it/s]


Epoch 23 | Train Loss: 4.8132 | Val Loss: 4.7336
 Model saved to gru_0_1744850565.pt


Epoch 24/50: 100%|██████████| 155/155 [00:08<00:00, 18.84it/s]


Epoch 24 | Train Loss: 4.7926 | Val Loss: 4.7177
 Model saved to gru_0_1744850565.pt


Epoch 25/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 25 | Train Loss: 4.7726 | Val Loss: 4.7023
 Model saved to gru_0_1744850565.pt


Epoch 26/50: 100%|██████████| 155/155 [00:08<00:00, 18.85it/s]


Epoch 26 | Train Loss: 4.7539 | Val Loss: 4.6909
 Model saved to gru_0_1744850565.pt


Epoch 27/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 27 | Train Loss: 4.7381 | Val Loss: 4.6814
 Model saved to gru_0_1744850565.pt


Epoch 28/50: 100%|██████████| 155/155 [00:08<00:00, 18.50it/s]


Epoch 28 | Train Loss: 4.7215 | Val Loss: 4.6682
 Model saved to gru_0_1744850565.pt


Epoch 29/50: 100%|██████████| 155/155 [00:08<00:00, 18.87it/s]


Epoch 29 | Train Loss: 4.7143 | Val Loss: 4.6589
 Model saved to gru_0_1744850565.pt


Epoch 30/50: 100%|██████████| 155/155 [00:08<00:00, 18.87it/s]


Epoch 30 | Train Loss: 4.6966 | Val Loss: 4.6498
 Model saved to gru_0_1744850565.pt


Epoch 31/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 31 | Train Loss: 4.6823 | Val Loss: 4.6399
 Model saved to gru_0_1744850565.pt


Epoch 32/50: 100%|██████████| 155/155 [00:08<00:00, 18.84it/s]


Epoch 32 | Train Loss: 4.6710 | Val Loss: 4.6297
 Model saved to gru_0_1744850565.pt


Epoch 33/50: 100%|██████████| 155/155 [00:08<00:00, 18.83it/s]


Epoch 33 | Train Loss: 4.6599 | Val Loss: 4.6240
 Model saved to gru_0_1744850565.pt


Epoch 34/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 34 | Train Loss: 4.6491 | Val Loss: 4.6167
 Model saved to gru_0_1744850565.pt


Epoch 35/50: 100%|██████████| 155/155 [00:08<00:00, 18.52it/s]


Epoch 35 | Train Loss: 4.6389 | Val Loss: 4.6100
 Model saved to gru_0_1744850565.pt


Epoch 36/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 36 | Train Loss: 4.6296 | Val Loss: 4.6051
 Model saved to gru_0_1744850565.pt


Epoch 37/50: 100%|██████████| 155/155 [00:08<00:00, 18.88it/s]


Epoch 37 | Train Loss: 4.6197 | Val Loss: 4.5999
 Model saved to gru_0_1744850565.pt


Epoch 38/50: 100%|██████████| 155/155 [00:08<00:00, 18.86it/s]


Epoch 38 | Train Loss: 4.6106 | Val Loss: 4.5935
 Model saved to gru_0_1744850565.pt


Epoch 39/50: 100%|██████████| 155/155 [00:08<00:00, 18.88it/s]


Epoch 39 | Train Loss: 4.6023 | Val Loss: 4.5890
 Model saved to gru_0_1744850565.pt


Epoch 40/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 40 | Train Loss: 4.5954 | Val Loss: 4.5872
 Model saved to gru_0_1744850565.pt


Epoch 41/50: 100%|██████████| 155/155 [00:08<00:00, 18.88it/s]


Epoch 41 | Train Loss: 4.5872 | Val Loss: 4.5823
 Model saved to gru_0_1744850565.pt


Epoch 42/50: 100%|██████████| 155/155 [00:08<00:00, 18.85it/s]


Epoch 42 | Train Loss: 4.5791 | Val Loss: 4.5778
 Model saved to gru_0_1744850565.pt


Epoch 43/50: 100%|██████████| 155/155 [00:08<00:00, 18.53it/s]


Epoch 43 | Train Loss: 4.5713 | Val Loss: 4.5732
 Model saved to gru_0_1744850565.pt


Epoch 44/50: 100%|██████████| 155/155 [00:08<00:00, 18.90it/s]


Epoch 44 | Train Loss: 4.5653 | Val Loss: 4.5700
 Model saved to gru_0_1744850565.pt


Epoch 45/50: 100%|██████████| 155/155 [00:08<00:00, 18.86it/s]


Epoch 45 | Train Loss: 4.5583 | Val Loss: 4.5672
 Model saved to gru_0_1744850565.pt


Epoch 46/50: 100%|██████████| 155/155 [00:08<00:00, 18.91it/s]


Epoch 46 | Train Loss: 4.5519 | Val Loss: 4.5641
 Model saved to gru_0_1744850565.pt


Epoch 47/50: 100%|██████████| 155/155 [00:08<00:00, 18.91it/s]


Epoch 47 | Train Loss: 4.5460 | Val Loss: 4.5596
 Model saved to gru_0_1744850565.pt


Epoch 48/50: 100%|██████████| 155/155 [00:08<00:00, 18.89it/s]


Epoch 48 | Train Loss: 4.5392 | Val Loss: 4.5578
 Model saved to gru_0_1744850565.pt


Epoch 49/50: 100%|██████████| 155/155 [00:08<00:00, 18.90it/s]


Epoch 49 | Train Loss: 4.5332 | Val Loss: 4.5564
 Model saved to gru_0_1744850565.pt


Epoch 50/50: 100%|██████████| 155/155 [00:08<00:00, 18.47it/s]


Epoch 50 | Train Loss: 4.5269 | Val Loss: 4.5528
 Model saved to gru_0_1744850565.pt
📈 Saved loss curve as: GRULanguageModel_loss_curve_20250416_195026.png

 Test Perplexity: 94.8999
 Average BLEU Score: 0.0139

 Training gru_1_1744851030 with config: {'embed_dim': 256, 'hidden_dim': 512, 'num_layers': 3, 'lr': 0.0005}


Epoch 1/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 1 | Train Loss: 6.9973 | Val Loss: 6.8219
 Model saved to gru_1_1744851030.pt


Epoch 2/50: 100%|██████████| 155/155 [00:14<00:00, 10.82it/s]


Epoch 2 | Train Loss: 6.8622 | Val Loss: 6.7981
 Model saved to gru_1_1744851030.pt


Epoch 3/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 3 | Train Loss: 6.7166 | Val Loss: 6.5617
 Model saved to gru_1_1744851030.pt


Epoch 4/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 4 | Train Loss: 6.5082 | Val Loss: 6.3596
 Model saved to gru_1_1744851030.pt


Epoch 5/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 5 | Train Loss: 6.3141 | Val Loss: 6.1263
 Model saved to gru_1_1744851030.pt


Epoch 6/50: 100%|██████████| 155/155 [00:14<00:00, 10.78it/s]


Epoch 6 | Train Loss: 6.0708 | Val Loss: 5.8941
 Model saved to gru_1_1744851030.pt


Epoch 7/50: 100%|██████████| 155/155 [00:14<00:00, 10.67it/s]


Epoch 7 | Train Loss: 5.8960 | Val Loss: 5.7433
 Model saved to gru_1_1744851030.pt


Epoch 8/50: 100%|██████████| 155/155 [00:14<00:00, 10.78it/s]


Epoch 8 | Train Loss: 5.7655 | Val Loss: 5.6150
 Model saved to gru_1_1744851030.pt


Epoch 9/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 9 | Train Loss: 5.6406 | Val Loss: 5.4890
 Model saved to gru_1_1744851030.pt


Epoch 10/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 10 | Train Loss: 5.5263 | Val Loss: 5.3775
 Model saved to gru_1_1744851030.pt


Epoch 11/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 11 | Train Loss: 5.4116 | Val Loss: 5.2685
 Model saved to gru_1_1744851030.pt


Epoch 12/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 12 | Train Loss: 5.2936 | Val Loss: 5.1581
 Model saved to gru_1_1744851030.pt


Epoch 13/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 13 | Train Loss: 5.1859 | Val Loss: 5.0643
 Model saved to gru_1_1744851030.pt


Epoch 14/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 14 | Train Loss: 5.0901 | Val Loss: 4.9807
 Model saved to gru_1_1744851030.pt


Epoch 15/50: 100%|██████████| 155/155 [00:14<00:00, 10.69it/s]


Epoch 15 | Train Loss: 5.0052 | Val Loss: 4.9160
 Model saved to gru_1_1744851030.pt


Epoch 16/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 16 | Train Loss: 4.9313 | Val Loss: 4.8543
 Model saved to gru_1_1744851030.pt


Epoch 17/50: 100%|██████████| 155/155 [00:14<00:00, 10.76it/s]


Epoch 17 | Train Loss: 4.8676 | Val Loss: 4.8076
 Model saved to gru_1_1744851030.pt


Epoch 18/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 18 | Train Loss: 4.8135 | Val Loss: 4.7660
 Model saved to gru_1_1744851030.pt


Epoch 19/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 19 | Train Loss: 4.7625 | Val Loss: 4.7285
 Model saved to gru_1_1744851030.pt


Epoch 20/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 20 | Train Loss: 4.7239 | Val Loss: 4.7089
 Model saved to gru_1_1744851030.pt


Epoch 21/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 21 | Train Loss: 4.6847 | Val Loss: 4.6720
 Model saved to gru_1_1744851030.pt


Epoch 22/50: 100%|██████████| 155/155 [00:14<00:00, 10.65it/s]


Epoch 22 | Train Loss: 4.6446 | Val Loss: 4.6473
 Model saved to gru_1_1744851030.pt


Epoch 23/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 23 | Train Loss: 4.6119 | Val Loss: 4.6274
 Model saved to gru_1_1744851030.pt


Epoch 24/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 24 | Train Loss: 4.5811 | Val Loss: 4.6096
 Model saved to gru_1_1744851030.pt


Epoch 25/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 25 | Train Loss: 4.5525 | Val Loss: 4.5941
 Model saved to gru_1_1744851030.pt


Epoch 26/50: 100%|██████████| 155/155 [00:14<00:00, 10.76it/s]


Epoch 26 | Train Loss: 4.5278 | Val Loss: 4.5792
 Model saved to gru_1_1744851030.pt


Epoch 27/50: 100%|██████████| 155/155 [00:14<00:00, 10.76it/s]


Epoch 27 | Train Loss: 4.5042 | Val Loss: 4.5665
 Model saved to gru_1_1744851030.pt


Epoch 28/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 28 | Train Loss: 4.6759 | Val Loss: 4.6662


Epoch 29/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 29 | Train Loss: 4.5947 | Val Loss: 4.6373


Epoch 30/50: 100%|██████████| 155/155 [00:14<00:00, 10.66it/s]


Epoch 30 | Train Loss: 4.5422 | Val Loss: 4.6149


Epoch 31/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 31 | Train Loss: 4.5220 | Val Loss: 4.6057


Epoch 32/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 32 | Train Loss: 4.4986 | Val Loss: 4.5975


Epoch 33/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 33 | Train Loss: 4.4896 | Val Loss: 4.5923


Epoch 34/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 34 | Train Loss: 4.4777 | Val Loss: 4.5894


Epoch 35/50: 100%|██████████| 155/155 [00:14<00:00, 10.79it/s]


Epoch 35 | Train Loss: 4.4741 | Val Loss: 4.5856


Epoch 36/50: 100%|██████████| 155/155 [00:14<00:00, 10.76it/s]


Epoch 36 | Train Loss: 4.4684 | Val Loss: 4.5846


Epoch 37/50: 100%|██████████| 155/155 [00:14<00:00, 10.66it/s]


Epoch 37 | Train Loss: 4.4656 | Val Loss: 4.5834


Epoch 38/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 38 | Train Loss: 4.4627 | Val Loss: 4.5825


Epoch 39/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 39 | Train Loss: 4.4617 | Val Loss: 4.5819


Epoch 40/50: 100%|██████████| 155/155 [00:14<00:00, 10.80it/s]


Epoch 40 | Train Loss: 4.4595 | Val Loss: 4.5815


Epoch 41/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 41 | Train Loss: 4.4595 | Val Loss: 4.5813


Epoch 42/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 42 | Train Loss: 4.4585 | Val Loss: 4.5811


Epoch 43/50: 100%|██████████| 155/155 [00:14<00:00, 10.81it/s]


Epoch 43 | Train Loss: 4.4589 | Val Loss: 4.5809


Epoch 44/50: 100%|██████████| 155/155 [00:14<00:00, 10.72it/s]


Epoch 44 | Train Loss: 4.4582 | Val Loss: 4.5808


Epoch 45/50: 100%|██████████| 155/155 [00:14<00:00, 10.68it/s]


Epoch 45 | Train Loss: 4.4573 | Val Loss: 4.5807


Epoch 46/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 46 | Train Loss: 4.4580 | Val Loss: 4.5807


Epoch 47/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 47 | Train Loss: 4.4574 | Val Loss: 4.5806


Epoch 48/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 48 | Train Loss: 4.4573 | Val Loss: 4.5806


Epoch 49/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 49 | Train Loss: 4.4576 | Val Loss: 4.5806


Epoch 50/50: 100%|██████████| 155/155 [00:14<00:00, 10.77it/s]


Epoch 50 | Train Loss: 4.4565 | Val Loss: 4.5806
📈 Saved loss curve as: GRULanguageModel_loss_curve_20250416_200343.png

 Test Perplexity: 96.2091
 Average BLEU Score: 0.0143

 Training lstm_0_1744851828 with config: {'embed_dim': 128, 'hidden_dim': 256, 'num_layers': 2, 'lr': 0.001}


Epoch 1/50: 100%|██████████| 155/155 [00:08<00:00, 18.33it/s]


Epoch 1 | Train Loss: 7.0331 | Val Loss: 6.7877
 Model saved to lstm_0_1744851828.pt


Epoch 2/50: 100%|██████████| 155/155 [00:08<00:00, 18.01it/s]


Epoch 2 | Train Loss: 6.5859 | Val Loss: 6.2670
 Model saved to lstm_0_1744851828.pt


Epoch 3/50: 100%|██████████| 155/155 [00:08<00:00, 18.32it/s]


Epoch 3 | Train Loss: 6.1455 | Val Loss: 5.9351
 Model saved to lstm_0_1744851828.pt


Epoch 4/50: 100%|██████████| 155/155 [00:08<00:00, 18.31it/s]


Epoch 4 | Train Loss: 5.8885 | Val Loss: 5.7384
 Model saved to lstm_0_1744851828.pt


Epoch 5/50: 100%|██████████| 155/155 [00:08<00:00, 18.30it/s]


Epoch 5 | Train Loss: 5.7306 | Val Loss: 5.6082
 Model saved to lstm_0_1744851828.pt


Epoch 6/50: 100%|██████████| 155/155 [00:08<00:00, 18.30it/s]


Epoch 6 | Train Loss: 5.6160 | Val Loss: 5.5041
 Model saved to lstm_0_1744851828.pt


Epoch 7/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 7 | Train Loss: 5.5268 | Val Loss: 5.4253
 Model saved to lstm_0_1744851828.pt


Epoch 8/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 8 | Train Loss: 5.4554 | Val Loss: 5.3617
 Model saved to lstm_0_1744851828.pt


Epoch 9/50: 100%|██████████| 155/155 [00:08<00:00, 18.30it/s]


Epoch 9 | Train Loss: 5.3944 | Val Loss: 5.3012
 Model saved to lstm_0_1744851828.pt


Epoch 10/50: 100%|██████████| 155/155 [00:08<00:00, 17.96it/s]


Epoch 10 | Train Loss: 5.3387 | Val Loss: 5.2472
 Model saved to lstm_0_1744851828.pt


Epoch 11/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 11 | Train Loss: 5.2875 | Val Loss: 5.1994
 Model saved to lstm_0_1744851828.pt


Epoch 12/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 12 | Train Loss: 5.2422 | Val Loss: 5.1574
 Model saved to lstm_0_1744851828.pt


Epoch 13/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 13 | Train Loss: 5.2013 | Val Loss: 5.1203
 Model saved to lstm_0_1744851828.pt


Epoch 14/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 14 | Train Loss: 5.1642 | Val Loss: 5.0873
 Model saved to lstm_0_1744851828.pt


Epoch 15/50: 100%|██████████| 155/155 [00:08<00:00, 18.29it/s]


Epoch 15 | Train Loss: 5.1306 | Val Loss: 5.0551
 Model saved to lstm_0_1744851828.pt


Epoch 16/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 16 | Train Loss: 5.0992 | Val Loss: 5.0266
 Model saved to lstm_0_1744851828.pt


Epoch 17/50: 100%|██████████| 155/155 [00:08<00:00, 17.97it/s]


Epoch 17 | Train Loss: 5.0700 | Val Loss: 4.9984
 Model saved to lstm_0_1744851828.pt


Epoch 18/50: 100%|██████████| 155/155 [00:08<00:00, 18.30it/s]


Epoch 18 | Train Loss: 5.0440 | Val Loss: 4.9739
 Model saved to lstm_0_1744851828.pt


Epoch 19/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 19 | Train Loss: 5.0176 | Val Loss: 4.9498
 Model saved to lstm_0_1744851828.pt


Epoch 20/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 20 | Train Loss: 4.9940 | Val Loss: 4.9273
 Model saved to lstm_0_1744851828.pt


Epoch 21/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 21 | Train Loss: 4.9706 | Val Loss: 4.9089
 Model saved to lstm_0_1744851828.pt


Epoch 22/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 22 | Train Loss: 4.9489 | Val Loss: 4.8879
 Model saved to lstm_0_1744851828.pt


Epoch 23/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 23 | Train Loss: 4.9287 | Val Loss: 4.8691
 Model saved to lstm_0_1744851828.pt


Epoch 24/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 24 | Train Loss: 4.9098 | Val Loss: 4.8513
 Model saved to lstm_0_1744851828.pt


Epoch 25/50: 100%|██████████| 155/155 [00:08<00:00, 17.95it/s]


Epoch 25 | Train Loss: 4.8912 | Val Loss: 4.8353
 Model saved to lstm_0_1744851828.pt


Epoch 26/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 26 | Train Loss: 4.8739 | Val Loss: 4.8209
 Model saved to lstm_0_1744851828.pt


Epoch 27/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 27 | Train Loss: 4.8567 | Val Loss: 4.8053
 Model saved to lstm_0_1744851828.pt


Epoch 28/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 28 | Train Loss: 4.8415 | Val Loss: 4.7927
 Model saved to lstm_0_1744851828.pt


Epoch 29/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 29 | Train Loss: 4.8276 | Val Loss: 4.7795
 Model saved to lstm_0_1744851828.pt


Epoch 30/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 30 | Train Loss: 4.8115 | Val Loss: 4.7659
 Model saved to lstm_0_1744851828.pt


Epoch 31/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 31 | Train Loss: 4.7985 | Val Loss: 4.7560
 Model saved to lstm_0_1744851828.pt


Epoch 32/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 32 | Train Loss: 4.7849 | Val Loss: 4.7454
 Model saved to lstm_0_1744851828.pt


Epoch 33/50: 100%|██████████| 155/155 [00:08<00:00, 18.28it/s]


Epoch 33 | Train Loss: 4.7716 | Val Loss: 4.7343
 Model saved to lstm_0_1744851828.pt


Epoch 34/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 34 | Train Loss: 4.7602 | Val Loss: 4.7243
 Model saved to lstm_0_1744851828.pt


Epoch 35/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 35 | Train Loss: 4.7484 | Val Loss: 4.7148
 Model saved to lstm_0_1744851828.pt


Epoch 36/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 36 | Train Loss: 4.7362 | Val Loss: 4.7055
 Model saved to lstm_0_1744851828.pt


Epoch 37/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 37 | Train Loss: 4.7261 | Val Loss: 4.6975
 Model saved to lstm_0_1744851828.pt


Epoch 38/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 38 | Train Loss: 4.7151 | Val Loss: 4.6880
 Model saved to lstm_0_1744851828.pt


Epoch 39/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 39 | Train Loss: 4.7048 | Val Loss: 4.6805
 Model saved to lstm_0_1744851828.pt


Epoch 40/50: 100%|██████████| 155/155 [00:08<00:00, 17.95it/s]


Epoch 40 | Train Loss: 4.6952 | Val Loss: 4.6724
 Model saved to lstm_0_1744851828.pt


Epoch 41/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 41 | Train Loss: 4.6851 | Val Loss: 4.6655
 Model saved to lstm_0_1744851828.pt


Epoch 42/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 42 | Train Loss: 4.6759 | Val Loss: 4.6610
 Model saved to lstm_0_1744851828.pt


Epoch 43/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 43 | Train Loss: 4.6662 | Val Loss: 4.6515
 Model saved to lstm_0_1744851828.pt


Epoch 44/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 44 | Train Loss: 4.6581 | Val Loss: 4.6465
 Model saved to lstm_0_1744851828.pt


Epoch 45/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 45 | Train Loss: 4.6489 | Val Loss: 4.6401
 Model saved to lstm_0_1744851828.pt


Epoch 46/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 46 | Train Loss: 4.6413 | Val Loss: 4.6342
 Model saved to lstm_0_1744851828.pt


Epoch 47/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 47 | Train Loss: 4.6327 | Val Loss: 4.6273
 Model saved to lstm_0_1744851828.pt


Epoch 48/50: 100%|██████████| 155/155 [00:08<00:00, 17.95it/s]


Epoch 48 | Train Loss: 4.6248 | Val Loss: 4.6231
 Model saved to lstm_0_1744851828.pt


Epoch 49/50: 100%|██████████| 155/155 [00:08<00:00, 18.26it/s]


Epoch 49 | Train Loss: 4.6178 | Val Loss: 4.6168
 Model saved to lstm_0_1744851828.pt


Epoch 50/50: 100%|██████████| 155/155 [00:08<00:00, 18.27it/s]


Epoch 50 | Train Loss: 4.6097 | Val Loss: 4.6122
 Model saved to lstm_0_1744851828.pt
📈 Saved loss curve as: LSTMLanguageModel_loss_curve_20250416_201142.png

 Test Perplexity: 100.7093
 Average BLEU Score: 0.0128

 Training lstm_1_1744852306 with config: {'embed_dim': 256, 'hidden_dim': 512, 'num_layers': 3, 'lr': 0.0001}


Epoch 1/50: 100%|██████████| 155/155 [00:14<00:00, 10.45it/s]


Epoch 1 | Train Loss: 7.3688 | Val Loss: 6.8272
 Model saved to lstm_1_1744852306.pt


Epoch 2/50: 100%|██████████| 155/155 [00:14<00:00, 10.46it/s]


Epoch 2 | Train Loss: 6.8766 | Val Loss: 6.8185
 Model saved to lstm_1_1744852306.pt


Epoch 3/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 3 | Train Loss: 6.8600 | Val Loss: 6.8112
 Model saved to lstm_1_1744852306.pt


Epoch 4/50: 100%|██████████| 155/155 [00:14<00:00, 10.45it/s]


Epoch 4 | Train Loss: 6.8455 | Val Loss: 6.7987
 Model saved to lstm_1_1744852306.pt


Epoch 5/50: 100%|██████████| 155/155 [00:15<00:00, 10.32it/s]


Epoch 5 | Train Loss: 6.8020 | Val Loss: 6.7159
 Model saved to lstm_1_1744852306.pt


Epoch 6/50: 100%|██████████| 155/155 [00:14<00:00, 10.44it/s]


Epoch 6 | Train Loss: 6.6898 | Val Loss: 6.6016
 Model saved to lstm_1_1744852306.pt


Epoch 7/50: 100%|██████████| 155/155 [00:14<00:00, 10.41it/s]


Epoch 7 | Train Loss: 6.5899 | Val Loss: 6.5088
 Model saved to lstm_1_1744852306.pt


Epoch 8/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 8 | Train Loss: 6.4906 | Val Loss: 6.3938
 Model saved to lstm_1_1744852306.pt


Epoch 9/50: 100%|██████████| 155/155 [00:14<00:00, 10.43it/s]


Epoch 9 | Train Loss: 6.3626 | Val Loss: 6.2579
 Model saved to lstm_1_1744852306.pt


Epoch 10/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 10 | Train Loss: 6.2390 | Val Loss: 6.1495
 Model saved to lstm_1_1744852306.pt


Epoch 11/50: 100%|██████████| 155/155 [00:14<00:00, 10.41it/s]


Epoch 11 | Train Loss: 6.1396 | Val Loss: 6.0549
 Model saved to lstm_1_1744852306.pt


Epoch 12/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 12 | Train Loss: 6.0512 | Val Loss: 5.9713
 Model saved to lstm_1_1744852306.pt


Epoch 13/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 13 | Train Loss: 5.9761 | Val Loss: 5.9035
 Model saved to lstm_1_1744852306.pt


Epoch 14/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 14 | Train Loss: 5.9138 | Val Loss: 5.8451
 Model saved to lstm_1_1744852306.pt


Epoch 15/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 15 | Train Loss: 5.8598 | Val Loss: 5.7945
 Model saved to lstm_1_1744852306.pt


Epoch 16/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 16 | Train Loss: 5.8123 | Val Loss: 5.7505
 Model saved to lstm_1_1744852306.pt


Epoch 17/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 17 | Train Loss: 5.7693 | Val Loss: 5.7096
 Model saved to lstm_1_1744852306.pt


Epoch 18/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 18 | Train Loss: 5.7307 | Val Loss: 5.6726
 Model saved to lstm_1_1744852306.pt


Epoch 19/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 19 | Train Loss: 5.6955 | Val Loss: 5.6398
 Model saved to lstm_1_1744852306.pt


Epoch 20/50: 100%|██████████| 155/155 [00:15<00:00, 10.31it/s]


Epoch 20 | Train Loss: 5.6626 | Val Loss: 5.6099
 Model saved to lstm_1_1744852306.pt


Epoch 21/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 21 | Train Loss: 5.6327 | Val Loss: 5.5799
 Model saved to lstm_1_1744852306.pt


Epoch 22/50: 100%|██████████| 155/155 [00:14<00:00, 10.34it/s]


Epoch 22 | Train Loss: 5.6036 | Val Loss: 5.5522
 Model saved to lstm_1_1744852306.pt


Epoch 23/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 23 | Train Loss: 5.5766 | Val Loss: 5.5265
 Model saved to lstm_1_1744852306.pt


Epoch 24/50: 100%|██████████| 155/155 [00:14<00:00, 10.40it/s]


Epoch 24 | Train Loss: 5.5509 | Val Loss: 5.5019
 Model saved to lstm_1_1744852306.pt


Epoch 25/50: 100%|██████████| 155/155 [00:14<00:00, 10.40it/s]


Epoch 25 | Train Loss: 5.5255 | Val Loss: 5.4777
 Model saved to lstm_1_1744852306.pt


Epoch 26/50: 100%|██████████| 155/155 [00:14<00:00, 10.40it/s]


Epoch 26 | Train Loss: 5.5011 | Val Loss: 5.4542
 Model saved to lstm_1_1744852306.pt


Epoch 27/50: 100%|██████████| 155/155 [00:14<00:00, 10.40it/s]


Epoch 27 | Train Loss: 5.4773 | Val Loss: 5.4313
 Model saved to lstm_1_1744852306.pt


Epoch 28/50: 100%|██████████| 155/155 [00:15<00:00, 10.31it/s]


Epoch 28 | Train Loss: 5.4545 | Val Loss: 5.4123
 Model saved to lstm_1_1744852306.pt


Epoch 29/50: 100%|██████████| 155/155 [00:14<00:00, 10.40it/s]


Epoch 29 | Train Loss: 5.4331 | Val Loss: 5.3903
 Model saved to lstm_1_1744852306.pt


Epoch 30/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 30 | Train Loss: 5.4119 | Val Loss: 5.3693
 Model saved to lstm_1_1744852306.pt


Epoch 31/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 31 | Train Loss: 5.3912 | Val Loss: 5.3510
 Model saved to lstm_1_1744852306.pt


Epoch 32/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 32 | Train Loss: 5.3720 | Val Loss: 5.3334
 Model saved to lstm_1_1744852306.pt


Epoch 33/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 33 | Train Loss: 5.3535 | Val Loss: 5.3171
 Model saved to lstm_1_1744852306.pt


Epoch 34/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 34 | Train Loss: 5.3355 | Val Loss: 5.3002
 Model saved to lstm_1_1744852306.pt


Epoch 35/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 35 | Train Loss: 5.3182 | Val Loss: 5.2843
 Model saved to lstm_1_1744852306.pt


Epoch 36/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 36 | Train Loss: 5.3012 | Val Loss: 5.2680
 Model saved to lstm_1_1744852306.pt


Epoch 37/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 37 | Train Loss: 5.2853 | Val Loss: 5.2535
 Model saved to lstm_1_1744852306.pt


Epoch 38/50: 100%|██████████| 155/155 [00:14<00:00, 10.41it/s]


Epoch 38 | Train Loss: 5.2699 | Val Loss: 5.2400
 Model saved to lstm_1_1744852306.pt


Epoch 39/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 39 | Train Loss: 5.2545 | Val Loss: 5.2275
 Model saved to lstm_1_1744852306.pt


Epoch 40/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 40 | Train Loss: 5.2402 | Val Loss: 5.2132
 Model saved to lstm_1_1744852306.pt


Epoch 41/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 41 | Train Loss: 5.2258 | Val Loss: 5.2012
 Model saved to lstm_1_1744852306.pt


Epoch 42/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 42 | Train Loss: 5.2122 | Val Loss: 5.1888
 Model saved to lstm_1_1744852306.pt


Epoch 43/50: 100%|██████████| 155/155 [00:15<00:00, 10.28it/s]


Epoch 43 | Train Loss: 5.1993 | Val Loss: 5.1780
 Model saved to lstm_1_1744852306.pt


Epoch 44/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 44 | Train Loss: 5.1863 | Val Loss: 5.1648
 Model saved to lstm_1_1744852306.pt


Epoch 45/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 45 | Train Loss: 5.1735 | Val Loss: 5.1555
 Model saved to lstm_1_1744852306.pt


Epoch 46/50: 100%|██████████| 155/155 [00:14<00:00, 10.38it/s]


Epoch 46 | Train Loss: 5.1609 | Val Loss: 5.1435
 Model saved to lstm_1_1744852306.pt


Epoch 47/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 47 | Train Loss: 5.1492 | Val Loss: 5.1331
 Model saved to lstm_1_1744852306.pt


Epoch 48/50: 100%|██████████| 155/155 [00:14<00:00, 10.42it/s]


Epoch 48 | Train Loss: 5.1369 | Val Loss: 5.1234
 Model saved to lstm_1_1744852306.pt


Epoch 49/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 49 | Train Loss: 5.1258 | Val Loss: 5.1135
 Model saved to lstm_1_1744852306.pt


Epoch 50/50: 100%|██████████| 155/155 [00:14<00:00, 10.39it/s]


Epoch 50 | Train Loss: 5.1144 | Val Loss: 5.1030
 Model saved to lstm_1_1744852306.pt
📈 Saved loss curve as: LSTMLanguageModel_loss_curve_20250416_202530.png

 Test Perplexity: 164.5215
 Average BLEU Score: 0.0099

 Training rnn_0_1744853135 with config: {'embed_dim': 128, 'hidden_dim': 128, 'num_layers': 2, 'lr': 0.001}


Epoch 1/50: 100%|██████████| 155/155 [00:05<00:00, 28.90it/s]


Epoch 1 | Train Loss: 7.0825 | Val Loss: 6.6578
 Model saved to rnn_0_1744853135.pt


Epoch 2/50: 100%|██████████| 155/155 [00:05<00:00, 29.75it/s]


Epoch 2 | Train Loss: 6.4342 | Val Loss: 6.0911
 Model saved to rnn_0_1744853135.pt


Epoch 3/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 3 | Train Loss: 5.9940 | Val Loss: 5.7600
 Model saved to rnn_0_1744853135.pt


Epoch 4/50: 100%|██████████| 155/155 [00:05<00:00, 29.78it/s]


Epoch 4 | Train Loss: 5.7612 | Val Loss: 5.5973
 Model saved to rnn_0_1744853135.pt


Epoch 5/50: 100%|██████████| 155/155 [00:05<00:00, 29.78it/s]


Epoch 5 | Train Loss: 5.6389 | Val Loss: 5.4972
 Model saved to rnn_0_1744853135.pt


Epoch 6/50: 100%|██████████| 155/155 [00:05<00:00, 29.76it/s]


Epoch 6 | Train Loss: 5.5586 | Val Loss: 5.4250
 Model saved to rnn_0_1744853135.pt


Epoch 7/50: 100%|██████████| 155/155 [00:05<00:00, 29.78it/s]


Epoch 7 | Train Loss: 5.4958 | Val Loss: 5.3662
 Model saved to rnn_0_1744853135.pt


Epoch 8/50: 100%|██████████| 155/155 [00:05<00:00, 28.95it/s]


Epoch 8 | Train Loss: 5.4484 | Val Loss: 5.3210
 Model saved to rnn_0_1744853135.pt


Epoch 9/50: 100%|██████████| 155/155 [00:05<00:00, 29.77it/s]


Epoch 9 | Train Loss: 5.4080 | Val Loss: 5.2814
 Model saved to rnn_0_1744853135.pt


Epoch 10/50: 100%|██████████| 155/155 [00:05<00:00, 29.77it/s]


Epoch 10 | Train Loss: 5.3741 | Val Loss: 5.2468
 Model saved to rnn_0_1744853135.pt


Epoch 11/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 11 | Train Loss: 5.3451 | Val Loss: 5.2163
 Model saved to rnn_0_1744853135.pt


Epoch 12/50: 100%|██████████| 155/155 [00:05<00:00, 29.77it/s]


Epoch 12 | Train Loss: 5.3192 | Val Loss: 5.1886
 Model saved to rnn_0_1744853135.pt


Epoch 13/50: 100%|██████████| 155/155 [00:05<00:00, 29.78it/s]


Epoch 13 | Train Loss: 5.2958 | Val Loss: 5.1652
 Model saved to rnn_0_1744853135.pt


Epoch 14/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 14 | Train Loss: 5.2751 | Val Loss: 5.1464
 Model saved to rnn_0_1744853135.pt


Epoch 15/50: 100%|██████████| 155/155 [00:05<00:00, 29.77it/s]


Epoch 15 | Train Loss: 5.2562 | Val Loss: 5.1263
 Model saved to rnn_0_1744853135.pt


Epoch 16/50: 100%|██████████| 155/155 [00:05<00:00, 28.92it/s]


Epoch 16 | Train Loss: 5.2386 | Val Loss: 5.1081
 Model saved to rnn_0_1744853135.pt


Epoch 17/50: 100%|██████████| 155/155 [00:05<00:00, 29.72it/s]


Epoch 17 | Train Loss: 5.2232 | Val Loss: 5.0922
 Model saved to rnn_0_1744853135.pt


Epoch 18/50: 100%|██████████| 155/155 [00:05<00:00, 29.74it/s]


Epoch 18 | Train Loss: 5.2087 | Val Loss: 5.0769
 Model saved to rnn_0_1744853135.pt


Epoch 19/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 19 | Train Loss: 5.1960 | Val Loss: 5.0646
 Model saved to rnn_0_1744853135.pt


Epoch 20/50: 100%|██████████| 155/155 [00:05<00:00, 29.77it/s]


Epoch 20 | Train Loss: 5.1848 | Val Loss: 5.0518
 Model saved to rnn_0_1744853135.pt


Epoch 21/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 21 | Train Loss: 5.1731 | Val Loss: 5.0406
 Model saved to rnn_0_1744853135.pt


Epoch 22/50: 100%|██████████| 155/155 [00:05<00:00, 29.79it/s]


Epoch 22 | Train Loss: 5.1635 | Val Loss: 5.0307
 Model saved to rnn_0_1744853135.pt


Epoch 23/50: 100%|██████████| 155/155 [00:05<00:00, 29.76it/s]


Epoch 23 | Train Loss: 5.1539 | Val Loss: 5.0232
 Model saved to rnn_0_1744853135.pt


Epoch 24/50: 100%|██████████| 155/155 [00:05<00:00, 28.92it/s]


Epoch 24 | Train Loss: 5.1450 | Val Loss: 5.0137
 Model saved to rnn_0_1744853135.pt


Epoch 25/50: 100%|██████████| 155/155 [00:05<00:00, 29.76it/s]


Epoch 25 | Train Loss: 5.1360 | Val Loss: 5.0042
 Model saved to rnn_0_1744853135.pt


Epoch 26/50: 100%|██████████| 155/155 [00:05<00:00, 29.73it/s]


Epoch 26 | Train Loss: 5.1282 | Val Loss: 4.9980
 Model saved to rnn_0_1744853135.pt


Epoch 27/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 27 | Train Loss: 5.1210 | Val Loss: 4.9889
 Model saved to rnn_0_1744853135.pt


Epoch 28/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 28 | Train Loss: 5.1136 | Val Loss: 4.9817
 Model saved to rnn_0_1744853135.pt


Epoch 29/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 29 | Train Loss: 5.1072 | Val Loss: 4.9764
 Model saved to rnn_0_1744853135.pt


Epoch 30/50: 100%|██████████| 155/155 [00:05<00:00, 29.71it/s]


Epoch 30 | Train Loss: 5.1000 | Val Loss: 4.9704
 Model saved to rnn_0_1744853135.pt


Epoch 31/50: 100%|██████████| 155/155 [00:05<00:00, 28.85it/s]


Epoch 31 | Train Loss: 5.0938 | Val Loss: 4.9642
 Model saved to rnn_0_1744853135.pt


Epoch 32/50: 100%|██████████| 155/155 [00:05<00:00, 29.72it/s]


Epoch 32 | Train Loss: 5.0885 | Val Loss: 4.9577
 Model saved to rnn_0_1744853135.pt


Epoch 33/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 33 | Train Loss: 5.0825 | Val Loss: 4.9514
 Model saved to rnn_0_1744853135.pt


Epoch 34/50: 100%|██████████| 155/155 [00:05<00:00, 29.69it/s]


Epoch 34 | Train Loss: 5.0768 | Val Loss: 4.9494
 Model saved to rnn_0_1744853135.pt


Epoch 35/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 35 | Train Loss: 5.0717 | Val Loss: 4.9436
 Model saved to rnn_0_1744853135.pt


Epoch 36/50: 100%|██████████| 155/155 [00:05<00:00, 29.70it/s]


Epoch 36 | Train Loss: 5.0662 | Val Loss: 4.9386
 Model saved to rnn_0_1744853135.pt


Epoch 37/50: 100%|██████████| 155/155 [00:05<00:00, 29.68it/s]


Epoch 37 | Train Loss: 5.0620 | Val Loss: 4.9352
 Model saved to rnn_0_1744853135.pt


Epoch 38/50: 100%|██████████| 155/155 [00:05<00:00, 29.72it/s]


Epoch 38 | Train Loss: 5.0564 | Val Loss: 4.9275
 Model saved to rnn_0_1744853135.pt


Epoch 39/50: 100%|██████████| 155/155 [00:05<00:00, 28.86it/s]


Epoch 39 | Train Loss: 5.0521 | Val Loss: 4.9236
 Model saved to rnn_0_1744853135.pt


Epoch 40/50: 100%|██████████| 155/155 [00:05<00:00, 29.71it/s]


Epoch 40 | Train Loss: 5.0469 | Val Loss: 4.9209
 Model saved to rnn_0_1744853135.pt


Epoch 41/50: 100%|██████████| 155/155 [00:05<00:00, 29.72it/s]


Epoch 41 | Train Loss: 5.0437 | Val Loss: 4.9148
 Model saved to rnn_0_1744853135.pt


Epoch 42/50: 100%|██████████| 155/155 [00:05<00:00, 29.71it/s]


Epoch 42 | Train Loss: 5.0394 | Val Loss: 4.9115
 Model saved to rnn_0_1744853135.pt


Epoch 43/50: 100%|██████████| 155/155 [00:05<00:00, 29.73it/s]


Epoch 43 | Train Loss: 5.0349 | Val Loss: 4.9078
 Model saved to rnn_0_1744853135.pt


Epoch 44/50: 100%|██████████| 155/155 [00:05<00:00, 29.73it/s]


Epoch 44 | Train Loss: 5.0306 | Val Loss: 4.9038
 Model saved to rnn_0_1744853135.pt


Epoch 45/50: 100%|██████████| 155/155 [00:05<00:00, 29.74it/s]


Epoch 45 | Train Loss: 5.0265 | Val Loss: 4.9029
 Model saved to rnn_0_1744853135.pt


Epoch 46/50: 100%|██████████| 155/155 [00:05<00:00, 29.73it/s]


Epoch 46 | Train Loss: 5.0224 | Val Loss: 4.8974
 Model saved to rnn_0_1744853135.pt


Epoch 47/50: 100%|██████████| 155/155 [00:05<00:00, 28.86it/s]


Epoch 47 | Train Loss: 5.0188 | Val Loss: 4.8933
 Model saved to rnn_0_1744853135.pt


Epoch 48/50: 100%|██████████| 155/155 [00:05<00:00, 29.73it/s]


Epoch 48 | Train Loss: 5.0158 | Val Loss: 4.8918
 Model saved to rnn_0_1744853135.pt


Epoch 49/50: 100%|██████████| 155/155 [00:05<00:00, 29.71it/s]


Epoch 49 | Train Loss: 5.0115 | Val Loss: 4.8885
 Model saved to rnn_0_1744853135.pt


Epoch 50/50: 100%|██████████| 155/155 [00:05<00:00, 29.72it/s]


Epoch 50 | Train Loss: 5.0082 | Val Loss: 4.8850
 Model saved to rnn_0_1744853135.pt
📈 Saved loss curve as: RNNLanguageModel_loss_curve_20250416_203031.png

 Test Perplexity: 132.2909
 Average BLEU Score: 0.0101

 Training transformer_0_1744853436 with config: {'embed_dim': 256, 'num_heads': 4, 'num_layers': 2, 'lr': 0.001}


Epoch 1/50: 100%|██████████| 155/155 [00:14<00:00, 11.04it/s]


Epoch 1 | Train Loss: 6.4797 | Val Loss: 5.8619
 Model saved to transformer_0_1744853436.pt


Epoch 2/50: 100%|██████████| 155/155 [00:13<00:00, 11.08it/s]


Epoch 2 | Train Loss: 5.7753 | Val Loss: 5.5066
 Model saved to transformer_0_1744853436.pt


Epoch 3/50: 100%|██████████| 155/155 [00:14<00:00, 10.98it/s]


Epoch 3 | Train Loss: 5.5050 | Val Loss: 5.3041
 Model saved to transformer_0_1744853436.pt


Epoch 4/50: 100%|██████████| 155/155 [00:14<00:00, 10.88it/s]


Epoch 4 | Train Loss: 5.3290 | Val Loss: 5.1813
 Model saved to transformer_0_1744853436.pt


Epoch 5/50: 100%|██████████| 155/155 [00:14<00:00, 10.92it/s]


Epoch 5 | Train Loss: 5.2068 | Val Loss: 5.0987
 Model saved to transformer_0_1744853436.pt


Epoch 6/50: 100%|██████████| 155/155 [00:13<00:00, 11.11it/s]


Epoch 6 | Train Loss: 5.1132 | Val Loss: 5.0182
 Model saved to transformer_0_1744853436.pt


Epoch 7/50: 100%|██████████| 155/155 [00:14<00:00, 10.81it/s]


Epoch 7 | Train Loss: 5.0380 | Val Loss: 4.9587
 Model saved to transformer_0_1744853436.pt


Epoch 8/50: 100%|██████████| 155/155 [00:14<00:00, 10.98it/s]


Epoch 8 | Train Loss: 4.9765 | Val Loss: 4.9094
 Model saved to transformer_0_1744853436.pt


Epoch 9/50: 100%|██████████| 155/155 [00:13<00:00, 11.14it/s]


Epoch 9 | Train Loss: 4.9246 | Val Loss: 4.8690
 Model saved to transformer_0_1744853436.pt


Epoch 10/50: 100%|██████████| 155/155 [00:14<00:00, 10.98it/s]


Epoch 10 | Train Loss: 4.8781 | Val Loss: 4.8319
 Model saved to transformer_0_1744853436.pt


Epoch 11/50: 100%|██████████| 155/155 [00:14<00:00, 11.03it/s]


Epoch 11 | Train Loss: 4.8400 | Val Loss: 4.8076
 Model saved to transformer_0_1744853436.pt


Epoch 12/50: 100%|██████████| 155/155 [00:14<00:00, 10.95it/s]


Epoch 12 | Train Loss: 4.8052 | Val Loss: 4.7795
 Model saved to transformer_0_1744853436.pt


Epoch 13/50: 100%|██████████| 155/155 [00:14<00:00, 10.94it/s]


Epoch 13 | Train Loss: 4.7734 | Val Loss: 4.7646
 Model saved to transformer_0_1744853436.pt


Epoch 14/50: 100%|██████████| 155/155 [00:14<00:00, 11.00it/s]


Epoch 14 | Train Loss: 4.7463 | Val Loss: 4.7376
 Model saved to transformer_0_1744853436.pt


Epoch 15/50: 100%|██████████| 155/155 [00:14<00:00, 11.05it/s]


Epoch 15 | Train Loss: 4.7210 | Val Loss: 4.7213
 Model saved to transformer_0_1744853436.pt


Epoch 16/50: 100%|██████████| 155/155 [00:14<00:00, 11.06it/s]


Epoch 16 | Train Loss: 4.6978 | Val Loss: 4.7028
 Model saved to transformer_0_1744853436.pt


Epoch 17/50: 100%|██████████| 155/155 [00:14<00:00, 11.07it/s]


Epoch 17 | Train Loss: 4.6771 | Val Loss: 4.6869
 Model saved to transformer_0_1744853436.pt


Epoch 18/50: 100%|██████████| 155/155 [00:13<00:00, 11.07it/s]


Epoch 18 | Train Loss: 4.6564 | Val Loss: 4.6734
 Model saved to transformer_0_1744853436.pt


Epoch 19/50: 100%|██████████| 155/155 [00:14<00:00, 11.05it/s]


Epoch 19 | Train Loss: 4.6385 | Val Loss: 4.6635
 Model saved to transformer_0_1744853436.pt


Epoch 20/50: 100%|██████████| 155/155 [00:13<00:00, 11.15it/s]


Epoch 20 | Train Loss: 4.6225 | Val Loss: 4.6451
 Model saved to transformer_0_1744853436.pt


Epoch 21/50: 100%|██████████| 155/155 [00:14<00:00, 10.78it/s]


Epoch 21 | Train Loss: 4.6069 | Val Loss: 4.6394
 Model saved to transformer_0_1744853436.pt


Epoch 22/50: 100%|██████████| 155/155 [00:14<00:00, 11.04it/s]


Epoch 22 | Train Loss: 4.5908 | Val Loss: 4.6286
 Model saved to transformer_0_1744853436.pt


Epoch 23/50: 100%|██████████| 155/155 [00:13<00:00, 11.12it/s]


Epoch 23 | Train Loss: 4.5778 | Val Loss: 4.6173
 Model saved to transformer_0_1744853436.pt


Epoch 24/50: 100%|██████████| 155/155 [00:13<00:00, 11.17it/s]


Epoch 24 | Train Loss: 4.5648 | Val Loss: 4.6108
 Model saved to transformer_0_1744853436.pt


Epoch 25/50: 100%|██████████| 155/155 [00:13<00:00, 11.14it/s]


Epoch 25 | Train Loss: 4.5526 | Val Loss: 4.6017
 Model saved to transformer_0_1744853436.pt


Epoch 26/50: 100%|██████████| 155/155 [00:13<00:00, 11.16it/s]


Epoch 26 | Train Loss: 4.5409 | Val Loss: 4.5923
 Model saved to transformer_0_1744853436.pt


Epoch 27/50: 100%|██████████| 155/155 [00:14<00:00, 10.88it/s]


Epoch 27 | Train Loss: 4.5292 | Val Loss: 4.5855
 Model saved to transformer_0_1744853436.pt


Epoch 28/50: 100%|██████████| 155/155 [00:14<00:00, 11.06it/s]


Epoch 28 | Train Loss: 4.5184 | Val Loss: 4.5815
 Model saved to transformer_0_1744853436.pt


Epoch 29/50: 100%|██████████| 155/155 [00:14<00:00, 10.73it/s]


Epoch 29 | Train Loss: 4.5093 | Val Loss: 4.5725
 Model saved to transformer_0_1744853436.pt


Epoch 30/50: 100%|██████████| 155/155 [00:14<00:00, 11.05it/s]


Epoch 30 | Train Loss: 4.4995 | Val Loss: 4.5671
 Model saved to transformer_0_1744853436.pt


Epoch 31/50: 100%|██████████| 155/155 [00:14<00:00, 11.05it/s]


Epoch 31 | Train Loss: 4.4887 | Val Loss: 4.5586
 Model saved to transformer_0_1744853436.pt


Epoch 32/50: 100%|██████████| 155/155 [00:14<00:00, 11.04it/s]


Epoch 32 | Train Loss: 4.4798 | Val Loss: 4.5530
 Model saved to transformer_0_1744853436.pt


Epoch 33/50: 100%|██████████| 155/155 [00:13<00:00, 11.08it/s]


Epoch 33 | Train Loss: 4.4715 | Val Loss: 4.5496
 Model saved to transformer_0_1744853436.pt


Epoch 34/50: 100%|██████████| 155/155 [00:14<00:00, 10.95it/s]


Epoch 34 | Train Loss: 4.4639 | Val Loss: 4.5423
 Model saved to transformer_0_1744853436.pt


Epoch 35/50: 100%|██████████| 155/155 [00:14<00:00, 10.85it/s]


Epoch 35 | Train Loss: 4.4556 | Val Loss: 4.5376
 Model saved to transformer_0_1744853436.pt


Epoch 36/50: 100%|██████████| 155/155 [00:14<00:00, 10.89it/s]


Epoch 36 | Train Loss: 4.4473 | Val Loss: 4.5327
 Model saved to transformer_0_1744853436.pt


Epoch 37/50: 100%|██████████| 155/155 [00:13<00:00, 11.13it/s]


Epoch 37 | Train Loss: 4.4404 | Val Loss: 4.5269
 Model saved to transformer_0_1744853436.pt


Epoch 38/50: 100%|██████████| 155/155 [00:14<00:00, 10.83it/s]


Epoch 38 | Train Loss: 4.4331 | Val Loss: 4.5244
 Model saved to transformer_0_1744853436.pt


Epoch 39/50: 100%|██████████| 155/155 [00:13<00:00, 11.13it/s]


Epoch 39 | Train Loss: 4.4265 | Val Loss: 4.5186
 Model saved to transformer_0_1744853436.pt


Epoch 40/50: 100%|██████████| 155/155 [00:13<00:00, 11.09it/s]


Epoch 40 | Train Loss: 4.4193 | Val Loss: 4.5164
 Model saved to transformer_0_1744853436.pt


Epoch 41/50: 100%|██████████| 155/155 [00:14<00:00, 11.02it/s]


Epoch 41 | Train Loss: 4.4127 | Val Loss: 4.5130
 Model saved to transformer_0_1744853436.pt


Epoch 42/50: 100%|██████████| 155/155 [00:14<00:00, 10.75it/s]


Epoch 42 | Train Loss: 4.4061 | Val Loss: 4.5064
 Model saved to transformer_0_1744853436.pt


Epoch 43/50: 100%|██████████| 155/155 [00:14<00:00, 11.02it/s]


Epoch 43 | Train Loss: 4.3997 | Val Loss: 4.5070


Epoch 44/50: 100%|██████████| 155/155 [00:13<00:00, 11.09it/s]


Epoch 44 | Train Loss: 4.3944 | Val Loss: 4.4985
 Model saved to transformer_0_1744853436.pt


Epoch 45/50: 100%|██████████| 155/155 [00:13<00:00, 11.12it/s]


Epoch 45 | Train Loss: 4.3881 | Val Loss: 4.4925
 Model saved to transformer_0_1744853436.pt


Epoch 46/50: 100%|██████████| 155/155 [00:14<00:00, 11.02it/s]


Epoch 46 | Train Loss: 4.3812 | Val Loss: 4.4938


Epoch 47/50: 100%|██████████| 155/155 [00:13<00:00, 11.12it/s]


Epoch 47 | Train Loss: 4.3760 | Val Loss: 4.4913
 Model saved to transformer_0_1744853436.pt


Epoch 48/50: 100%|██████████| 155/155 [00:14<00:00, 10.86it/s]


Epoch 48 | Train Loss: 4.3703 | Val Loss: 4.4867
 Model saved to transformer_0_1744853436.pt


Epoch 49/50: 100%|██████████| 155/155 [00:13<00:00, 11.14it/s]


Epoch 49 | Train Loss: 4.3654 | Val Loss: 4.4811
 Model saved to transformer_0_1744853436.pt


Epoch 50/50: 100%|██████████| 155/155 [00:14<00:00, 10.75it/s]


Epoch 50 | Train Loss: 4.3604 | Val Loss: 4.4759
 Model saved to transformer_0_1744853436.pt
📈 Saved loss curve as: TransformerLanguageModel_loss_curve_20250416_204333.png

 Test Perplexity: 87.8767
 Average BLEU Score: 0.0127

 Training transformer_1_1744854218 with config: {'embed_dim': 512, 'num_heads': 8, 'num_layers': 4, 'lr': 0.0005}


Epoch 1/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 1 | Train Loss: 6.5504 | Val Loss: 5.8935
 Model saved to transformer_1_1744854218.pt


Epoch 2/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 2 | Train Loss: 5.7448 | Val Loss: 5.4637
 Model saved to transformer_1_1744854218.pt


Epoch 3/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 3 | Train Loss: 5.4304 | Val Loss: 5.2149
 Model saved to transformer_1_1744854218.pt


Epoch 4/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 4 | Train Loss: 5.2146 | Val Loss: 5.0489
 Model saved to transformer_1_1744854218.pt


Epoch 5/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 5 | Train Loss: 5.0553 | Val Loss: 4.9348
 Model saved to transformer_1_1744854218.pt


Epoch 6/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 6 | Train Loss: 4.9319 | Val Loss: 4.8462
 Model saved to transformer_1_1744854218.pt


Epoch 7/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 7 | Train Loss: 4.8341 | Val Loss: 4.7679
 Model saved to transformer_1_1744854218.pt


Epoch 8/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 8 | Train Loss: 4.7531 | Val Loss: 4.7164
 Model saved to transformer_1_1744854218.pt


Epoch 9/50: 100%|██████████| 155/155 [00:39<00:00,  3.92it/s]


Epoch 9 | Train Loss: 4.6817 | Val Loss: 4.6692
 Model saved to transformer_1_1744854218.pt


Epoch 10/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 10 | Train Loss: 4.6215 | Val Loss: 4.6426
 Model saved to transformer_1_1744854218.pt


Epoch 11/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 11 | Train Loss: 4.5680 | Val Loss: 4.6057
 Model saved to transformer_1_1744854218.pt


Epoch 12/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 12 | Train Loss: 4.5192 | Val Loss: 4.5821
 Model saved to transformer_1_1744854218.pt


Epoch 13/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 13 | Train Loss: 4.4758 | Val Loss: 4.5584
 Model saved to transformer_1_1744854218.pt


Epoch 14/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 14 | Train Loss: 4.4366 | Val Loss: 4.5401
 Model saved to transformer_1_1744854218.pt


Epoch 15/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 15 | Train Loss: 4.4005 | Val Loss: 4.5252
 Model saved to transformer_1_1744854218.pt


Epoch 16/50: 100%|██████████| 155/155 [00:40<00:00,  3.86it/s]


Epoch 16 | Train Loss: 4.3658 | Val Loss: 4.5115
 Model saved to transformer_1_1744854218.pt


Epoch 17/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 17 | Train Loss: 4.3364 | Val Loss: 4.4984
 Model saved to transformer_1_1744854218.pt


Epoch 18/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 18 | Train Loss: 4.3076 | Val Loss: 4.4887
 Model saved to transformer_1_1744854218.pt


Epoch 19/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 19 | Train Loss: 4.2800 | Val Loss: 4.4810
 Model saved to transformer_1_1744854218.pt


Epoch 20/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 20 | Train Loss: 4.2556 | Val Loss: 4.4690
 Model saved to transformer_1_1744854218.pt


Epoch 21/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 21 | Train Loss: 4.2318 | Val Loss: 4.4604
 Model saved to transformer_1_1744854218.pt


Epoch 22/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 22 | Train Loss: 4.2098 | Val Loss: 4.4582
 Model saved to transformer_1_1744854218.pt


Epoch 23/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 23 | Train Loss: 4.1886 | Val Loss: 4.4496
 Model saved to transformer_1_1744854218.pt


Epoch 24/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 24 | Train Loss: 4.1690 | Val Loss: 4.4430
 Model saved to transformer_1_1744854218.pt


Epoch 25/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 25 | Train Loss: 4.1510 | Val Loss: 4.4378
 Model saved to transformer_1_1744854218.pt


Epoch 26/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 26 | Train Loss: 4.1321 | Val Loss: 4.4388


Epoch 27/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 27 | Train Loss: 4.1160 | Val Loss: 4.4309
 Model saved to transformer_1_1744854218.pt


Epoch 28/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 28 | Train Loss: 4.0995 | Val Loss: 4.4289
 Model saved to transformer_1_1744854218.pt


Epoch 29/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 29 | Train Loss: 4.0841 | Val Loss: 4.4222
 Model saved to transformer_1_1744854218.pt


Epoch 30/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 30 | Train Loss: 4.0684 | Val Loss: 4.4225


Epoch 31/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 31 | Train Loss: 4.0546 | Val Loss: 4.4200
 Model saved to transformer_1_1744854218.pt


Epoch 32/50: 100%|██████████| 155/155 [00:40<00:00,  3.86it/s]


Epoch 32 | Train Loss: 4.0420 | Val Loss: 4.4165
 Model saved to transformer_1_1744854218.pt


Epoch 33/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 33 | Train Loss: 4.0281 | Val Loss: 4.4143
 Model saved to transformer_1_1744854218.pt


Epoch 34/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 34 | Train Loss: 4.0161 | Val Loss: 4.4132
 Model saved to transformer_1_1744854218.pt


Epoch 35/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 35 | Train Loss: 4.0036 | Val Loss: 4.4130
 Model saved to transformer_1_1744854218.pt


Epoch 36/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 36 | Train Loss: 3.9919 | Val Loss: 4.4053
 Model saved to transformer_1_1744854218.pt


Epoch 37/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 37 | Train Loss: 3.9802 | Val Loss: 4.4072


Epoch 38/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 38 | Train Loss: 3.9703 | Val Loss: 4.4099


Epoch 39/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 39 | Train Loss: 3.9158 | Val Loss: 4.4004
 Model saved to transformer_1_1744854218.pt


Epoch 40/50: 100%|██████████| 155/155 [00:39<00:00,  3.92it/s]


Epoch 40 | Train Loss: 3.8978 | Val Loss: 4.4002
 Model saved to transformer_1_1744854218.pt


Epoch 41/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 41 | Train Loss: 3.8868 | Val Loss: 4.3977
 Model saved to transformer_1_1744854218.pt


Epoch 42/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 42 | Train Loss: 3.8775 | Val Loss: 4.4019


Epoch 43/50: 100%|██████████| 155/155 [00:39<00:00,  3.91it/s]


Epoch 43 | Train Loss: 3.8684 | Val Loss: 4.4009


Epoch 44/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 44 | Train Loss: 3.8390 | Val Loss: 4.3988


Epoch 45/50: 100%|██████████| 155/155 [00:39<00:00,  3.90it/s]


Epoch 45 | Train Loss: 3.8300 | Val Loss: 4.3962
 Model saved to transformer_1_1744854218.pt


Epoch 46/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 46 | Train Loss: 3.8245 | Val Loss: 4.3985


Epoch 47/50: 100%|██████████| 155/155 [00:39<00:00,  3.88it/s]


Epoch 47 | Train Loss: 3.8195 | Val Loss: 4.3972


Epoch 48/50: 100%|██████████| 155/155 [00:40<00:00,  3.87it/s]


Epoch 48 | Train Loss: 3.8033 | Val Loss: 4.3971


Epoch 49/50: 100%|██████████| 155/155 [00:39<00:00,  3.92it/s]


Epoch 49 | Train Loss: 3.7997 | Val Loss: 4.3974


Epoch 50/50: 100%|██████████| 155/155 [00:39<00:00,  3.89it/s]


Epoch 50 | Train Loss: 3.7911 | Val Loss: 4.3968
📈 Saved loss curve as: TransformerLanguageModel_loss_curve_20250416_211953.png

 Test Perplexity: 81.1430
 Average BLEU Score: 0.0152


In [8]:
results_df = pd.DataFrame(results)
results_df.to_csv("experiment_results.csv", index=False)
results_df.sort_values(by="perplexity").head(10)

Unnamed: 0,model_type,config,perplexity,bleu_score,model_path
6,transformer,"{'embed_dim': 512, 'num_heads': 8, 'num_layers...",81.142958,0.015212,transformer_1_1744854218.pt
5,transformer,"{'embed_dim': 256, 'num_heads': 4, 'num_layers...",87.876736,0.012722,transformer_0_1744853436.pt
0,gru,"{'embed_dim': 128, 'hidden_dim': 256, 'num_lay...",94.899851,0.01392,gru_0_1744850565.pt
1,gru,"{'embed_dim': 256, 'hidden_dim': 512, 'num_lay...",96.209134,0.014286,gru_1_1744851030.pt
2,lstm,"{'embed_dim': 128, 'hidden_dim': 256, 'num_lay...",100.709278,0.012821,lstm_0_1744851828.pt
4,rnn,"{'embed_dim': 128, 'hidden_dim': 128, 'num_lay...",132.29091,0.010129,rnn_0_1744853135.pt
3,lstm,"{'embed_dim': 256, 'hidden_dim': 512, 'num_lay...",164.521526,0.009868,lstm_1_1744852306.pt


In [15]:
results_df.sort_values(by="model_type").head(11)

Unnamed: 0,model_type,config,perplexity,bleu_score,model_path
0,gru,"{'embed_dim': 128, 'hidden_dim': 256, 'num_lay...",94.899851,0.01392,gru_0_1744850565.pt
1,gru,"{'embed_dim': 256, 'hidden_dim': 512, 'num_lay...",96.209134,0.014286,gru_1_1744851030.pt
2,lstm,"{'embed_dim': 128, 'hidden_dim': 256, 'num_lay...",100.709278,0.012821,lstm_0_1744851828.pt
3,lstm,"{'embed_dim': 256, 'hidden_dim': 512, 'num_lay...",164.521526,0.009868,lstm_1_1744852306.pt
4,rnn,"{'embed_dim': 128, 'hidden_dim': 128, 'num_lay...",132.29091,0.010129,rnn_0_1744853135.pt
5,transformer,"{'embed_dim': 256, 'num_heads': 4, 'num_layers...",87.876736,0.012722,transformer_0_1744853436.pt
6,transformer,"{'embed_dim': 512, 'num_heads': 8, 'num_layers...",81.142958,0.015212,transformer_1_1744854218.pt


In [28]:

from transformer_model import TransformerLanguageModel

# Example config — must match the config used during training
config = {
    "embed_dim": 512,
    "num_heads": 8,
    "num_layers": 4
}
MODEL_TYPE='Transformer'
# Rebuild the model
model = TransformerLanguageModel(
    vocab_size=VOCAB_SIZE,
    embed_dim=config["embed_dim"],
    num_heads=config["num_heads"],
    num_layers=config["num_layers"],
    pad_token_id=tokenizer.get_pad_id()
).to(DEVICE)

evaluate_model(model, 'transformer_1_1744854218.pt', test_loader, tokenizer, DEVICE)

# --- Step 5: Sample Generation ---
custom_prompts = [
 
    "What do you prefer — cat or dog?",
    "old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the"
]

print(f"\n--- Generations using {MODEL_TYPE.upper()} ---")
for prompt in custom_prompts:
    output = model.generate(tokenizer, prompt, device=DEVICE, return_continuation_only=True)
    print(f" Prompt    : {prompt}")
    print(f" Generated : {output}")



 Test Perplexity: 81.1430
 Average BLEU Score: 0.0152

--- Generations using TRANSFORMER ---
 Prompt    : What do you prefer — cat or dog?
 Generated : I come here in the wood. You may sleep in that coach, and you will come down, father. He took Cosette gently in his hands, and kissed her, propped himself down, kissed her hand. Cosette kept saying to the child.
 Prompt    : old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the
 Generated : door; he could not open it, and hurried off by the door asking whether, if anything could be; but the who was looking up, I would find it a young fox stretched out in the direction, and should


In [29]:
from gru_model import GRULanguageModel

config = {
    "embed_dim": 128,     # Must match training
    "hidden_dim": 256,    # Must match training
    "num_layers": 2       # Must match training
}

model = GRULanguageModel(
    vocab_size=VOCAB_SIZE,
    embed_dim=config["embed_dim"],
    hidden_dim=config["hidden_dim"],
    num_layers=config["num_layers"],
    pad_token_id=tokenizer.get_pad_id()
).to(DEVICE)

MODEL_TYPE='gru'

evaluate_model(model, 'gru_0_1744850565.pt', test_loader, tokenizer, DEVICE)

# --- Step 5: Sample Generation ---
custom_prompts = [
 
    "What do you prefer — cat or dog?",
    "old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the"
]

print(f"\n--- Generations using {MODEL_TYPE.upper()} ---")
for prompt in custom_prompts:
    output = model.generate(tokenizer, prompt, device=DEVICE, return_continuation_only=True)
    print(f" Prompt    : {prompt}")
    print(f" Generated : {output}")




 Test Perplexity: 94.8999
 Average BLEU Score: 0.0139

--- Generations using GRU ---
 Prompt    : What do you prefer — cat or dog?
 Generated : exclaimed Covent. It is with a dreamet drawn me when he spoke. The doctor shook his head. Where is Madeleine? said Fantine. The light is almost, a car. Marius was now lifted and cared, so Rapurzel
 Prompt    : old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the
 Generated : window; he sang, resolutely. The tree got. Her bows hid so long as we could just lean upon a run. around the air was way juffily, and made nothing of it, a fearful


In [30]:
from lstm_model import LSTMLanguageModel

config = {
    "embed_dim": 128,     # Must match training
    "hidden_dim": 256,    # Must match training
    "num_layers": 2       # Must match training
}

model = LSTMLanguageModel(
    vocab_size=VOCAB_SIZE,
    embed_dim=config["embed_dim"],
    hidden_dim=config["hidden_dim"],
    num_layers=config["num_layers"],
    pad_token_id=tokenizer.get_pad_id()
).to(DEVICE)

MODEL_TYPE='lstm'

evaluate_model(model, 'lstm_0_1744851828.pt', test_loader, tokenizer, DEVICE)

# --- Step 5: Sample Generation ---
custom_prompts = [
 
    "What do you prefer — cat or dog?",
    "old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the"
]

print(f"\n--- Generations using {MODEL_TYPE.upper()} ---")
for prompt in custom_prompts:
    output = model.generate(tokenizer, prompt, device=DEVICE, return_continuation_only=True)
    print(f" Prompt    : {prompt}")
    print(f" Generated : {output}")




 Test Perplexity: 100.7093
 Average BLEU Score: 0.0128

--- Generations using LSTM ---
 Prompt    : What do you prefer — cat or dog?
 Generated : asked Il, said she, why will not, how good? Why if I have, as he was old? Char10. Then ran up to the bandit, but Cleft, M. de And what ⁇ , such to be borne in passage
 Prompt    : old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the
 Generated : throne of king and lamented an account of that must best scorn when her mind would have been himself long even lived so


In [31]:
from rnn_model import RNNLanguageModel

config = {
    "embed_dim": 128,     # Must match training
    "hidden_dim": 128,    # Must match training
    "num_layers": 2       # Must match training
}

model = RNNLanguageModel(
    vocab_size=VOCAB_SIZE,
    embed_dim=config["embed_dim"],
    hidden_dim=config["hidden_dim"],
    num_layers=config["num_layers"],
    pad_token_id=tokenizer.get_pad_id()
).to(DEVICE)

MODEL_TYPE='rnn'

evaluate_model(model, 'rnn_0_1744853135.pt', test_loader, tokenizer, DEVICE)

# --- Step 5: Sample Generation ---
custom_prompts = [
 
    "What do you prefer — cat or dog?",
    "old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the"
]

print(f"\n--- Generations using {MODEL_TYPE.upper()} ---")
for prompt in custom_prompts:
    output = model.generate(tokenizer, prompt, device=DEVICE, return_continuation_only=True)
    print(f" Prompt    : {prompt}")
    print(f" Generated : {output}")


 Test Perplexity: 132.2909
 Average BLEU Score: 0.0101

--- Generations using RNN ---
 Prompt    : What do you prefer — cat or dog?
 Generated : What has since you be rained atlook to these fortune, I would not face what to give me to death. Thritted the second of them. He is. This is me that such a couple of people began faming bare up but their
 Prompt    : old Mr Fox stirred under the bench, and cudgelled all the rabble, and drove them and Mrs Fox out of the house. SECOND STORY When old Mr Fox was dead, the wolf came as a suitor, and knocked at the
 Generated : stones down cried and over the seat violently, and again near him. I will make this assistance, and said the King wouldn ⁇ , this Alice was quite a dadz, taking a red merry to the drive Dorothy handed himself upon him. The
