In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
BATCH_SIZE = 8 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000
TGT_VOCAB_SIZE = 5000
N_LAYERS = 3
N_HEADS = 2
D_MODEL = 32
FFN_HIDDEN = 64
DROPOUT = 0.1
EPOCHS = 100

SAVE_DIR = "saved"
encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


TrainData - Max 'min' sentence length: 76
TrainData - Max 'eng' sentence length: 107
TestData - Max 'min' sentence length: 61
TestData - Max 'eng' sentence length: 75
ValidData - Max 'min' sentence length: 71
ValidData - Max 'eng' sentence length: 80
Number of examples in train_dataset,train origin,train_raw: 800 800 800
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break

torch.Size([8, 109]) torch.Size([8, 109])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([8, 109])
torch.Size([8, 109])
torch.Size([8, 109])
torch.Size([8, 109])
torch.Size([8, 109])
torch.Size([8, 109])
torch.Size([8, 109])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([ 528,  119, 2099,   85,  430,   58, 2696, 1033,  636,  186,   54,  861,
          58,  565,    6,  634,   84,   96, 1402,  372,  663,  213, 2697,   85,
        1450,  301, 1083,   90, 1624,    6,  255,    2,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         293,   85, 3320, 3321,  120, 2321,  796,  189,    8,  615,  215,  724,
        1636, 2987,    2,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,   

In [6]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")

Epoch 1/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 154.72it/s, loss=8.6594]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 405.35it/s, loss=8.5790]


[Epoch 1] Train Loss: 8.6538 | Val Loss: 8.6134
  -> New best model saved at saved/best.pt
Epoch 2/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 168.85it/s, loss=8.3608]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 282.00it/s, loss=8.4113]


[Epoch 2] Train Loss: 8.5041 | Val Loss: 8.4339
  -> New best model saved at saved/best.pt
Epoch 3/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 144.03it/s, loss=7.8512]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 306.69it/s, loss=7.9035]


[Epoch 3] Train Loss: 8.1242 | Val Loss: 7.8973
  -> New best model saved at saved/best.pt
Epoch 4/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 140.86it/s, loss=7.2239]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 332.87it/s, loss=7.4539]


[Epoch 4] Train Loss: 7.5289 | Val Loss: 7.4335
  -> New best model saved at saved/best.pt
Epoch 5/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 149.88it/s, loss=7.0069]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 326.08it/s, loss=7.1714]


[Epoch 5] Train Loss: 7.1001 | Val Loss: 7.1519
  -> New best model saved at saved/best.pt
Epoch 6/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 152.99it/s, loss=6.6290]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 200.41it/s, loss=7.0073]


[Epoch 6] Train Loss: 6.8234 | Val Loss: 6.9921
  -> New best model saved at saved/best.pt
Epoch 7/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 154.68it/s, loss=6.5029]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 354.80it/s, loss=6.9149]


[Epoch 7] Train Loss: 6.6439 | Val Loss: 6.9069
  -> New best model saved at saved/best.pt
Epoch 8/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 141.20it/s, loss=6.6397]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 361.73it/s, loss=6.8778]


[Epoch 8] Train Loss: 6.5379 | Val Loss: 6.8771
  -> New best model saved at saved/best.pt
Epoch 9/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 119.56it/s, loss=6.4763]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 164.70it/s, loss=6.8657]


[Epoch 9] Train Loss: 6.4877 | Val Loss: 6.8737
  -> New best model saved at saved/best.pt
Epoch 10/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 144.07it/s, loss=6.5873]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 372.45it/s, loss=6.8663]


[Epoch 10] Train Loss: 6.4523 | Val Loss: 6.8810
Epoch 11/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 138.60it/s, loss=6.4839]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 378.11it/s, loss=6.8713]


[Epoch 11] Train Loss: 6.4266 | Val Loss: 6.8901
Epoch 12/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 141.65it/s, loss=6.2410]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 282.56it/s, loss=6.8710]


[Epoch 12] Train Loss: 6.4157 | Val Loss: 6.8956
Epoch 13/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 134.11it/s, loss=6.6173]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 297.97it/s, loss=6.8761]


[Epoch 13] Train Loss: 6.3989 | Val Loss: 6.8987
Epoch 14/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 139.03it/s, loss=6.4746]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 265.35it/s, loss=6.8844]


[Epoch 14] Train Loss: 6.3945 | Val Loss: 6.9003
Epoch 15/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 134.79it/s, loss=6.2547]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 312.72it/s, loss=6.8891]


[Epoch 15] Train Loss: 6.3782 | Val Loss: 6.9028
Epoch 16/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 124.52it/s, loss=6.3622]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 282.51it/s, loss=6.8910]


[Epoch 16] Train Loss: 6.3557 | Val Loss: 6.9022
Epoch 17/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 120.44it/s, loss=6.3740]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 343.24it/s, loss=6.8840]


[Epoch 17] Train Loss: 6.3393 | Val Loss: 6.9049
Epoch 18/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 132.67it/s, loss=5.9435]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 325.51it/s, loss=6.8956]


[Epoch 18] Train Loss: 6.3280 | Val Loss: 6.9009
Epoch 19/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 136.63it/s, loss=6.1698]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 348.64it/s, loss=6.8831]


[Epoch 19] Train Loss: 6.3129 | Val Loss: 6.8984
Epoch 20/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 147.41it/s, loss=6.3735]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 342.06it/s, loss=6.8895]


[Epoch 20] Train Loss: 6.2933 | Val Loss: 6.9012
Epoch 21/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 133.08it/s, loss=6.3458]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 338.34it/s, loss=6.8845]


[Epoch 21] Train Loss: 6.2744 | Val Loss: 6.9022
Epoch 22/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.79it/s, loss=6.2444]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 347.57it/s, loss=6.8776]


[Epoch 22] Train Loss: 6.2553 | Val Loss: 6.8988
Epoch 23/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 132.46it/s, loss=6.1185]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 332.18it/s, loss=6.8758]


[Epoch 23] Train Loss: 6.2475 | Val Loss: 6.8917
Epoch 24/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 150.22it/s, loss=5.8864]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 243.94it/s, loss=6.8690]


[Epoch 24] Train Loss: 6.2210 | Val Loss: 6.8875
Epoch 25/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.29it/s, loss=6.0708]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 252.10it/s, loss=6.8646]


[Epoch 25] Train Loss: 6.1968 | Val Loss: 6.8866
Epoch 26/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 135.59it/s, loss=6.2359]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 252.58it/s, loss=6.8679]


[Epoch 26] Train Loss: 6.1842 | Val Loss: 6.8803
Epoch 27/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 129.62it/s, loss=6.0296]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 270.29it/s, loss=6.8558]


[Epoch 27] Train Loss: 6.1704 | Val Loss: 6.8722
  -> New best model saved at saved/best.pt
Epoch 28/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.03it/s, loss=5.7732]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 274.72it/s, loss=6.8478]


[Epoch 28] Train Loss: 6.1496 | Val Loss: 6.8631
  -> New best model saved at saved/best.pt
Epoch 29/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 132.95it/s, loss=5.8251]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 350.60it/s, loss=6.8550]


[Epoch 29] Train Loss: 6.1334 | Val Loss: 6.8687
Epoch 30/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 146.29it/s, loss=5.9229]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 296.17it/s, loss=6.8515]


[Epoch 30] Train Loss: 6.1087 | Val Loss: 6.8622
  -> New best model saved at saved/best.pt
Epoch 31/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 131.50it/s, loss=5.9113]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 306.77it/s, loss=6.8385]


[Epoch 31] Train Loss: 6.1027 | Val Loss: 6.8547
  -> New best model saved at saved/best.pt
Epoch 32/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.60it/s, loss=6.3791]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 327.24it/s, loss=6.8348]


[Epoch 32] Train Loss: 6.0744 | Val Loss: 6.8443
  -> New best model saved at saved/best.pt
Epoch 33/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 132.47it/s, loss=5.9031]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 316.00it/s, loss=6.8263]


[Epoch 33] Train Loss: 6.0642 | Val Loss: 6.8401
  -> New best model saved at saved/best.pt
Epoch 34/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 119.14it/s, loss=6.2915]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 267.02it/s, loss=6.8121]


[Epoch 34] Train Loss: 6.0386 | Val Loss: 6.8274
  -> New best model saved at saved/best.pt
Epoch 35/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 118.46it/s, loss=6.1309]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 303.96it/s, loss=6.8037]


[Epoch 35] Train Loss: 6.0138 | Val Loss: 6.8181
  -> New best model saved at saved/best.pt
Epoch 36/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 131.13it/s, loss=6.2426]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 271.65it/s, loss=6.8079]


[Epoch 36] Train Loss: 5.9947 | Val Loss: 6.8241
Epoch 37/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 137.15it/s, loss=5.7221]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 329.56it/s, loss=6.7975]


[Epoch 37] Train Loss: 5.9694 | Val Loss: 6.8200
Epoch 38/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 137.32it/s, loss=5.9073]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 305.92it/s, loss=6.7843]


[Epoch 38] Train Loss: 5.9569 | Val Loss: 6.8082
  -> New best model saved at saved/best.pt
Epoch 39/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 136.45it/s, loss=5.7736]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 275.43it/s, loss=6.7929]


[Epoch 39] Train Loss: 5.9321 | Val Loss: 6.8125
Epoch 40/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 107.15it/s, loss=5.6994]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 237.82it/s, loss=6.7888]


[Epoch 40] Train Loss: 5.8990 | Val Loss: 6.8125
Epoch 41/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 131.55it/s, loss=6.0040]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 324.59it/s, loss=6.7830]


[Epoch 41] Train Loss: 5.8886 | Val Loss: 6.8109
Epoch 42/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 144.43it/s, loss=5.9622]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 344.53it/s, loss=6.7927]


[Epoch 42] Train Loss: 5.8569 | Val Loss: 6.8156
Epoch 43/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 138.18it/s, loss=5.8667]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 252.62it/s, loss=6.7787]


[Epoch 43] Train Loss: 5.8453 | Val Loss: 6.8019
  -> New best model saved at saved/best.pt
Epoch 44/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 141.25it/s, loss=5.9082]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 339.68it/s, loss=6.7810]


[Epoch 44] Train Loss: 5.8139 | Val Loss: 6.8050
Epoch 45/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 135.75it/s, loss=5.9558]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 248.37it/s, loss=6.7749]


[Epoch 45] Train Loss: 5.7976 | Val Loss: 6.7935
  -> New best model saved at saved/best.pt
Epoch 46/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 136.90it/s, loss=5.7028]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 263.39it/s, loss=6.7718]


[Epoch 46] Train Loss: 5.7896 | Val Loss: 6.7902
  -> New best model saved at saved/best.pt
Epoch 47/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 120.62it/s, loss=5.6649]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 264.26it/s, loss=6.7767]


[Epoch 47] Train Loss: 5.7689 | Val Loss: 6.8039
Epoch 48/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 116.13it/s, loss=5.4619]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 308.82it/s, loss=6.7638]


[Epoch 48] Train Loss: 5.7341 | Val Loss: 6.7970
Epoch 49/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.70it/s, loss=5.4601]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 292.40it/s, loss=6.7741]


[Epoch 49] Train Loss: 5.7127 | Val Loss: 6.8019
Epoch 50/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 137.33it/s, loss=5.9212]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 286.93it/s, loss=6.7670]


[Epoch 50] Train Loss: 5.7026 | Val Loss: 6.8004
Epoch 51/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 135.78it/s, loss=5.8787]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 328.16it/s, loss=6.7682]


[Epoch 51] Train Loss: 5.6763 | Val Loss: 6.7979
Epoch 52/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 136.95it/s, loss=5.9159]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 309.19it/s, loss=6.7610]


[Epoch 52] Train Loss: 5.6529 | Val Loss: 6.7939
Epoch 53/100


🚀 Training: 100%|██████████| 100/100 [00:01<00:00, 91.38it/s, loss=5.6367]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 253.20it/s, loss=6.7668]


[Epoch 53] Train Loss: 5.6385 | Val Loss: 6.8001
Epoch 54/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 123.21it/s, loss=5.5635]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 239.28it/s, loss=6.7778]


[Epoch 54] Train Loss: 5.6182 | Val Loss: 6.8054
Epoch 55/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 118.36it/s, loss=5.6434]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 260.47it/s, loss=6.7725]


[Epoch 55] Train Loss: 5.6042 | Val Loss: 6.8004
Epoch 56/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 119.85it/s, loss=5.8591]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 272.68it/s, loss=6.7683]


[Epoch 56] Train Loss: 5.5894 | Val Loss: 6.7919
Epoch 57/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 119.89it/s, loss=5.5095]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 309.90it/s, loss=6.7802]


[Epoch 57] Train Loss: 5.5582 | Val Loss: 6.8018
Epoch 58/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 123.93it/s, loss=5.7741]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 301.63it/s, loss=6.7887]


[Epoch 58] Train Loss: 5.5353 | Val Loss: 6.8102
Epoch 59/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 126.26it/s, loss=5.6128]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 195.56it/s, loss=6.7702]


[Epoch 59] Train Loss: 5.5179 | Val Loss: 6.7967
Epoch 60/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 125.01it/s, loss=5.5241]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 293.21it/s, loss=6.7874]


[Epoch 60] Train Loss: 5.5028 | Val Loss: 6.8109
Epoch 61/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 121.46it/s, loss=5.6039]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 296.22it/s, loss=6.7895]


[Epoch 61] Train Loss: 5.4847 | Val Loss: 6.8119
Epoch 62/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 122.18it/s, loss=5.3897]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 278.48it/s, loss=6.7859]


[Epoch 62] Train Loss: 5.4650 | Val Loss: 6.8112
Epoch 63/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 112.65it/s, loss=5.4172]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 233.86it/s, loss=6.7916]


[Epoch 63] Train Loss: 5.4407 | Val Loss: 6.8157
Epoch 64/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 122.96it/s, loss=5.4400]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 295.22it/s, loss=6.8085]


[Epoch 64] Train Loss: 5.4346 | Val Loss: 6.8255
Epoch 65/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 125.34it/s, loss=5.2215]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 315.76it/s, loss=6.7969]


[Epoch 65] Train Loss: 5.4105 | Val Loss: 6.8244
Epoch 66/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 124.19it/s, loss=5.5402]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 304.69it/s, loss=6.8044]


[Epoch 66] Train Loss: 5.3929 | Val Loss: 6.8292
Epoch 67/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 127.07it/s, loss=5.3900]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 306.31it/s, loss=6.8102]


[Epoch 67] Train Loss: 5.3701 | Val Loss: 6.8330
Epoch 68/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 123.69it/s, loss=5.1622]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 236.06it/s, loss=6.8013]


[Epoch 68] Train Loss: 5.3646 | Val Loss: 6.8302
Epoch 69/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 117.18it/s, loss=5.6004]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 287.57it/s, loss=6.8144]


[Epoch 69] Train Loss: 5.3432 | Val Loss: 6.8420
Epoch 70/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 133.05it/s, loss=5.4240]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 330.97it/s, loss=6.8347]


[Epoch 70] Train Loss: 5.3208 | Val Loss: 6.8601
Epoch 71/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.49it/s, loss=5.1654]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 233.05it/s, loss=6.8404]


[Epoch 71] Train Loss: 5.3022 | Val Loss: 6.8596
Epoch 72/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.99it/s, loss=5.2338]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 304.98it/s, loss=6.8348]


[Epoch 72] Train Loss: 5.2792 | Val Loss: 6.8569
Epoch 73/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 130.70it/s, loss=5.2812]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 348.89it/s, loss=6.8257]


[Epoch 73] Train Loss: 5.2630 | Val Loss: 6.8514
Epoch 74/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 134.09it/s, loss=5.2878]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 295.87it/s, loss=6.8306]


[Epoch 74] Train Loss: 5.2506 | Val Loss: 6.8581
Epoch 75/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 127.65it/s, loss=5.3932]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 310.31it/s, loss=6.8480]


[Epoch 75] Train Loss: 5.2314 | Val Loss: 6.8736
Epoch 76/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 144.18it/s, loss=5.0310]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 298.90it/s, loss=6.8448]


[Epoch 76] Train Loss: 5.2153 | Val Loss: 6.8735
Epoch 77/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 129.61it/s, loss=5.0823]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 283.53it/s, loss=6.8711]


[Epoch 77] Train Loss: 5.1922 | Val Loss: 6.8966
Epoch 78/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 135.25it/s, loss=5.2444]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 299.13it/s, loss=6.8587]


[Epoch 78] Train Loss: 5.1813 | Val Loss: 6.8941
Epoch 79/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 119.32it/s, loss=5.3783]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 264.61it/s, loss=6.8653]


[Epoch 79] Train Loss: 5.1687 | Val Loss: 6.8977
Epoch 80/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 138.08it/s, loss=5.2349]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 263.25it/s, loss=6.8732]


[Epoch 80] Train Loss: 5.1526 | Val Loss: 6.9053
Epoch 81/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 129.86it/s, loss=5.0988]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 320.97it/s, loss=6.8791]


[Epoch 81] Train Loss: 5.1391 | Val Loss: 6.9077
Epoch 82/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 121.98it/s, loss=5.1207]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 256.64it/s, loss=6.8863]


[Epoch 82] Train Loss: 5.1255 | Val Loss: 6.9149
Epoch 83/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 144.31it/s, loss=5.2530]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 290.18it/s, loss=6.8938]


[Epoch 83] Train Loss: 5.1040 | Val Loss: 6.9181
Epoch 84/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 148.27it/s, loss=5.2577]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 226.25it/s, loss=6.9024]


[Epoch 84] Train Loss: 5.0920 | Val Loss: 6.9319
Epoch 85/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 124.39it/s, loss=4.9524]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 268.80it/s, loss=6.9059]


[Epoch 85] Train Loss: 5.0661 | Val Loss: 6.9386
Epoch 86/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 131.94it/s, loss=4.8033]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 262.28it/s, loss=6.9214]


[Epoch 86] Train Loss: 5.0605 | Val Loss: 6.9497
Epoch 87/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 126.15it/s, loss=4.8546]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 348.76it/s, loss=6.9246]


[Epoch 87] Train Loss: 5.0447 | Val Loss: 6.9542
Epoch 88/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 129.10it/s, loss=5.0415]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 326.64it/s, loss=6.9318]


[Epoch 88] Train Loss: 5.0276 | Val Loss: 6.9628
Epoch 89/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 140.66it/s, loss=5.2008]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 361.25it/s, loss=6.9314]


[Epoch 89] Train Loss: 5.0151 | Val Loss: 6.9639
Epoch 90/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 149.14it/s, loss=4.9895]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 367.93it/s, loss=6.9367]


[Epoch 90] Train Loss: 5.0049 | Val Loss: 6.9707
Epoch 91/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 140.65it/s, loss=5.2195]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 334.47it/s, loss=6.9403]


[Epoch 91] Train Loss: 4.9826 | Val Loss: 6.9774
Epoch 92/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 137.13it/s, loss=5.0992]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 332.97it/s, loss=6.9580]


[Epoch 92] Train Loss: 4.9692 | Val Loss: 6.9974
Epoch 93/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 141.71it/s, loss=5.1090]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 360.06it/s, loss=6.9556]


[Epoch 93] Train Loss: 4.9542 | Val Loss: 6.9944
Epoch 94/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 135.51it/s, loss=5.0132]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 328.76it/s, loss=6.9524]


[Epoch 94] Train Loss: 4.9450 | Val Loss: 6.9952
Epoch 95/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 149.67it/s, loss=4.8255]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 319.66it/s, loss=6.9760]


[Epoch 95] Train Loss: 4.9303 | Val Loss: 7.0210
Epoch 96/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 138.01it/s, loss=4.8069]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 246.96it/s, loss=6.9675]


[Epoch 96] Train Loss: 4.9097 | Val Loss: 7.0145
Epoch 97/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 146.03it/s, loss=4.8626]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 304.49it/s, loss=6.9927]


[Epoch 97] Train Loss: 4.8946 | Val Loss: 7.0343
Epoch 98/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 139.84it/s, loss=5.1271]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 329.16it/s, loss=6.9959]


[Epoch 98] Train Loss: 4.8854 | Val Loss: 7.0378
Epoch 99/100


🚀 Training: 100%|██████████| 100/100 [00:00<00:00, 146.74it/s, loss=5.0969]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 223.31it/s, loss=6.9933]


[Epoch 99] Train Loss: 4.8760 | Val Loss: 7.0422
Epoch 100/100


🚀 Training: 100%|██████████| 100/100 [00:01<00:00, 92.13it/s, loss=4.6541]
🚀 Validation: 100%|██████████| 13/13 [00:00<00:00, 216.26it/s, loss=7.0050]

[Epoch 100] Train Loss: 4.8584 | Val Loss: 7.0522



