In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
BATCH_SIZE = 64 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000
TGT_VOCAB_SIZE = 5000
N_LAYERS = 4
N_HEADS = 4
D_MODEL = 64
FFN_HIDDEN = 64
DROPOUT = 0.1
EPOCHS = 100
SAVE_DIR = "saved"
encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


Number of examples in train_dataset,train origin,train_raw: 799 799 799
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break


torch.Size([64, 102]) torch.Size([64, 102])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([315,  28, 174,  ...,   0,   0,   0], device='cuda:0')


In [None]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")


Epoch 1/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 35.01it/s, loss=8.5886]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 112.06it/s, loss=8.5642]


[Epoch 1] Train Loss: 8.6214 | Val Loss: 8.5780
  -> New best model saved at saved/best.pt
Epoch 2/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.44it/s, loss=8.3608]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 81.77it/s, loss=8.3769]


[Epoch 2] Train Loss: 8.4420 | Val Loss: 8.3999
  -> New best model saved at saved/best.pt
Epoch 3/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 35.80it/s, loss=7.9965]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 101.43it/s, loss=8.0231]


[Epoch 3] Train Loss: 8.1553 | Val Loss: 8.0608
  -> New best model saved at saved/best.pt
Epoch 4/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 37.87it/s, loss=7.3983]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 97.96it/s, loss=7.5052]


[Epoch 4] Train Loss: 7.6737 | Val Loss: 7.5540
  -> New best model saved at saved/best.pt
Epoch 5/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 36.53it/s, loss=6.9234]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 93.36it/s, loss=7.0984]


[Epoch 5] Train Loss: 7.1478 | Val Loss: 7.1547
  -> New best model saved at saved/best.pt
Epoch 6/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.46it/s, loss=6.5575]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 76.65it/s, loss=6.8625]


[Epoch 6] Train Loss: 6.7832 | Val Loss: 6.9266
  -> New best model saved at saved/best.pt
Epoch 7/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 29.46it/s, loss=6.5129]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 82.97it/s, loss=6.7410]


[Epoch 7] Train Loss: 6.5758 | Val Loss: 6.8123
  -> New best model saved at saved/best.pt
Epoch 8/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.04it/s, loss=6.4282]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 89.67it/s, loss=6.6889]


[Epoch 8] Train Loss: 6.4578 | Val Loss: 6.7652
  -> New best model saved at saved/best.pt
Epoch 9/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.00it/s, loss=6.2929]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 95.80it/s, loss=6.6627]


[Epoch 9] Train Loss: 6.3934 | Val Loss: 6.7447
  -> New best model saved at saved/best.pt
Epoch 10/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 35.37it/s, loss=6.3308]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 91.92it/s, loss=6.6563]


[Epoch 10] Train Loss: 6.3554 | Val Loss: 6.7390
  -> New best model saved at saved/best.pt
Epoch 11/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.10it/s, loss=6.2549]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 92.17it/s, loss=6.6552]


[Epoch 11] Train Loss: 6.3227 | Val Loss: 6.7377
  -> New best model saved at saved/best.pt
Epoch 12/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 28.79it/s, loss=6.3050]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 62.48it/s, loss=6.6467]


[Epoch 12] Train Loss: 6.2918 | Val Loss: 6.7295
  -> New best model saved at saved/best.pt
Epoch 13/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 29.69it/s, loss=6.3327]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 82.11it/s, loss=6.6324]


[Epoch 13] Train Loss: 6.2659 | Val Loss: 6.7179
  -> New best model saved at saved/best.pt
Epoch 14/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 30.99it/s, loss=6.1624]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 84.01it/s, loss=6.6131]


[Epoch 14] Train Loss: 6.2310 | Val Loss: 6.7032
  -> New best model saved at saved/best.pt
Epoch 15/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.23it/s, loss=6.2820]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 80.99it/s, loss=6.6012]


[Epoch 15] Train Loss: 6.2032 | Val Loss: 6.6918
  -> New best model saved at saved/best.pt
Epoch 16/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.65it/s, loss=6.1728]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 88.34it/s, loss=6.5895]


[Epoch 16] Train Loss: 6.1647 | Val Loss: 6.6835
  -> New best model saved at saved/best.pt
Epoch 17/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.58it/s, loss=6.1380]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 82.79it/s, loss=6.5758]


[Epoch 17] Train Loss: 6.1374 | Val Loss: 6.6722
  -> New best model saved at saved/best.pt
Epoch 18/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.63it/s, loss=6.0503]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 86.34it/s, loss=6.5665]


[Epoch 18] Train Loss: 6.0982 | Val Loss: 6.6665
  -> New best model saved at saved/best.pt
Epoch 19/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.36it/s, loss=6.0224]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 84.00it/s, loss=6.5498]


[Epoch 19] Train Loss: 6.0676 | Val Loss: 6.6501
  -> New best model saved at saved/best.pt
Epoch 20/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.12it/s, loss=6.0062]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 96.17it/s, loss=6.5377]


[Epoch 20] Train Loss: 6.0287 | Val Loss: 6.6397
  -> New best model saved at saved/best.pt
Epoch 21/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.02it/s, loss=6.0189]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 93.22it/s, loss=6.5254]


[Epoch 21] Train Loss: 6.0011 | Val Loss: 6.6310
  -> New best model saved at saved/best.pt
Epoch 22/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 37.10it/s, loss=5.9641]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 103.59it/s, loss=6.5101]


[Epoch 22] Train Loss: 5.9572 | Val Loss: 6.6163
  -> New best model saved at saved/best.pt
Epoch 23/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 36.07it/s, loss=5.9625]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 86.76it/s, loss=6.5010]


[Epoch 23] Train Loss: 5.9240 | Val Loss: 6.6097
  -> New best model saved at saved/best.pt
Epoch 24/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.22it/s, loss=5.9805]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 88.07it/s, loss=6.4842]


[Epoch 24] Train Loss: 5.8895 | Val Loss: 6.5939
  -> New best model saved at saved/best.pt
Epoch 25/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.62it/s, loss=5.7267]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 77.88it/s, loss=6.4767]


[Epoch 25] Train Loss: 5.8423 | Val Loss: 6.5849
  -> New best model saved at saved/best.pt
Epoch 26/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.47it/s, loss=5.9002]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 88.84it/s, loss=6.4702]


[Epoch 26] Train Loss: 5.8120 | Val Loss: 6.5764
  -> New best model saved at saved/best.pt
Epoch 27/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.00it/s, loss=5.8561]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 86.12it/s, loss=6.4598]


[Epoch 27] Train Loss: 5.7745 | Val Loss: 6.5681
  -> New best model saved at saved/best.pt
Epoch 28/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.27it/s, loss=5.6537]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 88.70it/s, loss=6.4507]


[Epoch 28] Train Loss: 5.7281 | Val Loss: 6.5599
  -> New best model saved at saved/best.pt
Epoch 29/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.44it/s, loss=5.6202]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 79.20it/s, loss=6.4438]


[Epoch 29] Train Loss: 5.6912 | Val Loss: 6.5525
  -> New best model saved at saved/best.pt
Epoch 30/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.35it/s, loss=5.5915]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 84.09it/s, loss=6.4387]


[Epoch 30] Train Loss: 5.6512 | Val Loss: 6.5492
  -> New best model saved at saved/best.pt
Epoch 31/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.85it/s, loss=5.4416]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 81.65it/s, loss=6.4285]


[Epoch 31] Train Loss: 5.6110 | Val Loss: 6.5368
  -> New best model saved at saved/best.pt
Epoch 32/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.55it/s, loss=5.5942]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 77.89it/s, loss=6.4237]


[Epoch 32] Train Loss: 5.5804 | Val Loss: 6.5350
  -> New best model saved at saved/best.pt
Epoch 33/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.30it/s, loss=5.5829]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 85.89it/s, loss=6.4233]


[Epoch 33] Train Loss: 5.5425 | Val Loss: 6.5332
  -> New best model saved at saved/best.pt
Epoch 34/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.24it/s, loss=5.4657]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 82.78it/s, loss=6.4120]


[Epoch 34] Train Loss: 5.5042 | Val Loss: 6.5238
  -> New best model saved at saved/best.pt
Epoch 35/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.86it/s, loss=5.3469]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 84.84it/s, loss=6.4068]


[Epoch 35] Train Loss: 5.4631 | Val Loss: 6.5193
  -> New best model saved at saved/best.pt
Epoch 36/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.95it/s, loss=5.4116]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 81.82it/s, loss=6.3972]


[Epoch 36] Train Loss: 5.4357 | Val Loss: 6.5096
  -> New best model saved at saved/best.pt
Epoch 37/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.16it/s, loss=5.3115]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 81.12it/s, loss=6.3952]


[Epoch 37] Train Loss: 5.3981 | Val Loss: 6.5085
  -> New best model saved at saved/best.pt
Epoch 38/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 32.38it/s, loss=5.3821]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 70.43it/s, loss=6.3924]


[Epoch 38] Train Loss: 5.3629 | Val Loss: 6.5072
  -> New best model saved at saved/best.pt
Epoch 39/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.81it/s, loss=5.3405]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 89.28it/s, loss=6.3965]


[Epoch 39] Train Loss: 5.3297 | Val Loss: 6.5121
Epoch 40/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 35.28it/s, loss=5.3335]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 99.62it/s, loss=6.3943]


[Epoch 40] Train Loss: 5.3003 | Val Loss: 6.5065
  -> New best model saved at saved/best.pt
Epoch 41/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.19it/s, loss=5.1292]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 110.19it/s, loss=6.3960]


[Epoch 41] Train Loss: 5.2581 | Val Loss: 6.5061
  -> New best model saved at saved/best.pt
Epoch 42/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 37.47it/s, loss=5.1810]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 88.59it/s, loss=6.3903]


[Epoch 42] Train Loss: 5.2279 | Val Loss: 6.5001
  -> New best model saved at saved/best.pt
Epoch 43/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 31.33it/s, loss=5.0915]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 80.63it/s, loss=6.4024]


[Epoch 43] Train Loss: 5.1920 | Val Loss: 6.5113
Epoch 44/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.77it/s, loss=5.2330]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 94.64it/s, loss=6.3896]


[Epoch 44] Train Loss: 5.1699 | Val Loss: 6.4970
  -> New best model saved at saved/best.pt
Epoch 45/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.20it/s, loss=5.1314]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 95.98it/s, loss=6.3903]


[Epoch 45] Train Loss: 5.1331 | Val Loss: 6.4982
Epoch 46/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.52it/s, loss=5.2121]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 70.28it/s, loss=6.3928]


[Epoch 46] Train Loss: 5.1060 | Val Loss: 6.4994
Epoch 47/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 30.48it/s, loss=4.9886]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 75.98it/s, loss=6.3975]


[Epoch 47] Train Loss: 5.0711 | Val Loss: 6.5068
Epoch 48/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.75it/s, loss=4.9526]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 86.68it/s, loss=6.3928]


[Epoch 48] Train Loss: 5.0398 | Val Loss: 6.5022
Epoch 49/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 37.83it/s, loss=4.9472]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 108.80it/s, loss=6.3950]


[Epoch 49] Train Loss: 5.0084 | Val Loss: 6.5045
Epoch 50/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.91it/s, loss=4.9598]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 100.27it/s, loss=6.4014]


[Epoch 50] Train Loss: 4.9796 | Val Loss: 6.5102
Epoch 51/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.43it/s, loss=4.9236]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 102.09it/s, loss=6.4120]


[Epoch 51] Train Loss: 4.9531 | Val Loss: 6.5207
Epoch 52/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 34.04it/s, loss=4.8599]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 72.95it/s, loss=6.4121]


[Epoch 52] Train Loss: 4.9214 | Val Loss: 6.5205
Epoch 53/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 33.58it/s, loss=4.9810]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 92.92it/s, loss=6.4106]


[Epoch 53] Train Loss: 4.9028 | Val Loss: 6.5208
Epoch 54/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 36.31it/s, loss=4.9189]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 108.01it/s, loss=6.4208]


[Epoch 54] Train Loss: 4.8725 | Val Loss: 6.5293
Epoch 55/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.38it/s, loss=4.9261]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 117.74it/s, loss=6.4245]


[Epoch 55] Train Loss: 4.8485 | Val Loss: 6.5335
Epoch 56/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 39.70it/s, loss=4.8031]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 109.43it/s, loss=6.4282]


[Epoch 56] Train Loss: 4.8135 | Val Loss: 6.5362
Epoch 57/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 39.89it/s, loss=4.9381]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 116.60it/s, loss=6.4259]


[Epoch 57] Train Loss: 4.7989 | Val Loss: 6.5354
Epoch 58/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 39.93it/s, loss=4.7681]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 111.99it/s, loss=6.4331]


[Epoch 58] Train Loss: 4.7601 | Val Loss: 6.5433
Epoch 59/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.02it/s, loss=4.7029]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 101.60it/s, loss=6.4322]


[Epoch 59] Train Loss: 4.7371 | Val Loss: 6.5408
Epoch 60/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.73it/s, loss=4.6805]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 110.02it/s, loss=6.4330]


[Epoch 60] Train Loss: 4.7096 | Val Loss: 6.5423
Epoch 61/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.87it/s, loss=4.5851]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 94.74it/s, loss=6.4398]


[Epoch 61] Train Loss: 4.6839 | Val Loss: 6.5483
Epoch 62/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.31it/s, loss=4.6182]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 86.78it/s, loss=6.4462]


[Epoch 62] Train Loss: 4.6612 | Val Loss: 6.5571
Epoch 63/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.42it/s, loss=4.6696]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 106.53it/s, loss=6.4555]


[Epoch 63] Train Loss: 4.6357 | Val Loss: 6.5651
Epoch 64/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.83it/s, loss=4.5865]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 115.07it/s, loss=6.4515]


[Epoch 64] Train Loss: 4.6178 | Val Loss: 6.5613
Epoch 65/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.78it/s, loss=4.6490]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 110.97it/s, loss=6.4702]


[Epoch 65] Train Loss: 4.5948 | Val Loss: 6.5806
Epoch 66/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.43it/s, loss=4.5738]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 112.10it/s, loss=6.4718]


[Epoch 66] Train Loss: 4.5721 | Val Loss: 6.5850
Epoch 67/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 39.05it/s, loss=4.5358]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 101.46it/s, loss=6.4843]


[Epoch 67] Train Loss: 4.5459 | Val Loss: 6.5981
Epoch 68/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.65it/s, loss=4.5318]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 117.90it/s, loss=6.4853]


[Epoch 68] Train Loss: 4.5249 | Val Loss: 6.6024
Epoch 69/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.33it/s, loss=4.5814]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 117.01it/s, loss=6.4943]


[Epoch 69] Train Loss: 4.5091 | Val Loss: 6.6114
Epoch 70/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 40.95it/s, loss=4.5103]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 108.34it/s, loss=6.5090]


[Epoch 70] Train Loss: 4.4839 | Val Loss: 6.6243
Epoch 71/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.75it/s, loss=4.5298]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 106.45it/s, loss=6.5252]


[Epoch 71] Train Loss: 4.4661 | Val Loss: 6.6402
Epoch 72/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 41.22it/s, loss=4.3757]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 118.25it/s, loss=6.5269]


[Epoch 72] Train Loss: 4.4354 | Val Loss: 6.6414
Epoch 73/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 41.11it/s, loss=4.3822]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 106.67it/s, loss=6.5421]


[Epoch 73] Train Loss: 4.4195 | Val Loss: 6.6561
Epoch 74/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 38.70it/s, loss=4.3970]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 87.62it/s, loss=6.5441]


[Epoch 74] Train Loss: 4.3978 | Val Loss: 6.6563
Epoch 75/100


🚀 Training:  31%|███       | 4/13 [00:00<00:00, 38.68it/s, loss=4.3877]