In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
BATCH_SIZE = 64 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000
TGT_VOCAB_SIZE = 5000
N_LAYERS = 4
N_HEADS = 2
D_MODEL = 64
FFN_HIDDEN = 32
DROPOUT = 0.1
EPOCHS = 100

SAVE_DIR = "saved"
encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


TrainData - Max 'min' sentence length: 76
TrainData - Max 'eng' sentence length: 107
TestData - Max 'min' sentence length: 61
TestData - Max 'eng' sentence length: 75
ValidData - Max 'min' sentence length: 71
ValidData - Max 'eng' sentence length: 80
Number of examples in train_dataset,train origin,train_raw: 800 800 800
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break

torch.Size([64, 109]) torch.Size([64, 109])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([633, 599,  43,  ...,   0,   0,   0], device='cuda:0')


In [6]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")

Epoch 1/100


🚀 Training: 100%|██████████| 13/13 [00:10<00:00,  1.27it/s, loss=8.1624]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.87it/s, loss=8.1321]


[Epoch 1] Train Loss: 8.4415 | Val Loss: 8.1621
  -> New best model saved at saved/best.pt
Epoch 2/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=7.7090]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.87it/s, loss=7.6861]


[Epoch 2] Train Loss: 7.8893 | Val Loss: 7.7245
  -> New best model saved at saved/best.pt
Epoch 3/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=7.3720]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.86it/s, loss=7.3409]


[Epoch 3] Train Loss: 7.4790 | Val Loss: 7.3877
  -> New best model saved at saved/best.pt
Epoch 4/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=7.0728]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.88it/s, loss=7.0584]


[Epoch 4] Train Loss: 7.1302 | Val Loss: 7.1127
  -> New best model saved at saved/best.pt
Epoch 5/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.8162]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.86it/s, loss=6.8574]


[Epoch 5] Train Loss: 6.8526 | Val Loss: 6.9195
  -> New best model saved at saved/best.pt
Epoch 6/100


🚀 Training: 100%|██████████| 13/13 [00:10<00:00,  1.30it/s, loss=6.5265]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.87it/s, loss=6.7315]


[Epoch 6] Train Loss: 6.6489 | Val Loss: 6.8009
  -> New best model saved at saved/best.pt
Epoch 7/100


🚀 Training: 100%|██████████| 13/13 [00:10<00:00,  1.30it/s, loss=6.4486]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.87it/s, loss=6.6657]


[Epoch 7] Train Loss: 6.5253 | Val Loss: 6.7400
  -> New best model saved at saved/best.pt
Epoch 8/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.4830]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.89it/s, loss=6.6397]


[Epoch 8] Train Loss: 6.4546 | Val Loss: 6.7175
  -> New best model saved at saved/best.pt
Epoch 9/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.3848]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.89it/s, loss=6.6313]


[Epoch 9] Train Loss: 6.4137 | Val Loss: 6.7104
  -> New best model saved at saved/best.pt
Epoch 10/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.3178]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.89it/s, loss=6.6247]


[Epoch 10] Train Loss: 6.3857 | Val Loss: 6.7061
  -> New best model saved at saved/best.pt
Epoch 11/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.4317]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.88it/s, loss=6.6100]


[Epoch 11] Train Loss: 6.3707 | Val Loss: 6.6934
  -> New best model saved at saved/best.pt
Epoch 12/100


🚀 Training: 100%|██████████| 13/13 [00:10<00:00,  1.30it/s, loss=6.4775]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.88it/s, loss=6.6026]


[Epoch 12] Train Loss: 6.3521 | Val Loss: 6.6859
  -> New best model saved at saved/best.pt
Epoch 13/100


🚀 Training: 100%|██████████| 13/13 [00:09<00:00,  1.30it/s, loss=6.2861]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00,  4.85it/s, loss=6.5919]


[Epoch 13] Train Loss: 6.3229 | Val Loss: 6.6775
  -> New best model saved at saved/best.pt
Epoch 14/100


🚀 Training: 100%|██████████| 13/13 [00:03<00:00,  4.33it/s, loss=6.3659]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.33it/s, loss=6.5693]


[Epoch 14] Train Loss: 6.3011 | Val Loss: 6.6575
  -> New best model saved at saved/best.pt
Epoch 15/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.98it/s, loss=6.3890]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.24it/s, loss=6.5548]


[Epoch 15] Train Loss: 6.2699 | Val Loss: 6.6440
  -> New best model saved at saved/best.pt
Epoch 16/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.70it/s, loss=6.2006]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.29it/s, loss=6.5331]


[Epoch 16] Train Loss: 6.2291 | Val Loss: 6.6245
  -> New best model saved at saved/best.pt
Epoch 17/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.79it/s, loss=6.2695]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.45it/s, loss=6.5080]


[Epoch 17] Train Loss: 6.1966 | Val Loss: 6.5991
  -> New best model saved at saved/best.pt
Epoch 18/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 19.16it/s, loss=6.1544]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.37it/s, loss=6.4933]


[Epoch 18] Train Loss: 6.1534 | Val Loss: 6.5842
  -> New best model saved at saved/best.pt
Epoch 19/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.98it/s, loss=6.1443]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.19it/s, loss=6.4728]


[Epoch 19] Train Loss: 6.1127 | Val Loss: 6.5633
  -> New best model saved at saved/best.pt
Epoch 20/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 19.11it/s, loss=5.9480]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.09it/s, loss=6.4439]


[Epoch 20] Train Loss: 6.0597 | Val Loss: 6.5369
  -> New best model saved at saved/best.pt
Epoch 21/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.63it/s, loss=6.0023]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.52it/s, loss=6.4267]


[Epoch 21] Train Loss: 6.0174 | Val Loss: 6.5192
  -> New best model saved at saved/best.pt
Epoch 22/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.99it/s, loss=6.0238]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.98it/s, loss=6.4029]


[Epoch 22] Train Loss: 5.9714 | Val Loss: 6.4973
  -> New best model saved at saved/best.pt
Epoch 23/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 19.06it/s, loss=6.0387]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.81it/s, loss=6.3865]


[Epoch 23] Train Loss: 5.9283 | Val Loss: 6.4806
  -> New best model saved at saved/best.pt
Epoch 24/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 19.10it/s, loss=5.7042]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.52it/s, loss=6.3659]


[Epoch 24] Train Loss: 5.8696 | Val Loss: 6.4613
  -> New best model saved at saved/best.pt
Epoch 25/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 19.00it/s, loss=5.7599]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.87it/s, loss=6.3496]


[Epoch 25] Train Loss: 5.8240 | Val Loss: 6.4474
  -> New best model saved at saved/best.pt
Epoch 26/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.91it/s, loss=5.8823]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.50it/s, loss=6.3305]


[Epoch 26] Train Loss: 5.7849 | Val Loss: 6.4285
  -> New best model saved at saved/best.pt
Epoch 27/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.75it/s, loss=5.7663]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.20it/s, loss=6.3256]


[Epoch 27] Train Loss: 5.7325 | Val Loss: 6.4309
Epoch 28/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.65it/s, loss=5.5877]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.55it/s, loss=6.3002]


[Epoch 28] Train Loss: 5.6798 | Val Loss: 6.4040
  -> New best model saved at saved/best.pt
Epoch 29/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.98it/s, loss=5.6455]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 62.70it/s, loss=6.2905]


[Epoch 29] Train Loss: 5.6390 | Val Loss: 6.3963
  -> New best model saved at saved/best.pt
Epoch 30/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.91it/s, loss=5.6692]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.29it/s, loss=6.2734]


[Epoch 30] Train Loss: 5.5926 | Val Loss: 6.3760
  -> New best model saved at saved/best.pt
Epoch 31/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.65it/s, loss=5.5308]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.76it/s, loss=6.2751]


[Epoch 31] Train Loss: 5.5458 | Val Loss: 6.3850
Epoch 32/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.67it/s, loss=5.6693]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.21it/s, loss=6.2542]


[Epoch 32] Train Loss: 5.5025 | Val Loss: 6.3635
  -> New best model saved at saved/best.pt
Epoch 33/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.71it/s, loss=5.3325]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.46it/s, loss=6.2502]


[Epoch 33] Train Loss: 5.4435 | Val Loss: 6.3599
  -> New best model saved at saved/best.pt
Epoch 34/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.64it/s, loss=5.3349]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 55.03it/s, loss=6.2359]


[Epoch 34] Train Loss: 5.3972 | Val Loss: 6.3499
  -> New best model saved at saved/best.pt
Epoch 35/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.30it/s, loss=5.4217]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.75it/s, loss=6.2266]


[Epoch 35] Train Loss: 5.3608 | Val Loss: 6.3451
  -> New best model saved at saved/best.pt
Epoch 36/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.58it/s, loss=5.4973]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.64it/s, loss=6.2138]


[Epoch 36] Train Loss: 5.3250 | Val Loss: 6.3352
  -> New best model saved at saved/best.pt
Epoch 37/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.17it/s, loss=5.2347]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.43it/s, loss=6.2145]


[Epoch 37] Train Loss: 5.2695 | Val Loss: 6.3353
Epoch 38/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.18it/s, loss=5.3576]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.77it/s, loss=6.2003]


[Epoch 38] Train Loss: 5.2364 | Val Loss: 6.3257
  -> New best model saved at saved/best.pt
Epoch 39/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.32it/s, loss=5.1372]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 52.93it/s, loss=6.1936]


[Epoch 39] Train Loss: 5.1823 | Val Loss: 6.3208
  -> New best model saved at saved/best.pt
Epoch 40/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.48it/s, loss=4.9614]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.00it/s, loss=6.1884]


[Epoch 40] Train Loss: 5.1390 | Val Loss: 6.3181
  -> New best model saved at saved/best.pt
Epoch 41/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.29it/s, loss=5.0512]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.98it/s, loss=6.1854]


[Epoch 41] Train Loss: 5.1000 | Val Loss: 6.3123
  -> New best model saved at saved/best.pt
Epoch 42/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.46it/s, loss=5.0856]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.63it/s, loss=6.1911]


[Epoch 42] Train Loss: 5.0573 | Val Loss: 6.3211
Epoch 43/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.52it/s, loss=4.9489]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.83it/s, loss=6.1813]


[Epoch 43] Train Loss: 5.0196 | Val Loss: 6.3124
Epoch 44/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.40it/s, loss=5.0904]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.18it/s, loss=6.1779]


[Epoch 44] Train Loss: 4.9783 | Val Loss: 6.3122
  -> New best model saved at saved/best.pt
Epoch 45/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.50it/s, loss=5.0136]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.64it/s, loss=6.1785]


[Epoch 45] Train Loss: 4.9388 | Val Loss: 6.3171
Epoch 46/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.38it/s, loss=4.9136]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.20it/s, loss=6.1836]


[Epoch 46] Train Loss: 4.9026 | Val Loss: 6.3225
Epoch 47/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.47it/s, loss=5.0358]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.15it/s, loss=6.1770]


[Epoch 47] Train Loss: 4.8614 | Val Loss: 6.3206
Epoch 48/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.45it/s, loss=4.8543]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.01it/s, loss=6.1725]


[Epoch 48] Train Loss: 4.8212 | Val Loss: 6.3123
Epoch 49/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.44it/s, loss=4.6692]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.89it/s, loss=6.1796]


[Epoch 49] Train Loss: 4.7792 | Val Loss: 6.3227
Epoch 50/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.53it/s, loss=4.7655]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 53.32it/s, loss=6.1656]


[Epoch 50] Train Loss: 4.7454 | Val Loss: 6.3094
  -> New best model saved at saved/best.pt
Epoch 51/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.47it/s, loss=4.6277]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.79it/s, loss=6.1736]


[Epoch 51] Train Loss: 4.6964 | Val Loss: 6.3138
Epoch 52/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.33it/s, loss=4.5281]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.57it/s, loss=6.1820]


[Epoch 52] Train Loss: 4.6563 | Val Loss: 6.3292
Epoch 53/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.34it/s, loss=4.5945]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.40it/s, loss=6.1863]


[Epoch 53] Train Loss: 4.6290 | Val Loss: 6.3309
Epoch 54/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.36it/s, loss=4.7518]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.42it/s, loss=6.1993]


[Epoch 54] Train Loss: 4.6022 | Val Loss: 6.3491
Epoch 55/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 17.56it/s, loss=4.5322]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.72it/s, loss=6.1961]


[Epoch 55] Train Loss: 4.5575 | Val Loss: 6.3450
Epoch 56/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.30it/s, loss=4.5475]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 53.60it/s, loss=6.2086]


[Epoch 56] Train Loss: 4.5231 | Val Loss: 6.3563
Epoch 57/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.20it/s, loss=4.5717]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.07it/s, loss=6.2024]


[Epoch 57] Train Loss: 4.4904 | Val Loss: 6.3492
Epoch 58/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.06it/s, loss=4.4429]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.56it/s, loss=6.2023]


[Epoch 58] Train Loss: 4.4513 | Val Loss: 6.3592
Epoch 59/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.19it/s, loss=4.3532]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.08it/s, loss=6.2155]


[Epoch 59] Train Loss: 4.4148 | Val Loss: 6.3723
Epoch 60/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.13it/s, loss=4.2975]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.09it/s, loss=6.2171]


[Epoch 60] Train Loss: 4.3797 | Val Loss: 6.3679
Epoch 61/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.27it/s, loss=4.2746]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.69it/s, loss=6.2307]


[Epoch 61] Train Loss: 4.3475 | Val Loss: 6.3910
Epoch 62/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.28it/s, loss=4.3519]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.66it/s, loss=6.2123]


[Epoch 62] Train Loss: 4.3158 | Val Loss: 6.3742
Epoch 63/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.47it/s, loss=4.3607]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.49it/s, loss=6.2363]


[Epoch 63] Train Loss: 4.2793 | Val Loss: 6.3928
Epoch 64/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.17it/s, loss=4.2248]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.54it/s, loss=6.2278]


[Epoch 64] Train Loss: 4.2451 | Val Loss: 6.3944
Epoch 65/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.37it/s, loss=4.2235]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.43it/s, loss=6.2436]


[Epoch 65] Train Loss: 4.2092 | Val Loss: 6.4008
Epoch 66/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.32it/s, loss=4.1249]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.02it/s, loss=6.2544]


[Epoch 66] Train Loss: 4.1753 | Val Loss: 6.4189
Epoch 67/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.33it/s, loss=4.1286]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 54.19it/s, loss=6.2642]


[Epoch 67] Train Loss: 4.1411 | Val Loss: 6.4334
Epoch 68/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.08it/s, loss=4.1621]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.50it/s, loss=6.2795]


[Epoch 68] Train Loss: 4.1104 | Val Loss: 6.4518
Epoch 69/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 17.90it/s, loss=4.0527]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 53.31it/s, loss=6.2730]


[Epoch 69] Train Loss: 4.0790 | Val Loss: 6.4378
Epoch 70/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 17.83it/s, loss=3.9950]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.76it/s, loss=6.2622]


[Epoch 70] Train Loss: 4.0397 | Val Loss: 6.4308
Epoch 71/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.37it/s, loss=3.9011]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.94it/s, loss=6.2941]


[Epoch 71] Train Loss: 4.0047 | Val Loss: 6.4638
Epoch 72/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.20it/s, loss=4.0286]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.52it/s, loss=6.3053]


[Epoch 72] Train Loss: 3.9834 | Val Loss: 6.4727
Epoch 73/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.41it/s, loss=3.9825]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.96it/s, loss=6.3029]


[Epoch 73] Train Loss: 3.9483 | Val Loss: 6.4874
Epoch 74/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.54it/s, loss=3.8225]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.31it/s, loss=6.3125]


[Epoch 74] Train Loss: 3.9165 | Val Loss: 6.4896
Epoch 75/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.64it/s, loss=3.9867]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.11it/s, loss=6.3275]


[Epoch 75] Train Loss: 3.8886 | Val Loss: 6.5002
Epoch 76/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.67it/s, loss=3.8728]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 62.08it/s, loss=6.3350]


[Epoch 76] Train Loss: 3.8552 | Val Loss: 6.5113
Epoch 77/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.48it/s, loss=3.8917]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.44it/s, loss=6.3502]


[Epoch 77] Train Loss: 3.8236 | Val Loss: 6.5220
Epoch 78/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.31it/s, loss=3.7842]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 58.57it/s, loss=6.3473]


[Epoch 78] Train Loss: 3.7949 | Val Loss: 6.5307
Epoch 79/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.54it/s, loss=3.6774]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.07it/s, loss=6.3546]


[Epoch 79] Train Loss: 3.7583 | Val Loss: 6.5477
Epoch 80/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.56it/s, loss=3.6533]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.70it/s, loss=6.3663]


[Epoch 80] Train Loss: 3.7317 | Val Loss: 6.5478
Epoch 81/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.52it/s, loss=3.7016]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.67it/s, loss=6.3783]


[Epoch 81] Train Loss: 3.7053 | Val Loss: 6.5659
Epoch 82/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.51it/s, loss=3.6906]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.57it/s, loss=6.4135]


[Epoch 82] Train Loss: 3.6815 | Val Loss: 6.6059
Epoch 83/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.27it/s, loss=3.7540]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 53.54it/s, loss=6.3939]


[Epoch 83] Train Loss: 3.6550 | Val Loss: 6.5777
Epoch 84/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 17.72it/s, loss=3.7151]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.05it/s, loss=6.4280]


[Epoch 84] Train Loss: 3.6222 | Val Loss: 6.6122
Epoch 85/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.80it/s, loss=3.5950]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.50it/s, loss=6.4247]


[Epoch 85] Train Loss: 3.5875 | Val Loss: 6.6051
Epoch 86/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.62it/s, loss=3.5490]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.14it/s, loss=6.4374]


[Epoch 86] Train Loss: 3.5613 | Val Loss: 6.6237
Epoch 87/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.84it/s, loss=3.6130]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 62.41it/s, loss=6.4699]


[Epoch 87] Train Loss: 3.5402 | Val Loss: 6.6593
Epoch 88/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.87it/s, loss=3.4388]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.75it/s, loss=6.4778]


[Epoch 88] Train Loss: 3.5028 | Val Loss: 6.6665
Epoch 89/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.56it/s, loss=3.5024]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 62.44it/s, loss=6.4928]


[Epoch 89] Train Loss: 3.4826 | Val Loss: 6.6732
Epoch 90/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.98it/s, loss=3.5138]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.52it/s, loss=6.5043]


[Epoch 90] Train Loss: 3.4512 | Val Loss: 6.6845
Epoch 91/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.63it/s, loss=3.4128]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.84it/s, loss=6.5238]


[Epoch 91] Train Loss: 3.4209 | Val Loss: 6.7043
Epoch 92/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.84it/s, loss=3.2772]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 61.75it/s, loss=6.5330]


[Epoch 92] Train Loss: 3.3914 | Val Loss: 6.7149
Epoch 93/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.35it/s, loss=3.5732]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.13it/s, loss=6.5388]


[Epoch 93] Train Loss: 3.3811 | Val Loss: 6.7240
Epoch 94/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.43it/s, loss=3.3736]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.59it/s, loss=6.5468]


[Epoch 94] Train Loss: 3.3484 | Val Loss: 6.7312
Epoch 95/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.68it/s, loss=3.3357]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.85it/s, loss=6.5644]


[Epoch 95] Train Loss: 3.3272 | Val Loss: 6.7515
Epoch 96/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.56it/s, loss=3.1874]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.24it/s, loss=6.5877]


[Epoch 96] Train Loss: 3.2925 | Val Loss: 6.7912
Epoch 97/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.52it/s, loss=3.3564]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 56.99it/s, loss=6.5859]


[Epoch 97] Train Loss: 3.2719 | Val Loss: 6.7761
Epoch 98/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.47it/s, loss=3.2735]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 60.44it/s, loss=6.6100]


[Epoch 98] Train Loss: 3.2504 | Val Loss: 6.7995
Epoch 99/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.68it/s, loss=3.1008]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 59.94it/s, loss=6.6296]


[Epoch 99] Train Loss: 3.2127 | Val Loss: 6.8297
Epoch 100/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 18.19it/s, loss=3.2076]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 57.23it/s, loss=6.6417]

[Epoch 100] Train Loss: 3.2024 | Val Loss: 6.8415



