In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
BATCH_SIZE = 64 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000
TGT_VOCAB_SIZE = 5000
N_LAYERS = 4
N_HEADS = 2
D_MODEL = 64
FFN_HIDDEN = 32
DROPOUT = 0.1
EPOCHS = 100

SAVE_DIR = "saved"
encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


Number of examples in train_dataset,train origin,train_raw: 799 799 799
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break


torch.Size([64, 102]) torch.Size([64, 102])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])
torch.Size([64, 102])


In [None]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([2097,  185, 1828,  ...,    0,    0,    0], device='cuda:0')


In [None]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")


Epoch 1/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.16it/s, loss=7.8487]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.31it/s, loss=7.6792]


[Epoch 1] Train Loss: 8.1965 | Val Loss: 7.7357
  -> New best model saved at saved/best.pt
Epoch 2/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.05it/s, loss=7.1096]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.08it/s, loss=7.1270]


[Epoch 2] Train Loss: 7.3742 | Val Loss: 7.1893
  -> New best model saved at saved/best.pt
Epoch 3/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.11it/s, loss=6.7184]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.49it/s, loss=6.7624]


[Epoch 3] Train Loss: 6.8396 | Val Loss: 6.8293
  -> New best model saved at saved/best.pt
Epoch 4/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.06it/s, loss=6.3670]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.02it/s, loss=6.6424]


[Epoch 4] Train Loss: 6.5259 | Val Loss: 6.7171
  -> New best model saved at saved/best.pt
Epoch 5/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.05it/s, loss=6.3570]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 50.12it/s, loss=6.6259]


[Epoch 5] Train Loss: 6.4158 | Val Loss: 6.7083
  -> New best model saved at saved/best.pt
Epoch 6/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.13it/s, loss=6.2457]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.82it/s, loss=6.5963]


[Epoch 6] Train Loss: 6.3646 | Val Loss: 6.6893
  -> New best model saved at saved/best.pt
Epoch 7/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.10it/s, loss=6.2639]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.99it/s, loss=6.5598]


[Epoch 7] Train Loss: 6.3196 | Val Loss: 6.6582
  -> New best model saved at saved/best.pt
Epoch 8/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.04it/s, loss=6.2793]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.42it/s, loss=6.5231]


[Epoch 8] Train Loss: 6.2599 | Val Loss: 6.6250
  -> New best model saved at saved/best.pt
Epoch 9/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.02it/s, loss=6.0988]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.83it/s, loss=6.4965]


[Epoch 9] Train Loss: 6.1857 | Val Loss: 6.6032
  -> New best model saved at saved/best.pt
Epoch 10/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.97it/s, loss=6.0542]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.60it/s, loss=6.4415]


[Epoch 10] Train Loss: 6.1093 | Val Loss: 6.5485
  -> New best model saved at saved/best.pt
Epoch 11/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.00it/s, loss=5.8926]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.39it/s, loss=6.4133]


[Epoch 11] Train Loss: 6.0242 | Val Loss: 6.5203
  -> New best model saved at saved/best.pt
Epoch 12/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.99it/s, loss=5.8752]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.86it/s, loss=6.3915]


[Epoch 12] Train Loss: 5.9409 | Val Loss: 6.5013
  -> New best model saved at saved/best.pt
Epoch 13/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.98it/s, loss=5.6551]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.32it/s, loss=6.3476]


[Epoch 13] Train Loss: 5.8504 | Val Loss: 6.4565
  -> New best model saved at saved/best.pt
Epoch 14/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.04it/s, loss=5.7657]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.88it/s, loss=6.3254]


[Epoch 14] Train Loss: 5.7700 | Val Loss: 6.4278
  -> New best model saved at saved/best.pt
Epoch 15/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 15.00it/s, loss=5.7450]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.26it/s, loss=6.2855]


[Epoch 15] Train Loss: 5.6814 | Val Loss: 6.3871
  -> New best model saved at saved/best.pt
Epoch 16/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.95it/s, loss=5.5820]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.05it/s, loss=6.2469]


[Epoch 16] Train Loss: 5.5844 | Val Loss: 6.3530
  -> New best model saved at saved/best.pt
Epoch 17/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.94it/s, loss=5.3094]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.05it/s, loss=6.2362]


[Epoch 17] Train Loss: 5.4823 | Val Loss: 6.3382
  -> New best model saved at saved/best.pt
Epoch 18/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.93it/s, loss=5.4613]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.50it/s, loss=6.2035]


[Epoch 18] Train Loss: 5.3958 | Val Loss: 6.3046
  -> New best model saved at saved/best.pt
Epoch 19/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.93it/s, loss=5.3760]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.79it/s, loss=6.1743]


[Epoch 19] Train Loss: 5.3083 | Val Loss: 6.2785
  -> New best model saved at saved/best.pt
Epoch 20/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.99it/s, loss=5.0683]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 44.31it/s, loss=6.1570]


[Epoch 20] Train Loss: 5.2102 | Val Loss: 6.2541
  -> New best model saved at saved/best.pt
Epoch 21/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.96it/s, loss=5.0073]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.72it/s, loss=6.1447]


[Epoch 21] Train Loss: 5.1272 | Val Loss: 6.2425
  -> New best model saved at saved/best.pt
Epoch 22/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.85it/s, loss=5.0957]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.69it/s, loss=6.1317]


[Epoch 22] Train Loss: 5.0448 | Val Loss: 6.2329
  -> New best model saved at saved/best.pt
Epoch 23/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.88it/s, loss=5.0359]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.49it/s, loss=6.1111]


[Epoch 23] Train Loss: 4.9679 | Val Loss: 6.2131
  -> New best model saved at saved/best.pt
Epoch 24/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.80it/s, loss=4.9169]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.26it/s, loss=6.1129]


[Epoch 24] Train Loss: 4.8810 | Val Loss: 6.2117
  -> New best model saved at saved/best.pt
Epoch 25/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.90it/s, loss=4.6883]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.35it/s, loss=6.1108]


[Epoch 25] Train Loss: 4.7942 | Val Loss: 6.2124
Epoch 26/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.74it/s, loss=4.7553]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.13it/s, loss=6.1086]


[Epoch 26] Train Loss: 4.7260 | Val Loss: 6.2162
Epoch 27/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.83it/s, loss=4.6756]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.88it/s, loss=6.1031]


[Epoch 27] Train Loss: 4.6564 | Val Loss: 6.2052
  -> New best model saved at saved/best.pt
Epoch 28/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.79it/s, loss=4.6141]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.06it/s, loss=6.1202]


[Epoch 28] Train Loss: 4.5746 | Val Loss: 6.2260
Epoch 29/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.84it/s, loss=4.4143]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.43it/s, loss=6.0967]


[Epoch 29] Train Loss: 4.4912 | Val Loss: 6.2004
  -> New best model saved at saved/best.pt
Epoch 30/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.80it/s, loss=4.3874]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.31it/s, loss=6.1066]


[Epoch 30] Train Loss: 4.4238 | Val Loss: 6.2109
Epoch 31/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.80it/s, loss=4.3423]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.43it/s, loss=6.0947]


[Epoch 31] Train Loss: 4.3508 | Val Loss: 6.1984
  -> New best model saved at saved/best.pt
Epoch 32/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.86it/s, loss=4.3285]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.33it/s, loss=6.1061]


[Epoch 32] Train Loss: 4.2770 | Val Loss: 6.2140
Epoch 33/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.81it/s, loss=4.2682]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.81it/s, loss=6.1256]


[Epoch 33] Train Loss: 4.2047 | Val Loss: 6.2325
Epoch 34/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.72it/s, loss=4.0358]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.59it/s, loss=6.1169]


[Epoch 34] Train Loss: 4.1282 | Val Loss: 6.2348
Epoch 35/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.85it/s, loss=4.0407]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.94it/s, loss=6.1312]


[Epoch 35] Train Loss: 4.0689 | Val Loss: 6.2377
Epoch 36/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.85it/s, loss=3.8959]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.01it/s, loss=6.1576]


[Epoch 36] Train Loss: 3.9939 | Val Loss: 6.2624
Epoch 37/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.82it/s, loss=3.9019]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.86it/s, loss=6.1737]


[Epoch 37] Train Loss: 3.9253 | Val Loss: 6.2788
Epoch 38/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.71it/s, loss=3.8803]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.92it/s, loss=6.1647]


[Epoch 38] Train Loss: 3.8536 | Val Loss: 6.2787
Epoch 39/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.79it/s, loss=3.6648]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.52it/s, loss=6.1953]


[Epoch 39] Train Loss: 3.7738 | Val Loss: 6.3009
Epoch 40/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.74it/s, loss=3.6326]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.64it/s, loss=6.1931]


[Epoch 40] Train Loss: 3.7122 | Val Loss: 6.3093
Epoch 41/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.73it/s, loss=3.6808]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.19it/s, loss=6.2090]


[Epoch 41] Train Loss: 3.6647 | Val Loss: 6.3208
Epoch 42/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.75it/s, loss=3.6973]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.30it/s, loss=6.2413]


[Epoch 42] Train Loss: 3.5956 | Val Loss: 6.3506
Epoch 43/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.77it/s, loss=3.5394]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.72it/s, loss=6.2462]


[Epoch 43] Train Loss: 3.5230 | Val Loss: 6.3638
Epoch 44/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.73it/s, loss=3.4339]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.25it/s, loss=6.2659]


[Epoch 44] Train Loss: 3.4607 | Val Loss: 6.3689
Epoch 45/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.72it/s, loss=3.4016]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.35it/s, loss=6.2832]


[Epoch 45] Train Loss: 3.4057 | Val Loss: 6.3934
Epoch 46/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.67it/s, loss=3.3257]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 49.35it/s, loss=6.3135]


[Epoch 46] Train Loss: 3.3335 | Val Loss: 6.4235
Epoch 47/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.67it/s, loss=3.2706]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.21it/s, loss=6.3363]


[Epoch 47] Train Loss: 3.2695 | Val Loss: 6.4574
Epoch 48/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.60it/s, loss=3.2517]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.78it/s, loss=6.3621]


[Epoch 48] Train Loss: 3.2142 | Val Loss: 6.4712
Epoch 49/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.66it/s, loss=3.2417]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.84it/s, loss=6.3907]


[Epoch 49] Train Loss: 3.1544 | Val Loss: 6.5006
Epoch 50/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.64it/s, loss=3.0801]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.89it/s, loss=6.4089]


[Epoch 50] Train Loss: 3.0905 | Val Loss: 6.5256
Epoch 51/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.56it/s, loss=3.0322]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.54it/s, loss=6.4021]


[Epoch 51] Train Loss: 3.0278 | Val Loss: 6.5174
Epoch 52/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.59it/s, loss=2.9479]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.45it/s, loss=6.4397]


[Epoch 52] Train Loss: 2.9702 | Val Loss: 6.5572
Epoch 53/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.52it/s, loss=2.9098]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 45.85it/s, loss=6.4611]


[Epoch 53] Train Loss: 2.9141 | Val Loss: 6.5762
Epoch 54/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.52it/s, loss=2.7736]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.36it/s, loss=6.5217]


[Epoch 54] Train Loss: 2.8496 | Val Loss: 6.6285
Epoch 55/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.65it/s, loss=2.7656]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.56it/s, loss=6.5460]


[Epoch 55] Train Loss: 2.7948 | Val Loss: 6.6641
Epoch 56/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.68it/s, loss=2.7458]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.64it/s, loss=6.5377]


[Epoch 56] Train Loss: 2.7406 | Val Loss: 6.6710
Epoch 57/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.60it/s, loss=2.5498]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.12it/s, loss=6.5491]


[Epoch 57] Train Loss: 2.6804 | Val Loss: 6.6654
Epoch 58/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.65it/s, loss=2.6229]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.59it/s, loss=6.6025]


[Epoch 58] Train Loss: 2.6411 | Val Loss: 6.7085
Epoch 59/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.58it/s, loss=2.5125]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.20it/s, loss=6.6390]


[Epoch 59] Train Loss: 2.5830 | Val Loss: 6.7607
Epoch 60/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.56it/s, loss=2.5438]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.98it/s, loss=6.6590]


[Epoch 60] Train Loss: 2.5349 | Val Loss: 6.7718
Epoch 61/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.50it/s, loss=2.4338]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.40it/s, loss=6.6880]


[Epoch 61] Train Loss: 2.4870 | Val Loss: 6.8024
Epoch 62/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.37it/s, loss=2.3519]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.20it/s, loss=6.7278]


[Epoch 62] Train Loss: 2.4351 | Val Loss: 6.8319
Epoch 63/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.58it/s, loss=2.4363]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.47it/s, loss=6.7403]


[Epoch 63] Train Loss: 2.3914 | Val Loss: 6.8501
Epoch 64/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.57it/s, loss=2.2410]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.55it/s, loss=6.7743]


[Epoch 64] Train Loss: 2.3423 | Val Loss: 6.8829
Epoch 65/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.62it/s, loss=2.3232]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.81it/s, loss=6.7978]


[Epoch 65] Train Loss: 2.2944 | Val Loss: 6.8926
Epoch 66/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.60it/s, loss=2.2153]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.31it/s, loss=6.8145]


[Epoch 66] Train Loss: 2.2378 | Val Loss: 6.9200
Epoch 67/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.57it/s, loss=2.2717]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.31it/s, loss=6.8310]


[Epoch 67] Train Loss: 2.2035 | Val Loss: 6.9347
Epoch 68/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.52it/s, loss=2.0575]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 43.00it/s, loss=6.8700]


[Epoch 68] Train Loss: 2.1566 | Val Loss: 6.9771
Epoch 69/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.56it/s, loss=2.1375]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.18it/s, loss=6.8884]


[Epoch 69] Train Loss: 2.1161 | Val Loss: 6.9907
Epoch 70/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.55it/s, loss=2.1625]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.18it/s, loss=6.9079]


[Epoch 70] Train Loss: 2.0673 | Val Loss: 7.0291
Epoch 71/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.41it/s, loss=2.1189]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.51it/s, loss=6.9564]


[Epoch 71] Train Loss: 2.0387 | Val Loss: 7.0768
Epoch 72/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.34it/s, loss=1.9689]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.11it/s, loss=6.9822]


[Epoch 72] Train Loss: 2.0060 | Val Loss: 7.1045
Epoch 73/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.53it/s, loss=1.8451]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.41it/s, loss=7.0271]


[Epoch 73] Train Loss: 1.9472 | Val Loss: 7.1404
Epoch 74/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.42it/s, loss=1.8856]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 45.43it/s, loss=7.0484]


[Epoch 74] Train Loss: 1.9101 | Val Loss: 7.1427
Epoch 75/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.40it/s, loss=2.0527]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 44.89it/s, loss=7.0817]


[Epoch 75] Train Loss: 1.8848 | Val Loss: 7.1768
Epoch 76/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.32it/s, loss=2.0093]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.70it/s, loss=7.1118]


[Epoch 76] Train Loss: 1.8441 | Val Loss: 7.2231
Epoch 77/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.40it/s, loss=1.8845]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.47it/s, loss=7.1313]


[Epoch 77] Train Loss: 1.8081 | Val Loss: 7.2558
Epoch 78/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.38it/s, loss=1.8386]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.75it/s, loss=7.1717]


[Epoch 78] Train Loss: 1.7629 | Val Loss: 7.2819
Epoch 79/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.35it/s, loss=1.7526]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.63it/s, loss=7.1976]


[Epoch 79] Train Loss: 1.7334 | Val Loss: 7.3079
Epoch 80/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.43it/s, loss=1.6953]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.27it/s, loss=7.2247]


[Epoch 80] Train Loss: 1.7025 | Val Loss: 7.3482
Epoch 81/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.46it/s, loss=1.8332]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 45.76it/s, loss=7.2622]


[Epoch 81] Train Loss: 1.6845 | Val Loss: 7.3970
Epoch 82/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.29it/s, loss=1.5738]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 43.64it/s, loss=7.2528]


[Epoch 82] Train Loss: 1.6364 | Val Loss: 7.3801
Epoch 83/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.42it/s, loss=1.5365]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 48.51it/s, loss=7.3224]


[Epoch 83] Train Loss: 1.6086 | Val Loss: 7.4463
Epoch 84/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.40it/s, loss=1.7300]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.66it/s, loss=7.3358]


[Epoch 84] Train Loss: 1.5823 | Val Loss: 7.4516
Epoch 85/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.34it/s, loss=1.5177]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.57it/s, loss=7.3685]


[Epoch 85] Train Loss: 1.5547 | Val Loss: 7.4809
Epoch 86/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.42it/s, loss=1.5248]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.47it/s, loss=7.4171]


[Epoch 86] Train Loss: 1.5271 | Val Loss: 7.5355
Epoch 87/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.33it/s, loss=1.4823]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.56it/s, loss=7.4364]


[Epoch 87] Train Loss: 1.4869 | Val Loss: 7.5471
Epoch 88/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.41it/s, loss=1.5078]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 47.59it/s, loss=7.4564]


[Epoch 88] Train Loss: 1.4620 | Val Loss: 7.5879
Epoch 89/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.37it/s, loss=1.5540]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 45.88it/s, loss=7.4737]


[Epoch 89] Train Loss: 1.4516 | Val Loss: 7.6091
Epoch 90/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.37it/s, loss=1.5261]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.57it/s, loss=7.4760]


[Epoch 90] Train Loss: 1.4201 | Val Loss: 7.6086
Epoch 91/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.35it/s, loss=1.4028]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 44.51it/s, loss=7.5044]


[Epoch 91] Train Loss: 1.3889 | Val Loss: 7.6483
Epoch 92/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.03it/s, loss=1.3948]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.25it/s, loss=7.5168]


[Epoch 92] Train Loss: 1.3637 | Val Loss: 7.6594
Epoch 93/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.30it/s, loss=1.2488]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.95it/s, loss=7.5905]


[Epoch 93] Train Loss: 1.3347 | Val Loss: 7.7113
Epoch 94/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.29it/s, loss=1.3157]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.66it/s, loss=7.6291]


[Epoch 94] Train Loss: 1.3165 | Val Loss: 7.7437
Epoch 95/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.24it/s, loss=1.3387]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.33it/s, loss=7.6328]


[Epoch 95] Train Loss: 1.2949 | Val Loss: 7.7627
Epoch 96/100


🚀 Training: 100%|██████████| 13/13 [00:00<00:00, 14.22it/s, loss=1.3237]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 46.41it/s, loss=7.6379]


[Epoch 96] Train Loss: 1.2679 | Val Loss: 7.7801
Epoch 97/100


🚀 Training:   0%|          | 0/13 [00:00<?, ?it/s]