In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
from bigru import Encoder,Decoder,Seq2Seq
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
BATCH_SIZE = 64 # butuh lebih banyak update 
EPOCHS=100
DATA_PATH = "dataset/"  
SAVE_DIR = "saved"
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000    # Example size
TGT_VOCAB_SIZE = 5000   # Example size
EMBED_SIZE = 256
ENC_HIDDEN = 128      # For the encoder (before doubling in BiGRU)
DEC_HIDDEN = 256      # For the decoder
N_LAYERS = 2
encoder = Encoder(SRC_VOCAB_SIZE, EMBED_SIZE, ENC_HIDDEN, num_layers=N_LAYERS, dropout=0.1, pad_idx=utils.PAD_TOKEN)
decoder = Decoder(TGT_VOCAB_SIZE, EMBED_SIZE, DEC_HIDDEN, num_layers=N_LAYERS, dropout=0.1, pad_idx=utils.PAD_TOKEN)
model = Seq2Seq(encoder, decoder, device, ENC_HIDDEN, DEC_HIDDEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


TrainData - Max 'min' sentence length: 76
TrainData - Max 'eng' sentence length: 107
TestData - Max 'min' sentence length: 61
TestData - Max 'eng' sentence length: 75
ValidData - Max 'min' sentence length: 71
ValidData - Max 'eng' sentence length: 80
Number of examples in train_dataset,train origin,train_raw: 800 800 800
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break

torch.Size([64, 109]) torch.Size([64, 109])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])
torch.Size([64, 109])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output,_= model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([187, 153,  21,  ...,   0,   0,   0], device='cuda:0')


In [6]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best_gru.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")


Epoch 1/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=7.7257]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.12it/s, loss=7.4307]


[Epoch 1] Train Loss: 8.2379 | Val Loss: 7.4876
  -> New best model saved at saved/best_gru.pt
Epoch 2/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.11it/s, loss=6.7286]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.40it/s, loss=6.7000]


[Epoch 2] Train Loss: 6.9073 | Val Loss: 6.7637
  -> New best model saved at saved/best_gru.pt
Epoch 3/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.10it/s, loss=6.5487]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.67it/s, loss=6.7392]


[Epoch 3] Train Loss: 6.5275 | Val Loss: 6.8166
Epoch 4/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.10it/s, loss=6.4926]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.97it/s, loss=6.7624]


[Epoch 4] Train Loss: 6.4944 | Val Loss: 6.8439
Epoch 5/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.10it/s, loss=6.7432]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.92it/s, loss=6.7918]


[Epoch 5] Train Loss: 6.4860 | Val Loss: 6.8657
Epoch 6/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.11it/s, loss=6.4647]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.67it/s, loss=6.7999]


[Epoch 6] Train Loss: 6.4637 | Val Loss: 6.8782
Epoch 7/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=6.5098]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.07it/s, loss=6.8188]


[Epoch 7] Train Loss: 6.4576 | Val Loss: 6.8967
Epoch 8/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.07it/s, loss=6.3975]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.10it/s, loss=6.8250]


[Epoch 8] Train Loss: 6.4462 | Val Loss: 6.8980
Epoch 9/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.07it/s, loss=6.4053]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.79it/s, loss=6.8241]


[Epoch 9] Train Loss: 6.4327 | Val Loss: 6.8973
Epoch 10/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=6.6051]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.25it/s, loss=6.8297]


[Epoch 10] Train Loss: 6.4290 | Val Loss: 6.9006
Epoch 11/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=6.4484]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.47it/s, loss=6.8324]


[Epoch 11] Train Loss: 6.4096 | Val Loss: 6.9025
Epoch 12/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.09it/s, loss=6.2252]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.12it/s, loss=6.8345]


[Epoch 12] Train Loss: 6.3755 | Val Loss: 6.9036
Epoch 13/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.11it/s, loss=6.4260]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.85it/s, loss=6.8229]


[Epoch 13] Train Loss: 6.3654 | Val Loss: 6.8988
Epoch 14/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.13it/s, loss=6.3886]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 20.17it/s, loss=6.8239]


[Epoch 14] Train Loss: 6.3394 | Val Loss: 6.8932
Epoch 15/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.08it/s, loss=6.1544]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.09it/s, loss=6.8183]


[Epoch 15] Train Loss: 6.3098 | Val Loss: 6.8892
Epoch 16/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.95it/s, loss=6.2968]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.86it/s, loss=6.8010]


[Epoch 16] Train Loss: 6.2900 | Val Loss: 6.8761
Epoch 17/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=6.4358]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.51it/s, loss=6.8064]


[Epoch 17] Train Loss: 6.2719 | Val Loss: 6.8831
Epoch 18/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=6.2084]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.14it/s, loss=6.8057]


[Epoch 18] Train Loss: 6.2344 | Val Loss: 6.8783
Epoch 19/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=6.1967]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.85it/s, loss=6.8021]


[Epoch 19] Train Loss: 6.2103 | Val Loss: 6.8765
Epoch 20/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=6.2708]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.83it/s, loss=6.8067]


[Epoch 20] Train Loss: 6.1917 | Val Loss: 6.8844
Epoch 21/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=6.1002]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.31it/s, loss=6.8006]


[Epoch 21] Train Loss: 6.1619 | Val Loss: 6.8772
Epoch 22/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=6.2973]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.86it/s, loss=6.8026]


[Epoch 22] Train Loss: 6.1417 | Val Loss: 6.8824
Epoch 23/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.95it/s, loss=6.1627]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 12.73it/s, loss=6.8108]


[Epoch 23] Train Loss: 6.1149 | Val Loss: 6.8916
Epoch 24/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=6.1138]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 14.03it/s, loss=6.8052]


[Epoch 24] Train Loss: 6.0905 | Val Loss: 6.8864
Epoch 25/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.88it/s, loss=6.1243]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 13.22it/s, loss=6.8112]


[Epoch 25] Train Loss: 6.0724 | Val Loss: 6.8922
Epoch 26/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.93it/s, loss=5.9589]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.55it/s, loss=6.8155]


[Epoch 26] Train Loss: 6.0407 | Val Loss: 6.9014
Epoch 27/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.94it/s, loss=6.0688]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 13.87it/s, loss=6.8115]


[Epoch 27] Train Loss: 6.0211 | Val Loss: 6.8979
Epoch 28/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=5.9035]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.02it/s, loss=6.8154]


[Epoch 28] Train Loss: 5.9943 | Val Loss: 6.9060
Epoch 29/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=6.0029]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.69it/s, loss=6.8168]


[Epoch 29] Train Loss: 5.9755 | Val Loss: 6.9138
Epoch 30/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=5.9533]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.44it/s, loss=6.8229]


[Epoch 30] Train Loss: 5.9552 | Val Loss: 6.9164
Epoch 31/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=5.8445]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.91it/s, loss=6.8314]


[Epoch 31] Train Loss: 5.9297 | Val Loss: 6.9212
Epoch 32/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.83it/s, loss=6.0191]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.18it/s, loss=6.8310]


[Epoch 32] Train Loss: 5.9124 | Val Loss: 6.9273
Epoch 33/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=5.8390]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.15it/s, loss=6.8257]


[Epoch 33] Train Loss: 5.8847 | Val Loss: 6.9266
Epoch 34/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.83it/s, loss=5.7507]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 11.53it/s, loss=6.8413]


[Epoch 34] Train Loss: 5.8600 | Val Loss: 6.9380
Epoch 35/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.00it/s, loss=5.8484]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.62it/s, loss=6.8375]


[Epoch 35] Train Loss: 5.8425 | Val Loss: 6.9351
Epoch 36/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.94it/s, loss=5.8426]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.62it/s, loss=6.8599]


[Epoch 36] Train Loss: 5.8201 | Val Loss: 6.9514
Epoch 37/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=5.8084]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.04it/s, loss=6.8608]


[Epoch 37] Train Loss: 5.8018 | Val Loss: 6.9585
Epoch 38/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=5.8744]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.96it/s, loss=6.8663]


[Epoch 38] Train Loss: 5.7785 | Val Loss: 6.9565
Epoch 39/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=5.7602]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.73it/s, loss=6.8680]


[Epoch 39] Train Loss: 5.7561 | Val Loss: 6.9677
Epoch 40/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.7789]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.28it/s, loss=6.8675]


[Epoch 40] Train Loss: 5.7369 | Val Loss: 6.9779
Epoch 41/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=5.6925]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.20it/s, loss=6.8766]


[Epoch 41] Train Loss: 5.7136 | Val Loss: 6.9741
Epoch 42/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.8158]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 20.07it/s, loss=6.8875]


[Epoch 42] Train Loss: 5.6924 | Val Loss: 6.9820
Epoch 43/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=5.6219]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.94it/s, loss=6.8908]


[Epoch 43] Train Loss: 5.6585 | Val Loss: 6.9878
Epoch 44/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.06it/s, loss=5.6810]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.94it/s, loss=6.8825]


[Epoch 44] Train Loss: 5.6393 | Val Loss: 6.9861
Epoch 45/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.5607]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.37it/s, loss=6.8925]


[Epoch 45] Train Loss: 5.6122 | Val Loss: 6.9915
Epoch 46/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=5.5529]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.33it/s, loss=6.9004]


[Epoch 46] Train Loss: 5.5903 | Val Loss: 6.9990
Epoch 47/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=5.6081]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.05it/s, loss=6.8891]


[Epoch 47] Train Loss: 5.5705 | Val Loss: 6.9978
Epoch 48/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=5.5496]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.48it/s, loss=6.9212]


[Epoch 48] Train Loss: 5.5390 | Val Loss: 7.0116
Epoch 49/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=5.5065]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.97it/s, loss=6.9222]


[Epoch 49] Train Loss: 5.5158 | Val Loss: 7.0269
Epoch 50/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.06it/s, loss=5.5075]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.99it/s, loss=6.9199]


[Epoch 50] Train Loss: 5.4933 | Val Loss: 7.0153
Epoch 51/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.4921]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.10it/s, loss=6.9282]


[Epoch 51] Train Loss: 5.4589 | Val Loss: 7.0237
Epoch 52/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.4881]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.31it/s, loss=6.9262]


[Epoch 52] Train Loss: 5.4431 | Val Loss: 7.0291
Epoch 53/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.5435]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.57it/s, loss=6.9522]


[Epoch 53] Train Loss: 5.4215 | Val Loss: 7.0545
Epoch 54/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=5.4195]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.50it/s, loss=6.9421]


[Epoch 54] Train Loss: 5.3925 | Val Loss: 7.0433
Epoch 55/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.07it/s, loss=5.3066]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.73it/s, loss=6.9457]


[Epoch 55] Train Loss: 5.3606 | Val Loss: 7.0508
Epoch 56/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.12it/s, loss=5.3516]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.02it/s, loss=6.9544]


[Epoch 56] Train Loss: 5.3285 | Val Loss: 7.0577
Epoch 57/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=5.2750]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.66it/s, loss=6.9589]


[Epoch 57] Train Loss: 5.3079 | Val Loss: 7.0657
Epoch 58/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.05it/s, loss=5.2326]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 18.16it/s, loss=6.9661]


[Epoch 58] Train Loss: 5.2811 | Val Loss: 7.0718
Epoch 59/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=5.1273]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.81it/s, loss=6.9545]


[Epoch 59] Train Loss: 5.2507 | Val Loss: 7.0729
Epoch 60/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.04it/s, loss=5.3303]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.81it/s, loss=6.9704]


[Epoch 60] Train Loss: 5.2348 | Val Loss: 7.0777
Epoch 61/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=5.1179]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 13.29it/s, loss=6.9564]


[Epoch 61] Train Loss: 5.1882 | Val Loss: 7.0721
Epoch 62/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=5.1297]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 12.91it/s, loss=6.9878]


[Epoch 62] Train Loss: 5.1634 | Val Loss: 7.0904
Epoch 63/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=5.1848]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.43it/s, loss=6.9974]


[Epoch 63] Train Loss: 5.1384 | Val Loss: 7.1011
Epoch 64/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.85it/s, loss=5.1329]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 12.83it/s, loss=6.9929]


[Epoch 64] Train Loss: 5.1138 | Val Loss: 7.1046
Epoch 65/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.88it/s, loss=5.0760]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.43it/s, loss=7.0167]


[Epoch 65] Train Loss: 5.0768 | Val Loss: 7.1147
Epoch 66/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.79it/s, loss=5.1034]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.22it/s, loss=7.0324]


[Epoch 66] Train Loss: 5.0579 | Val Loss: 7.1255
Epoch 67/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=5.1267]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.24it/s, loss=7.0197]


[Epoch 67] Train Loss: 5.0318 | Val Loss: 7.1437
Epoch 68/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.84it/s, loss=5.0121]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.58it/s, loss=7.0552]


[Epoch 68] Train Loss: 4.9996 | Val Loss: 7.1461
Epoch 69/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=4.9695]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.45it/s, loss=7.0598]


[Epoch 69] Train Loss: 4.9739 | Val Loss: 7.1459
Epoch 70/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=4.9389]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.56it/s, loss=7.0343]


[Epoch 70] Train Loss: 4.9380 | Val Loss: 7.1457
Epoch 71/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=4.9730]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.47it/s, loss=7.0741]


[Epoch 71] Train Loss: 4.9253 | Val Loss: 7.1722
Epoch 72/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=4.7869]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.65it/s, loss=7.0878]


[Epoch 72] Train Loss: 4.8723 | Val Loss: 7.1766
Epoch 73/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.82it/s, loss=4.7636]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.20it/s, loss=7.0945]


[Epoch 73] Train Loss: 4.8471 | Val Loss: 7.1879
Epoch 74/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=4.8778]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.61it/s, loss=7.0930]


[Epoch 74] Train Loss: 4.8215 | Val Loss: 7.1900
Epoch 75/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.00it/s, loss=4.8389]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.63it/s, loss=7.0819]


[Epoch 75] Train Loss: 4.7933 | Val Loss: 7.1923
Epoch 76/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=4.8355]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.49it/s, loss=7.1098]


[Epoch 76] Train Loss: 4.7901 | Val Loss: 7.2334
Epoch 77/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=4.8206]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.05it/s, loss=7.1391]


[Epoch 77] Train Loss: 4.7444 | Val Loss: 7.2372
Epoch 78/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=4.8204]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.10it/s, loss=7.1212]


[Epoch 78] Train Loss: 4.7246 | Val Loss: 7.2287
Epoch 79/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.01it/s, loss=4.6530]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 13.87it/s, loss=7.1549]


[Epoch 79] Train Loss: 4.6778 | Val Loss: 7.2675
Epoch 80/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.89it/s, loss=4.6453]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.21it/s, loss=7.1412]


[Epoch 80] Train Loss: 4.6488 | Val Loss: 7.2578
Epoch 81/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.89it/s, loss=4.5735]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 14.67it/s, loss=7.1889]


[Epoch 81] Train Loss: 4.6350 | Val Loss: 7.2847
Epoch 82/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.94it/s, loss=4.6127]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.33it/s, loss=7.1674]


[Epoch 82] Train Loss: 4.5956 | Val Loss: 7.2893
Epoch 83/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.00it/s, loss=4.6513]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.18it/s, loss=7.1946]


[Epoch 83] Train Loss: 4.5703 | Val Loss: 7.3109
Epoch 84/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=4.4581]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.13it/s, loss=7.1931]


[Epoch 84] Train Loss: 4.5528 | Val Loss: 7.2944
Epoch 85/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=4.6083]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.90it/s, loss=7.2153]


[Epoch 85] Train Loss: 4.5121 | Val Loss: 7.3111
Epoch 86/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.03it/s, loss=4.6176]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 19.28it/s, loss=7.1863]


[Epoch 86] Train Loss: 4.4744 | Val Loss: 7.3194
Epoch 87/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.94it/s, loss=4.4081]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.77it/s, loss=7.2123]


[Epoch 87] Train Loss: 4.4565 | Val Loss: 7.3251
Epoch 88/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.98it/s, loss=4.4828]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.26it/s, loss=7.2339]


[Epoch 88] Train Loss: 4.4114 | Val Loss: 7.3587
Epoch 89/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=4.4880]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.27it/s, loss=7.2323]


[Epoch 89] Train Loss: 4.3851 | Val Loss: 7.3606
Epoch 90/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=4.4012]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.22it/s, loss=7.2376]


[Epoch 90] Train Loss: 4.3669 | Val Loss: 7.3741
Epoch 91/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=4.2167]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 10.08it/s, loss=7.2870]


[Epoch 91] Train Loss: 4.3644 | Val Loss: 7.4062
Epoch 92/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.93it/s, loss=4.1647]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.46it/s, loss=7.2791]


[Epoch 92] Train Loss: 4.2855 | Val Loss: 7.3952
Epoch 93/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=4.3591]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 13.37it/s, loss=7.2898]


[Epoch 93] Train Loss: 4.2799 | Val Loss: 7.4074
Epoch 94/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.84it/s, loss=4.3255]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.00it/s, loss=7.3553]


[Epoch 94] Train Loss: 4.2356 | Val Loss: 7.4531
Epoch 95/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.02it/s, loss=4.2060]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 14.60it/s, loss=7.3572]


[Epoch 95] Train Loss: 4.2066 | Val Loss: 7.4787
Epoch 96/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.99it/s, loss=4.4657]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 12.84it/s, loss=7.3343]


[Epoch 96] Train Loss: 4.2093 | Val Loss: 7.4637
Epoch 97/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.90it/s, loss=4.1615]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.60it/s, loss=7.3471]


[Epoch 97] Train Loss: 4.1536 | Val Loss: 7.4536
Epoch 98/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=3.9881]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 16.83it/s, loss=7.3359]


[Epoch 98] Train Loss: 4.1380 | Val Loss: 7.4705
Epoch 99/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  3.00it/s, loss=4.1368]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 17.27it/s, loss=7.3619]


[Epoch 99] Train Loss: 4.0771 | Val Loss: 7.4813
Epoch 100/100


🚀 Training: 100%|██████████| 13/13 [00:04<00:00,  2.83it/s, loss=4.1414]
🚀 Validation: 100%|██████████| 2/2 [00:00<00:00, 15.92it/s, loss=7.3868]

[Epoch 100] Train Loss: 4.0899 | Val Loss: 7.5002



