In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
BATCH_SIZE = 8 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)


In [3]:
SRC_VOCAB_SIZE = 5000  
TGT_VOCAB_SIZE = 5000  
DROPOUT = 0.15      
N_LAYERS = 2         
N_HEADS = 2        
FFN_HIDDEN = 256      
D_MODEL = 128        

encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)


In [4]:
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)

Model initialized on: cuda


In [5]:

for i, batch in enumerate(train_loader):
    if i < 1:
        print("src_batch type:", type(batch['src']))  
        print("tgt_batch type:", type(batch['tgt']))  # Should be <class 'torch.Tensor'>
        print("src_batch shape:", batch['src'].shape)  
        print("tgt_batch shape:", batch['tgt'].shape)  
    else:
        break


src_batch type: <class 'torch.Tensor'>
tgt_batch type: <class 'torch.Tensor'>
src_batch shape: torch.Size([8, 52])
tgt_batch shape: torch.Size([8, 52])


  src_batch = [torch.tensor(item['src'], dtype=torch.long) for item in batch]
  tgt_batch = [torch.tensor(item['tgt'], dtype=torch.long) for item in batch]


Checky checky .

In [8]:
pth = "dataset"
src = "min"
tgt = "eng"
tp  = os.path.join(pth,f"{src}_{tgt}")
input_dic_path = os.path.join(tp, "input_dic.pkl")
output_dic_path = os.path.join(tp, "output_dic.pkl")
with open(input_dic_path, "rb") as f:
    inp = pickle.load(f)
with open(output_dic_path, "rb") as f:
    output_dictionary = pickle.load(f)

for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 

        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        token_indices = torch.argmax(output, dim=1).cpu().tolist()  # Convert here
        tt = utils.detokenize(token_indices, output_dictionary)
        print(tt)
        print("Max index in output_dictionary:", max(output_dictionary.index2word.keys()))
        print("n_count:", output_dictionary.n_count)

  src_batch = [torch.tensor(item['src'], dtype=torch.long) for item in batch]
  tgt_batch = [torch.tensor(item['tgt'], dtype=torch.long) for item in batch]


KeyError: 3317


        output,_= model(src_batch, tgt_batch)
        output_dim = output.shape[-1]
        output = output[:, :-1, :].reshape(-1, output_dim)
        tgt_y  = tgt_batch[:, 1:].reshape(-1)

TypeError: detokenize() missing 1 required positional argument: 'vocab'

In [None]:
import os
import torch
from tqdm import tqdm

EPOCHS = 250
SAVE_DIR = "saved"
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf")
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=False, total=len(train_loader))
    for batch in train_loader:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:, :-1].reshape(-1)
        
        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:, :-1].reshape(-1)
            # tgt_y = tgt_batch[:, 1:].reshape(-1)

            
            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")


Epoch 1/250


  src_batch = [torch.tensor(item['src'], dtype=torch.long) for item in batch]
  tgt_batch = [torch.tensor(item['tgt'], dtype=torch.long) for item in batch]
🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 223.45it/s, loss=8.4977]


[Epoch 1] Train Loss: 8.5404 | Val Loss: 8.4775
  -> New best model saved at saved/best.pt
Epoch 2/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 192.19it/s, loss=8.3446]


[Epoch 2] Train Loss: 8.4069 | Val Loss: 8.3251
  -> New best model saved at saved/best.pt
Epoch 3/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 195.29it/s, loss=8.1851]


[Epoch 3] Train Loss: 8.2624 | Val Loss: 8.1664
  -> New best model saved at saved/best.pt
Epoch 4/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 234.57it/s, loss=8.0126]


[Epoch 4] Train Loss: 8.1128 | Val Loss: 7.9952
  -> New best model saved at saved/best.pt
Epoch 5/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 175.19it/s, loss=7.8293]


[Epoch 5] Train Loss: 7.9522 | Val Loss: 7.8127
  -> New best model saved at saved/best.pt
Epoch 6/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 253.70it/s, loss=7.6374]


[Epoch 6] Train Loss: 7.7811 | Val Loss: 7.6222
  -> New best model saved at saved/best.pt
Epoch 7/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 186.68it/s, loss=7.4410]


[Epoch 7] Train Loss: 7.6057 | Val Loss: 7.4270
  -> New best model saved at saved/best.pt
Epoch 8/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 221.13it/s, loss=7.2448]


[Epoch 8] Train Loss: 7.4229 | Val Loss: 7.2311
  -> New best model saved at saved/best.pt
Epoch 9/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 142.82it/s, loss=7.0521]


[Epoch 9] Train Loss: 7.2495 | Val Loss: 7.0389
  -> New best model saved at saved/best.pt
Epoch 10/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 206.82it/s, loss=6.8673]


[Epoch 10] Train Loss: 7.0696 | Val Loss: 6.8542
  -> New best model saved at saved/best.pt
Epoch 11/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 228.90it/s, loss=6.6900]


[Epoch 11] Train Loss: 6.8965 | Val Loss: 6.6773
  -> New best model saved at saved/best.pt
Epoch 12/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 239.24it/s, loss=6.5236]


[Epoch 12] Train Loss: 6.7420 | Val Loss: 6.5115
  -> New best model saved at saved/best.pt
Epoch 13/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 213.92it/s, loss=6.3668]


[Epoch 13] Train Loss: 6.5846 | Val Loss: 6.3550
  -> New best model saved at saved/best.pt
Epoch 14/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 243.63it/s, loss=6.2182]


[Epoch 14] Train Loss: 6.4483 | Val Loss: 6.2069
  -> New best model saved at saved/best.pt
Epoch 15/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 190.02it/s, loss=6.0787]


[Epoch 15] Train Loss: 6.3123 | Val Loss: 6.0677
  -> New best model saved at saved/best.pt
Epoch 16/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 233.23it/s, loss=5.9451]


[Epoch 16] Train Loss: 6.1831 | Val Loss: 5.9345
  -> New best model saved at saved/best.pt
Epoch 17/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 205.89it/s, loss=5.8186]


[Epoch 17] Train Loss: 6.0563 | Val Loss: 5.8079
  -> New best model saved at saved/best.pt
Epoch 18/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 222.55it/s, loss=5.6974]


[Epoch 18] Train Loss: 5.9487 | Val Loss: 5.6871
  -> New best model saved at saved/best.pt
Epoch 19/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 224.80it/s, loss=5.5831]


[Epoch 19] Train Loss: 5.8406 | Val Loss: 5.5735
  -> New best model saved at saved/best.pt
Epoch 20/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 231.59it/s, loss=5.4732]


[Epoch 20] Train Loss: 5.7345 | Val Loss: 5.4633
  -> New best model saved at saved/best.pt
Epoch 21/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 175.17it/s, loss=5.3675]


[Epoch 21] Train Loss: 5.6295 | Val Loss: 5.3582
  -> New best model saved at saved/best.pt
Epoch 22/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 211.13it/s, loss=5.2678]


[Epoch 22] Train Loss: 5.5347 | Val Loss: 5.2584
  -> New best model saved at saved/best.pt
Epoch 23/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 149.69it/s, loss=5.1721]


[Epoch 23] Train Loss: 5.4375 | Val Loss: 5.1626
  -> New best model saved at saved/best.pt
Epoch 24/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 217.40it/s, loss=5.0797]


[Epoch 24] Train Loss: 5.3542 | Val Loss: 5.0705
  -> New best model saved at saved/best.pt
Epoch 25/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 190.71it/s, loss=4.9917]


[Epoch 25] Train Loss: 5.2620 | Val Loss: 4.9831
  -> New best model saved at saved/best.pt
Epoch 26/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 184.35it/s, loss=4.9059]


[Epoch 26] Train Loss: 5.1812 | Val Loss: 4.8969
  -> New best model saved at saved/best.pt
Epoch 27/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 171.88it/s, loss=4.8246]


[Epoch 27] Train Loss: 5.0951 | Val Loss: 4.8156
  -> New best model saved at saved/best.pt
Epoch 28/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 213.19it/s, loss=4.7458]


[Epoch 28] Train Loss: 5.0229 | Val Loss: 4.7369
  -> New best model saved at saved/best.pt
Epoch 29/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 182.00it/s, loss=4.6683]


[Epoch 29] Train Loss: 4.9456 | Val Loss: 4.6602
  -> New best model saved at saved/best.pt
Epoch 30/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 243.07it/s, loss=4.5947]


[Epoch 30] Train Loss: 4.8767 | Val Loss: 4.5871
  -> New best model saved at saved/best.pt
Epoch 31/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 172.49it/s, loss=4.5227]


[Epoch 31] Train Loss: 4.8085 | Val Loss: 4.5156
  -> New best model saved at saved/best.pt
Epoch 32/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 221.49it/s, loss=4.4536]


[Epoch 32] Train Loss: 4.7362 | Val Loss: 4.4462
  -> New best model saved at saved/best.pt
Epoch 33/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 221.31it/s, loss=4.3848]


[Epoch 33] Train Loss: 4.6684 | Val Loss: 4.3787
  -> New best model saved at saved/best.pt
Epoch 34/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 206.30it/s, loss=4.3197]


[Epoch 34] Train Loss: 4.6086 | Val Loss: 4.3140
  -> New best model saved at saved/best.pt
Epoch 35/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 116.74it/s, loss=4.2563]


[Epoch 35] Train Loss: 4.5442 | Val Loss: 4.2510
  -> New best model saved at saved/best.pt
Epoch 36/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 217.88it/s, loss=4.1947]


[Epoch 36] Train Loss: 4.4853 | Val Loss: 4.1904
  -> New best model saved at saved/best.pt
Epoch 37/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 206.23it/s, loss=4.1346]


[Epoch 37] Train Loss: 4.4243 | Val Loss: 4.1312
  -> New best model saved at saved/best.pt
Epoch 38/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 227.50it/s, loss=4.0769]


[Epoch 38] Train Loss: 4.3696 | Val Loss: 4.0740
  -> New best model saved at saved/best.pt
Epoch 39/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 308.67it/s, loss=4.0210]


[Epoch 39] Train Loss: 4.3111 | Val Loss: 4.0192
  -> New best model saved at saved/best.pt
Epoch 40/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 315.66it/s, loss=3.9644]


[Epoch 40] Train Loss: 4.2578 | Val Loss: 3.9640
  -> New best model saved at saved/best.pt
Epoch 41/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 221.34it/s, loss=3.9114]


[Epoch 41] Train Loss: 4.2093 | Val Loss: 3.9113
  -> New best model saved at saved/best.pt
Epoch 42/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 340.40it/s, loss=3.8589]


[Epoch 42] Train Loss: 4.1581 | Val Loss: 3.8598
  -> New best model saved at saved/best.pt
Epoch 43/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 299.63it/s, loss=3.8090]


[Epoch 43] Train Loss: 4.1046 | Val Loss: 3.8103
  -> New best model saved at saved/best.pt
Epoch 44/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 332.68it/s, loss=3.7594]


[Epoch 44] Train Loss: 4.0628 | Val Loss: 3.7618
  -> New best model saved at saved/best.pt
Epoch 45/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 217.40it/s, loss=3.7112]


[Epoch 45] Train Loss: 4.0174 | Val Loss: 3.7143
  -> New best model saved at saved/best.pt
Epoch 46/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 214.31it/s, loss=3.6637]


[Epoch 46] Train Loss: 3.9677 | Val Loss: 3.6676
  -> New best model saved at saved/best.pt
Epoch 47/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 164.56it/s, loss=3.6168]


[Epoch 47] Train Loss: 3.9214 | Val Loss: 3.6211
  -> New best model saved at saved/best.pt
Epoch 48/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 241.29it/s, loss=3.5719]


[Epoch 48] Train Loss: 3.8819 | Val Loss: 3.5767
  -> New best model saved at saved/best.pt
Epoch 49/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 174.18it/s, loss=3.5282]


[Epoch 49] Train Loss: 3.8344 | Val Loss: 3.5339
  -> New best model saved at saved/best.pt
Epoch 50/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 254.93it/s, loss=3.4834]


[Epoch 50] Train Loss: 3.7919 | Val Loss: 3.4900
  -> New best model saved at saved/best.pt
Epoch 51/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 182.34it/s, loss=3.4406]


[Epoch 51] Train Loss: 3.7505 | Val Loss: 3.4482
  -> New best model saved at saved/best.pt
Epoch 52/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 235.91it/s, loss=3.3988]


[Epoch 52] Train Loss: 3.7151 | Val Loss: 3.4070
  -> New best model saved at saved/best.pt
Epoch 53/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 179.19it/s, loss=3.3573]


[Epoch 53] Train Loss: 3.6754 | Val Loss: 3.3666
  -> New best model saved at saved/best.pt
Epoch 54/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 243.55it/s, loss=3.3174]


[Epoch 54] Train Loss: 3.6340 | Val Loss: 3.3272
  -> New best model saved at saved/best.pt
Epoch 55/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 173.58it/s, loss=3.2767]


[Epoch 55] Train Loss: 3.6002 | Val Loss: 3.2873
  -> New best model saved at saved/best.pt
Epoch 56/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 255.24it/s, loss=3.2381]


[Epoch 56] Train Loss: 3.5650 | Val Loss: 3.2494
  -> New best model saved at saved/best.pt
Epoch 57/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 271.46it/s, loss=3.2000]


[Epoch 57] Train Loss: 3.5240 | Val Loss: 3.2122
  -> New best model saved at saved/best.pt
Epoch 58/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 340.27it/s, loss=3.1615]


[Epoch 58] Train Loss: 3.4940 | Val Loss: 3.1746
  -> New best model saved at saved/best.pt
Epoch 59/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 162.81it/s, loss=3.1241]


[Epoch 59] Train Loss: 3.4658 | Val Loss: 3.1380
  -> New best model saved at saved/best.pt
Epoch 60/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 222.09it/s, loss=3.0880]


[Epoch 60] Train Loss: 3.4275 | Val Loss: 3.1028
  -> New best model saved at saved/best.pt
Epoch 61/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 221.45it/s, loss=3.0517]


[Epoch 61] Train Loss: 3.3880 | Val Loss: 3.0672
  -> New best model saved at saved/best.pt
Epoch 62/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 211.84it/s, loss=3.0162]


[Epoch 62] Train Loss: 3.3550 | Val Loss: 3.0324
  -> New best model saved at saved/best.pt
Epoch 63/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 145.68it/s, loss=2.9806]


[Epoch 63] Train Loss: 3.3217 | Val Loss: 2.9976
  -> New best model saved at saved/best.pt
Epoch 64/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 163.55it/s, loss=2.9459]


[Epoch 64] Train Loss: 3.2953 | Val Loss: 2.9639
  -> New best model saved at saved/best.pt
Epoch 65/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 193.13it/s, loss=2.9122]


[Epoch 65] Train Loss: 3.2623 | Val Loss: 2.9308
  -> New best model saved at saved/best.pt
Epoch 66/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 217.88it/s, loss=2.8789]


[Epoch 66] Train Loss: 3.2372 | Val Loss: 2.8979
  -> New best model saved at saved/best.pt
Epoch 67/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 148.03it/s, loss=2.8453]


[Epoch 67] Train Loss: 3.1999 | Val Loss: 2.8651
  -> New best model saved at saved/best.pt
Epoch 68/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 259.99it/s, loss=2.8111]


[Epoch 68] Train Loss: 3.1702 | Val Loss: 2.8316
  -> New best model saved at saved/best.pt
Epoch 69/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 178.21it/s, loss=2.7791]


[Epoch 69] Train Loss: 3.1423 | Val Loss: 2.8001
  -> New best model saved at saved/best.pt
Epoch 70/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 334.00it/s, loss=2.7482]


[Epoch 70] Train Loss: 3.1193 | Val Loss: 2.7698
  -> New best model saved at saved/best.pt
Epoch 71/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 298.11it/s, loss=2.7170]


[Epoch 71] Train Loss: 3.0843 | Val Loss: 2.7390
  -> New best model saved at saved/best.pt
Epoch 72/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 291.88it/s, loss=2.6847]


[Epoch 72] Train Loss: 3.0548 | Val Loss: 2.7079
  -> New best model saved at saved/best.pt
Epoch 73/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 250.48it/s, loss=2.6541]


[Epoch 73] Train Loss: 3.0286 | Val Loss: 2.6779
  -> New best model saved at saved/best.pt
Epoch 74/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 269.55it/s, loss=2.6228]


[Epoch 74] Train Loss: 3.0013 | Val Loss: 2.6470
  -> New best model saved at saved/best.pt
Epoch 75/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 228.51it/s, loss=2.5930]


[Epoch 75] Train Loss: 2.9735 | Val Loss: 2.6182
  -> New best model saved at saved/best.pt
Epoch 76/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 298.11it/s, loss=2.5628]


[Epoch 76] Train Loss: 2.9469 | Val Loss: 2.5881
  -> New best model saved at saved/best.pt
Epoch 77/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 198.90it/s, loss=2.5328]


[Epoch 77] Train Loss: 2.9231 | Val Loss: 2.5589
  -> New best model saved at saved/best.pt
Epoch 78/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 195.82it/s, loss=2.5043]


[Epoch 78] Train Loss: 2.8958 | Val Loss: 2.5309
  -> New best model saved at saved/best.pt
Epoch 79/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 231.10it/s, loss=2.4765]


[Epoch 79] Train Loss: 2.8737 | Val Loss: 2.5037
  -> New best model saved at saved/best.pt
Epoch 80/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 220.53it/s, loss=2.4471]


[Epoch 80] Train Loss: 2.8504 | Val Loss: 2.4749
  -> New best model saved at saved/best.pt
Epoch 81/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 261.76it/s, loss=2.4204]


[Epoch 81] Train Loss: 2.8221 | Val Loss: 2.4485
  -> New best model saved at saved/best.pt
Epoch 82/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 303.40it/s, loss=2.3923]


[Epoch 82] Train Loss: 2.8004 | Val Loss: 2.4208
  -> New best model saved at saved/best.pt
Epoch 83/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 191.97it/s, loss=2.3649]


[Epoch 83] Train Loss: 2.7765 | Val Loss: 2.3941
  -> New best model saved at saved/best.pt
Epoch 84/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 243.42it/s, loss=2.3374]


[Epoch 84] Train Loss: 2.7493 | Val Loss: 2.3669
  -> New best model saved at saved/best.pt
Epoch 85/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 182.71it/s, loss=2.3106]


[Epoch 85] Train Loss: 2.7280 | Val Loss: 2.3404
  -> New best model saved at saved/best.pt
Epoch 86/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 332.44it/s, loss=2.2823]


[Epoch 86] Train Loss: 2.7070 | Val Loss: 2.3123
  -> New best model saved at saved/best.pt
Epoch 87/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 271.84it/s, loss=2.2570]


[Epoch 87] Train Loss: 2.6817 | Val Loss: 2.2872
  -> New best model saved at saved/best.pt
Epoch 88/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 275.34it/s, loss=2.2309]


[Epoch 88] Train Loss: 2.6576 | Val Loss: 2.2614
  -> New best model saved at saved/best.pt
Epoch 89/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 185.18it/s, loss=2.2078]


[Epoch 89] Train Loss: 2.6380 | Val Loss: 2.2388
  -> New best model saved at saved/best.pt
Epoch 90/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 323.00it/s, loss=2.1818]


[Epoch 90] Train Loss: 2.6219 | Val Loss: 2.2128
  -> New best model saved at saved/best.pt
Epoch 91/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.26it/s, loss=2.1566]


[Epoch 91] Train Loss: 2.5967 | Val Loss: 2.1880
  -> New best model saved at saved/best.pt
Epoch 92/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 240.40it/s, loss=2.1315]


[Epoch 92] Train Loss: 2.5723 | Val Loss: 2.1630
  -> New best model saved at saved/best.pt
Epoch 93/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 236.01it/s, loss=2.1080]


[Epoch 93] Train Loss: 2.5504 | Val Loss: 2.1397
  -> New best model saved at saved/best.pt
Epoch 94/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 204.61it/s, loss=2.0835]


[Epoch 94] Train Loss: 2.5408 | Val Loss: 2.1152
  -> New best model saved at saved/best.pt
Epoch 95/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 266.62it/s, loss=2.0606]


[Epoch 95] Train Loss: 2.5083 | Val Loss: 2.0921
  -> New best model saved at saved/best.pt
Epoch 96/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 302.22it/s, loss=2.0369]


[Epoch 96] Train Loss: 2.4975 | Val Loss: 2.0691
  -> New best model saved at saved/best.pt
Epoch 97/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 290.09it/s, loss=2.0122]


[Epoch 97] Train Loss: 2.4721 | Val Loss: 2.0443
  -> New best model saved at saved/best.pt
Epoch 98/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 249.44it/s, loss=1.9893]


[Epoch 98] Train Loss: 2.4547 | Val Loss: 2.0212
  -> New best model saved at saved/best.pt
Epoch 99/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 173.18it/s, loss=1.9668]


[Epoch 99] Train Loss: 2.4331 | Val Loss: 1.9989
  -> New best model saved at saved/best.pt
Epoch 100/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 315.18it/s, loss=1.9451]


[Epoch 100] Train Loss: 2.4129 | Val Loss: 1.9771
  -> New best model saved at saved/best.pt
Epoch 101/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 284.90it/s, loss=1.9237]


[Epoch 101] Train Loss: 2.3961 | Val Loss: 1.9556
  -> New best model saved at saved/best.pt
Epoch 102/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 292.52it/s, loss=1.9021]


[Epoch 102] Train Loss: 2.3810 | Val Loss: 1.9342
  -> New best model saved at saved/best.pt
Epoch 103/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 226.00it/s, loss=1.8784]


[Epoch 103] Train Loss: 2.3507 | Val Loss: 1.9106
  -> New best model saved at saved/best.pt
Epoch 104/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 210.58it/s, loss=1.8548]


[Epoch 104] Train Loss: 2.3437 | Val Loss: 1.8871
  -> New best model saved at saved/best.pt
Epoch 105/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 279.18it/s, loss=1.8337]


[Epoch 105] Train Loss: 2.3186 | Val Loss: 1.8659
  -> New best model saved at saved/best.pt
Epoch 106/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 324.87it/s, loss=1.8119]


[Epoch 106] Train Loss: 2.3061 | Val Loss: 1.8438
  -> New best model saved at saved/best.pt
Epoch 107/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 257.48it/s, loss=1.7912]


[Epoch 107] Train Loss: 2.2834 | Val Loss: 1.8228
  -> New best model saved at saved/best.pt
Epoch 108/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 255.16it/s, loss=1.7681]


[Epoch 108] Train Loss: 2.2567 | Val Loss: 1.7997
  -> New best model saved at saved/best.pt
Epoch 109/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 184.21it/s, loss=1.7462]


[Epoch 109] Train Loss: 2.2434 | Val Loss: 1.7774
  -> New best model saved at saved/best.pt
Epoch 110/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 302.76it/s, loss=1.7240]


[Epoch 110] Train Loss: 2.2313 | Val Loss: 1.7545
  -> New best model saved at saved/best.pt
Epoch 111/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 279.61it/s, loss=1.7017]


[Epoch 111] Train Loss: 2.2160 | Val Loss: 1.7319
  -> New best model saved at saved/best.pt
Epoch 112/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 303.54it/s, loss=1.6809]


[Epoch 112] Train Loss: 2.1947 | Val Loss: 1.7112
  -> New best model saved at saved/best.pt
Epoch 113/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.49it/s, loss=1.6597]


[Epoch 113] Train Loss: 2.1673 | Val Loss: 1.6897
  -> New best model saved at saved/best.pt
Epoch 114/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 233.67it/s, loss=1.6395]


[Epoch 114] Train Loss: 2.1655 | Val Loss: 1.6694
  -> New best model saved at saved/best.pt
Epoch 115/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 242.51it/s, loss=1.6185]


[Epoch 115] Train Loss: 2.1439 | Val Loss: 1.6483
  -> New best model saved at saved/best.pt
Epoch 116/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 325.41it/s, loss=1.5991]


[Epoch 116] Train Loss: 2.1323 | Val Loss: 1.6285
  -> New best model saved at saved/best.pt
Epoch 117/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 268.92it/s, loss=1.5797]


[Epoch 117] Train Loss: 2.1086 | Val Loss: 1.6084
  -> New best model saved at saved/best.pt
Epoch 118/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 287.19it/s, loss=1.5598]


[Epoch 118] Train Loss: 2.0873 | Val Loss: 1.5883
  -> New best model saved at saved/best.pt
Epoch 119/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 173.00it/s, loss=1.5412]


[Epoch 119] Train Loss: 2.0799 | Val Loss: 1.5695
  -> New best model saved at saved/best.pt
Epoch 120/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 290.51it/s, loss=1.5213]


[Epoch 120] Train Loss: 2.0620 | Val Loss: 1.5496
  -> New best model saved at saved/best.pt
Epoch 121/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 287.72it/s, loss=1.5034]


[Epoch 121] Train Loss: 2.0493 | Val Loss: 1.5313
  -> New best model saved at saved/best.pt
Epoch 122/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 306.22it/s, loss=1.4852]


[Epoch 122] Train Loss: 2.0297 | Val Loss: 1.5129
  -> New best model saved at saved/best.pt
Epoch 123/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 276.92it/s, loss=1.4681]


[Epoch 123] Train Loss: 2.0182 | Val Loss: 1.4955
  -> New best model saved at saved/best.pt
Epoch 124/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 200.48it/s, loss=1.4486]


[Epoch 124] Train Loss: 2.0009 | Val Loss: 1.4757
  -> New best model saved at saved/best.pt
Epoch 125/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 260.86it/s, loss=1.4313]


[Epoch 125] Train Loss: 1.9833 | Val Loss: 1.4583
  -> New best model saved at saved/best.pt
Epoch 126/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 317.21it/s, loss=1.4141]


[Epoch 126] Train Loss: 1.9755 | Val Loss: 1.4411
  -> New best model saved at saved/best.pt
Epoch 127/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 265.87it/s, loss=1.3956]


[Epoch 127] Train Loss: 1.9637 | Val Loss: 1.4222
  -> New best model saved at saved/best.pt
Epoch 128/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 287.02it/s, loss=1.3797]


[Epoch 128] Train Loss: 1.9432 | Val Loss: 1.4057
  -> New best model saved at saved/best.pt
Epoch 129/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 230.29it/s, loss=1.3626]


[Epoch 129] Train Loss: 1.9274 | Val Loss: 1.3884
  -> New best model saved at saved/best.pt
Epoch 130/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 319.39it/s, loss=1.3449]


[Epoch 130] Train Loss: 1.9176 | Val Loss: 1.3705
  -> New best model saved at saved/best.pt
Epoch 131/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 281.76it/s, loss=1.3289]


[Epoch 131] Train Loss: 1.8976 | Val Loss: 1.3543
  -> New best model saved at saved/best.pt
Epoch 132/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 313.05it/s, loss=1.3133]


[Epoch 132] Train Loss: 1.8894 | Val Loss: 1.3383
  -> New best model saved at saved/best.pt
Epoch 133/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 196.03it/s, loss=1.2966]


[Epoch 133] Train Loss: 1.8764 | Val Loss: 1.3216
  -> New best model saved at saved/best.pt
Epoch 134/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 235.50it/s, loss=1.2809]


[Epoch 134] Train Loss: 1.8629 | Val Loss: 1.3056
  -> New best model saved at saved/best.pt
Epoch 135/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 193.63it/s, loss=1.2653]


[Epoch 135] Train Loss: 1.8493 | Val Loss: 1.2899
  -> New best model saved at saved/best.pt
Epoch 136/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 323.01it/s, loss=1.2494]


[Epoch 136] Train Loss: 1.8380 | Val Loss: 1.2739
  -> New best model saved at saved/best.pt
Epoch 137/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 272.31it/s, loss=1.2344]


[Epoch 137] Train Loss: 1.8207 | Val Loss: 1.2585
  -> New best model saved at saved/best.pt
Epoch 138/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 294.47it/s, loss=1.2208]


[Epoch 138] Train Loss: 1.8064 | Val Loss: 1.2446
  -> New best model saved at saved/best.pt
Epoch 139/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 252.35it/s, loss=1.2044]


[Epoch 139] Train Loss: 1.7964 | Val Loss: 1.2280
  -> New best model saved at saved/best.pt
Epoch 140/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 204.37it/s, loss=1.1903]


[Epoch 140] Train Loss: 1.7829 | Val Loss: 1.2137
  -> New best model saved at saved/best.pt
Epoch 141/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.45it/s, loss=1.1762]


[Epoch 141] Train Loss: 1.7707 | Val Loss: 1.1994
  -> New best model saved at saved/best.pt
Epoch 142/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 326.38it/s, loss=1.1614]


[Epoch 142] Train Loss: 1.7537 | Val Loss: 1.1844
  -> New best model saved at saved/best.pt
Epoch 143/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 279.65it/s, loss=1.1483]


[Epoch 143] Train Loss: 1.7443 | Val Loss: 1.1711
  -> New best model saved at saved/best.pt
Epoch 144/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 269.93it/s, loss=1.1341]


[Epoch 144] Train Loss: 1.7340 | Val Loss: 1.1565
  -> New best model saved at saved/best.pt
Epoch 145/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 214.14it/s, loss=1.1205]


[Epoch 145] Train Loss: 1.7165 | Val Loss: 1.1429
  -> New best model saved at saved/best.pt
Epoch 146/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 314.01it/s, loss=1.1069]


[Epoch 146] Train Loss: 1.7036 | Val Loss: 1.1291
  -> New best model saved at saved/best.pt
Epoch 147/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 280.43it/s, loss=1.0945]


[Epoch 147] Train Loss: 1.7002 | Val Loss: 1.1162
  -> New best model saved at saved/best.pt
Epoch 148/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 277.35it/s, loss=1.0819]


[Epoch 148] Train Loss: 1.6844 | Val Loss: 1.1034
  -> New best model saved at saved/best.pt
Epoch 149/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.33it/s, loss=1.0694]


[Epoch 149] Train Loss: 1.6715 | Val Loss: 1.0909
  -> New best model saved at saved/best.pt
Epoch 150/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 205.76it/s, loss=1.0566]


[Epoch 150] Train Loss: 1.6622 | Val Loss: 1.0779
  -> New best model saved at saved/best.pt
Epoch 151/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 218.93it/s, loss=1.0452]


[Epoch 151] Train Loss: 1.6483 | Val Loss: 1.0664
  -> New best model saved at saved/best.pt
Epoch 152/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 304.37it/s, loss=1.0334]


[Epoch 152] Train Loss: 1.6339 | Val Loss: 1.0544
  -> New best model saved at saved/best.pt
Epoch 153/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 265.80it/s, loss=1.0210]


[Epoch 153] Train Loss: 1.6273 | Val Loss: 1.0419
  -> New best model saved at saved/best.pt
Epoch 154/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 273.79it/s, loss=1.0101]


[Epoch 154] Train Loss: 1.6082 | Val Loss: 1.0308
  -> New best model saved at saved/best.pt
Epoch 155/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 256.02it/s, loss=0.9985]


[Epoch 155] Train Loss: 1.6028 | Val Loss: 1.0191
  -> New best model saved at saved/best.pt
Epoch 156/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 201.21it/s, loss=0.9877]


[Epoch 156] Train Loss: 1.5917 | Val Loss: 1.0079
  -> New best model saved at saved/best.pt
Epoch 157/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 246.11it/s, loss=0.9766]


[Epoch 157] Train Loss: 1.5773 | Val Loss: 0.9968
  -> New best model saved at saved/best.pt
Epoch 158/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 309.77it/s, loss=0.9655]


[Epoch 158] Train Loss: 1.5653 | Val Loss: 0.9856
  -> New best model saved at saved/best.pt
Epoch 159/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 197.78it/s, loss=0.9547]


[Epoch 159] Train Loss: 1.5584 | Val Loss: 0.9743
  -> New best model saved at saved/best.pt
Epoch 160/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 306.51it/s, loss=0.9441]


[Epoch 160] Train Loss: 1.5486 | Val Loss: 0.9637
  -> New best model saved at saved/best.pt
Epoch 161/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 187.31it/s, loss=0.9340]


[Epoch 161] Train Loss: 1.5367 | Val Loss: 0.9536
  -> New best model saved at saved/best.pt
Epoch 162/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 351.48it/s, loss=0.9236]


[Epoch 162] Train Loss: 1.5287 | Val Loss: 0.9430
  -> New best model saved at saved/best.pt
Epoch 163/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 306.54it/s, loss=0.9131]


[Epoch 163] Train Loss: 1.5135 | Val Loss: 0.9326
  -> New best model saved at saved/best.pt
Epoch 164/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 320.34it/s, loss=0.9036]


[Epoch 164] Train Loss: 1.5053 | Val Loss: 0.9229
  -> New best model saved at saved/best.pt
Epoch 165/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 285.12it/s, loss=0.8937]


[Epoch 165] Train Loss: 1.4991 | Val Loss: 0.9130
  -> New best model saved at saved/best.pt
Epoch 166/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 227.57it/s, loss=0.8838]


[Epoch 166] Train Loss: 1.4842 | Val Loss: 0.9032
  -> New best model saved at saved/best.pt
Epoch 167/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 247.63it/s, loss=0.8747]


[Epoch 167] Train Loss: 1.4740 | Val Loss: 0.8940
  -> New best model saved at saved/best.pt
Epoch 168/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 341.92it/s, loss=0.8653]


[Epoch 168] Train Loss: 1.4588 | Val Loss: 0.8846
  -> New best model saved at saved/best.pt
Epoch 169/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 266.95it/s, loss=0.8560]


[Epoch 169] Train Loss: 1.4564 | Val Loss: 0.8754
  -> New best model saved at saved/best.pt
Epoch 170/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 276.69it/s, loss=0.8469]


[Epoch 170] Train Loss: 1.4448 | Val Loss: 0.8663
  -> New best model saved at saved/best.pt
Epoch 171/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 204.35it/s, loss=0.8382]


[Epoch 171] Train Loss: 1.4361 | Val Loss: 0.8577
  -> New best model saved at saved/best.pt
Epoch 172/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 344.18it/s, loss=0.8294]


[Epoch 172] Train Loss: 1.4262 | Val Loss: 0.8488
  -> New best model saved at saved/best.pt
Epoch 173/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 271.55it/s, loss=0.8210]


[Epoch 173] Train Loss: 1.4049 | Val Loss: 0.8400
  -> New best model saved at saved/best.pt
Epoch 174/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 294.35it/s, loss=0.8122]


[Epoch 174] Train Loss: 1.4105 | Val Loss: 0.8314
  -> New best model saved at saved/best.pt
Epoch 175/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 184.32it/s, loss=0.8041]


[Epoch 175] Train Loss: 1.3909 | Val Loss: 0.8232
  -> New best model saved at saved/best.pt
Epoch 176/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 302.25it/s, loss=0.7958]


[Epoch 176] Train Loss: 1.3859 | Val Loss: 0.8149
  -> New best model saved at saved/best.pt
Epoch 177/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 308.34it/s, loss=0.7877]


[Epoch 177] Train Loss: 1.3765 | Val Loss: 0.8068
  -> New best model saved at saved/best.pt
Epoch 178/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 308.64it/s, loss=0.7796]


[Epoch 178] Train Loss: 1.3654 | Val Loss: 0.7989
  -> New best model saved at saved/best.pt
Epoch 179/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 251.29it/s, loss=0.7720]


[Epoch 179] Train Loss: 1.3619 | Val Loss: 0.7911
  -> New best model saved at saved/best.pt
Epoch 180/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 222.56it/s, loss=0.7642]


[Epoch 180] Train Loss: 1.3456 | Val Loss: 0.7832
  -> New best model saved at saved/best.pt
Epoch 181/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 263.90it/s, loss=0.7563]


[Epoch 181] Train Loss: 1.3384 | Val Loss: 0.7757
  -> New best model saved at saved/best.pt
Epoch 182/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 329.64it/s, loss=0.7489]


[Epoch 182] Train Loss: 1.3289 | Val Loss: 0.7682
  -> New best model saved at saved/best.pt
Epoch 183/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 301.24it/s, loss=0.7415]


[Epoch 183] Train Loss: 1.3204 | Val Loss: 0.7607
  -> New best model saved at saved/best.pt
Epoch 184/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 269.14it/s, loss=0.7343]


[Epoch 184] Train Loss: 1.3050 | Val Loss: 0.7534
  -> New best model saved at saved/best.pt
Epoch 185/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 187.33it/s, loss=0.7272]


[Epoch 185] Train Loss: 1.3088 | Val Loss: 0.7464
  -> New best model saved at saved/best.pt
Epoch 186/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 309.63it/s, loss=0.7197]


[Epoch 186] Train Loss: 1.2939 | Val Loss: 0.7393
  -> New best model saved at saved/best.pt
Epoch 187/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 304.51it/s, loss=0.7132]


[Epoch 187] Train Loss: 1.2881 | Val Loss: 0.7324
  -> New best model saved at saved/best.pt
Epoch 188/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 285.25it/s, loss=0.7059]


[Epoch 188] Train Loss: 1.2736 | Val Loss: 0.7254
  -> New best model saved at saved/best.pt
Epoch 189/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 181.69it/s, loss=0.6993]


[Epoch 189] Train Loss: 1.2727 | Val Loss: 0.7186
  -> New best model saved at saved/best.pt
Epoch 190/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 325.18it/s, loss=0.6927]


[Epoch 190] Train Loss: 1.2605 | Val Loss: 0.7118
  -> New best model saved at saved/best.pt
Epoch 191/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 300.68it/s, loss=0.6859]


[Epoch 191] Train Loss: 1.2517 | Val Loss: 0.7052
  -> New best model saved at saved/best.pt
Epoch 192/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 316.61it/s, loss=0.6792]


[Epoch 192] Train Loss: 1.2388 | Val Loss: 0.6985
  -> New best model saved at saved/best.pt
Epoch 193/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 237.06it/s, loss=0.6728]


[Epoch 193] Train Loss: 1.2316 | Val Loss: 0.6921
  -> New best model saved at saved/best.pt
Epoch 194/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 254.10it/s, loss=0.6660]


[Epoch 194] Train Loss: 1.2256 | Val Loss: 0.6854
  -> New best model saved at saved/best.pt
Epoch 195/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 254.24it/s, loss=0.6602]


[Epoch 195] Train Loss: 1.2220 | Val Loss: 0.6793
  -> New best model saved at saved/best.pt
Epoch 196/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 321.91it/s, loss=0.6537]


[Epoch 196] Train Loss: 1.2068 | Val Loss: 0.6730
  -> New best model saved at saved/best.pt
Epoch 197/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 280.09it/s, loss=0.6480]


[Epoch 197] Train Loss: 1.1942 | Val Loss: 0.6670
  -> New best model saved at saved/best.pt
Epoch 198/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 251.48it/s, loss=0.6416]


[Epoch 198] Train Loss: 1.1898 | Val Loss: 0.6608
  -> New best model saved at saved/best.pt
Epoch 199/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 240.08it/s, loss=0.6358]


[Epoch 199] Train Loss: 1.1816 | Val Loss: 0.6549
  -> New best model saved at saved/best.pt
Epoch 200/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 327.92it/s, loss=0.6297]


[Epoch 200] Train Loss: 1.1768 | Val Loss: 0.6491
  -> New best model saved at saved/best.pt
Epoch 201/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 268.63it/s, loss=0.6239]


[Epoch 201] Train Loss: 1.1700 | Val Loss: 0.6431
  -> New best model saved at saved/best.pt
Epoch 202/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 289.22it/s, loss=0.6180]


[Epoch 202] Train Loss: 1.1618 | Val Loss: 0.6373
  -> New best model saved at saved/best.pt
Epoch 203/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 193.95it/s, loss=0.6117]


[Epoch 203] Train Loss: 1.1471 | Val Loss: 0.6312
  -> New best model saved at saved/best.pt
Epoch 204/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 336.79it/s, loss=0.6062]


[Epoch 204] Train Loss: 1.1423 | Val Loss: 0.6255
  -> New best model saved at saved/best.pt
Epoch 205/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 292.90it/s, loss=0.6004]


[Epoch 205] Train Loss: 1.1349 | Val Loss: 0.6200
  -> New best model saved at saved/best.pt
Epoch 206/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 317.74it/s, loss=0.5946]


[Epoch 206] Train Loss: 1.1295 | Val Loss: 0.6144
  -> New best model saved at saved/best.pt
Epoch 207/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 269.61it/s, loss=0.5895]


[Epoch 207] Train Loss: 1.1220 | Val Loss: 0.6090
  -> New best model saved at saved/best.pt
Epoch 208/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.75it/s, loss=0.5839]


[Epoch 208] Train Loss: 1.1093 | Val Loss: 0.6035
  -> New best model saved at saved/best.pt
Epoch 209/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 307.25it/s, loss=0.5785]


[Epoch 209] Train Loss: 1.1072 | Val Loss: 0.5980
  -> New best model saved at saved/best.pt
Epoch 210/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 325.90it/s, loss=0.5733]


[Epoch 210] Train Loss: 1.0950 | Val Loss: 0.5926
  -> New best model saved at saved/best.pt
Epoch 211/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 270.47it/s, loss=0.5681]


[Epoch 211] Train Loss: 1.0898 | Val Loss: 0.5873
  -> New best model saved at saved/best.pt
Epoch 212/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 219.52it/s, loss=0.5625]


[Epoch 212] Train Loss: 1.0827 | Val Loss: 0.5819
  -> New best model saved at saved/best.pt
Epoch 213/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 258.73it/s, loss=0.5572]


[Epoch 213] Train Loss: 1.0735 | Val Loss: 0.5767
  -> New best model saved at saved/best.pt
Epoch 214/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 294.43it/s, loss=0.5519]


[Epoch 214] Train Loss: 1.0713 | Val Loss: 0.5715
  -> New best model saved at saved/best.pt
Epoch 215/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 286.68it/s, loss=0.5466]


[Epoch 215] Train Loss: 1.0660 | Val Loss: 0.5665
  -> New best model saved at saved/best.pt
Epoch 216/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 296.52it/s, loss=0.5416]


[Epoch 216] Train Loss: 1.0524 | Val Loss: 0.5614
  -> New best model saved at saved/best.pt
Epoch 217/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 178.16it/s, loss=0.5366]


[Epoch 217] Train Loss: 1.0473 | Val Loss: 0.5564
  -> New best model saved at saved/best.pt
Epoch 218/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 333.71it/s, loss=0.5315]


[Epoch 218] Train Loss: 1.0358 | Val Loss: 0.5513
  -> New best model saved at saved/best.pt
Epoch 219/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 266.70it/s, loss=0.5265]


[Epoch 219] Train Loss: 1.0306 | Val Loss: 0.5464
  -> New best model saved at saved/best.pt
Epoch 220/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 248.93it/s, loss=0.5213]


[Epoch 220] Train Loss: 1.0248 | Val Loss: 0.5414
  -> New best model saved at saved/best.pt
Epoch 221/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 173.22it/s, loss=0.5168]


[Epoch 221] Train Loss: 1.0118 | Val Loss: 0.5366
  -> New best model saved at saved/best.pt
Epoch 222/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 330.57it/s, loss=0.5117]


[Epoch 222] Train Loss: 1.0113 | Val Loss: 0.5318
  -> New best model saved at saved/best.pt
Epoch 223/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 266.79it/s, loss=0.5069]


[Epoch 223] Train Loss: 1.0017 | Val Loss: 0.5271
  -> New best model saved at saved/best.pt
Epoch 224/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 321.34it/s, loss=0.5022]


[Epoch 224] Train Loss: 1.0016 | Val Loss: 0.5223
  -> New best model saved at saved/best.pt
Epoch 225/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 291.19it/s, loss=0.4974]


[Epoch 225] Train Loss: 0.9899 | Val Loss: 0.5177
  -> New best model saved at saved/best.pt
Epoch 226/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 236.66it/s, loss=0.4928]


[Epoch 226] Train Loss: 0.9798 | Val Loss: 0.5132
  -> New best model saved at saved/best.pt
Epoch 227/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 244.23it/s, loss=0.4885]


[Epoch 227] Train Loss: 0.9728 | Val Loss: 0.5086
  -> New best model saved at saved/best.pt
Epoch 228/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 298.85it/s, loss=0.4837]


[Epoch 228] Train Loss: 0.9722 | Val Loss: 0.5040
  -> New best model saved at saved/best.pt
Epoch 229/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 282.56it/s, loss=0.4791]


[Epoch 229] Train Loss: 0.9594 | Val Loss: 0.4995
  -> New best model saved at saved/best.pt
Epoch 230/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 293.56it/s, loss=0.4746]


[Epoch 230] Train Loss: 0.9559 | Val Loss: 0.4949
  -> New best model saved at saved/best.pt
Epoch 231/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 179.12it/s, loss=0.4699]


[Epoch 231] Train Loss: 0.9521 | Val Loss: 0.4904
  -> New best model saved at saved/best.pt
Epoch 232/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 309.86it/s, loss=0.4653]


[Epoch 232] Train Loss: 0.9383 | Val Loss: 0.4860
  -> New best model saved at saved/best.pt
Epoch 233/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 308.38it/s, loss=0.4610]


[Epoch 233] Train Loss: 0.9361 | Val Loss: 0.4816
  -> New best model saved at saved/best.pt
Epoch 234/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 321.90it/s, loss=0.4565]


[Epoch 234] Train Loss: 0.9305 | Val Loss: 0.4773
  -> New best model saved at saved/best.pt
Epoch 235/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 289.39it/s, loss=0.4521]


[Epoch 235] Train Loss: 0.9251 | Val Loss: 0.4729
  -> New best model saved at saved/best.pt
Epoch 236/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 218.44it/s, loss=0.4477]


[Epoch 236] Train Loss: 0.9149 | Val Loss: 0.4687
  -> New best model saved at saved/best.pt
Epoch 237/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 227.64it/s, loss=0.4436]


[Epoch 237] Train Loss: 0.9108 | Val Loss: 0.4645
  -> New best model saved at saved/best.pt
Epoch 238/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 317.77it/s, loss=0.4393]


[Epoch 238] Train Loss: 0.8989 | Val Loss: 0.4602
  -> New best model saved at saved/best.pt
Epoch 239/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 281.14it/s, loss=0.4349]


[Epoch 239] Train Loss: 0.8924 | Val Loss: 0.4560
  -> New best model saved at saved/best.pt
Epoch 240/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 275.25it/s, loss=0.4306]


[Epoch 240] Train Loss: 0.8890 | Val Loss: 0.4519
  -> New best model saved at saved/best.pt
Epoch 241/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 210.06it/s, loss=0.4266]


[Epoch 241] Train Loss: 0.8848 | Val Loss: 0.4478
  -> New best model saved at saved/best.pt
Epoch 242/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 339.15it/s, loss=0.4224]


[Epoch 242] Train Loss: 0.8786 | Val Loss: 0.4437
  -> New best model saved at saved/best.pt
Epoch 243/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 302.68it/s, loss=0.4185]


[Epoch 243] Train Loss: 0.8683 | Val Loss: 0.4396
  -> New best model saved at saved/best.pt
Epoch 244/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 294.18it/s, loss=0.4142]


[Epoch 244] Train Loss: 0.8704 | Val Loss: 0.4356
  -> New best model saved at saved/best.pt
Epoch 245/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 248.81it/s, loss=0.4103]


[Epoch 245] Train Loss: 0.8606 | Val Loss: 0.4316
  -> New best model saved at saved/best.pt
Epoch 246/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 236.11it/s, loss=0.4062]


[Epoch 246] Train Loss: 0.8566 | Val Loss: 0.4277
  -> New best model saved at saved/best.pt
Epoch 247/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 295.77it/s, loss=0.4023]


[Epoch 247] Train Loss: 0.8453 | Val Loss: 0.4238
  -> New best model saved at saved/best.pt
Epoch 248/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 327.43it/s, loss=0.3985]


[Epoch 248] Train Loss: 0.8370 | Val Loss: 0.4199
  -> New best model saved at saved/best.pt
Epoch 249/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 267.02it/s, loss=0.3945]


[Epoch 249] Train Loss: 0.8374 | Val Loss: 0.4160
  -> New best model saved at saved/best.pt
Epoch 250/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 283.50it/s, loss=0.3908]


[Epoch 250] Train Loss: 0.8273 | Val Loss: 0.4121
  -> New best model saved at saved/best.pt


## Bleu Score

In [None]:
import os
import torch
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction

# Load the best model for testing.
encoder = TransformerEncoder(SRC_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
best_model = Transformer(encoder, decoder, device, utils.PAD_TOKEN).to(device)
best_model.load_state_dict(torch.load(os.path.join(SAVE_DIR, "best.pt")))
print("Loaded best model for testing!")

def greedy_decode(model, src, max_len, start_symbol=utils.SOS_TOKEN):
    model.eval()
    src_mask = model.make_input_mask(src)  # shape: [batch, 1, 1, src_len]
    memory = model.encoder(src, src_mask)
    batch_size = src.size(0)
    ys = torch.full((batch_size, max_len), utils.PAD_TOKEN, dtype=torch.long, device=src.device)
    ys[:, 0] = start_symbol
    current_length = 1
    for i in range(1, max_len):
        current_tgt = ys[:, :current_length]
        padded_tgt = torch.cat([
            current_tgt,
            torch.full((batch_size, max_len - current_length), utils.PAD_TOKEN, device=src.device)
        ], dim=1)
        out, _ = model(src, padded_tgt)
        predicted_logits = out[:, current_length - 1, :]  
        next_token = predicted_logits.argmax(dim=-1)
        ys[:, current_length] = next_token
        current_length += 1
        if next_token.item() == utils.EOS_TOKEN:
            break
    return ys[:, :current_length]

# Prepare BLEU evaluation.
smooth_fn = SmoothingFunction().method1
references = []
hypotheses = []
best_model.eval()

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        # Ensure batch dimensions are correct for src and tgt.
        if src_batch.dim() == 1:
            src_batch = src_batch.unsqueeze(0)
        if tgt_batch.dim() == 1:
            tgt_batch = tgt_batch.unsqueeze(0)
            
        preds = greedy_decode(best_model, src_batch, max_len=70)
        
        for i in range(src_batch.size(0)):
            gold = tgt_batch[i].tolist()
            # Remove SOS and PAD tokens from gold.
            gold_tokens = [str(t) for t in gold if t not in [utils.SOS_TOKEN, utils.PAD_TOKEN]]
            if utils.EOS_TOKEN in gold_tokens:
                gold_tokens = gold_tokens[:gold_tokens.index(utils.EOS_TOKEN)]
            pred = preds[i].tolist()
            # Remove SOS from predictions.
            pred_tokens = [str(t) for t in pred if t != utils.SOS_TOKEN]
            if utils.EOS_TOKEN in pred_tokens:
                pred_tokens = pred_tokens[:pred_tokens.index(utils.EOS_TOKEN)]
            references.append([gold_tokens])
            hypotheses.append(pred_tokens)
weights = (0.5, 0.5)
bleu_score = corpus_bleu(
    references,
    hypotheses,
    weights=weights,
    smoothing_function=smooth_fn
)
print(f"Test BLEU-2: {bleu_score:.4f}")


Loaded best model for testing!


Testing: 100%|██████████| 664/664 [01:16<00:00,  8.73it/s]

Test BLEU-2: 0.0000





## Translasi

In [None]:
import os
import pickle
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# -----------------------------------------------------------------------------
# 1. Load the best model for testing
# -----------------------------------------------------------------------------

# Create encoder, decoder, and the full Transformer model.
encoder = TransformerEncoder(SRC_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
best_model = Transformer(encoder, decoder, device, utils.PAD_TOKEN).to(device)

# Load the best model state
model_path = os.path.join(SAVE_DIR, "best.pt")
best_model.load_state_dict(torch.load(model_path, map_location=device))
print("Loaded best model for testing!")
# -----------------------------------------------------------------------------
# 2. Load the input and output dictionaries
# -----------------------------------------------------------------------------
pth = "dataset"
src = "min"
tgt = "eng"
tp  = os.path.join(pth,f"{src}_{tgt}")
input_dic_path = os.path.join(tp, "input_dic.pkl")
output_dic_path = os.path.join(tp, "output_dic.pkl")
with open(input_dic_path, "rb") as f:
    input_dictionary = pickle.load(f)
with open(output_dic_path, "rb") as f:
    output_dictionary = pickle.load(f)
# -----------------------------------------------------------------------------
# 4. Define a translation function that uses tokenization, model inference, and detokenization
# -----------------------------------------------------------------------------
def translate_sentence(sentence, model, input_dictionary, output_dictionary, max_length=50):
    tokenized = utils.tokenize(sentence, input_dictionary, MAX_LENGTH=max_length)
    src_tensor = torch.tensor(tokenized).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        output_tokens = greedy_decode(model, src_tensor, max_length)
    # Convert the output token indices back to words using detokenize
    translated_sentence = utils.detokenize(output_tokens[0].tolist(), output_dictionary)
    return translated_sentence

# -----------------------------------------------------------------------------
# 5. Process a list of sentences and output their translations
# -----------------------------------------------------------------------------
minang_sentences = [
    "Ambo mancari awaknyo besok",
    "Alun salama, apo kabar?",
    "Dunsanak ka rumah gadang",
    "Urang minang manarimo tradisi",
    "Apo ado di pasar?"
]

for sentence in minang_sentences:
    translation = translate_sentence(sentence, best_model, input_dictionary, output_dictionary)
    print(f"Original:   {sentence}")
    print(f"Translated: {translation}\n")


Loaded best model for testing!
Original:   Ambo mancari awaknyo besok
Translated: 

Original:   Alun salama, apo kabar?
Translated: 

Original:   Dunsanak ka rumah gadang
Translated: 

Original:   Urang minang manarimo tradisi
Translated: 

Original:   Apo ado di pasar?
Translated: 



# Translasi: 
Ambo mancari awaknyo besok

    Bahasa Indonesia: Saya akan mencarimu besok.
    English: I will look for you tomorrow.

Alun salama, apo kabar?

    Bahasa Indonesia: Halo, apa kabar?
    English: Hello, how are you?

Dunsanak ka rumah gadang

    Bahasa Indonesia: Saudara, mari ke rumah gadang.
    English: Relatives, let's go to the traditional house.

Urang minang manarimo tradisi

    Bahasa Indonesia: Orang Minang menerima tradisi.
    English: Minangkabau people embrace tradition.

Apo ado di pasar?

    Bahasa Indonesia: Apa ada di pasar?
    English: What's there in the market?