In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
BATCH_SIZE = 8 # butuh lebih banyak update 
DATA_PATH = "dataset/"  
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 5000  
TGT_VOCAB_SIZE = 5000  
DROPOUT = 0.15      
N_HEADS = 2
N_LAYERS = 4       
FFN_HIDDEN = 256      
D_MODEL = 128        

EPOCHS = 250
SAVE_DIR = "saved"
encoder = TransformerEncoder(SRC_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE,D_MODEL,N_LAYERS,N_HEADS,FFN_HIDDEN,DROPOUT,device)
model = Transformer(encoder,decoder,device,utils.PAD_TOKEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)


Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 1:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,ss)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break


torch.Size([8, 52]) tensor([  1,  55, 134, 987,  58,  47, 988, 989, 239, 126,  18,   7, 990, 991,
         23, 992,  40,  93, 993,   8, 521,  27, 355, 987, 239, 994,  27,   2,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


  src_batch = [torch.tensor(item['src'], dtype=torch.long) for item in batch]
  tgt_batch = [torch.tensor(item['tgt'], dtype=torch.long) for item in batch]


testing input , is it correct

In [4]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([ 431,    6,  456,   17,  542,   28, 1188,   22,  156, 1164,  174, 1189,
          39, 1190,  416,  115,  840,  742,   61, 1191,   30,  282,   22,  471,
          28,    2,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,  376,  322,  161,  107,  226, 1382,   28,   10,  135,
          70, 1383,   92, 1384,   39,   10,  135,   70, 1295,   36, 1385,    2,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,  241,  769,   31,   21,  623,   10,
          73,  388, 1990,  524,  549, 1255, 1991,   73, 1992, 1993,  163,   22,
         471,  132,   73,   94,  760,  760,  847,   28,   22, 1994, 1995,   39,
          22, 1296,    6,   22, 1996, 1269,  115,  283, 1997,   76,   87, 1998,
         858,  127,  132,   28,    2,   


        output,_= model(src_batch, tgt_batch)
        output_dim = output.shape[-1]
        output = output[:, :-1, :].reshape(-1, output_dim)
        tgt_y  = tgt_batch[:, 1:].reshape(-1)
        pth = "dataset"
src = "min"
tgt = "eng"
tp  = os.path.join(pth,f"{src}_{tgt}")
input_dic_path = os.path.join(tp, "input_dic.pkl")
output_dic_path = os.path.join(tp, "output_dic.pkl")
with open(input_dic_path, "rb") as f:
    inp = pickle.load(f)
with open(output_dic_path, "rb") as f:
    output_dictionary = pickle.load(f)

In [None]:
import os
import torch
from tqdm import tqdm

os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf")
best_model_path = None
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=False, total=len(train_loader))
    for batch in train_loader:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")


Epoch 1/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 142.08it/s, loss=8.4913]


[Epoch 1] Train Loss: 8.5771 | Val Loss: 8.5268
  -> New best model saved at saved/best.pt
Epoch 2/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 169.74it/s, loss=8.1761]


[Epoch 2] Train Loss: 8.2827 | Val Loss: 8.2291
  -> New best model saved at saved/best.pt
Epoch 3/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 146.11it/s, loss=7.9439]


[Epoch 3] Train Loss: 8.0092 | Val Loss: 8.0026
  -> New best model saved at saved/best.pt
Epoch 4/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 164.01it/s, loss=7.7867]


[Epoch 4] Train Loss: 7.8051 | Val Loss: 7.8457
  -> New best model saved at saved/best.pt
Epoch 5/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 112.15it/s, loss=7.6625]


[Epoch 5] Train Loss: 7.6531 | Val Loss: 7.7196
  -> New best model saved at saved/best.pt
Epoch 6/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 169.47it/s, loss=7.5562]


[Epoch 6] Train Loss: 7.5204 | Val Loss: 7.6108
  -> New best model saved at saved/best.pt
Epoch 7/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 152.93it/s, loss=7.4625]


[Epoch 7] Train Loss: 7.4008 | Val Loss: 7.5158
  -> New best model saved at saved/best.pt
Epoch 8/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 167.54it/s, loss=7.3768]


[Epoch 8] Train Loss: 7.2898 | Val Loss: 7.4281
  -> New best model saved at saved/best.pt
Epoch 9/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 145.07it/s, loss=7.2972]


[Epoch 9] Train Loss: 7.1924 | Val Loss: 7.3490
  -> New best model saved at saved/best.pt
Epoch 10/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 167.05it/s, loss=7.2244]


[Epoch 10] Train Loss: 7.0960 | Val Loss: 7.2755
  -> New best model saved at saved/best.pt
Epoch 11/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 137.71it/s, loss=7.1575]


[Epoch 11] Train Loss: 7.0068 | Val Loss: 7.2079
  -> New best model saved at saved/best.pt
Epoch 12/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 152.63it/s, loss=7.0965]


[Epoch 12] Train Loss: 6.9244 | Val Loss: 7.1467
  -> New best model saved at saved/best.pt
Epoch 13/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 114.14it/s, loss=7.0401]


[Epoch 13] Train Loss: 6.8500 | Val Loss: 7.0899
  -> New best model saved at saved/best.pt
Epoch 14/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 110.03it/s, loss=6.9882]


[Epoch 14] Train Loss: 6.7736 | Val Loss: 7.0369
  -> New best model saved at saved/best.pt
Epoch 15/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 146.05it/s, loss=6.9417]


[Epoch 15] Train Loss: 6.7080 | Val Loss: 6.9901
  -> New best model saved at saved/best.pt
Epoch 16/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 159.13it/s, loss=6.9007]


[Epoch 16] Train Loss: 6.6422 | Val Loss: 6.9484
  -> New best model saved at saved/best.pt
Epoch 17/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 153.76it/s, loss=6.8615]


[Epoch 17] Train Loss: 6.5878 | Val Loss: 6.9092
  -> New best model saved at saved/best.pt
Epoch 18/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 181.00it/s, loss=6.8286]


[Epoch 18] Train Loss: 6.5327 | Val Loss: 6.8754
  -> New best model saved at saved/best.pt
Epoch 19/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 129.18it/s, loss=6.7999]


[Epoch 19] Train Loss: 6.4854 | Val Loss: 6.8456
  -> New best model saved at saved/best.pt
Epoch 20/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 183.52it/s, loss=6.7744]


[Epoch 20] Train Loss: 6.4389 | Val Loss: 6.8201
  -> New best model saved at saved/best.pt
Epoch 21/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 142.91it/s, loss=6.7498]


[Epoch 21] Train Loss: 6.3954 | Val Loss: 6.7949
  -> New best model saved at saved/best.pt
Epoch 22/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 107.61it/s, loss=6.7299]


[Epoch 22] Train Loss: 6.3597 | Val Loss: 6.7745
  -> New best model saved at saved/best.pt
Epoch 23/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 100.19it/s, loss=6.7125]


[Epoch 23] Train Loss: 6.3269 | Val Loss: 6.7570
  -> New best model saved at saved/best.pt
Epoch 24/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 133.71it/s, loss=6.6950]


[Epoch 24] Train Loss: 6.2943 | Val Loss: 6.7395
  -> New best model saved at saved/best.pt
Epoch 25/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 116.30it/s, loss=6.6809]


[Epoch 25] Train Loss: 6.2627 | Val Loss: 6.7247
  -> New best model saved at saved/best.pt
Epoch 26/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 111.92it/s, loss=6.6674]


[Epoch 26] Train Loss: 6.2333 | Val Loss: 6.7117
  -> New best model saved at saved/best.pt
Epoch 27/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 130.27it/s, loss=6.6558]


[Epoch 27] Train Loss: 6.2139 | Val Loss: 6.6998
  -> New best model saved at saved/best.pt
Epoch 28/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 129.61it/s, loss=6.6474]


[Epoch 28] Train Loss: 6.1907 | Val Loss: 6.6903
  -> New best model saved at saved/best.pt
Epoch 29/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 102.59it/s, loss=6.6402]


[Epoch 29] Train Loss: 6.1656 | Val Loss: 6.6825
  -> New best model saved at saved/best.pt
Epoch 30/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 133.04it/s, loss=6.6314]


[Epoch 30] Train Loss: 6.1459 | Val Loss: 6.6736
  -> New best model saved at saved/best.pt
Epoch 31/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 133.48it/s, loss=6.6267]


[Epoch 31] Train Loss: 6.1329 | Val Loss: 6.6678
  -> New best model saved at saved/best.pt
Epoch 32/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 125.84it/s, loss=6.6234]


[Epoch 32] Train Loss: 6.1057 | Val Loss: 6.6637
  -> New best model saved at saved/best.pt
Epoch 33/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 134.53it/s, loss=6.6211]


[Epoch 33] Train Loss: 6.0898 | Val Loss: 6.6592
  -> New best model saved at saved/best.pt
Epoch 34/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 175.14it/s, loss=6.6140]


[Epoch 34] Train Loss: 6.0747 | Val Loss: 6.6506
  -> New best model saved at saved/best.pt
Epoch 35/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 131.46it/s, loss=6.6138]


[Epoch 35] Train Loss: 6.0566 | Val Loss: 6.6481
  -> New best model saved at saved/best.pt
Epoch 36/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 179.16it/s, loss=6.6132]


[Epoch 36] Train Loss: 6.0416 | Val Loss: 6.6454
  -> New best model saved at saved/best.pt
Epoch 37/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 153.31it/s, loss=6.6097]


[Epoch 37] Train Loss: 6.0269 | Val Loss: 6.6403
  -> New best model saved at saved/best.pt
Epoch 38/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 174.60it/s, loss=6.6083]


[Epoch 38] Train Loss: 6.0170 | Val Loss: 6.6368
  -> New best model saved at saved/best.pt
Epoch 39/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 149.98it/s, loss=6.6064]


[Epoch 39] Train Loss: 5.9943 | Val Loss: 6.6313
  -> New best model saved at saved/best.pt
Epoch 40/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 180.25it/s, loss=6.6043]


[Epoch 40] Train Loss: 5.9853 | Val Loss: 6.6282
  -> New best model saved at saved/best.pt
Epoch 41/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 105.42it/s, loss=6.6016]


[Epoch 41] Train Loss: 5.9698 | Val Loss: 6.6220
  -> New best model saved at saved/best.pt
Epoch 42/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 161.27it/s, loss=6.5992]


[Epoch 42] Train Loss: 5.9559 | Val Loss: 6.6191
  -> New best model saved at saved/best.pt
Epoch 43/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 105.85it/s, loss=6.5962]


[Epoch 43] Train Loss: 5.9410 | Val Loss: 6.6150
  -> New best model saved at saved/best.pt
Epoch 44/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 165.66it/s, loss=6.5942]


[Epoch 44] Train Loss: 5.9329 | Val Loss: 6.6124
  -> New best model saved at saved/best.pt
Epoch 45/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 108.44it/s, loss=6.5925]


[Epoch 45] Train Loss: 5.9190 | Val Loss: 6.6093
  -> New best model saved at saved/best.pt
Epoch 46/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 158.46it/s, loss=6.5852]


[Epoch 46] Train Loss: 5.9028 | Val Loss: 6.6025
  -> New best model saved at saved/best.pt
Epoch 47/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 111.38it/s, loss=6.5857]


[Epoch 47] Train Loss: 5.8928 | Val Loss: 6.6003
  -> New best model saved at saved/best.pt
Epoch 48/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 126.74it/s, loss=6.5851]


[Epoch 48] Train Loss: 5.8801 | Val Loss: 6.5995
  -> New best model saved at saved/best.pt
Epoch 49/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 104.93it/s, loss=6.5840]


[Epoch 49] Train Loss: 5.8685 | Val Loss: 6.5978
  -> New best model saved at saved/best.pt
Epoch 50/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 125.65it/s, loss=6.5808]


[Epoch 50] Train Loss: 5.8582 | Val Loss: 6.5938
  -> New best model saved at saved/best.pt
Epoch 51/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 123.67it/s, loss=6.5737]


[Epoch 51] Train Loss: 5.8461 | Val Loss: 6.5868
  -> New best model saved at saved/best.pt
Epoch 52/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 173.11it/s, loss=6.5761]


[Epoch 52] Train Loss: 5.8309 | Val Loss: 6.5903
Epoch 53/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 96.04it/s, loss=6.5769]


[Epoch 53] Train Loss: 5.8219 | Val Loss: 6.5898
Epoch 54/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 120.63it/s, loss=6.5712]


[Epoch 54] Train Loss: 5.8140 | Val Loss: 6.5831
  -> New best model saved at saved/best.pt
Epoch 55/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 117.67it/s, loss=6.5708]


[Epoch 55] Train Loss: 5.7958 | Val Loss: 6.5834
Epoch 56/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 145.59it/s, loss=6.5689]


[Epoch 56] Train Loss: 5.7916 | Val Loss: 6.5818
  -> New best model saved at saved/best.pt
Epoch 57/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 111.52it/s, loss=6.5679]


[Epoch 57] Train Loss: 5.7703 | Val Loss: 6.5801
  -> New best model saved at saved/best.pt
Epoch 58/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 121.50it/s, loss=6.5655]


[Epoch 58] Train Loss: 5.7617 | Val Loss: 6.5788
  -> New best model saved at saved/best.pt
Epoch 59/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 94.80it/s, loss=6.5614]


[Epoch 59] Train Loss: 5.7442 | Val Loss: 6.5743
  -> New best model saved at saved/best.pt
Epoch 60/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 116.93it/s, loss=6.5587]


[Epoch 60] Train Loss: 5.7359 | Val Loss: 6.5712
  -> New best model saved at saved/best.pt
Epoch 61/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 99.96it/s, loss=6.5562] 


[Epoch 61] Train Loss: 5.7269 | Val Loss: 6.5704
  -> New best model saved at saved/best.pt
Epoch 62/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 119.08it/s, loss=6.5595]


[Epoch 62] Train Loss: 5.7159 | Val Loss: 6.5742
Epoch 63/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 121.07it/s, loss=6.5548]


[Epoch 63] Train Loss: 5.7040 | Val Loss: 6.5694
  -> New best model saved at saved/best.pt
Epoch 64/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 134.42it/s, loss=6.5562]


[Epoch 64] Train Loss: 5.6920 | Val Loss: 6.5691
  -> New best model saved at saved/best.pt
Epoch 65/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 127.76it/s, loss=6.5500]


[Epoch 65] Train Loss: 5.6822 | Val Loss: 6.5652
  -> New best model saved at saved/best.pt
Epoch 66/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 148.67it/s, loss=6.5493]


[Epoch 66] Train Loss: 5.6736 | Val Loss: 6.5653
Epoch 67/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 147.51it/s, loss=6.5461]


[Epoch 67] Train Loss: 5.6534 | Val Loss: 6.5632
  -> New best model saved at saved/best.pt
Epoch 68/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 132.70it/s, loss=6.5463]


[Epoch 68] Train Loss: 5.6412 | Val Loss: 6.5636
Epoch 69/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 122.70it/s, loss=6.5422]


[Epoch 69] Train Loss: 5.6367 | Val Loss: 6.5610
  -> New best model saved at saved/best.pt
Epoch 70/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 138.64it/s, loss=6.5448]


[Epoch 70] Train Loss: 5.6255 | Val Loss: 6.5623
Epoch 71/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 103.28it/s, loss=6.5419]


[Epoch 71] Train Loss: 5.6090 | Val Loss: 6.5607
  -> New best model saved at saved/best.pt
Epoch 72/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 171.01it/s, loss=6.5510]


[Epoch 72] Train Loss: 5.6045 | Val Loss: 6.5672
Epoch 73/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 132.46it/s, loss=6.5459]


[Epoch 73] Train Loss: 5.5908 | Val Loss: 6.5636
Epoch 74/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 145.52it/s, loss=6.5404]


[Epoch 74] Train Loss: 5.5804 | Val Loss: 6.5588
  -> New best model saved at saved/best.pt
Epoch 75/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 82.82it/s, loss=6.5408]


[Epoch 75] Train Loss: 5.5652 | Val Loss: 6.5606
Epoch 76/250


🚀 Validation: 100%|██████████| 21/21 [00:00<00:00, 117.43it/s, loss=6.5351]


[Epoch 76] Train Loss: 5.5509 | Val Loss: 6.5558
  -> New best model saved at saved/best.pt
Epoch 77/250


🚀 Training:   0%|          | 0/102 [00:00<?, ?it/s, loss=5.2665]

## Bleu Score

loading the best model !


In [None]:
import os
import torch
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
SRC_VOCAB_SIZE = 5000  
TGT_VOCAB_SIZE = 5000  
DROPOUT = 0.15      
N_HEADS = 2
N_LAYERS = 4       
FFN_HIDDEN = 256      
D_MODEL = 128    
encoder = TransformerEncoder(SRC_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
decoder = TransformerDecoder(TGT_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
best_model = Transformer(encoder, decoder, device, utils.PAD_TOKEN).to(device)
best_model.load_state_dict(torch.load(os.path.join(SAVE_DIR, "best.pt")))
print("Loaded best model for testing!")
pth = "dataset"
src = "min"
tgt = "eng"
tp  = os.path.join(pth, f"{src}_{tgt}")
with open(os.path.join(tp, "input_dic.pkl"),  "rb") as f:
    input_lang_dic = pickle.load(f)
with open(os.path.join(tp, "output_dic.pkl"), "rb") as f:
    output_lang_dic = pickle.load(f)
# ---------------------------------------------------------------------
# 4) Evaluate function
# ---------------------------------------------------------------------
def evaluate(model, valid_dataloader):
    model.eval()
    
    total_loss = 0.0
    all_bleu   = []
    with torch.no_grad():
        for batch_num, batch in enumerate(valid_dataloader):
            src_batch = batch["src"].to(device)
            tgt_batch = batch["tgt"].to(device)
            output, _ = model(src_batch, tgt_batch[:, :-1])
            out_dim = output.size(-1)  
            output_2d = output.contiguous().view(-1, out_dim)
            tgt_2d = tgt_batch[:, 1:].contiguous().view(-1)
            
            loss = criterion(output_2d, tgt_2d)
            total_loss += loss.item()
            batch_size = src_batch.size(0)
            batch_bleu = []
            for i in range(batch_size):
                ref_tokens = utils.detokenize(tgt_batch[i].tolist(), output_lang_dic)
                pred_ids   = output[i].argmax(dim=1)  # shape [seq_len-1]
                hyp_tokens = utils.detokenize(pred_ids.tolist(), output_lang_dic)
                bleu_score = utils.get_bleu(hyp_tokens.split(), ref_tokens.split())
                batch_bleu.append(bleu_score)
            all_bleu.append(sum(batch_bleu) / len(batch_bleu))
    epoch_loss = total_loss / len(valid_dataloader)
    epoch_bleu = sum(all_bleu) / len(all_bleu)
    return epoch_loss, epoch_bleu
valid_loss, valid_bleu = evaluate(best_model, val_loader)
print(f"Validation Loss = {valid_loss:.4f} | BLEU = {valid_bleu:.2f}")

Loaded best model for testing!
Validation Loss = 8.7341 | BLEU = 89.00


## Translasi

In [None]:
import os
import pickle
import torch
from utils import tokenize,detokenize
import torch
import utils  # For PAD_TOKEN, SOS_TOKEN, EOS_TOKEN, UNK_TOKEN, etc.
def translate_sentence(token_ids,input_dic, output_dic, model, device, max_len=50):
    model.eval()
    
    input_tensor = torch.LongTensor(token_ids).unsqueeze(0).to(device)
    src_mask = model.make_input_mask(input_tensor)
    
    print("Source tokens:", token_ids)
    print("Decoded tokens:", [input_dic.index2word[x] for x in token_ids])
    with torch.no_grad():
        encoded_input = model.encoder(input_tensor, src_mask)
    predicted_tokens = [utils.SOS_TOKEN]
    for _ in range(max_len):
        tgt_tensor = torch.LongTensor(predicted_tokens).unsqueeze(0).to(device)
        tgt_mask = model.make_target_mask(tgt_tensor)
        with torch.no_grad():
            output, attention = model.decoder(tgt_tensor, encoded_input, tgt_mask, src_mask)
        next_token = output[0, -1].argmax(dim=-1).item()
        predicted_tokens.append(next_token)
        
        if next_token == utils.EOS_TOKEN:
            break
    translation = ' '.join(output_dic.index2word[idx] 
                           for idx in predicted_tokens[1:] 
                           if idx not in [utils.SOS_TOKEN, utils.EOS_TOKEN, utils.PAD_TOKEN])
    return translation
sentences = [
    "Ambo mancari awaknyo besok",
    "Alun salama, apo kabar?",
    "Dunsanak ka rumah gadang",
    "Urang minang manarimo tradisi",
    "Apo ado di pasar?"
]
tokenized_inputs = []
for s in sentences:
    tokens = utils.tokenize(s, input_lang_dic, utils.MAX_SENT_LEN)
    tokenized_inputs.append(tokens)
    
for idx, token_ids in enumerate(tokenized_inputs):
    translation = translate_sentence(
        token_ids,
        input_lang_dic,
        output_lang_dic,
        best_model,
        device,
        utils.MAX_SENT_LEN
    )
    print(f"Original: {sentences[idx]}")
    print(f"Translated: {translation}\n")

Source tokens: [1, 3, 573, 3, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Decoded tokens: ['SOS', 'UNK', 'mancari', 'UNK', 'UNK', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
Original: Ambo mancari awaknyo besok
Translated: 

Source tokens: [1, 3, 3, 425, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Decoded tokens: ['SOS', 'UNK', 'UNK', 'apo', 'UNK', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PA

In [None]:
print("Index to word mapping (first 10):")
for i in range(10):
    print(i, output_lang_dic.index2word[i])


Index to word mapping (first 10):
0 PAD
1 SOS
2 EOS
3 UNK
4 enjoy
5 instalment
6 for
7 up
8 to
9 months


In [None]:
print("Dictionary size:", len(input_lang_dic.word2index))
print("Special tokens in the dictionary:")
for idx in range(4):
    print(idx, input_lang_dic.index2word[idx])


Dictionary size: 2183
Special tokens in the dictionary:
0 PAD
1 SOS
2 EOS
3 nikmati


In [None]:
for name, param in best_model.named_parameters():
    if "word_embedding" in name:
        print(name, param.shape)

encoder.word_embedding.weight torch.Size([5000, 128])
decoder.word_embedding.weight torch.Size([5000, 128])


In [None]:
# import os
# import pickle
# import torch

# from utils import tokenize,detokenize
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # -----------------------------------------------------------------------------
# # 1. Load the best model for testing
# # -----------------------------------------------------------------------------

# # Create encoder, decoder, and the full Transformer model.
# encoder = TransformerEncoder(SRC_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
# decoder = TransformerDecoder(TGT_VOCAB_SIZE, D_MODEL, N_LAYERS, N_HEADS, FFN_HIDDEN, DROPOUT, device)
# best_model = Transformer(encoder, decoder, device, utils.PAD_TOKEN).to(device)

# # Load the best model state
# model_path = os.path.join(SAVE_DIR, "best.pt")
# best_model.load_state_dict(torch.load(model_path, map_location=device))
# print("Loaded best model for testing!")
# # -----------------------------------------------------------------------------
# # 2. Load the input and output dictionaries
# # -----------------------------------------------------------------------------
# pth = "dataset"
# src = "min"
# tgt = "eng"
# tp  = os.path.join(pth,f"{src}_{tgt}")
# input_dic_path = os.path.join(tp, "input_dic.pkl")
# output_dic_path = os.path.join(tp, "output_dic.pkl")
# with open(input_dic_path, "rb") as f:
#     input_dictionary = pickle.load(f)
# with open(output_dic_path, "rb") as f:
#     output_dictionary = pickle.load(f)

# minang_sentences = [
#     "Ambo mancari awaknyo besok",
#     "Alun salama, apo kabar?",
#     "Dunsanak ka rumah gadang",
#     "Urang minang manarimo tradisi",
#     "Apo ado di pasar?"
# ]

# for sentence in minang_sentences:
#     translation = translate_sentence(sentence, best_model, input_dictionary, output_dictionary)
#     print(f"Original:   {sentence}")
#     print(f"Translated: {translation}\n")


# Translasi: 
Ambo mancari awaknyo besok

    Bahasa Indonesia: Saya akan mencarimu besok.
    English: I will look for you tomorrow.

Alun salama, apo kabar?

    Bahasa Indonesia: Halo, apa kabar?
    English: Hello, how are you?

Dunsanak ka rumah gadang

    Bahasa Indonesia: Saudara, mari ke rumah gadang.
    English: Relatives, let's go to the traditional house.

Urang minang manarimo tradisi

    Bahasa Indonesia: Orang Minang menerima tradisi.
    English: Minangkabau people embrace tradition.

Apo ado di pasar?

    Bahasa Indonesia: Apa ada di pasar?
    English: What's there in the market?