In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
from bigru import Encoder,Decoder,Seq2Seq
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
BATCH_SIZE = 32 # butuh lebih banyak update 
EPOCHS=20
DATA_PATH = "dataset/"  
SAVE_DIR = "saved"
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 4000  
TGT_VOCAB_SIZE = 4000  
EMBED_SIZE = 256
ENC_HIDDEN = 64    
DEC_HIDDEN = ENC_HIDDEN*2 #2 KALI KARENA DARI BIGRU EMBEDDINGNYA 2X   [x_1 -> x_2 ] cat [x_1 <- x_2 ]    
N_LAYERS = 2
DROP_OUT = 0.2
encoder = Encoder(SRC_VOCAB_SIZE, EMBED_SIZE, ENC_HIDDEN, num_layers=N_LAYERS, dropout=DROP_OUT, pad_idx=utils.PAD_TOKEN)
decoder = Decoder(TGT_VOCAB_SIZE, EMBED_SIZE, DEC_HIDDEN, num_layers=N_LAYERS, dropout=DROP_OUT, pad_idx=utils.PAD_TOKEN)
model = Seq2Seq(encoder, decoder, device, ENC_HIDDEN, DEC_HIDDEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)

TrainData - Max 'min' sentence length: 76
TrainData - Max 'eng' sentence length: 107
TestData - Max 'min' sentence length: 61
TestData - Max 'eng' sentence length: 75
ValidData - Max 'min' sentence length: 62
ValidData - Max 'eng' sentence length: 81
Number of examples in train_dataset,train origin,train_raw: 799 799 799
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break

torch.Size([32, 109]) torch.Size([32, 109])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output,_= model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([1038,  190,  851,  ...,    0,    0,    0], device='cuda:0')


In [6]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
atl = {}
avl = {}
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    atl[epoch]=avg_train_loss

    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    avl[epoch]=avg_val_loss

    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best_gru.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")
utils.plot_loss(atl, avl, SAVE_DIR,"loss_bigru")

Epoch 1/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  6.02it/s, loss=7.2510]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 27.12it/s, loss=7.1819]


[Epoch 1] Train Loss: 7.9795 | Val Loss: 7.2310
  -> New best model saved at saved/best_gru.pt
Epoch 2/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.91it/s, loss=6.3914]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 27.27it/s, loss=6.5234]


[Epoch 2] Train Loss: 6.6391 | Val Loss: 6.5592
  -> New best model saved at saved/best_gru.pt
Epoch 3/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.87it/s, loss=6.2874]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 29.09it/s, loss=6.5405]


[Epoch 3] Train Loss: 6.3214 | Val Loss: 6.5672
Epoch 4/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.97it/s, loss=6.4059]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 26.04it/s, loss=6.5450]


[Epoch 4] Train Loss: 6.2947 | Val Loss: 6.5860
Epoch 5/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  6.11it/s, loss=6.3105]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 29.98it/s, loss=6.5837]


[Epoch 5] Train Loss: 6.2888 | Val Loss: 6.6201
Epoch 6/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  6.12it/s, loss=6.3062]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.45it/s, loss=6.5972]


[Epoch 6] Train Loss: 6.2799 | Val Loss: 6.6346
Epoch 7/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.99it/s, loss=6.1678]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 29.51it/s, loss=6.6078]


[Epoch 7] Train Loss: 6.2763 | Val Loss: 6.6476
Epoch 8/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.61it/s, loss=6.3248]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 29.60it/s, loss=6.6260]


[Epoch 8] Train Loss: 6.2627 | Val Loss: 6.6610
Epoch 9/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.58it/s, loss=6.2467]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.07it/s, loss=6.6355]


[Epoch 9] Train Loss: 6.2560 | Val Loss: 6.6769
Epoch 10/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.18it/s, loss=6.2725]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 19.18it/s, loss=6.6549]


[Epoch 10] Train Loss: 6.2498 | Val Loss: 6.6864
Epoch 11/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.20it/s, loss=6.2107]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 20.89it/s, loss=6.6677]


[Epoch 11] Train Loss: 6.2418 | Val Loss: 6.6974
Epoch 12/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.35it/s, loss=6.3209]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 20.15it/s, loss=6.6672]


[Epoch 12] Train Loss: 6.2263 | Val Loss: 6.6992
Epoch 13/20


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.87it/s, loss=6.3136]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 18.82it/s, loss=6.6878]


[Epoch 13] Train Loss: 6.2120 | Val Loss: 6.7098
Epoch 14/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.18it/s, loss=6.1218]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 25.38it/s, loss=6.6959]


[Epoch 14] Train Loss: 6.2006 | Val Loss: 6.7158
Epoch 15/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.90it/s, loss=6.1282]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.64it/s, loss=6.7036]


[Epoch 15] Train Loss: 6.1898 | Val Loss: 6.7168
Epoch 16/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.62it/s, loss=6.0973]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 22.68it/s, loss=6.7006]


[Epoch 16] Train Loss: 6.1732 | Val Loss: 6.7182
Epoch 17/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.83it/s, loss=6.2337]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.44it/s, loss=6.7027]


[Epoch 17] Train Loss: 6.1595 | Val Loss: 6.7232
Epoch 18/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.77it/s, loss=6.0963]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 25.49it/s, loss=6.7224]


[Epoch 18] Train Loss: 6.1383 | Val Loss: 6.7332
Epoch 19/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.71it/s, loss=6.1956]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 19.79it/s, loss=6.7300]


[Epoch 19] Train Loss: 6.1224 | Val Loss: 6.7321
Epoch 20/20


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.79it/s, loss=6.1344]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 26.98it/s, loss=6.7168]


[Epoch 20] Train Loss: 6.1076 | Val Loss: 6.7244
Loss plot saved to saved/loss_bigru.png
