In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from dataloader import get_dataloaders
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformer import Transformer,TransformerEncoder,TransformerDecoder
from bigru import Encoder,Decoder,Seq2Seq
import utils
import pickle
nltk.download('punkt')  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/wicaksonolxn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
BATCH_SIZE = 32 # butuh lebih banyak update 
EPOCHS=20
DATA_PATH = "dataset/"  
SAVE_DIR = "saved"
train_loader, val_loader, test_loader = get_dataloaders(
    data_path=DATA_PATH, 
    source_lang="min", 
    target_lang="eng", 
    batch_size=BATCH_SIZE, 
    device=device
)
SRC_VOCAB_SIZE = 4000  
TGT_VOCAB_SIZE = 4000  
EMBED_SIZE = 256
ENC_HIDDEN = 64    
DEC_HIDDEN = ENC_HIDDEN*2 #2 KALI KARENA DARI BIGRU EMBEDDINGNYA 2X   [x_1 -> x_2 ] cat [x_1 <- x_2 ]    
N_LAYERS = 2
DROP_OUT = 0.2
encoder = Encoder(SRC_VOCAB_SIZE, EMBED_SIZE, ENC_HIDDEN, num_layers=N_LAYERS, dropout=DROP_OUT, pad_idx=utils.PAD_TOKEN)
decoder = Decoder(TGT_VOCAB_SIZE, EMBED_SIZE, DEC_HIDDEN, num_layers=N_LAYERS, dropout=DROP_OUT, pad_idx=utils.PAD_TOKEN)
model = Seq2Seq(encoder, decoder, device, ENC_HIDDEN, DEC_HIDDEN).to(device)
optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=utils.PAD_TOKEN) 
print("Model initialized on:", device)

TrainData - Max 'min' sentence length: 76
TrainData - Max 'eng' sentence length: 107
TestData - Max 'min' sentence length: 61
TestData - Max 'eng' sentence length: 75
ValidData - Max 'min' sentence length: 62
ValidData - Max 'eng' sentence length: 81
Number of examples in train_dataset,train origin,train_raw: 799 799 799
Number of examples in valid_dataset: 100
Number of examples in test_dataset: 100
Model initialized on: cuda


In [3]:
tokens = {
    "Padding": utils.PAD_TOKEN,
    "Start of Sequence": utils.SOS_TOKEN,
    "End of Sequence": utils.EOS_TOKEN,
    "Unknown": utils.UNK_TOKEN
}
for i, batch in enumerate(train_loader):
    if i < 8:
        src = batch["src"]
        tgt = batch["tgt"]
        ss,fss=src[0,:],src.shape
        st,fst=tgt[0,:],tgt.shape
        print(fss,fst)
    for name, token in tokens.items():
        print(f"{name}: {token}")
    else:
        break

torch.Size([32, 109]) torch.Size([32, 109])
Padding: 0
Start of Sequence: 1
End of Sequence: 2
Unknown: 3


testing input , is it correct

In [4]:
data_iter = iter(train_loader)
for i in range(7):
    batch = next(data_iter)
    print(batch["src"].shape)


torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])
torch.Size([32, 109])


In [5]:
for i,batch in enumerate(train_loader):
    if i <1:
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        output,_= model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)
        print(tgt_y)

tensor([ 22, 365, 742,  ...,   0,   0,   0], device='cuda:0')


In [None]:
import os
import torch
from tqdm import tqdm
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_loss = float("inf") 
best_model_path = None
atl = {}
avl = {}
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    model.train()
    total_train_loss = 0.0
    train_bar = tqdm(train_loader, desc="🚀 Training", 
                leave=True, total=len(train_loader))
    for batch in train_bar:
        optimizer.zero_grad()
        src_batch = batch['src'].to(device)
        tgt_batch = batch['tgt'].to(device)
        
        output, _ = model(src_batch, tgt_batch[:, :-1]) 
        output_dim = output.shape[-1]
        output = output.reshape(-1, output_dim)
        tgt_y = tgt_batch[:,1:].contiguous().view(-1)

        loss = criterion(output, tgt_y)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)
    atl[epoch]=avg_train_loss

    model.eval()
    total_val_loss = 0.0
    val_bar = tqdm(val_loader, desc="🚀 Validation",
              leave=True, total=len(val_loader))
    with torch.no_grad():
        for batch in val_bar:
            src_batch = batch['src'].to(device)
            tgt_batch = batch['tgt'].to(device)
            
            output, _ = model(src_batch, tgt_batch[:, :-1]) 
            output_dim = output.shape[-1]
            output = output.reshape(-1, output_dim)
            tgt_y = tgt_batch[:,1:].contiguous().view(-1)

            loss = criterion(output, tgt_y)
            total_val_loss += loss.item()
            val_bar.set_postfix(loss=f"{loss.item():.4f}")
    
    avg_val_loss = total_val_loss / len(val_loader)
    avl[epoch]=avg_val_loss

    print(f"[Epoch {epoch}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        if best_model_path and os.path.exists(best_model_path):
            os.remove(best_model_path)
        best_val_loss = avg_val_loss
        best_model_path = os.path.join(SAVE_DIR, "best_gru.pt")
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> New best model saved at {best_model_path}")
utils.plot_loss(atl, avl, SAVE_DIR,"loss_bigru")

Epoch 1/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.19it/s, loss=6.6185]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 19.04it/s, loss=6.5693]


[Epoch 1] Train Loss: 7.4089 | Val Loss: 6.5943
  -> New best model saved at saved/best_gru.pt
Epoch 2/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.35it/s, loss=6.2908]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.59it/s, loss=6.5862]


[Epoch 2] Train Loss: 6.3633 | Val Loss: 6.6235
Epoch 3/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.13it/s, loss=6.3197]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.78it/s, loss=6.6237]


[Epoch 3] Train Loss: 6.3157 | Val Loss: 6.6608
Epoch 4/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.10it/s, loss=6.2323]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.69it/s, loss=6.6621]


[Epoch 4] Train Loss: 6.2931 | Val Loss: 6.6816
Epoch 5/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.26it/s, loss=6.2030]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 18.83it/s, loss=6.6824]


[Epoch 5] Train Loss: 6.2730 | Val Loss: 6.7036
Epoch 6/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.12it/s, loss=6.3699]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 25.77it/s, loss=6.6969]


[Epoch 6] Train Loss: 6.2504 | Val Loss: 6.7205
Epoch 7/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.28it/s, loss=6.2012]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.51it/s, loss=6.7203]


[Epoch 7] Train Loss: 6.2227 | Val Loss: 6.7433
Epoch 8/100


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.98it/s, loss=6.2807]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.62it/s, loss=6.7417]


[Epoch 8] Train Loss: 6.1978 | Val Loss: 6.7494
Epoch 9/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.28it/s, loss=6.1819]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 26.07it/s, loss=6.7544]


[Epoch 9] Train Loss: 6.1644 | Val Loss: 6.7632
Epoch 10/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.31it/s, loss=6.0575]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 24.32it/s, loss=6.7511]


[Epoch 10] Train Loss: 6.1270 | Val Loss: 6.7638
Epoch 11/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.16it/s, loss=6.1647]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.12it/s, loss=6.7605]


[Epoch 11] Train Loss: 6.0943 | Val Loss: 6.7566
Epoch 12/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.06it/s, loss=6.0835]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 20.73it/s, loss=6.7608]


[Epoch 12] Train Loss: 6.0548 | Val Loss: 6.7580
Epoch 13/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.09it/s, loss=6.0777]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.47it/s, loss=6.7639]


[Epoch 13] Train Loss: 6.0238 | Val Loss: 6.7699
Epoch 14/100


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.99it/s, loss=6.0098]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 18.19it/s, loss=6.7795]


[Epoch 14] Train Loss: 5.9851 | Val Loss: 6.7643
Epoch 15/100


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.89it/s, loss=5.9505]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 20.67it/s, loss=6.7435]


[Epoch 15] Train Loss: 5.9497 | Val Loss: 6.7589
Epoch 16/100


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.98it/s, loss=5.9995]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 19.13it/s, loss=6.7714]


[Epoch 16] Train Loss: 5.9188 | Val Loss: 6.7723
Epoch 17/100


🚀 Training: 100%|██████████| 25/25 [00:05<00:00,  4.98it/s, loss=6.0052]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 18.54it/s, loss=6.7655]


[Epoch 17] Train Loss: 5.8862 | Val Loss: 6.7667
Epoch 18/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.02it/s, loss=5.8574]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 19.22it/s, loss=6.7734]


[Epoch 18] Train Loss: 5.8490 | Val Loss: 6.7809
Epoch 19/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.12it/s, loss=5.8515]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.90it/s, loss=6.7906]


[Epoch 19] Train Loss: 5.8191 | Val Loss: 6.7777
Epoch 20/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.20it/s, loss=5.8538]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 23.51it/s, loss=6.7811]


[Epoch 20] Train Loss: 5.7859 | Val Loss: 6.7938
Epoch 21/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.13it/s, loss=5.7412]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 22.61it/s, loss=6.7849]


[Epoch 21] Train Loss: 5.7515 | Val Loss: 6.7811
Epoch 22/100


🚀 Training: 100%|██████████| 25/25 [00:04<00:00,  5.14it/s, loss=5.6839]
🚀 Validation: 100%|██████████| 4/4 [00:00<00:00, 22.77it/s, loss=6.7619]


[Epoch 22] Train Loss: 5.7119 | Val Loss: 6.7738
Epoch 23/100


🚀 Training:   0%|          | 0/25 [00:00<?, ?it/s]