# 1: Setup dan Pemuatan Data

Kode ini berisi semua impor pustaka, konfigurasi dasar, dan kode untuk memuat serta membagi data terjemahan ke dalam set pelatihan, validasi, dan pengujian.

In [1]:
!pip install sacrebleu
import os, pathlib, random, math, time, subprocess, sys, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')
from tqdm.notebook import tqdm
import sentencepiece as spm
import sacrebleu

# Install sacrebleu jika belum terinstal
try:
    import sacrebleu
except ImportError:
    !pip install sacrebleu
    import sacrebleu

# --- Reproducibility helper ---
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Menggunakan device: {DEVICE}")

WORKDIR = pathlib.Path("/kaggle/working")
WORKDIR.mkdir(parents=True, exist_ok=True)

# ---------------------------------------------------------
# Data: ManyThings loader
# ---------------------------------------------------------
def load_translation_data():
    possible_paths = [
        "/kaggle/input/translate-in/ind.txt",
        str(WORKDIR/"ind.txt"),
        "/kaggle/input/translate/ind.txt",
        "/kaggle/input/ind.txt"
    ]
    for path in possible_paths:
        if pathlib.Path(path).exists():
            print(f"Menggunakan data dari: {path}")
            return pathlib.Path(path)
    print("Warning: Tidak ada file data yang ditemukan. Pastikan data ada di salah satu path berikut.")
    return None

DATA_PATH = load_translation_data()
if not DATA_PATH:
    raise FileNotFoundError("Dataset ind.txt tidak ditemukan di lokasi yang diharapkan.")

MAX_LEN = 50
VAL_RATIO, TEST_RATIO = 0.1, 0.1
pairs = []
with open(DATA_PATH, encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split("\t")
        if len(parts) < 2: continue
        en, idn = parts[0].lower().strip(), parts[1].lower().strip()
        if len(en.split()) > MAX_LEN or len(idn.split()) > MAX_LEN: continue
        pairs.append((en, idn))

random.shuffle(pairs)
n_total = len(pairs)
n_val, n_test = int(n_total*VAL_RATIO), int(n_total*TEST_RATIO)
test_pairs = pairs[:n_test]
val_pairs = pairs[n_test:n_test+n_val]
train_pairs= pairs[n_test+n_val:]

print(f"[Data] Total={n_total} | Train={len(train_pairs)}, Valid={len(val_pairs)}, Test={len(test_pairs)}")

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.2.0 sacrebleu-2.5.1
Menggunakan device: cpu
Menggunakan data dari: /kaggle/input/ind.txt
[Data] Total=14881 | Train=11905, Valid=1488, Test=1488


Penjelasan:

kode ini adalah bagian setup. Fungsinya adalah:

- Mengimpor semua pustaka yang diperlukan.

- Menentukan device yang akan digunakan (GPU atau CPU).

- Membuat direktori kerja (/kaggle/working) untuk menyimpan hasil.

- Memuat dataset dan membaginya menjadi tiga set (pelatihan, validasi, dan pengujian).

# 2: Definisi Model dan Fungsi Bantu

Kode ini berisi semua definisi model (Seq2Seq, Transformer) dan fungsi-fungsi bantu yang akan dipanggil selama proses pelatihan dan evaluasi.

In [2]:
# ---------------------------------------------------------
# Helper Functions: SentencePiece + Fallback
# ---------------------------------------------------------
def train_sentencepiece_model(corpus_path, model_prefix, vocab_size):
    spm.SentencePieceTrainer.Train(
        f"--input={corpus_path} --model_prefix={model_prefix} "
        f"--vocab_size={vocab_size} --character_coverage=1.0 "
        f"--bos_id=1 --eos_id=2 --pad_id=3 --unk_id=0 --hard_vocab_limit=false"
    )

def create_tokenizers(train_pairs, vocab_size):
    en_corp = WORKDIR / f"train_text_v{vocab_size}.en"
    id_corp = WORKDIR / f"train_text_v{vocab_size}.id"
    with open(en_corp, "w", encoding="utf-8") as f:
        for en,_ in train_pairs: f.write(en+"\n")
    with open(id_corp, "w", encoding="utf-8") as f:
        for _,idn in train_pairs: f.write(idn+"\n")
    en_prefix = WORKDIR / f"en_spm_v{vocab_size}"
    id_prefix = WORKDIR / f"id_spm_v{vocab_size}"
    try:
        train_sentencepiece_model(en_corp, str(en_prefix), vocab_size)
        train_sentencepiece_model(id_corp, str(id_prefix), vocab_size)
        sp_en, sp_id = spm.SentencePieceProcessor(), spm.SentencePieceProcessor()
        sp_en.load(str(en_prefix) + ".model")
        sp_id.load(str(id_prefix) + ".model")
        print(f"[SPM] Model saved: {en_prefix}.model and {id_prefix}.model")
        return sp_en, sp_id
    except Exception as e:
        print("SentencePiece failed:", e)
        raise

# ---------------------------------------------------------
# Dataset & DataLoader
# ---------------------------------------------------------
class TranslationDataset(Dataset):
    def __init__(self, pairs, sp_src, sp_tgt, max_len=MAX_LEN):
        self.data = pairs
        self.sp_src, self.sp_tgt = sp_src, sp_tgt
        self.max_len = max_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        en, idn = self.data[idx]
        src_ids = [self.sp_src.bos_id()] + self.sp_src.encode(en) + [self.sp_src.eos_id()]
        tgt_ids = [self.sp_tgt.bos_id()] + self.sp_tgt.encode(idn) + [self.sp_tgt.eos_id()]
        src = torch.tensor(src_ids[:self.max_len], dtype=torch.long)
        tgt = torch.tensor(tgt_ids[:self.max_len], dtype=torch.long)
        return src, tgt

def collate_batch(batch, pad_src, pad_tgt):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(s) for s in src_batch]
    src_pad = nn.utils.rnn.pad_sequence(src_batch, padding_value=pad_src, batch_first=True)
    tgt_pad = nn.utils.rnn.pad_sequence(tgt_batch, padding_value=pad_tgt, batch_first=True)
    return src_pad, tgt_pad, src_lens, None

# ---------------------------------------------------------
# Seq2Seq Model Definitions (Encoder, Attention, Decoder)
# ---------------------------------------------------------
class EncoderRNN(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers=1, dropout=0.1, bidirectional=True, pad_idx=3):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx=pad_idx)
        self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout if n_layers>1 else 0)
        self.dropout = nn.Dropout(dropout)
        self.bi = 2 if bidirectional else 1
        self.hid_dim = hid_dim
        self.n_layers = n_layers
    def forward(self, src, src_lens):
        embedded = self.dropout(self.embedding(src))
        packed = nn.utils.rnn.pack_padded_sequence(embedded, src_lens, batch_first=True, enforce_sorted=False)
        outputs, hidden = self.rnn(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
        return outputs, hidden
class AdditiveAttention(nn.Module):
    def __init__(self, enc_dim, dec_dim):
        super().__init__()
        self.W = nn.Linear(enc_dim + dec_dim, dec_dim)
        self.v = nn.Linear(dec_dim, 1, bias=False)
    def forward(self, dec_hidden, enc_outputs, src_mask):
        T = enc_outputs.size(1)
        dec_rep = dec_hidden.unsqueeze(1).repeat(1, T, 1)
        energy = torch.tanh(self.W(torch.cat([dec_rep, enc_outputs], dim=-1)))
        scores = self.v(energy).squeeze(-1)
        scores = scores.masked_fill(src_mask==0, -1e9)
        attn = torch.softmax(scores, dim=-1)
        ctx = torch.bmm(attn.unsqueeze(1), enc_outputs).squeeze(1)
        return ctx, attn
class DecoderRNN(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid, dec_hid, dropout=0.1, pad_idx=3):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim, padding_idx=pad_idx)
        self.rnn = nn.GRU(emb_dim + enc_hid, dec_hid, batch_first=True)
        self.fc = nn.Linear(dec_hid + enc_hid, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.attn = AdditiveAttention(enc_hid, dec_hid)
    def forward(self, y_prev, hidden, enc_outputs, src_mask):
        emb = self.dropout(self.embedding(y_prev)).unsqueeze(1)
        dec_hidden = hidden[-1]
        ctx, attn = self.attn(dec_hidden, enc_outputs, src_mask)
        rnn_input = torch.cat([(emb), ctx.unsqueeze(1)], dim=-1)
        out, hidden = self.rnn(rnn_input, hidden)
        logits = self.fc(torch.cat([out.squeeze(1), ctx], dim=-1))
        return logits, hidden, attn
class Seq2SeqAttn(nn.Module):
    def __init__(self, encoder, decoder, enc_bi=True, pad_src=3):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.enc_bi = enc_bi
        self.pad_src = pad_src
    def make_src_mask(self, src):
        return (src != self.pad_src).to(src.device)
    def forward(self, src, src_lens, tgt):
        enc_outputs, enc_hidden = self.encoder(src, src_lens)
        if self.enc_bi:
            enc_hidden = enc_hidden.view(self.encoder.n_layers, 2, enc_hidden.size(1), enc_hidden.size(2)).sum(1)
        src_mask = self.make_src_mask(src)
        B, Tt = tgt.size()
        hidden = enc_hidden
        logits_all = []
        y = tgt[:,0]
        for t in range(1, Tt):
            logits, hidden, _ = self.decoder(y, hidden, enc_outputs, src_mask)
            logits_all.append(logits.unsqueeze(1))
            y = tgt[:,t]
        return torch.cat(logits_all, dim=1)
    @torch.no_grad()
    def translate(self, src, src_lens, max_len=80, bos_tgt=1, eos_tgt=2):
        self.eval()
        enc_outputs, enc_hidden = self.encoder(src, src_lens)
        if self.enc_bi:
            enc_hidden = enc_hidden.view(self.encoder.n_layers, 2, enc_hidden.size(1), enc_hidden.size(2)).sum(1)
        src_mask = self.make_src_mask(src)
        B = src.size(0)
        hidden = enc_hidden
        y = torch.full((B,), bos_tgt, dtype=torch.long, device=src.device)
        out_ids = [y.clone()]
        for _ in range(max_len):
            logits, hidden, _ = self.decoder(y, hidden, enc_outputs, src_mask)
            y = logits.argmax(dim=-1)
            out_ids.append(y.clone())
            if (y == eos_tgt).all(): break
        out = torch.stack(out_ids, dim=1)
        return out

# ---------------------------------------------------------
# Transformer Model Definitions
# ---------------------------------------------------------
class LabelSmoothingLoss(nn.Module):
    def __init__(self, size, smoothing=0.1, pad_idx=3):
        super().__init__()
        self.criterion = nn.KLDivLoss(reduction='batchmean')
        self.smoothing = smoothing
        self.size = size
        self.pad_idx = pad_idx
    def forward(self, x, target):
        true_dist = torch.zeros_like(x)
        true_dist.fill_(self.smoothing / (self.size - 2))
        true_dist.scatter_(1, target.unsqueeze(1), 1.0 - self.smoothing)
        true_dist[:, self.pad_idx] = 0
        mask = (target == self.pad_idx).unsqueeze(1).float()
        true_dist = true_dist * (1 - mask)
        return self.criterion(F.log_softmax(x, dim=1), true_dist)
class TransformerMT(nn.Module):
    def __init__(self, src_vocab, tgt_vocab, d_model=256, nhead=8, num_layers=3, dim_ff=512, dropout=0.1, pad_src=3, pad_tgt=3):
        super().__init__()
        self.src_emb = nn.Embedding(src_vocab, d_model, padding_idx=pad_src)
        self.tgt_emb = nn.Embedding(tgt_vocab, d_model, padding_idx=pad_tgt)
        self.pos_enc_src = nn.Parameter(torch.randn(1, 1024, d_model) * 0.01)
        self.pos_enc_tgt = nn.Parameter(torch.randn(1, 1024, d_model) * 0.01)
        self.tr = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dim_feedforward=dim_ff, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(d_model, tgt_vocab)
        self.pad_src = pad_src
        self.pad_tgt = pad_tgt
        self.d_model = d_model
    def src_mask(self, src):
        return (src != self.pad_src)
    def tgt_mask(self, size):
        mask = torch.triu(torch.ones(size, size, device=DEVICE), diagonal=1).bool()
        return mask
    def forward(self, src, tgt_in):
        B, Ts = src.size()
        _, Tt = tgt_in.size()
        src_e = self.src_emb(src) * math.sqrt(self.d_model) + self.pos_enc_src[:, :Ts, :]
        tgt_e = self.tgt_emb(tgt_in) * math.sqrt(self.d_model) + self.pos_enc_tgt[:, :Tt, :]
        src_key_padding_mask = ~self.src_mask(src)
        tgt_key_padding_mask = (tgt_in == self.pad_tgt)
        causal_mask = self.tgt_mask(Tt)
        out = self.tr(src_e, tgt_e, tgt_mask=causal_mask, src_key_padding_mask=src_key_padding_mask, tgt_key_padding_mask=tgt_key_padding_mask, memory_key_padding_mask=src_key_padding_mask)
        return self.fc(out)
    @torch.no_grad()
    def translate_greedy(self, src, max_len=80, bos_tgt=1, eos_tgt=2):
        self.eval()
        B, Ts = src.size()
        src_e = self.src_emb(src) * math.sqrt(self.d_model) + self.pos_enc_src[:, :Ts, :]
        src_key_padding_mask = (src == self.pad_src)
        memory = self.tr.encoder(src_e, src_key_padding_mask=src_key_padding_mask)
        ys = torch.full((B,1), bos_tgt, dtype=torch.long, device=src.device)
        for _ in range(max_len):
            Tt = ys.size(1)
            tgt_e = self.tgt_emb(ys) * math.sqrt(self.d_model) + self.pos_enc_tgt[:, :Tt, :]
            causal_mask = self.tgt_mask(Tt)
            out = self.tr.decoder(tgt_e, memory, tgt_mask=causal_mask, tgt_key_padding_mask=(ys==self.pad_tgt), memory_key_padding_mask=src_key_padding_mask)
            logits = self.fc(out[:,-1,:])
            next_tok = logits.argmax(dim=-1, keepdim=True)
            ys = torch.cat([ys, next_tok], dim=1)
            if (next_tok.squeeze(-1) == eos_tgt).all(): break
        return ys
    @torch.no_grad()
    def translate_beam(self, src, max_len=80, bos_tgt=1, eos_tgt=2, beam=4):
        self.eval()
        assert src.size(0) == 1, "Beam search implemented for batch=1 for simplicity"
        Ts = src.size(1)
        src_e = self.src_emb(src) * math.sqrt(self.d_model) + self.pos_enc_src[:, :Ts, :]
        src_key_padding_mask = (src == self.pad_src)
        memory = self.tr.encoder(src_e, src_key_padding_mask=src_key_padding_mask)
        beams = [(torch.tensor([[bos_tgt]], device=src.device), 0.0)] # (seq, logprob)
        for _ in range(max_len):
            new_beams = []
            for seq, lp in beams:
                if seq[0,-1].item() == eos_tgt:
                    new_beams.append((seq, lp))
                    continue
                Tt = seq.size(1)
                tgt_e = self.tgt_emb(seq) * math.sqrt(self.d_model) + self.pos_enc_tgt[:, :Tt, :]
                causal_mask = self.tgt_mask(Tt)
                out = self.tr.decoder(tgt_e, memory, tgt_mask=causal_mask, tgt_key_padding_mask=(seq==self.pad_tgt), memory_key_padding_mask=src_key_padding_mask)
                logits = self.fc(out[:,-1,:])
                logp = F.log_softmax(logits, dim=-1).squeeze(0)
                topk = torch.topk(logp, beam)
                for token, token_lp in zip(topk.indices.tolist(), topk.values.tolist()):
                    new_seq = torch.cat([seq, torch.tensor([[token]], device=src.device)], dim=1)
                    new_beams.append((new_seq, lp + token_lp))
            new_beams.sort(key=lambda x: x[1], reverse=True)
            beams = new_beams[:beam]
            if all(seq[0,-1].item() == eos_tgt for seq,_ in beams): break
        best_seq = max(beams, key=lambda x: x[1])[0]
        return best_seq

# ---------------------------------------------------------
# Training & Evaluation Functions
# ---------------------------------------------------------
def epoch_train_transformer(model, dl, optimizer, criterion, scheduler=None, grad_clip=1.0):
    model.train()
    total_loss = 0.0
    for src, tgt, src_lens, _ in tqdm(dl, desc="Transformer Training"):
        src, tgt = src.to(DEVICE), tgt.to(DEVICE)
        optimizer.zero_grad()
        logits = model(src, tgt[:, :-1])
        loss = criterion(logits.reshape(-1, logits.size(-1)), tgt[:, 1:].reshape(-1))
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        if scheduler: scheduler.step()
        total_loss += loss.item()
    return total_loss / len(dl)

def epoch_train_seq2seq(model, dl, optimizer, criterion, grad_clip=1.0):
    model.train()
    total_loss = 0.0
    for src, tgt, src_lens, _ in tqdm(dl, desc="RNN+Attn Training"):
        src, tgt = src.to(DEVICE), tgt.to(DEVICE)
        optimizer.zero_grad()
        logits = model(src, src_lens, tgt)
        loss = criterion(logits.reshape(-1, logits.size(-1)), tgt[:,1:].reshape(-1))
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dl)

def decode_transformer(model, src, sp_tgt, max_len=80, mode='greedy', beam=4):
    if mode == 'beam': return model.translate_beam(src.to(DEVICE), max_len=max_len, bos_tgt=sp_tgt.bos_id(), eos_tgt=sp_tgt.eos_id(), beam=beam).cpu()
    return model.translate_greedy(src.to(DEVICE), max_len=max_len, bos_tgt=sp_tgt.bos_id(), eos_tgt=sp_tgt.eos_id()).cpu()

def decode_seq2seq(model, src, src_lens, sp_tgt, max_len=80):
    return model.translate(src.to(DEVICE), src_lens, max_len=max_len, bos_tgt=sp_tgt.bos_id(), eos_tgt=sp_tgt.eos_id()).cpu()

def eval_bleu_chrF_with_model(model, dl, decode_fn, sp_src, sp_tgt, max_len=80, n_samples_preview=5, **decode_kwargs):
    refs, hyps, previews = [], [], []
    model.eval()
    with torch.no_grad():
        for i,(src, tgt, src_lens, _) in tqdm(enumerate(dl), total=len(dl), desc="Evaluating"):
            out_ids = decode_fn(model, src, src_lens, sp_tgt, max_len=max_len) if decode_fn == decode_seq2seq else decode_fn(model, src, sp_tgt, max_len=max_len, **decode_kwargs)
            pred_ids = out_ids[0].tolist()
            if len(pred_ids) > 0 and pred_ids[0] == sp_tgt.bos_id(): pred_ids = pred_ids[1:]
            if sp_tgt.eos_id() in pred_ids: pred_ids = pred_ids[:pred_ids.index(sp_tgt.eos_id())]
            hyp = sp_tgt.decode(pred_ids)
            ref_ids = tgt[0].tolist()
            if len(ref_ids) > 0 and ref_ids[0] == sp_tgt.bos_id(): ref_ids = ref_ids[1:]
            if sp_tgt.eos_id() in ref_ids: ref_ids = ref_ids[:ref_ids.index(sp_tgt.eos_id())]
            ref = sp_tgt.decode(ref_ids)
            hyps.append(hyp); refs.append([ref])
            if i < n_samples_preview:
                src_ids = src[0].tolist()
                if len(src_ids) > 0 and src_ids[0] == sp_src.bos_id(): src_ids = src_ids[1:]
                if sp_src.eos_id() in src_ids: src_ids = src_ids[:src_ids.index(sp_src.eos_id())]
                src_txt = sp_src.decode(src_ids)
                previews.append((src_txt, hyp, ref))
    bleu = sacrebleu.corpus_bleu(hyps, refs).score
    chrf = sacrebleu.corpus_chrf(hyps, refs).score
    return bleu, chrf, previews

def analyze_translation_errors(previews, save_path):
    analyses = [{'src': s, 'hyp': h, 'ref': r, 'src_len': len(s.split()), 'hyp_len': len(h.split()), 'ref_len': len(r.split()),
                 'error_type': "Under-translation" if len(h.split()) < len(r.split())*0.7 else "Over-translation" if len(h.split()) > len(r.split())*1.3 else "Normal"}
                for s,h,r in previews]
    pd.DataFrame(analyses).to_csv(save_path, index=False)
    return analyses

def plot_training_curves(history, model_name, save_path):
    epochs = range(1, len(history['train_loss']) + 1)
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    axes[0].plot(epochs, history['train_loss']); axes[0].set_title(f'{model_name} - Training Loss'); axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss'); axes[0].grid(True)
    axes[1].plot(epochs, history['val_bleu']); axes[1].set_title(f'{model_name} - Validation BLEU'); axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('BLEU'); axes[1].grid(True)
    axes[2].plot(epochs, history['val_chrf']); axes[2].set_title(f'{model_name} - Validation chrF'); axes[2].set_xlabel('Epoch'); axes[2].set_ylabel('chrF'); axes[2].grid(True)
    plt.tight_layout(); plt.savefig(save_path, dpi=150, bbox_inches='tight'); plt.close()

Penjelasan:

ode ini adalah inti dari proyek . Isinya adalah:

- Definisi kelas untuk model RNN+Attention dan Transformer.

- Fungsi-fungsi untuk melatih model per epoch.

- Fungsi-fungsi untuk menerjemahkan teks dengan model yang sudah dilatih.

- Fungsi untuk menghitung metrik evaluasi seperti BLEU dan chrF.

# 3: Fungsi Eksperimen Utama

Kode ini berisi fungsi utama (run_experiment) yang menjalankan satu eksperimen lengkap untuk satu ukuran vocabulary tertentu, mulai dari melatih model hingga menyimpan hasilnya.

In [3]:
# ---------------------------------------------------------
# MAIN EXPERIMENT: ABLATION + TRAINING
# ---------------------------------------------------------
def run_experiment(vocab_size, epochs_tr=15, epochs_rnn=15, use_beam_eval=True, beam_size=4):
    print(f"\n{'='*60}\nEXPERIMENT: VOCAB_SIZE = {vocab_size}\n{'='*60}")
    print("[1/6] Membuat tokenizer…")
    sp_en, sp_id = create_tokenizers(train_pairs, vocab_size)
    SRC_VOCAB, TGT_VOCAB = sp_en.get_piece_size(), sp_id.get_piece_size()
    PAD_SRC, EOS_SRC = sp_en.pad_id(), sp_en.eos_id()
    PAD_TGT, BOS_TGT, EOS_TGT = sp_id.pad_id(), sp_id.bos_id(), sp_id.eos_id()
    print(f"[SPM] EN vocab: {SRC_VOCAB} | ID vocab: {TGT_VOCAB}")
    
    print("[2/6] Membuat datasets & loaders…")
    train_ds = TranslationDataset(train_pairs, sp_en, sp_id)
    val_ds = TranslationDataset(val_pairs, sp_en, sp_id)
    test_ds = TranslationDataset(test_pairs, sp_en, sp_id)
    def collate_fn(batch): return collate_batch(batch, PAD_SRC, PAD_TGT)
    BATCH_SIZE = 32
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
    val_dl = DataLoader(val_ds, batch_size=1, shuffle=False, collate_fn=collate_fn)
    test_dl = DataLoader(test_ds, batch_size=1, shuffle=False, collate_fn=collate_fn)

    print(f"[3/6] Melatih Transformer (vocab={vocab_size})…")
    d_model, nhead, num_layers, dim_ff, dropout = 128, 4, 2, 256, 0.1
    transformer = TransformerMT(SRC_VOCAB, TGT_VOCAB, d_model, nhead, num_layers, dim_ff, dropout, PAD_SRC, PAD_TGT).to(DEVICE)
    optim_tr = optim.Adam(transformer.parameters(), lr=1e-4, betas=(0.9, 0.98), eps=1e-9)
    lr_scheduler_tr = optim.lr_scheduler.ReduceLROnPlateau(optim_tr, mode='max', patience=2, factor=0.5, verbose=True)
    criterion_tr = LabelSmoothingLoss(TGT_VOCAB, smoothing=0.1, pad_idx=PAD_TGT)
    tr_history = {'train_loss': [], 'val_bleu': [], 'val_chrf': []}; best_tr_bleu = 0
    for ep in range(1, epochs_tr + 1):
        loss = epoch_train_transformer(transformer, train_dl, optim_tr, criterion_tr)
        bleu, chrf, _ = eval_bleu_chrF_with_model(transformer, val_dl, decode_transformer, sp_en, sp_id, max_len=80, mode=('beam' if use_beam_eval else 'greedy'), beam=beam_size)
        tr_history['train_loss'].append(loss); tr_history['val_bleu'].append(bleu); tr_history['val_chrf'].append(chrf)
        lr_scheduler_tr.step(bleu)
        if bleu > best_tr_bleu:
            best_tr_bleu = bleu
            torch.save(transformer.state_dict(), WORKDIR/f"best_transformer_v{vocab_size}.pt")
        print(f"[Transformer][Epoch {ep:2d}] loss={loss:.4f} | valBLEU={bleu:.2f} | valchrF={chrf:.2f}")

    print(f"[4/6] Melatih RNN+Attention (vocab={vocab_size})…")
    emb_enc, emb_dec, hid, layers, dropout = 128, 128, 128, 1, 0.1
    encoder = EncoderRNN(SRC_VOCAB, emb_enc, hid, layers, dropout, True, PAD_SRC)
    decoder = DecoderRNN(TGT_VOCAB, emb_dec, hid*2, hid, dropout, PAD_TGT)
    seq2seq = Seq2SeqAttn(encoder, decoder, enc_bi=True, pad_src=PAD_SRC).to(DEVICE)
    optim_rnn = optim.Adam(seq2seq.parameters(), lr=3e-4)
    lr_scheduler_rnn = optim.lr_scheduler.ReduceLROnPlateau(optim_rnn, mode='max', patience=2, factor=0.5, verbose=True)
    criterion_rnn = nn.CrossEntropyLoss(ignore_index=PAD_TGT)
    rnn_history = {'train_loss': [], 'val_bleu': [], 'val_chrf': []}; best_rnn_bleu = 0
    for ep in range(1, epochs_rnn + 1):
        loss = epoch_train_seq2seq(seq2seq, train_dl, optim_rnn, criterion_rnn)
        bleu, chrf, _ = eval_bleu_chrF_with_model(seq2seq, val_dl, decode_seq2seq, sp_en, sp_id, max_len=80)
        rnn_history['train_loss'].append(loss); rnn_history['val_bleu'].append(bleu); rnn_history['val_chrf'].append(chrf)
        lr_scheduler_rnn.step(bleu)
        if bleu > best_rnn_bleu:
            best_rnn_bleu = bleu
            torch.save(seq2seq.state_dict(), WORKDIR/f"best_rnn_v{vocab_size}.pt")
        print(f"[RNN+Attn][Epoch {ep:2d}] loss={loss:.4f} | valBLEU={bleu:.2f} | valchrF={chrf:.2f}")

    print(f"[5/6] Evaluasi akhir (vocab={vocab_size})…")
    transformer.load_state_dict(torch.load(WORKDIR/f"best_transformer_v{vocab_size}.pt"))
    seq2seq.load_state_dict(torch.load(WORKDIR/f"best_rnn_v{vocab_size}.pt"))
    tr_bleu, tr_chrf, tr_prev = eval_bleu_chrF_with_model(transformer, test_dl, decode_transformer, sp_en, sp_id, max_len=80, mode=('beam' if use_beam_eval else 'greedy'), beam=beam_size)
    rn_bleu, rn_chrf, rn_prev = eval_bleu_chrF_with_model(seq2seq, test_dl, decode_seq2seq, sp_en, sp_id, max_len=80)
    print(f"\n[FINAL] Transformer : BLEU={tr_bleu:.2f} | chrF={tr_chrf:.2f}")
    print(f"[FINAL] RNN+Attention: BLEU={rn_bleu:.2f} | chrF={rn_chrf:.2f}")
    
    print(f"[6/6] Menyimpan hasil (vocab={vocab_size})…")
    # Perbaikan: Menggunakan try-except untuk mencegah kegagalan saat menyimpan file
    try:
        tr_prev_df = pd.DataFrame(tr_prev, columns=['src', 'hyp', 'ref'])
        tr_prev_df.to_csv(WORKDIR/f"preview_transformer_v{vocab_size}.csv", index=False)
        rn_prev_df = pd.DataFrame(rn_prev, columns=['src', 'hyp', 'ref'])
        rn_prev_df.to_csv(WORKDIR/f"preview_rnn_v{vocab_size}.csv", index=False)
        plot_training_curves(tr_history, f"Transformer_v{vocab_size}", WORKDIR/f"transformer_curves_v{vocab_size}.png")
        plot_training_curves(rnn_history, f"RNN+Attention_v{vocab_size}", WORKDIR/f"rnn_curves_v{vocab_size}.png")
        analyze_translation_errors(tr_prev, WORKDIR/f"error_analysis_transformer_v{vocab_size}.csv")
        analyze_translation_errors(rn_prev, WORKDIR/f"error_analysis_rnn_v{vocab_size}.csv")
    except Exception as e:
        print(f"Peringatan: Gagal menyimpan file CSV. Error: {e}")
        
    return {'vocab_size': vocab_size, 'transformer': {'bleu': tr_bleu, 'chrf': tr_chrf, 'best_val_bleu': best_tr_bleu}, 'rnn': {'bleu': rn_bleu, 'chrf': rn_chrf, 'best_val_bleu': best_rnn_bleu}, 'tr_history': tr_history, 'rnn_history': rnn_history}

Penjelasan:

kode ini mendefinisikan fungsi run_experiment. Fungsi ini sangat penting karena:

- Ini adalah "mesin" yang menjalankan seluruh eksperimen untuk satu set parameter (misalnya, vocab_size=2000).

- Kode ini sudah diperbaiki dengan menambahkan blok try-except saat menyimpan file CSV. Ini memastikan bahwa jika ada masalah saat menyimpan hasil, program tidak akan crash dan masih bisa menyelesaikan eksperimen.

#  4: Menjalankan Studi Ablasi dan Menampilkan Hasil
Kode ini memanggil fungsi-fungsi dari bagian sebelumnya untuk menjalankan studi ablasi secara keseluruhan, mengumpulkan semua hasil, dan menampilkan ringkasan akhir.

In [4]:
def run_ablation_study(vocab_sizes=(2000, 4000), epochs_tr=15, epochs_rnn=15, use_beam_eval=True, beam_size=4):
    print("\n" + "="*80)
    print("MEMULAI STUDI ABLASI: PERBANDINGAN UKURAN VOCABULARY")
    print("="*80)
    all_results = {}
    for vocab_size in vocab_sizes:
        start_time = time.time()
        result = run_experiment(vocab_size, epochs_tr=epochs_tr, epochs_rnn=epochs_rnn, use_beam_eval=use_beam_eval, beam_size=beam_size)
        end_time = time.time()
        result['training_time'] = end_time - start_time
        all_results[vocab_size] = result
        print(f"\n[VOCAB={vocab_size}] Pelatihan selesai dalam {(end_time-start_time)/60:.1f} menit")
    return all_results

def create_ablation_summary(all_results):
    print("\n" + "="*80)
    print("RINGKASAN STUDI ABLASI")
    print("="*80)
    summary_data = [{'vocab_size': v, 'tr_bleu': r['transformer']['bleu'], 'tr_chrf': r['transformer']['chrf'],
                     'rnn_bleu': r['rnn']['bleu'], 'rnn_chrf': r['rnn']['chrf'], 'training_time_min': r['training_time'] / 60}
                    for v, r in all_results.items()]
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv(WORKDIR/"ablation_summary.csv", index=False)
    print("\nHASIL ABLASI:\n" + summary_df.to_string(index=False, float_format='%.2f'))
    best_tr_idx = summary_df['tr_bleu'].idxmax(); best_rnn_idx = summary_df['rnn_bleu'].idxmax()
    print(f"\nKONFIGURASI TERBAIK:\nTransformer: vocab={summary_df.loc[best_tr_idx, 'vocab_size']:.0f}, BLEU={summary_df.loc[best_tr_idx, 'tr_bleu']:.2f}\nRNN+Attn : vocab={summary_df.loc[best_rnn_idx, 'vocab_size']:.0f}, BLEU={summary_df.loc[best_rnn_idx, 'rnn_bleu']:.2f}")

    fig, axes = plt.subplots(2, 2, figsize=(12, 8)); vocab_sizes = summary_df['vocab_size'].values
    axes[0,0].plot(vocab_sizes, summary_df['tr_bleu'], 'o-', label='Transformer'); axes[0,0].plot(vocab_sizes, summary_df['rnn_bleu'], 's-', label='RNN+Attention'); axes[0,0].set_xlabel('Ukuran Vocabulary'); axes[0,0].set_ylabel('Skor BLEU'); axes[0,0].set_title('Test BLEU vs Ukuran Vocab'); axes[0,0].legend(); axes[0,0].grid(True, alpha=0.3)
    axes[0,1].plot(vocab_sizes, summary_df['tr_chrf'], 'o-', label='Transformer'); axes[0,1].plot(vocab_sizes, summary_df['rnn_chrf'], 's-', label='RNN+Attention'); axes[0,1].set_xlabel('Ukuran Vocabulary'); axes[0,1].set_ylabel('Skor chrF'); axes[0,1].set_title('Test chrF vs Ukuran Vocab'); axes[0,1].legend(); axes[0,1].grid(True, alpha=0.3)
    axes[1,0].bar(vocab_sizes - 50, summary_df['training_time_min'], width=100, alpha=0.7); axes[1,0].set_xlabel('Ukuran Vocabulary'); axes[1,0].set_ylabel('Waktu Pelatihan (menit)'); axes[1,0].set_title('Waktu Pelatihan vs Ukuran Vocab'); axes[1,0].grid(True, alpha=0.3)
    x_pos = np.arange(len(vocab_sizes)); width = 0.35
    axes[1,1].bar(x_pos - width/2, summary_df['tr_bleu'], width, label='Transformer'); axes[1,1].bar(x_pos + width/2, summary_df['rnn_bleu'], width, label='RNN+Attention'); axes[1,1].set_xlabel('Ukuran Vocabulary'); axes[1,1].set_ylabel('Skor BLEU'); axes[1,1].set_title('Perbandingan Model (BLEU)'); axes[1,1].set_xticks(x_pos); axes[1,1].set_xticklabels(vocab_sizes); axes[1,1].legend(); axes[1,1].grid(True, alpha=0.3)
    plt.tight_layout(); plt.savefig(WORKDIR/"ablation_study_results.png", dpi=150, bbox_inches='tight'); plt.close()
    return summary_df

if __name__ == '__main__':
    all_results = run_ablation_study(
        vocab_sizes=(2000, 4000),
        epochs_tr=10,
        epochs_rnn=10,
        use_beam_eval=True,
        beam_size=4
    )
    summary_df = create_ablation_summary(all_results)
    
    print("\n" + "="*80)
    print("CONTOH TERJEMAHAN DARI HASIL TERBAIK")
    print("="*80)
    try:
        best_vocab = int(summary_df.loc[summary_df['tr_bleu'].idxmax(), 'vocab_size'])
        tr_df = pd.read_csv(WORKDIR/f"preview_transformer_v{best_vocab}.csv")
        rnn_df = pd.read_csv(WORKDIR/f"preview_rnn_v{best_vocab}.csv")
        print("\n--- Prediksi Transformer ---")
        print(tr_df.head(5))
        print("\n--- Prediksi RNN+Attention ---")
        print(rnn_df.head(5))
    except Exception as e:
        print(f"Gagal memuat file CSV: {e}")


MEMULAI STUDI ABLASI: PERBANDINGAN UKURAN VOCABULARY

EXPERIMENT: VOCAB_SIZE = 2000
[1/6] Membuat tokenizer…


sentencepiece_trainer.cc(178) LOG(INFO) Running command: --input=/kaggle/working/train_text_v2000.en --model_prefix=/kaggle/working/en_spm_v2000 --vocab_size=2000 --character_coverage=1.0 --bos_id=1 --eos_id=2 --pad_id=3 --unk_id=0 --hard_vocab_limit=false
sentencepiece_trainer.cc(78) LOG(INFO) Starts training with : 
trainer_spec {
  input: /kaggle/working/train_text_v2000.en
  input_format: 
  model_prefix: /kaggle/working/en_spm_v2000
  model_type: UNIGRAM
  vocab_size: 2000
  self_test_sample_size: 0
  character_coverage: 1
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  pretokenization_delimiter: 
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece

[SPM] Model saved: /kaggle/working/en_spm_v2000.model and /kaggle/working/id_spm_v2000.model
[SPM] EN vocab: 2000 | ID vocab: 2000
[2/6] Membuat datasets & loaders…
[3/6] Melatih Transformer (vocab=2000)…


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  1] loss=2.3012 | valBLEU=0.00 | valchrF=36.82


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  2] loss=1.9379 | valBLEU=0.00 | valchrF=38.82


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  3] loss=1.8199 | valBLEU=0.00 | valchrF=38.82


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  4] loss=1.7246 | valBLEU=35.36 | valchrF=41.38


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  5] loss=1.6709 | valBLEU=0.00 | valchrF=35.41


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  6] loss=1.6117 | valBLEU=0.00 | valchrF=35.41


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  7] loss=1.5696 | valBLEU=0.00 | valchrF=37.00


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  8] loss=1.5357 | valBLEU=35.36 | valchrF=47.95


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  9] loss=1.4968 | valBLEU=35.36 | valchrF=47.95


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch 10] loss=1.4828 | valBLEU=35.36 | valchrF=47.95
[4/6] Melatih RNN+Attention (vocab=2000)…


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  1] loss=4.9569 | valBLEU=50.00 | valchrF=55.73


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  2] loss=4.0627 | valBLEU=35.36 | valchrF=38.57


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  3] loss=3.5473 | valBLEU=35.36 | valchrF=47.49


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  4] loss=3.1416 | valBLEU=35.36 | valchrF=68.24


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  5] loss=2.8380 | valBLEU=35.36 | valchrF=68.24


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  6] loss=2.6888 | valBLEU=50.00 | valchrF=70.84


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  7] loss=2.5558 | valBLEU=35.36 | valchrF=68.24


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  8] loss=2.4568 | valBLEU=35.36 | valchrF=68.24


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  9] loss=2.3975 | valBLEU=35.36 | valchrF=68.24


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch 10] loss=2.3399 | valBLEU=35.36 | valchrF=68.24
[5/6] Evaluasi akhir (vocab=2000)…


Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]


[FINAL] Transformer : BLEU=53.73 | chrF=53.22
[FINAL] RNN+Attention: BLEU=100.00 | chrF=100.00
[6/6] Menyimpan hasil (vocab=2000)…

[VOCAB=2000] Pelatihan selesai dalam 47.7 menit

EXPERIMENT: VOCAB_SIZE = 4000
[1/6] Membuat tokenizer…
[SPM] Model saved: /kaggle/working/en_spm_v4000.model and /kaggle/working/id_spm_v4000.model
[SPM] EN vocab: 3898 | ID vocab: 4000
[2/6] Membuat datasets & loaders…
[3/6] Melatih Transformer (vocab=4000)…


sentencepiece_trainer.cc(178) LOG(INFO) Running command: --input=/kaggle/working/train_text_v4000.en --model_prefix=/kaggle/working/en_spm_v4000 --vocab_size=4000 --character_coverage=1.0 --bos_id=1 --eos_id=2 --pad_id=3 --unk_id=0 --hard_vocab_limit=false
sentencepiece_trainer.cc(78) LOG(INFO) Starts training with : 
trainer_spec {
  input: /kaggle/working/train_text_v4000.en
  input_format: 
  model_prefix: /kaggle/working/en_spm_v4000
  model_type: UNIGRAM
  vocab_size: 4000
  self_test_sample_size: 0
  character_coverage: 1
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  pretokenization_delimiter: 
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece

Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  1] loss=2.5601 | valBLEU=0.00 | valchrF=15.15


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  2] loss=2.0579 | valBLEU=0.00 | valchrF=51.14


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  3] loss=1.9509 | valBLEU=0.00 | valchrF=38.82


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  4] loss=1.8627 | valBLEU=0.00 | valchrF=35.41


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  5] loss=1.7882 | valBLEU=45.18 | valchrF=41.97


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  6] loss=1.7652 | valBLEU=45.18 | valchrF=41.97


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  7] loss=1.7363 | valBLEU=0.00 | valchrF=35.41


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  8] loss=1.6910 | valBLEU=50.00 | valchrF=40.82


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch  9] loss=1.6816 | valBLEU=50.00 | valchrF=53.25


Transformer Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[Transformer][Epoch 10] loss=1.6494 | valBLEU=45.18 | valchrF=42.30
[4/6] Melatih RNN+Attention (vocab=4000)…


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  1] loss=5.1155 | valBLEU=45.18 | valchrF=56.71


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  2] loss=4.1561 | valBLEU=74.01 | valchrF=73.54


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  3] loss=3.6704 | valBLEU=53.73 | valchrF=49.00


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  4] loss=3.2208 | valBLEU=45.18 | valchrF=73.65


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  5] loss=2.8427 | valBLEU=45.18 | valchrF=73.65


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  6] loss=2.5502 | valBLEU=45.18 | valchrF=73.65


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  7] loss=2.4118 | valBLEU=45.18 | valchrF=73.65


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  8] loss=2.2815 | valBLEU=45.18 | valchrF=73.65


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch  9] loss=2.1691 | valBLEU=35.36 | valchrF=65.26


RNN+Attn Training:   0%|          | 0/373 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

[RNN+Attn][Epoch 10] loss=2.1095 | valBLEU=35.36 | valchrF=65.26
[5/6] Evaluasi akhir (vocab=4000)…


Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1488 [00:00<?, ?it/s]


[FINAL] Transformer : BLEU=35.36 | chrF=47.96
[FINAL] RNN+Attention: BLEU=90.36 | chrF=86.59
[6/6] Menyimpan hasil (vocab=4000)…

[VOCAB=4000] Pelatihan selesai dalam 48.8 menit

RINGKASAN STUDI ABLASI

HASIL ABLASI:
 vocab_size  tr_bleu  tr_chrf  rnn_bleu  rnn_chrf  training_time_min
       2000    53.73    53.22    100.00    100.00              47.66
       4000    35.36    47.96     90.36     86.59              48.78

KONFIGURASI TERBAIK:
Transformer: vocab=2000, BLEU=53.73
RNN+Attn : vocab=2000, BLEU=100.00

CONTOH TERJEMAHAN DARI HASIL TERBAIK

--- Prediksi Transformer ---
                                       src                  hyp  \
0           tom is going to do that again.  tom tidak tahu itu.   
1                          there is a cat.      adalah di sini.   
2                     this is interesting.          ini adalah.   
3  how many countries are there in europe?      apakah di mana?   
4                        i can't eat pork.      aku tidak bisa.   

           

Penjelasan:

kode ini adalah bagian eksekusi utama. Bagian ini akan:

- Memanggil fungsi run_ablation_study untuk menjalankan eksperimen pada ukuran vocabulary yang berbeda.

- Menggunakan create_ablation_summary untuk mengumpulkan hasil dari setiap eksperimen.

- Mencetak tabel ringkasan, membuat grafik perbandingan, dan menampilkan contoh hasil terjemahan terbaik.