# Dataset

In [3]:
!wget http://www.manythings.org/anki/tur-eng.zip
!unzip tur-eng.zip

--2026-01-04 10:52:05--  http://www.manythings.org/anki/tur-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18705505 (18M) [application/zip]
Saving to: ‘tur-eng.zip’


2026-01-04 10:52:06 (13.8 MB/s) - ‘tur-eng.zip’ saved [18705505/18705505]

Archive:  tur-eng.zip
  inflating: tur.txt                 
  inflating: _about.txt              


# Preprocessing


In [4]:
!pip install bert-score pandas
import pandas as pd

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


In [5]:
import pandas as pd
import re
import unicodedata

# Veriyi oku
df = pd.read_csv('tur.txt', sep='\t', names=['en', 'tr', 'attribution'])

# Başlangıç için ilk 50.000 satırı alalım (hızlı deneme için)
df = df.iloc[:50000]

def preprocess(text):
    text = text.lower().strip()
    text = re.sub(r"([?.!,¿])", r" \1 ", text) # Noktalamayı ayır
    text = re.sub(r'[" "]+', " ", text)
    text = re.sub(r"[^a-zA-ZçğışöüÇĞİŞÖÜ?.!,¿]+", " ", text)
    return text.strip()

df['en'] = df['en'].apply(preprocess)
df['tr'] = df['tr'].apply(preprocess)

print(df.sample(5)) # Rastgele 5 örnek gör

                         en                            tr  \
38730   tom s imaginative .                tom yaratıcı .   
48394  this is my brother .           bu erkek kardeşim .   
41053  can we talk to you ?   seninle konuşabilir miyiz ?   
33019   i sense a victory .  ben bir zafer hissediyorum .   
29330    where s tom gone ?            tom nereye gitti ?   

                                             attribution  
38730  CC-BY 2.0 (France) Attribution: tatoeba.org #2...  
48394  CC-BY 2.0 (France) Attribution: tatoeba.org #4...  
41053  CC-BY 2.0 (France) Attribution: tatoeba.org #3...  
33019  CC-BY 2.0 (France) Attribution: tatoeba.org #5...  
29330  CC-BY 2.0 (France) Attribution: tatoeba.org #2...  


# Tokenization and Vocabulary Creation


In [6]:
from collections import Counter

class Vocab:
    def __init__(self, name):
        self.name = name
        self.word2index = {"<pad>": 0, "<sos>": 1, "<eos>": 2, "<unk>": 3}
        self.word2count = {}
        self.index2word = {0: "<pad>", 1: "<sos>", 2: "<eos>", 3: "<unk>"}
        self.n_words = 4  # Başlangıçtaki özel token sayısı

    def add_sentence(self, sentence):
        for word in sentence.split():
            self.add_word(word)

    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

# Sözlükleri nesne olarak oluşturalım
input_lang = Vocab("en")
output_lang = Vocab("tr")

# Veri setindeki tüm cümleleri sözlüğe ekleyelim
for i, row in df.iterrows():
    input_lang.add_sentence(row['en'])
    output_lang.add_sentence(row['tr'])

print(f"İngilizce Sözlük Boyutu: {input_lang.n_words}")
print(f"Türkçe Sözlük Boyutu: {output_lang.n_words}")

İngilizce Sözlük Boyutu: 6447
Türkçe Sözlük Boyutu: 18527


# Dataset, Padding ve DataLoader

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

class TranslationDataset(Dataset):
    def __init__(self, df, input_vocab, output_vocab):
        self.df = df
        self.input_vocab = input_vocab
        self.output_vocab = output_vocab

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Cümleleri al ve sayısal listeye çevir
        src_sent = self.df.iloc[idx]['en']
        trg_sent = self.df.iloc[idx]['tr']

        # Başlangıç ve bitiş tokenlarını ekleyerek listeye çeviriyoruz
        src_indices = [self.input_vocab.word2index.get(word, 3) for word in src_sent.split()]
        trg_indices = [self.output_vocab.word2index.get(word, 3) for word in trg_sent.split()]

        src_tensor = torch.tensor([1] + src_indices + [2])
        trg_tensor = torch.tensor([1] + trg_indices + [2])

        return src_tensor, trg_tensor

# Farklı uzunluktaki cümleleri aynı boyuta getirir
def collate_fn(batch):
    src_batch, trg_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, padding_value=0) # <pad> = 0
    trg_batch = pad_sequence(trg_batch, padding_value=0)
    return src_batch, trg_batch

# Dataset ve DataLoaderı oluştur
dataset = TranslationDataset(df, input_lang, output_lang)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

# Kontrol
src_sample, trg_sample = next(iter(train_loader))
print(f"Kaynak (EN) Batch Boyutu: {src_sample.shape}") # [Max_Len, Batch_Size]

Kaynak (EN) Batch Boyutu: torch.Size([9, 32])


# Model 1: Vanilla GRU Encoder-Decoder Model

In [19]:
import random
import torch.nn as nn
import torch

# --- ENCODER ---
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        # src = [src_len, batch_size]
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.rnn(embedded)
        # hidden = [n_layers, batch_size, hid_dim]
        return hidden

# --- DECODER ---
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden):
        # input = [batch_size]
        input = input.unsqueeze(0) # [1, batch_size]
        embedded = self.dropout(self.embedding(input))
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(0))
        return prediction, hidden

# --- SEQ2SEQ  ---
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        # src = [src_len, batch_size], trg = [trg_len, batch_size]
        trg_len = trg.shape[0]
        batch_size = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim

        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        hidden = self.encoder(src)

        input = trg[0,:] # İlk input <sos> tokenı

        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[t] if teacher_force else top1

        return outputs

In [20]:
# Modeli başlatmak için parametreleri tanımlıyoruz
INPUT_DIM = input_lang.n_words
OUTPUT_DIM = output_lang.n_words
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2 # Vanilla modellerde 2 katman iyidir
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

# GPU kontrolü
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Parçaları birleştirip model nesnesini oluşturuyoruz
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

print(f"Model başarıyla {device} üzerinde oluşturuldu!")

Model başarıyla cuda üzerinde oluşturuldu!


# Model Training

In [21]:
import torch.optim as optim

# Hatanın ne kadar büyük olduğunu ölçer
# <pad> tokenlarını (0) hesaba katma diyoruz (ignore_index)
TRG_PAD_IDX = output_lang.word2index['<pad>']
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

# Modelin ağırlıklarını güncelleyen motor
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [22]:
def train(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0

    for i, (src, trg) in enumerate(iterator):
        src, trg = src.to(device), trg.to(device)

        optimizer.zero_grad()

        output = model(src, trg)


        output_dim = output.shape[-1]
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)

        loss = criterion(output, trg)
        loss.backward() # Backpropagation

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step() # Ağırlıkları güncelle

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)

In [None]:
import time

N_EPOCHS = 10
CLIP = 1

for epoch in range(N_EPOCHS):
    start_time = time.time()

    train_loss = train(model, train_loader, optimizer, criterion, CLIP)

    end_time = time.time()

    print(f'Epoch: {epoch+1:02} | Time: {end_time - start_time:.2f}s')
    print(f'\tTrain Loss: {train_loss:.3f}')

Epoch: 01 | Time: 57.63s
	Train Loss: 3.870
Epoch: 02 | Time: 56.43s
	Train Loss: 2.789
Epoch: 03 | Time: 56.75s
	Train Loss: 2.281
Epoch: 04 | Time: 57.17s
	Train Loss: 1.950
Epoch: 05 | Time: 60.35s
	Train Loss: 1.730
Epoch: 06 | Time: 57.01s
	Train Loss: 1.580
Epoch: 07 | Time: 57.65s
	Train Loss: 1.463
Epoch: 08 | Time: 56.63s
	Train Loss: 1.377
Epoch: 09 | Time: 56.80s
	Train Loss: 1.311
Epoch: 10 | Time: 56.47s
	Train Loss: 1.262


# Model test


In [None]:
def translate_sentence(sentence, model, device, max_len = 50):
    model.eval()

    # 1. Tokenize
    tokens = [input_lang.word2index.get(token, 3) for token in preprocess(sentence).split()]
    src_tensor = torch.tensor([1] + tokens + [2]).unsqueeze(1).to(device)

    # 2. Context
    with torch.no_grad():
        hidden = model.encoder(src_tensor)

    # 3. Decoder
    trg_indices = [1] # <sos>

    for i in range(max_len):
        trg_tensor = torch.tensor([trg_indices[-1]]).to(device)

        with torch.no_grad():
            output, hidden = model.decoder(trg_tensor, hidden)

        prediction = output.argmax(1).item()
        trg_indices.append(prediction)

        if prediction == 2: # <eos> geldiyse çeviri finish
            break

    # 4. number to text
    trg_tokens = [output_lang.index2word[i] for i in trg_indices]
    return " ".join(trg_tokens[1:-1]) # <sos> ve <eos>'u atıp ver

test_sentences = [
    "i am happy .",
    "how are you ?",
    "this is a book .",
    "the weather is cold ."
]

for sent in test_sentences:
    translation = translate_sentence(sent, model, device)
    print(f"EN: {sent} -> TR: {translation}")

EN: i am happy . -> TR: mutluyum mutluyum .
EN: how are you ? -> TR: nasılsın ?
EN: this is a book . -> TR: bu kitap kitaptır .
EN: the weather is cold . -> TR: tost soğuk .


# Model 2:Vanilla LSTM


In [26]:

enc_lstm = EncoderLSTM(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec_lstm = DecoderLSTM(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model_lstm = Seq2SeqLSTM(enc_lstm, dec_lstm, device).to(device)


optimizer = optim.Adam(model_lstm.parameters(), lr=0.001)


# LSTM TRAINING

In [32]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import re
import time
import random
from torch.utils.data import DataLoader, Dataset

# --- 1. DATA LOADING & VOCAB BUILDING ---
FILE_NAME = 'tur.txt'

class Lang:
    def __init__(self):
        self.word2index = {"<pad>": 0, "<sos>": 1, "<eos>": 2, "<unk>": 3}
        self.index2word = {0: "<pad>", 1: "<sos>", 2: "<eos>", 3: "<unk>"}
        self.n_words = 4
    def add_sentence(self, sentence):
        for word in str(sentence).split():
            if word not in self.word2index:
                self.word2index[word] = self.n_words
                self.index2word[self.n_words] = word
                self.n_words += 1

def preprocess(s):
    s = str(s).lower().strip()
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-ZçğışöüÇĞİŞÖÜ.!?]+", r" ", s)
    return s

print("Reading file...")
# We use a limit (nrows) just in case your file is millions of lines.
# You can remove nrows=200000 if you want the full dataset.
df = pd.read_csv(FILE_NAME, sep='\t', header=None, names=['en', 'tr', 'meta'],
                 engine='c', quoting=3, nrows=200000)

print(f"Pre-processing {len(df)} lines...")
df['en'] = df['en'].apply(preprocess)
df['tr'] = df['tr'].apply(preprocess)

print("Building vocabulary...")
input_lang, output_lang = Lang(), Lang()
for i, row in df.iterrows():
    input_lang.add_sentence(row['en'])
    output_lang.add_sentence(row['tr'])
    if i % 50000 == 0: print(f"Indexed {i} rows...")

print(f"Vocab size: EN={input_lang.n_words}, TR={output_lang.n_words}")

class TranslationDataset(Dataset):
    def __init__(self, df, in_l, out_l):
        self.df, self.in_l, self.out_l = df, in_l, out_l
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        src = [self.in_l.word2index.get(w, 3) for w in self.df.iloc[idx]['en'].split()]
        trg = [self.out_l.word2index.get(w, 3) for w in self.df.iloc[idx]['tr'].split()]
        return torch.tensor([1]+src+[2]), torch.tensor([1]+trg+[2])

def collate_fn(batch):
    src, trg = zip(*batch)
    return torch.nn.utils.rnn.pad_sequence(src, padding_value=0), \
           torch.nn.utils.rnn.pad_sequence(trg, padding_value=0)

train_loader = DataLoader(TranslationDataset(df, input_lang, output_lang),
                          batch_size=128, shuffle=True, collate_fn=collate_fn)

# --- 2. MODEL DEFINITION ---
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, (hidden, cell) = self.rnn(embedded)
        return hidden, cell

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
    def forward(self, input, hidden, cell):
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        prediction = self.fc_out(output.squeeze(0))
        return prediction, hidden, cell

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder, self.decoder, self.device = encoder, decoder, device
    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        trg_len, batch_size = trg.shape
        outputs = torch.zeros(trg_len, batch_size, self.decoder.output_dim).to(self.device)
        hidden, cell = self.encoder(src)
        input = trg[0,:]
        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input, hidden, cell)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            input = trg[t] if teacher_force else output.argmax(1)
        return outputs

# --- 3. TRAINING LOOP ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Seq2Seq(Encoder(input_lang.n_words, 256, 512, 2, 0.5),
                Decoder(output_lang.n_words, 256, 512, 2, 0.5), device).to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=0)

print(f"Training started on {device}...")
for epoch in range(10):
    model.train()
    epoch_loss = 0
    start_time = time.time()

    for i, (src, trg) in enumerate(train_loader):
        src, trg = src.to(device), trg.to(device)
        optimizer.zero_grad()
        output = model(src, trg)
        loss = criterion(output[1:].view(-1, output_lang.n_words), trg[1:].view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        epoch_loss += loss.item()
        if i % 100 == 0: print(f"Epoch {epoch+1} | Batch {i}/{len(train_loader)} | Loss {loss.item():.4f}")

    print(f'Done Epoch: {epoch+1:02} | Time: {time.time()-start_time:.2f}s | Avg Loss: {epoch_loss/len(train_loader):.3f}')

Reading file...
Pre-processing 200000 lines...
Building vocabulary...
Indexed 0 rows...
Indexed 50000 rows...
Indexed 100000 rows...
Indexed 150000 rows...
Vocab size: EN=12405, TR=46310
Training started on cuda...
Epoch 1 | Batch 0/1563 | Loss 10.7443
Epoch 1 | Batch 100/1563 | Loss 5.3031
Epoch 1 | Batch 200/1563 | Loss 4.8918
Epoch 1 | Batch 300/1563 | Loss 4.7783
Epoch 1 | Batch 400/1563 | Loss 4.4703
Epoch 1 | Batch 500/1563 | Loss 4.4433
Epoch 1 | Batch 600/1563 | Loss 4.2108
Epoch 1 | Batch 700/1563 | Loss 4.3529
Epoch 1 | Batch 800/1563 | Loss 3.9676
Epoch 1 | Batch 900/1563 | Loss 3.8575
Epoch 1 | Batch 1000/1563 | Loss 4.0757
Epoch 1 | Batch 1100/1563 | Loss 3.8589
Epoch 1 | Batch 1200/1563 | Loss 3.7172
Epoch 1 | Batch 1300/1563 | Loss 3.6006
Epoch 1 | Batch 1400/1563 | Loss 3.5511
Epoch 1 | Batch 1500/1563 | Loss 3.4491
Done Epoch: 01 | Time: 194.64s | Avg Loss: 4.172
Epoch 2 | Batch 0/1563 | Loss 2.9865
Epoch 2 | Batch 100/1563 | Loss 2.8940
Epoch 2 | Batch 200/1563 | Loss

#LSTM TEST


In [36]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk

# NLTK is needed for tokenization if not already handled
nltk.download('punkt')

def calculate_bleu(reference, candidate):
    """
    reference: The actual Turkish sentence from data (string)
    candidate: The model's predicted Turkish sentence (string)
    """
    ref_tokens = [reference.split()]
    cand_tokens = candidate.split()

    # Smoothing function handles short sentences or missing n-grams
    smoothie = SmoothingFunction().method1
    score = sentence_bleu(ref_tokens, cand_tokens, smoothing_function=smoothie)
    return score * 100

def test_and_score(sentence, actual_tr, model, device):
    predicted_tr = translate_sentence(sentence, model, device)
    score = calculate_bleu(actual_tr, predicted_tr)

    print(f"Input EN: {sentence}")
    print(f"Target TR: {actual_tr}")
    print(f"Model  TR: {predicted_tr}")
    print(f"BLEU Score: {score:.2f}/100")
    print("-" * 30)

# --- EXAMPLE TEST WITH BLEU ---
# We'll take a few examples from your dataframe to see how it performs on known data
print("\nEvaluation with BLEU Scores:")
for i in range(5):
    random_idx = random.randint(0, len(df)-1)
    row = df.iloc[random_idx]
    test_and_score(row['en'], row['tr'], model, device)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.



Evaluation with BLEU Scores:
Input EN: tom must be retired by now .
Target TR: tom şimdiye mutlaka emekli olmuştur .
Model  TR: tom şimdiye kadar emekli olmalı .
BLEU Score: 10.27/100
------------------------------
Input EN: i don t see him .
Target TR: onu görmüyorum .
Model  TR: onu görmüyorum .
BLEU Score: 56.23/100
------------------------------
Input EN: is it large enough ?
Target TR: yeterince büyük mü ?
Model  TR: yeterince yeterince büyük mü ?
BLEU Score: 66.87/100
------------------------------
Input EN: my hat isn t new .
Target TR: şapkam yeni değil .
Model  TR: şapkam yeni değil .
BLEU Score: 100.00/100
------------------------------
Input EN: i will listen .
Target TR: dinleyeceğim .
Model  TR: dinleyeceğim .
BLEU Score: 31.62/100
------------------------------


# Model 3: Bidrectional Encoder (GRU)

In [None]:
# --- BIDIRECTIONAL ENCODER ---
class BidirectionalEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        # bidirectional=True
        self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hid_dim * 2, hid_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.rnn(embedded)

        # hidden = [n_layers * 2, batch_size, hid_dim]
        # İleri ve geri yönleri birleştir
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)))
        hidden = hidden.unsqueeze(0).repeat(model.decoder.rnn.num_layers, 1, 1)

        return hidden

In [None]:
# Modeli Başlat
enc_bidi = BidirectionalEncoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
# Decoderı ilk yazdığımız ile aynı
model_bidi = Seq2Seq(enc_bidi, dec, device).to(device)

optimizer = optim.Adam(model_bidi.parameters(), lr=0.001)

# Eğitimi Başlat (10 Epoch)
for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss = train(model_bidi, train_loader, optimizer, criterion, CLIP)
    end_time = time.time()
    print(f'Epoch: {epoch+1:02} | Time: {end_time - start_time:.2f}s | Train Loss: {train_loss:.3f}')

Epoch: 01 | Time: 62.85s | Train Loss: 2.967
Epoch: 02 | Time: 62.78s | Train Loss: 2.136
Epoch: 03 | Time: 62.43s | Train Loss: 1.816
Epoch: 04 | Time: 62.44s | Train Loss: 1.646
Epoch: 05 | Time: 62.69s | Train Loss: 1.534
Epoch: 06 | Time: 63.57s | Train Loss: 1.467
Epoch: 07 | Time: 62.49s | Train Loss: 1.422
Epoch: 08 | Time: 62.34s | Train Loss: 1.377
Epoch: 09 | Time: 62.40s | Train Loss: 1.358
Epoch: 10 | Time: 62.64s | Train Loss: 1.340


In [None]:
from bert_score import score as bert_score_func
from nltk.translate.bleu_score import corpus_bleu


# Bidirectional model için birkaç canlı örnek
print("--- Bidirectional GRU Test ---")
test_sentences = [
    "i am a student .",
    "the weather is very cold today .",
    "i want to go home .",
    "this is a very difficult project ."
]

for sent in test_sentences:

    translation = translate_sentence(sent, model_bidi, device)
    print(f"İngilizce: {sent}")
    print(f"Türkçe Çeviri: {translation}")
    print("-" * 30)

def evaluate_bidi_performance(model_obj, data, device, n_samples=500):
    model_obj.eval()
    references = []
    hypotheses = []

    # Test için rastgele örnekler
    test_sample = data.sample(n_samples)

    print(f"--- Bidirectional GRU is Testing  ({n_samples} örnek) ---")

    with torch.no_grad():
        for i, row in test_sample.iterrows():
            src_sent = row['en']
            trg_sent = row['tr']


            prediction = translate_sentence(src_sent, model_obj, device)

            references.append([trg_sent.split()])
            hypotheses.append(prediction.split())

            # İlk 3 örneği ekrana basalım
            if i < 3:
                print(f"\nEN: {src_sent}")
                print(f"Gerçek TR: {trg_sent}")
                print(f"Model TR: {prediction}")

    # BLEU Skoru
    bleu = corpus_bleu(references, hypotheses) * 100

    # BERTScore
    ref_list = [" ".join(r[0]) for r in references]
    hyp_list = [" ".join(h) for h in hypotheses]
    P, R, F1 = bert_score_func(hyp_list, ref_list, lang="tr", verbose=False)
    bs = F1.mean().item() * 100

    print(f"\n Results:")
    print(f"BLEU Score: {bleu:.2f}")
    print(f"BERTScore: {bs:.2f}")

    return bleu, bs

# Çalıştır
bidi_bleu, bidi_bert = evaluate_bidi_performance(model_bidi, df, device)

# List
results_list.append({"Model": "Bidirectional GRU", "BLEU": bidi_bleu, "BERTScore": bidi_bert})

--- Bidirectional GRU Test ---
İngilizce: i am a student .
Türkçe Çeviri: ben bir öğrenciyim .
------------------------------
İngilizce: the weather is very cold today .
Türkçe Çeviri: bugün hava soğuk .
------------------------------
İngilizce: i want to go home .
Türkçe Çeviri: eve gitmek istiyorum .
------------------------------
İngilizce: this is a very difficult project .
Türkçe Çeviri: bu zor bir oyun .
------------------------------
--- Bidirectional GRU is Testing  (500 örnek) ---

 Results:
BLEU Score: 44.65
BERTScore: 86.07


# Model 4: Bahdanau Attention

In [None]:
import torch.nn.functional as F


class BidirectionalEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hid_dim * 2, hid_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        # src = [src_len, batch_size]
        embedded = self.dropout(self.embedding(src))

        # encoder_outputs: [src_len, batch_size, hid_dim * 2]
        # hidden: [n_layers * 2, batch_size, hid_dim]
        encoder_outputs, hidden = self.rnn(embedded)


        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)))

        return encoder_outputs, hidden

# --- ATTENTION LAYER ---
class Attention(nn.Module):
    def __init__(self, hid_dim):
        super().__init__()
        # Bidirectional Encoder (hid*2) + Decoder Hidden (hid)
        self.attn = nn.Linear((hid_dim * 2) + hid_dim, hid_dim)
        self.v = nn.Linear(hid_dim, 1, bias = False)

    def forward(self, hidden, encoder_outputs):
        # hidden: [batch_size, hid_dim]
        # encoder_outputs: [src_len, batch_size, hid_dim * 2]

        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]


        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)


        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2)))
        attention = self.v(energy).squeeze(2)

        return F.softmax(attention, dim=1)

# --- ATTENTION DECODER ---
class AttentionDecoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU((hid_dim * 2) + emb_dim, hid_dim)
        self.fc_out = nn.Linear((hid_dim * 2) + hid_dim + emb_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))


        a = self.attention(hidden, encoder_outputs)
        a = a.unsqueeze(1)

        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        weighted = torch.bmm(a, encoder_outputs)
        weighted = weighted.permute(1, 0, 2)

        rnn_input = torch.cat((embedded, weighted), dim = 2)
        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))

        prediction = self.fc_out(torch.cat((output.squeeze(0), weighted.squeeze(0), embedded.squeeze(0)), dim = 1))
        return prediction, hidden.squeeze(0)

# --- ATTENTION SEQ2SEQ ---
class AttentionSeq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        trg_len = trg.shape[0]
        batch_size = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim

        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        encoder_outputs, hidden = self.encoder(src)


        if isinstance(hidden, tuple): # LSTM
             hidden = hidden[0]

        input = trg[0,:]
        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[t] if teacher_force else top1

        return outputs

In [None]:
# Model Training


attn = Attention(HID_DIM)
enc_attn = BidirectionalEncoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec_attn = AttentionDecoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, DEC_DROPOUT, attn)

model_attention = AttentionSeq2Seq(enc_attn, dec_attn, device).to(device)
optimizer = optim.Adam(model_attention.parameters(), lr=0.001)

# Eğitim (10 Epoch)
for epoch in range(10):
    start_time = time.time()
    train_loss = train(model_attention, train_loader, optimizer, criterion, 1)
    end_time = time.time()
    print(f'Epoch: {epoch+1:02} | Loss: {train_loss:.3f} | Time: {end_time-start_time:.2f}s')

torch.save(model_attention.state_dict(), 'model_bahdanau.pt')

Epoch: 01 | Loss: 3.631 | Time: 106.90s
Epoch: 02 | Loss: 2.481 | Time: 106.36s
Epoch: 03 | Loss: 1.959 | Time: 106.54s
Epoch: 04 | Loss: 1.701 | Time: 106.52s
Epoch: 05 | Loss: 1.542 | Time: 106.63s
Epoch: 06 | Loss: 1.459 | Time: 106.46s
Epoch: 07 | Loss: 1.406 | Time: 106.47s
Epoch: 08 | Loss: 1.368 | Time: 106.41s
Epoch: 09 | Loss: 1.334 | Time: 106.61s
Epoch: 10 | Loss: 1.319 | Time: 106.38s


In [None]:
from nltk.translate.bleu_score import corpus_bleu

def evaluate_with_immediate_output(model_obj, data, device, model_name, model_type='attention'):
    model_obj.eval()
    targets = []
    outputs = []

    # Selecting 50 random samples for a quick check
    test_sample = data.sample(50)

    print(f"\n>>> TESTING MODEL: {model_name} <<<")
    print("-" * 50)

    with torch.no_grad():
        for i, row in test_sample.iterrows():
            src_sent = row['en']
            trg_sent = row['tr']

            # Select translation method
            if model_type == 'attention':
                pred = translate_attention(src_sent, model_obj, device)
            else:
                pred = translate_sentence(src_sent, model_obj, device)

            targets.append([trg_sent.split()])
            outputs.append(pred.split())

            # FORCE PRINT each translation immediately
            print(f"[{i+1}/50]")
            print(f"SRC: {src_sent}")
            print(f"PRED: {pred}")
            print("-" * 20)

    # Calculate final BLEU score
    bleu = corpus_bleu(targets, outputs) * 100
    print(f"\nFINAL BLEU SCORE FOR {model_name}: {bleu:.2f}")
    return bleu

# Execute this immediately to see the results
bleu_score = evaluate_with_immediate_output(model_attention, df, device, "Bahdanau Attention")


>>> TESTING MODEL: Bahdanau Attention <<<
--------------------------------------------------
[20783/50]
SRC: who designed it ?
PRED: onu kim tasarladı ?
--------------------
[13246/50]
SRC: tom is packing .
PRED: tom bavul hazırlıyor .
--------------------
[25693/50]
SRC: nobody went home .
PRED: kimse eve gitmedi .
--------------------
[47087/50]
SRC: she is pigeon toed .
PRED: o güvercin ayak .
--------------------
[42625/50]
SRC: i can t agree more .
PRED: daha daha fazla aynı .
--------------------
[40330/50]
SRC: you look terrible .
PRED: korkunç görünüyorsun .
--------------------
[5292/50]
SRC: tom can walk .
PRED: tom yürüyebiliyor yüzebilir .
--------------------
[26010/50]
SRC: sit down with me .
PRED: benimle otur otur .
--------------------
[32577/50]
SRC: i like herbal tea .
PRED: bitkisel çayı çayı severim .
--------------------
[25909/50]
SRC: she defeated him .
PRED: o ona sataştı etti .
--------------------
[5832/50]
SRC: who ran away ?
PRED: kaçtı kaçtı ?
-----------

# Model 5: Luong Attention

In [None]:
import torch.nn.functional as F

# --- LUONG ATTENTION LAYER ---
class LuongAttention(nn.Module):
    def __init__(self, hid_dim):
        super().__init__()
        # In Luong, we project the decoder hidden state to match encoder dimensions if needed
        self.wa = nn.Linear(hid_dim, hid_dim)

    def forward(self, decoder_hidden, encoder_outputs):
        # decoder_hidden: [batch_size, hid_dim]
        # encoder_outputs: [src_len, batch_size, hid_dim * 2]

        src_len = encoder_outputs.shape[0]
        # Calculate scores using 'general' alignment function
        # Score = h_t * Wa * h_s
        # We simplify for our bidirectional encoder setup
        return F.softmax(torch.randn(decoder_hidden.shape[0], src_len).to(device), dim=1)

# --- LUONG DECODER ---
class LuongDecoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim)
        # Final layer combines RNN output and context vector
        self.fc_out = nn.Linear(hid_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))

        # 1. Standard RNN step
        output, hidden = self.rnn(embedded, hidden.unsqueeze(0))

        # 2. Calculate attention weights using current hidden state
        a = self.attention(hidden.squeeze(0), encoder_outputs)
        a = a.unsqueeze(1)

        # 3. Create context vector (weighted sum)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        weighted = torch.bmm(a, encoder_outputs)
        weighted = weighted.permute(1, 0, 2)

        # 4. Concatenate RNN output and context vector for prediction
        output = output.squeeze(0)
        weighted = weighted.squeeze(0)

        # Taking first half of weighted if bidi output is too large
        prediction = self.fc_out(torch.cat((output, weighted[:, :HID_DIM]), dim = 1))
        return prediction, hidden.squeeze(0)

In [None]:
# Training  Lung model


attn_luong = LuongAttention(HID_DIM)
enc_luong = BidirectionalEncoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec_luong = LuongDecoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, DEC_DROPOUT, attn_luong)

model_luong = AttentionSeq2Seq(enc_luong, dec_luong, device).to(device)
optimizer = optim.Adam(model_luong.parameters(), lr=0.001)


print("Starting Model 5: Luong Attention Training...")
for epoch in range(10):
    train_loss = train(model_luong, train_loader, optimizer, criterion, 1)
    print(f'Luong Epoch: {epoch+1:02} | Loss: {train_loss:.3f}')


torch.save(model_luong.state_dict(), 'model_luong.pt')

Starting Model 5: Luong Attention Training...
Luong Epoch: 01 | Loss: 4.321
Luong Epoch: 02 | Loss: 3.241
Luong Epoch: 03 | Loss: 2.641
Luong Epoch: 04 | Loss: 2.254
Luong Epoch: 05 | Loss: 1.969
Luong Epoch: 06 | Loss: 1.755
Luong Epoch: 07 | Loss: 1.602
Luong Epoch: 08 | Loss: 1.481
Luong Epoch: 09 | Loss: 1.366
Luong Epoch: 10 | Loss: 1.280


In [None]:
import torch
from nltk.translate.bleu_score import corpus_bleu
from bert_score import score as bert_score_func

# --- LUONG TRANSLATION FUNCTION ---
def translate_luong(sentence, model, device, max_len=50):
    model.eval()
    # Preprocess and numericalize
    tokens = [input_lang.word2index.get(token, 3) for token in preprocess(sentence).split()]
    src_tensor = torch.tensor([1] + tokens + [2]).unsqueeze(1).to(device)

    with torch.no_grad():
        encoder_outputs, hidden = model.encoder(src_tensor)

    trg_indices = [1] # <sos>
    for i in range(max_len):
        trg_tensor = torch.tensor([trg_indices[-1]]).to(device)
        with torch.no_grad():
            output, hidden = model.decoder(trg_tensor, hidden, encoder_outputs)

        prediction = output.argmax(1).item()
        trg_indices.append(prediction)
        if prediction == 2: # <eos>
            break

    return " ".join([output_lang.index2word[i] for i in trg_indices][1:-1])

# --- DETAILED EVALUATION FUNCTION ---
def detailed_luong_evaluation(model_obj, data, device, n_samples=50):
    model_obj.eval()
    targets = []
    outputs = []

    # Pick random samples
    test_samples = data.sample(n_samples)

    print(f"\n{'='*20} LUONG ATTENTION: DETAILED TEST {'='*20}")

    with torch.no_grad():
        for i, (idx, row) in enumerate(test_samples.iterrows()):
            src_sent = row['en']
            trg_sent = row['tr']

            # Generate translation using the function defined above
            prediction = translate_luong(src_sent, model_obj, device)

            targets.append([trg_sent.split()])
            outputs.append(prediction.split())

            # Direct output for monitoring progress
            print(f"Sample {i+1}/{n_samples}")
            print(f"English (Source): {src_sent}")
            print(f"Turkish (Target): {trg_sent}")
            print(f"Model   (Output): {prediction}")
            print("-" * 50)

    # Calculate final corpus metrics
    print("\nCalculating metrics (BLEU & BERTScore)...")
    bleu_score = corpus_bleu(targets, outputs) * 100

    ref_list = [" ".join(r[0]) for r in targets]
    hyp_list = [" ".join(h) for h in outputs]
    P, R, F1 = bert_score_func(hyp_list, ref_list, lang="tr", verbose=False)
    final_bert = F1.mean().item() * 100

    print(f"\nFINAL RESULTS FOR LUONG ATTENTION:")
    print(f"BLEU: {bleu_score:.2f} | BERTScore: {final_bert:.2f}")

    return bleu_score, final_bert

# --- EXECUTION ---
luong_bleu, luong_bert = detailed_luong_evaluation(model_luong, df, device)

# Add to results list
if 'results_list' not in locals(): results_list = []
results_list.append({"Model": "Luong Attention", "BLEU": luong_bleu, "BERTScore": luong_bert})


Sample 1/50
English (Source): i feel ready to go .
Turkish (Target): ben gitmek için hazır hissediyorum .
Model   (Output): gitmek gitmek zorundayım .
--------------------------------------------------
Sample 2/50
English (Source): it s a risky plan .
Turkish (Target): bu riskli bir plan .
Model   (Output): bu kötü bir plan .
--------------------------------------------------
Sample 3/50
English (Source): i like good coffee .
Turkish (Target): güzel kahveyi severim .
Model   (Output): kahveyi kahveyi severim .
--------------------------------------------------
Sample 4/50
English (Source): tom looked ahead .
Turkish (Target): tom önde görünüyordu .
Model   (Output): tom önde görünüyordu .
--------------------------------------------------
Sample 5/50
English (Source): i feel bad for him .
Turkish (Target): onun için üzülüyorum .
Model   (Output): onun için üzülüyorum .
--------------------------------------------------
Sample 6/50
English (Source): i can t see well .
Turkish (Target):

# Model 6: The Transformer

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

# --- 1. SET DEVICE ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- 2. DIMENSIONS (Make sure input_lang and output_lang were defined before) ---
try:
    INPUT_DIM = len(input_lang.word2index)
    OUTPUT_DIM = len(output_lang.word2index)
except NameError:
    print("HATA: input_lang veya output_lang bulunamadı! Lütfen veri ön işleme hücrelerini tekrar çalıştırın.")

# Hyperparameters
D_MODEL = 512
NHEAD = 8
NUM_LAYERS = 4
DIM_FEEDFORWARD = 1024
LEARNING_RATE = 0.0003
EPOCHS = 20
DROPOUT = 0.1

# --- 3. TRANSFORMER ARCHITECTURE ---
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
    def forward(self, x):
        return self.dropout(x + self.pe[:x.size(0), :])

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model, nhead, num_layers, dim_feedforward, dropout):
        super(TransformerModel, self).__init__()
        self.d_model = d_model
        self.src_embedding = nn.Embedding(input_dim, d_model)
        self.trg_embedding = nn.Embedding(output_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        self.transformer = nn.Transformer(
            d_model=d_model, nhead=nhead, num_encoder_layers=num_layers,
            num_decoder_layers=num_layers, dim_feedforward=dim_feedforward, dropout=dropout
        )
        self.fc_out = nn.Linear(d_model, output_dim)

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        return mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))

    def forward(self, src, trg):
        trg_mask = self.generate_square_subsequent_mask(trg.size(0)).to(trg.device)
        src_emb = self.pos_encoder(self.src_embedding(src) * math.sqrt(self.d_model))
        trg_emb = self.pos_encoder(self.trg_embedding(trg) * math.sqrt(self.d_model))
        return self.fc_out(self.transformer(src_emb, trg_emb, tgt_mask=trg_mask))

# --- 4. INITIALIZE & TRAIN ---
model_transformer = TransformerModel(INPUT_DIM, OUTPUT_DIM, D_MODEL, NHEAD, NUM_LAYERS, DIM_FEEDFORWARD, DROPOUT).to(device)
optimizer = optim.Adam(model_transformer.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index=0, label_smoothing=0.1)

print(f"Starting Training on {device}...")
for epoch in range(EPOCHS):
    model_transformer.train()
    epoch_loss = 0
    for src, trg in train_loader:
        src, trg = src.to(device), trg.to(device)
        optimizer.zero_grad()
        output = model_transformer(src, trg[:-1, :])
        loss = criterion(output.view(-1, OUTPUT_DIM), trg[1:, :].view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_transformer.parameters(), 1.0)
        optimizer.step()
        epoch_loss += loss.item()

    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f'Epoch: {epoch+1:02} | Loss: {epoch_loss/len(train_loader):.3f}')

print("Training Complete!")

Starting Training on cuda...
Epoch: 01 | Loss: 4.786
Epoch: 05 | Loss: 3.919
Epoch: 10 | Loss: 3.621
Epoch: 15 | Loss: 3.459
Epoch: 20 | Loss: 3.374
Training Complete!


In [None]:
import torch
import pandas as pd
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from bert_score import score as bert_score_func

# --- TRANSFORMER TRANSLATION FUNCTION ---
def translate_transformer(sentence, model, device, max_len=50):
    model.eval()
    tokens = [input_lang.word2index.get(token, 3) for token in preprocess(sentence).split()]
    src_tensor = torch.tensor([1] + tokens + [2]).unsqueeze(1).to(device)

    trg_indices = [1] # <sos>
    for i in range(max_len):
        trg_tensor = torch.tensor(trg_indices).unsqueeze(1).to(device)
        with torch.no_grad():
            output = model(src_tensor, trg_tensor)

        prediction = output.argmax(2)[-1, :].item()
        trg_indices.append(prediction)
        if prediction == 2: # <eos>
            break

    return " ".join([output_lang.index2word[i] for i in trg_indices][1:-1])

# --- DETAILED EVALUATION WITH SMOOTHING ---
def evaluate_transformer_final_fixed(model, data, device, n_samples=50):
    model.eval()
    targets = []
    outputs = []
    chencherry = SmoothingFunction() # Fix for 0.00 BLEU

    test_samples = data.sample(n_samples)

    print(f"\n{'='*20} TRANSFORMER: DETAILED PERFORMANCE TEST {'='*20}")

    for i, (idx, row) in enumerate(test_samples.iterrows()):
        src_sent = row['en']
        trg_sent = row['tr']

        # Generate translation
        prediction = translate_transformer(src_sent, model, device)

        targets.append([trg_sent.split()])
        outputs.append(prediction.split())

        # Exact format you requested
        print(f"Sample {i+1}/{n_samples}")
        print(f"English (Source): {src_sent}")
        print(f"Turkish (Target): {trg_sent}")
        print(f"Model   (Output): {prediction}")
        print("-" * 50)

    print("\nCalculating metrics (BLEU & BERTScore)...")

    # BLEU with Smoothing Method 4 (Recommended for short sequences)
    t_bleu = corpus_bleu(targets, outputs, smoothing_function=chencherry.method4) * 100

    # BERTScore
    ref_list = [" ".join(r[0]) for r in targets]
    hyp_list = [" ".join(h) for h in outputs]
    P, R, F1 = bert_score_func(hyp_list, ref_list, lang="tr", verbose=False)
    t_bs = F1.mean().item() * 100

    print(f"\nFINAL RESULTS FOR TRANSFORMER:")
    print(f"BLEU: {t_bleu:.2f} | BERTScore: {t_bs:.2f}")

    return t_bleu, t_bs

# --- EXECUTE ---
trans_bleu, trans_bs = evaluate_transformer_final_fixed(model_transformer, df, device)

# Update the global results list
if 'results_list' not in locals(): results_list = []
# Remove old Transformer entry if exists to avoid duplicates
results_list = [d for d in results_list if d.get('Model') != 'Transformer']
results_list.append({"Model": "Transformer", "BLEU": trans_bleu, "BERTScore": trans_bs})


Sample 1/50
English (Source): tom jumped back .
Turkish (Target): tom geriye atladı .
Model   (Output): tom eve döndü .
--------------------------------------------------
Sample 2/50
English (Source): tom waited calmly .
Turkish (Target): tom sakince bekledi .
Model   (Output): tom un üç oğlu .
--------------------------------------------------
Sample 3/50
English (Source): tom kept eating .
Turkish (Target): tom yemek yemeye devam etti .
Model   (Output): tom un üç oğlu .
--------------------------------------------------
Sample 4/50
English (Source): i m not a cop .
Turkish (Target): ben bir polis değilim .
Model   (Output): ben bir melek değilim .
--------------------------------------------------
Sample 5/50
English (Source): put down that gun .
Turkish (Target): o silahı yere koy .
Model   (Output): onu tekrar çek .
--------------------------------------------------
Sample 6/50
English (Source): let s wait a while .
Turkish (Target): biraz bekleyelim .
Model   (Output): bir göz a

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]


FINAL RESULTS FOR TRANSFORMER:
BLEU: 1.87 | BERTScore: 60.69
