# New

In [1]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
import random
from torch.nn.utils.rnn import pad_sequence

# Log in to W&B
wandb.login(key='acdc26d2fc17a56e83ea3ae6c10e496128dee648')

# ---------- Model Definitions ----------
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers, cell_type='LSTM', dropout=0.2, bidirectional=False):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim, padding_idx=0)
        rnn_cls = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[cell_type]
        self.rnn = rnn_cls(embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True, bidirectional=bidirectional)
        self.cell_type = cell_type
        self.bidirectional = bidirectional

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden

class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers, cell_type='LSTM', dropout=0.2, bidirectional=False):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, embed_dim, padding_idx=0)
        rnn_cls = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[cell_type]
        self.rnn = rnn_cls(embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True, bidirectional=bidirectional)
        self.fc_out = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)
        self.cell_type = cell_type
        self.bidirectional = bidirectional

    def forward(self, input, hidden):
        input = input.unsqueeze(1)
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc_out(output.squeeze(1))
        return output, hidden

class Seq2Seq(nn.Module):
    def __init__(self, input_dim, output_dim, embed_dim, hidden_dim, enc_layers, dec_layers,
                 cell_type='LSTM', dropout=0.2, bidirectional=False):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(input_dim, embed_dim, hidden_dim, enc_layers, cell_type, dropout, bidirectional)
        self.decoder = Decoder(output_dim, embed_dim, hidden_dim, dec_layers, cell_type, dropout, bidirectional)
        self.cell_type = cell_type

    # ✔ Add this inside the class
    def adjust_hidden_for_decoder(self, encoder_hidden):
        enc_layers = self.encoder.rnn.num_layers
        dec_layers = self.decoder.rnn.num_layers

        def adjust(h):
            if enc_layers == dec_layers:
                return h
            elif enc_layers < dec_layers:
                repeat_h = h[-1].unsqueeze(0).repeat(dec_layers - enc_layers, 1, 1)
                return torch.cat([h, repeat_h], dim=0)
            else:
                return h[-dec_layers:]

        if self.cell_type == 'LSTM':
            h, c = encoder_hidden
            h = adjust(h)
            c = adjust(c)
            return (h, c)
        else:
            h = encoder_hidden
            h = adjust(h)
            return h

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size, trg_len = trg.size()
        outputs = torch.zeros(batch_size, trg_len, self.decoder.fc_out.out_features, device=src.device)
    
        hidden = self.encoder(src)
        # ✔ Use the adjusted hidden
        decoder_hidden = self.adjust_hidden_for_decoder(hidden)
    
        input = trg[:, 0]
        for t in range(1, trg_len):
            output, decoder_hidden = self.decoder(input, decoder_hidden)
            outputs[:, t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[:, t] if teacher_force else top1
    
        return outputs



# ---------- Utility Functions ----------
def build_vocab(sequences):
    chars = set(ch for seq in sequences for ch in seq)
    stoi = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}
    for ch in sorted(chars):
        stoi[ch] = len(stoi)
    itos = {i: ch for ch, i in stoi.items()}
    return stoi, itos

def encode_sequence(seq, stoi):
    return [stoi.get(c, stoi['<unk>']) for c in seq]

def prepare_batch(pairs, inp_stoi, out_stoi, device):
    src_seq = [torch.tensor(encode_sequence(src, inp_stoi) + [inp_stoi['<eos>']]) for src, _ in pairs]
    trg_seq = [torch.tensor([out_stoi['<sos>']] + encode_sequence(trg, out_stoi) + [out_stoi['<eos>']]) for _, trg in pairs]
    src_batch = pad_sequence(src_seq, batch_first=True, padding_value=inp_stoi['<pad>'])
    trg_batch = pad_sequence(trg_seq, batch_first=True, padding_value=out_stoi['<pad>'])
    return src_batch.to(device), trg_batch.to(device)

def read_dataset(path):
    with open(path, encoding='utf-8') as f:
        lines = f.read().strip().split('\n')
        return [(l.split('\t')[1], l.split('\t')[0]) for l in lines if '\t' in l]

def calculate_word_accuracy(preds, targets, ignore_index=0):
    # Get the token predictions
    preds = preds.argmax(dim=-1)  # [batch, seq_len]
    
    # Create a mask where targets are not padding
    mask = targets != ignore_index

    # For word-level accuracy, we check if the entire sequence matches
    # First, apply mask to both preds and targets
    preds_masked = preds * mask
    targets_masked = targets * mask

    # Now compare entire sequences
    # (preds == targets) -> shape [batch, seq_len]
    # .all(dim=1) -> True if all tokens match for a sequence
    sequence_correct = (preds_masked == targets_masked).all(dim=1)
    
    # Calculate word accuracy
    word_accuracy = sequence_correct.float().mean().item() * 100

    # print(f"Word-level Correct Sequences: {sequence_correct.sum().item()}/{sequence_correct.size(0)}")
    return word_accuracy

def evaluate(model, data, src_vocab, tgt_vocab, device, criterion, batch_size):
    model.eval()
    total_loss = 0
    total_acc = 0
    with torch.no_grad():
        for i in range(0, len(data), batch_size):
            batch = data[i:i + batch_size]
            src, trg = prepare_batch(batch, src_vocab, tgt_vocab, device)
            output = model(src, trg)
            loss = criterion(output[:, 1:].reshape(-1, output.shape[-1]), trg[:, 1:].reshape(-1))
            # Updated to word-level accuracy
            acc = calculate_word_accuracy(output[:, 1:], trg[:, 1:])
            total_loss += loss.item()
            total_acc += acc

    return total_loss / len(data), total_acc / (len(data) // batch_size)

# ---------- Train Function ----------


def train():
    wandb.init(config={
        "embed_dim": 128,
        "hidden_dim": 256,
        "enc_layers": 2,
        "dec_layers": 2,
        "cell_type": "LSTM",
        "dropout": 0.2,
        "epochs": 10,
        "batch_size": 64,
        "bidirectional": False,
        "learning_rate": 0.001,
        "optimizer": "adam",
        "teacher_forcing_ratio": 0.5,
        "beam_width": 1
    })
    config = wandb.config
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv")
    dev_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv")

    src_vocab, tgt_vocab = build_vocab([src for src, _ in train_data]), build_vocab([tgt for _, tgt in train_data])
    model = Seq2Seq(len(src_vocab[0]), len(tgt_vocab[0]), config.embed_dim, config.hidden_dim,
                    config.enc_layers, config.dec_layers, config.cell_type, config.dropout, config.bidirectional).to(device)

    if config.optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == "nadam":
        optimizer = optim.NAdam(model.parameters(), lr=config.learning_rate)
    else:
        raise ValueError("Unsupported optimizer")

    criterion = nn.CrossEntropyLoss(ignore_index=0)

    for epoch in range(config.epochs):
        model.train()
        total_loss = 0
        total_acc = 0
        random.shuffle(train_data)
        # print(len(train_data))
        for i in range(0, len(train_data), config.batch_size):
            batch = train_data[i:i + config.batch_size]
            src, trg = prepare_batch(batch, src_vocab[0], tgt_vocab[0], device)
            # print(src)
            # print(trg)
            
            optimizer.zero_grad()
            output = model(src, trg, teacher_forcing_ratio=config.teacher_forcing_ratio)
            loss = criterion(output[:, 1:].reshape(-1, output.shape[-1]), trg[:, 1:].reshape(-1))
            # print('output',output.shape)   # output torch.Size([32, 13, 67])
            # print('trg',trg.shape) # trg torch.Size([32, 13])
            acc = calculate_word_accuracy(output[:, 1:], trg[:, 1:])
            # print(acc)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc

        avg_train_loss = total_loss / len(train_data)
        avg_train_acc = total_acc / (len(train_data) // config.batch_size)
        val_loss, val_acc = evaluate(model, dev_data, src_vocab[0], tgt_vocab[0], device, criterion, config.batch_size)

        wandb.log({
            "Train Loss": avg_train_loss,
            "Train Accuracy": avg_train_acc,
            "Validation Loss": val_loss,
            "Validation Accuracy": val_acc,
            "Epoch": epoch + 1,
            "Learning Rate": config.learning_rate,
            "Teacher Forcing Ratio": config.teacher_forcing_ratio,
            "Optimizer": config.optimizer,
            "Bidirectional": config.bidirectional,
            "Beam Width": config.beam_width
        })

        print(f"Epoch {epoch + 1}/{config.epochs} | Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.2f}% | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    wandb.finish()

# ---------- Sweep Setup ----------
sweep_config = {
    'method': 'random',
    'metric': {'name': 'Validation Loss', 'goal': 'minimize'},
    'parameters': {
        'embed_dim': {'values': [32, 64, 256]},
        'hidden_dim': {'values': [64, 128]},
        'enc_layers': {'values': [3]},
        'dec_layers': {'values': [1]},
        'cell_type': {'values': ['LSTM']},
        'dropout': {'values': [0.2, 0.3]},
        'batch_size': {'value': 32},
        'epochs': {'value': 1},
        'bidirectional': {'values': [False]},
        'learning_rate': {'values': [0.001, 0.002, 0.0001]},
        'optimizer': {'values': ['adam', 'nadam']},
        'teacher_forcing_ratio': {'values': [0.2, 0.5, 0.7]},
        'beam_width': {'values': [1, 3, 5]}
    }
}
# LSTM , GRU, RNN 1 3
# sweep_id = wandb.sweep(sweep_config, project="Vinod_Assignment_3_new")
# wandb.agent(sweep_id, function=train, count=1)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
import pandas as pd

def calculate_word_accuracy_from_ids(preds_ids, targets_ids, ignore_index=0):
    """
    Calculates word-level accuracy given token id tensors directly (both of shape [batch, seq_len]).
    """
    # Create mask where targets are not padding
    mask = targets_ids != ignore_index

    # Mask both predictions and targets
    preds_masked = preds_ids * mask
    targets_masked = targets_ids * mask

    # Compare entire sequences: True if all tokens match in a sequence
    sequence_correct = (preds_masked == targets_masked).all(dim=1)

    # Calculate word accuracy
    word_accuracy = sequence_correct.float().mean().item() * 100

    return word_accuracy

# def predict_and_log_test_examples(model, test_path, src_vocab, tgt_vocab, device, num_examples=50):
#     model.eval()
#     itos = tgt_vocab[1]
#     stoi = src_vocab[0]

#     test_data = read_dataset(test_path)
#     examples = random.sample(test_data, num_examples)
#     predictions_log = []

#     preds_list = []
#     trgs_list = []

#     for src_text, tgt_text in examples:
#         src_tensor = torch.tensor(encode_sequence(src_text, stoi) + [stoi['<eos>']], device=device).unsqueeze(0)
        
#         hidden = model.encoder(src_tensor)
#         decoder_hidden = model.adjust_hidden_for_decoder(hidden)

#         input = torch.tensor([tgt_vocab[0]['<sos>']], device=device)

#         decoded_tokens = []
#         for _ in range(30):  # max decoding length
#             output, decoder_hidden = model.decoder(input, decoder_hidden)
#             top1 = output.argmax(1)
#             if top1.item() == tgt_vocab[0]['<eos>']:
#                 break
#             decoded_tokens.append(top1.item())
#             input = top1

#         prediction = decoded_tokens

#         pred_str = ''.join([itos[idx] for idx in prediction])
#         print(f"Input: {src_text} | Target: {tgt_text} | Prediction: {pred_str}")
#         predictions_log.append(wandb.Html(f"<b>Input:</b> {src_text} &nbsp; <b>Target:</b> {tgt_text} &nbsp; <b>Pred:</b> {pred_str}"))

#         tgt_encoded = [tgt_vocab[0].get(ch, tgt_vocab[0]['<unk>']) for ch in tgt_text] + [tgt_vocab[0]['<eos>']]
#         preds_list.append(torch.tensor(prediction, device=device))
#         trgs_list.append(torch.tensor(tgt_encoded, device=device))

#     # Find the max sequence length among both preds and trgs
#     max_len = max(max([p.size(0) for p in preds_list]), max([t.size(0) for t in trgs_list]))

#     # Pad both preds and targets to max_len
#     preds_padded = pad_sequence([torch.cat([p, torch.full((max_len - p.size(0),), 0, device=device)]) if p.size(0) < max_len else p for p in preds_list], batch_first=True)
#     trgs_padded = pad_sequence([torch.cat([t, torch.full((max_len - t.size(0),), 0, device=device)]) if t.size(0) < max_len else t for t in trgs_list], batch_first=True)

#     # Calculate word accuracy
#     # print(preds_padded.shape)
#     # print(trgs_padded.shape)
#     test_word_acc = calculate_word_accuracy_from_ids(preds_padded, trgs_padded)

#     print(f"Test Word Accuracy on {num_examples} examples: {test_word_acc:.2f}%")

#     wandb.log({
#         "Test Predictions": wandb.Html("<br>".join([str(p) for p in predictions_log])),
#         "Test Word Accuracy": test_word_acc
#     })


def predict_and_log_test_examples_with_csv(model, test_path, src_vocab, tgt_vocab, device, num_examples=50, csv_save_path="predictions.csv"):
    model.eval()
    itos = tgt_vocab[1]
    stoi = src_vocab[0]

    test_data = read_dataset(test_path)
    examples = random.sample(test_data, num_examples)
    predictions_log = []

    preds_list = []
    trgs_list = []

    # ✅ Create a list to store for CSV
    csv_data = []

    for src_text, tgt_text in examples:
        src_tensor = torch.tensor(encode_sequence(src_text, stoi) + [stoi['<eos>']], device=device).unsqueeze(0)
        
        hidden = model.encoder(src_tensor)
        decoder_hidden = model.adjust_hidden_for_decoder(hidden)

        input = torch.tensor([tgt_vocab[0]['<sos>']], device=device)

        decoded_tokens = []
        for _ in range(30):
            output, decoder_hidden = model.decoder(input, decoder_hidden)
            top1 = output.argmax(1)
            if top1.item() == tgt_vocab[0]['<eos>']:
                break
            decoded_tokens.append(top1.item())
            input = top1

        prediction = decoded_tokens
        pred_str = ''.join([itos[idx] for idx in prediction])

        print(f"Input: {src_text} | Target: {tgt_text} | Prediction: {pred_str}")
        
        # ✅ Append data for CSV
        csv_data.append({
            "Input": src_text,
            "Target": tgt_text,
            "Prediction": pred_str
        })

        predictions_log.append(wandb.Html(f"<b>Input:</b> {src_text} &nbsp; <b>Target:</b> {tgt_text} &nbsp; <b>Pred:</b> {pred_str}"))

        tgt_encoded = [tgt_vocab[0].get(ch, tgt_vocab[0]['<unk>']) for ch in tgt_text] + [tgt_vocab[0]['<eos>']]
        preds_list.append(torch.tensor(prediction, device=device))
        trgs_list.append(torch.tensor(tgt_encoded, device=device))
    # Find the max sequence length among both preds and trgs
    max_len = max(max([p.size(0) for p in preds_list]), max([t.size(0) for t in trgs_list]))

    # Pad both preds and targets to max_len
    preds_padded = pad_sequence([torch.cat([p, torch.full((max_len - p.size(0),), 0, device=device)]) if p.size(0) < max_len else p for p in preds_list], batch_first=True)
    trgs_padded = pad_sequence([torch.cat([t, torch.full((max_len - t.size(0),), 0, device=device)]) if t.size(0) < max_len else t for t in trgs_list], batch_first=True)

    # Calculate word accuracy
    # print(preds_padded.shape)
    # print(trgs_padded.shape)
    test_word_acc = calculate_word_accuracy_from_ids(preds_padded, trgs_padded)

    print(f"Test Word Accuracy on {num_examples} examples: {test_word_acc:.2f}%")

    wandb.log({
        "Test Predictions": wandb.Html("<br>".join([str(p) for p in predictions_log])),
        "Test Word Accuracy": test_word_acc
    })

    # ✅ Save to CSV
    df = pd.DataFrame(csv_data)
    df.to_csv(csv_save_path, index=False)
    print(f"✅ Saved predictions to {csv_save_path}")



def train_pred():
    wandb.init(config={
        "embed_dim": 128,
        "hidden_dim": 256,
        "enc_layers": 2,
        "dec_layers": 2,
        "cell_type": "LSTM",
        "dropout": 0.2,
        "epochs": 10,
        "batch_size": 64,
        "bidirectional": False,
        "learning_rate": 0.001,
        "optimizer": "adam",
        "teacher_forcing_ratio": 0.5,
        "beam_width": 1
    })
    config = wandb.config
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv")
    dev_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv")

    src_vocab, tgt_vocab = build_vocab([src for src, _ in train_data]), build_vocab([tgt for _, tgt in train_data])
    model = Seq2Seq(len(src_vocab[0]), len(tgt_vocab[0]), config.embed_dim, config.hidden_dim,
                    config.enc_layers, config.dec_layers, config.cell_type, config.dropout, config.bidirectional).to(device)

    if config.optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == "nadam":
        optimizer = optim.NAdam(model.parameters(), lr=config.learning_rate)
    else:
        raise ValueError("Unsupported optimizer")

    criterion = nn.CrossEntropyLoss(ignore_index=0)

    for epoch in range(config.epochs):
        model.train()
        total_loss = 0
        total_acc = 0
        random.shuffle(train_data)
        # print(len(train_data))
        for i in range(0, len(train_data), config.batch_size):
            batch = train_data[i:i + config.batch_size]
            src, trg = prepare_batch(batch, src_vocab[0], tgt_vocab[0], device)
            # print(src)
            # print(trg)
            
            optimizer.zero_grad()
            output = model(src, trg, teacher_forcing_ratio=config.teacher_forcing_ratio)
            loss = criterion(output[:, 1:].reshape(-1, output.shape[-1]), trg[:, 1:].reshape(-1))
            # print('output',output.shape)   # output torch.Size([32, 13, 67])
            # print('trg',trg.shape) # trg torch.Size([32, 13])
            acc = calculate_word_accuracy(output[:, 1:], trg[:, 1:])
            # print(acc)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc

        avg_train_loss = total_loss / len(train_data)
        avg_train_acc = total_acc / (len(train_data) // config.batch_size)
        val_loss, val_acc = evaluate(model, dev_data, src_vocab[0], tgt_vocab[0], device, criterion, config.batch_size)

        wandb.log({
            "Train Loss": avg_train_loss,
            "Train Accuracy": avg_train_acc,
            "Validation Loss": val_loss,
            "Validation Accuracy": val_acc,
            "Epoch": epoch + 1,
            "Learning Rate": config.learning_rate,
            "Teacher Forcing Ratio": config.teacher_forcing_ratio,
            "Optimizer": config.optimizer,
            "Bidirectional": config.bidirectional,
            "Beam Width": config.beam_width
        })

        print(f"Epoch {epoch + 1}/{config.epochs} | Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.2f}% | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        # At the end of train()
    test_path = "/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
    # predict_and_log_test_examples(model, test_path, src_vocab, tgt_vocab, device)
    predict_and_log_test_examples_with_csv(model, test_path, src_vocab, tgt_vocab, device, num_examples=50, csv_save_path="predictions_attention.csv")

    
    wandb.finish()

# ---------- Sweep Setup ----------
sweep_config = {
    'method': 'random',
    'metric': {'name': 'Validation Loss', 'goal': 'minimize'},
    'parameters': {
        'embed_dim': {'values': [256]},
        'hidden_dim': {'values': [128]},
        'enc_layers': {'values': [3]},
        'dec_layers': {'values': [1]},
        'cell_type': {'values': ['LSTM']},
        'dropout': {'values': [0.3]},
        'batch_size': {'value': 32},
        'epochs': {'value': 1},
        'bidirectional': {'values': [False]},
        'learning_rate': {'values': [0.001]},
        'optimizer': {'values': ['adam']},
        'teacher_forcing_ratio': {'values': [0.2]},
        'beam_width': {'values': [1]}
    }
}
# LSTM , GRU, RNN 1 3
sweep_id = wandb.sweep(sweep_config, project="Vinod_Assignment_3_new")
wandb.agent(sweep_id, function=train_pred, count=1)

Create sweep with ID: 77bdkd1w
Sweep URL: https://wandb.ai/viinod9-iitm/Vinod_Assignment_3_new/sweeps/77bdkd1w


[34m[1mwandb[0m: Agent Starting Run: imt6v769 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2




Epoch 1/1 | Train Loss: 0.0816, Train Acc: 0.34% | Val Loss: 0.0564, Val Acc: 2.17%
Input: kapdon | Target: कपड़ो | Prediction: कपपुं
Input: bhooli | Target: भूली | Prediction: भुलि
Input: angkor | Target: अंकोर | Prediction: अनुक्र
Input: paathyakram | Target: पाठ्यक्रम | Prediction: पा्त्त्रर
Input: pammi | Target: पम्मी | Prediction: पाममी
Input: divyata | Target: दिव्यता | Prediction: दिवावाा
Input: ear | Target: इअर | Prediction: अर्
Input: lanset | Target: लैंसेट | Prediction: लास्स
Input: siera | Target: सिएरा | Prediction: स्टी
Input: anuprayogon | Target: अनुप्रयोगों | Prediction: अनुर््ुनों
Input: chauka | Target: चौका | Prediction: चुका
Input: mejbani | Target: मेज़बानी | Prediction: मिज्जी
Input: bhrantiyan | Target: भ्रांतियां | Prediction: भ्र्यियों
Input: entone | Target: एंटोन | Prediction: इंट्टन
Input: eliminator | Target: एलिमिनेटर | Prediction: लिमिलामिय
Input: flin | Target: फ्लिन | Prediction: फिलि
Input: garibi | Target: गरीबी | Prediction: जार्ि
Input: alnkaron 

0,1
Beam Width,▁
Epoch,▁
Learning Rate,▁
Teacher Forcing Ratio,▁
Test Word Accuracy,▁
Train Accuracy,▁
Train Loss,▁
Validation Accuracy,▁
Validation Loss,▁

0,1
Beam Width,1
Bidirectional,False
Epoch,1
Learning Rate,0.001
Optimizer,adam
Teacher Forcing Ratio,0.2
Test Word Accuracy,0
Train Accuracy,0.33717
Train Loss,0.08158
Validation Accuracy,2.16759
