In [1]:
import wandb

# Log in to W&B (usually called at the start of the script)
#wandb.login()

# Optionally: You can specify the API key if not logged in yet, or use environment variables for automatic login
wandb.login(key='acdc26d2fc17a56e83ea3ae6c10e496128dee648')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
import torch
import torch.nn as nn

class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers, cell_type='LSTM', dropout=0.2):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim, padding_idx=0)

        rnn_cls = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[cell_type]
        self.rnn = rnn_cls(embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True, bidirectional=False)
        self.cell_type = cell_type

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden  # hidden: tuple for LSTM, tensor for RNN/GRU


class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers, cell_type='LSTM', dropout=0.2):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, embed_dim, padding_idx=0)

        rnn_cls = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[cell_type]
        self.rnn = rnn_cls(embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True, bidirectional=False)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.cell_type = cell_type

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # (batch, 1)
        embedded = self.embedding(input)  # (batch, 1, embed_dim)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc_out(output.squeeze(1))  # (batch, output_dim)
        return output, hidden


class Seq2Seq(nn.Module):
    def __init__(self, input_dim, output_dim, embed_dim, hidden_dim, enc_layers, dec_layers,
                 cell_type='LSTM', dropout=0.2):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(input_dim, embed_dim, hidden_dim, enc_layers, cell_type, dropout)
        self.decoder = Decoder(output_dim, embed_dim, hidden_dim, dec_layers, cell_type, dropout)
        self.cell_type = cell_type

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size, trg_len = trg.size()
        outputs = torch.zeros(batch_size, trg_len, self.decoder.fc_out.out_features, device=src.device)

        hidden = self.encoder(src)

        if self.cell_type == 'LSTM':
            decoder_hidden = (hidden[0][:self.decoder.rnn.num_layers], hidden[1][:self.decoder.rnn.num_layers])
        else:
            decoder_hidden = hidden[:self.decoder.rnn.num_layers]

        input = trg[:, 0]  # <sos>

        for t in range(1, trg_len):
            output, decoder_hidden = self.decoder(input, decoder_hidden)
            outputs[:, t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[:, t] if teacher_force else top1

        return outputs


In [3]:
import torch
from torch.nn.utils.rnn import pad_sequence

def build_vocab(sequences):
    chars = set(ch for seq in sequences for ch in seq)
    stoi = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}
    for ch in sorted(chars):
        stoi[ch] = len(stoi)
    itos = {i: ch for ch, i in stoi.items()}
    return stoi, itos

def encode_sequence(seq, stoi):
    return [stoi.get(c, stoi['<unk>']) for c in seq]

def prepare_batch(pairs, inp_stoi, out_stoi, device):
    src_seq = [torch.tensor(encode_sequence(src, inp_stoi) + [inp_stoi['<eos>']]) for src, _ in pairs]
    trg_seq = [torch.tensor([out_stoi['<sos>']] + encode_sequence(trg, out_stoi) + [out_stoi['<eos>']]) for _, trg in pairs]
    src_batch = pad_sequence(src_seq, batch_first=True, padding_value=inp_stoi['<pad>'])
    trg_batch = pad_sequence(trg_seq, batch_first=True, padding_value=out_stoi['<pad>'])
    return src_batch.to(device), trg_batch.to(device)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
#from model import Seq2Seq
#from utils import build_vocab, prepare_batch
import wandb
import random

def read_dataset(path):
    with open(path, encoding='utf-8') as f:
        lines = f.read().strip().split('\n')
        return [(l.split('\t')[1], l.split('\t')[0]) for l in lines if '\t' in l]

def calculate_accuracy(preds, targets, ignore_index=0):
    preds = preds.argmax(dim=-1)
    mask = targets != ignore_index
    correct = (preds == targets) & mask
    return correct.sum().item() / mask.sum().item()

def evaluate(model, data, src_vocab, tgt_vocab, device, criterion, batch_size):
    model.eval()
    total_loss = 0
    total_acc = 0
    with torch.no_grad():
        for i in range(0, len(data), batch_size):
            batch = data[i:i + batch_size]
            src, trg = prepare_batch(batch, src_vocab, tgt_vocab, device)
            output = model(src, trg)
            loss = criterion(output[:, 1:].reshape(-1, output.shape[-1]), trg[:, 1:].reshape(-1))
            acc = calculate_accuracy(output[:, 1:], trg[:, 1:])
            total_loss += loss.item()
            total_acc += acc
    return total_loss / len(data), total_acc / (len(data) // batch_size)

def train():
    wandb.init(config={
    "embed_dim": 128,
    "hidden_dim": 256,
    "enc_layers": 2,
    "dec_layers": 2,
    "cell_type": "LSTM",
    "dropout": 0.2,
    "epochs": 10,
    "batch_size": 64})
    config = wandb.config

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv")
    dev_data = read_dataset("/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv")

    src_vocab, tgt_vocab = build_vocab([src for src, _ in train_data]), build_vocab([tgt for _, tgt in train_data])
    model = Seq2Seq(len(src_vocab[0]), len(tgt_vocab[0]), config.embed_dim, config.hidden_dim,
                    config.enc_layers, config.dec_layers, config.cell_type, config.dropout).to(device)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    for epoch in range(config.epochs):
        model.train()
        total_loss = 0
        total_acc = 0
        random.shuffle(train_data)

        for i in range(0, len(train_data), config.batch_size):
            batch = train_data[i:i + config.batch_size]
            src, trg = prepare_batch(batch, src_vocab[0], tgt_vocab[0], device)

            optimizer.zero_grad()
            output = model(src, trg)
            loss = criterion(output[:, 1:].reshape(-1, output.shape[-1]), trg[:, 1:].reshape(-1))
            acc = calculate_accuracy(output[:, 1:], trg[:, 1:])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc

        avg_train_loss = total_loss / len(train_data)
        avg_train_acc = total_acc / (len(train_data) // config.batch_size)

        val_loss, val_acc = evaluate(model, dev_data, src_vocab[0], tgt_vocab[0], device, criterion, config.batch_size)

        wandb.log({
            "train_loss": avg_train_loss,
            "train_acc": avg_train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "epoch": epoch + 1
        })

        print(f"Epoch {epoch + 1}/{config.epochs} | "
              f"Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    wandb.finish()


In [5]:
import wandb
#from train import train  # make sure your train function is properly imported

sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'loss',
        'goal': 'minimize'
    },
    'parameters': {
        'embed_dim': {'values': [32, 64, 256]},
        'hidden_dim': {'values': [64, 128]},
        'enc_layers': {'values': [1, 2]},
        'dec_layers': {'values': [1, 2]},
        'cell_type': {'values': ['LSTM', 'GRU']},
        'dropout': {'values': [0.2, 0.3]},
        'batch_size': {'value': 32},
        'epochs': {'value': 10}
    }
}

sweep_id = wandb.sweep(sweep_config, project="Vinod_Assignment 3")
wandb.agent(sweep_id, function=train, count=1)


Create sweep with ID: z92uehik
Sweep URL: https://wandb.ai/viinod9-iitm/Vinod_Assignment%203/sweeps/z92uehik


[34m[1mwandb[0m: Agent Starting Run: 71lsww45 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.




Epoch 1/10 | Train Loss: 0.0833, Train Acc: 0.2958 | Val Loss: 0.0667, Val Acc: 0.3988
Epoch 2/10 | Train Loss: 0.0561, Train Acc: 0.4763 | Val Loss: 0.0490, Val Acc: 0.5490
Epoch 3/10 | Train Loss: 0.0448, Train Acc: 0.5706 | Val Loss: 0.0413, Val Acc: 0.6111
Epoch 4/10 | Train Loss: 0.0390, Train Acc: 0.6235 | Val Loss: 0.0374, Val Acc: 0.6484
Epoch 5/10 | Train Loss: 0.0355, Train Acc: 0.6591 | Val Loss: 0.0356, Val Acc: 0.6641
Epoch 6/10 | Train Loss: 0.0333, Train Acc: 0.6800 | Val Loss: 0.0342, Val Acc: 0.6821
Epoch 7/10 | Train Loss: 0.0314, Train Acc: 0.6983 | Val Loss: 0.0329, Val Acc: 0.6944
Epoch 8/10 | Train Loss: 0.0302, Train Acc: 0.7093 | Val Loss: 0.0318, Val Acc: 0.7012
Epoch 9/10 | Train Loss: 0.0291, Train Acc: 0.7203 | Val Loss: 0.0310, Val Acc: 0.7082
Epoch 10/10 | Train Loss: 0.0280, Train Acc: 0.7304 | Val Loss: 0.0301, Val Acc: 0.7169


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▄▅▆▇▇▇███
train_loss,█▅▃▂▂▂▁▁▁▁
val_acc,▁▄▆▆▇▇████
val_loss,█▅▃▂▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.73038
train_loss,0.02803
val_acc,0.71686
val_loss,0.03009
