In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import wandb

# Log in to W&B (usually called at the start of the script)
#wandb.login()

# Optionally: You can specify the API key if not logged in yet, or use environment variables for automatic login
wandb.login(key='acdc26d2fc17a56e83ea3ae6c10e496128dee648')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
import wandb
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load Dakshina Dataset
def load_data(path):
    df = pd.read_csv(path, sep='\t', header=None, names=['target', 'input'])
    # Convert the input and target columns to strings
    df['input'] = df['input'].astype(str)
    df['target'] = df['target'].astype(str)
    return df


train_df = load_data('/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv')
dev_df = load_data('/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv')
test_df = load_data('/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv')

# Build Vocabularies
class Vocab:
    def __init__(self, texts, specials=['<pad>', '<sos>', '<eos>']):
        chars = set(''.join(texts))
        self.itos = specials + sorted(list(chars))
        self.stoi = {c:i for i,c in enumerate(self.itos)}

    def numericalize(self, text):
        return [self.stoi['<sos>']] + [self.stoi[c] for c in text] + [self.stoi['<eos>']]

    def denumericalize(self, nums):
        return ''.join([self.itos[i] for i in nums if i not in (self.stoi['<pad>'], self.stoi['<sos>'], self.stoi['<eos>'])])

input_vocab = Vocab(train_df['input'])
output_vocab = Vocab(train_df['target'])

# Dataset Class
class TransliterationDataset(Dataset):
    def __init__(self, df, input_vocab, output_vocab, max_len=30):
        self.inputs = df['input'].tolist()
        self.targets = df['target'].tolist()
        self.input_vocab = input_vocab
        self.output_vocab = output_vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = self.input_vocab.numericalize(self.inputs[idx])
        y = self.output_vocab.numericalize(self.targets[idx])
        x = x[:self.max_len]
        y = y[:self.max_len]
        return torch.tensor(x), torch.tensor(y)

# Collate Function
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    x_batch, y_batch = zip(*batch)
    x_batch = pad_sequence(x_batch, batch_first=True, padding_value=input_vocab.stoi['<pad>'])
    y_batch = pad_sequence(y_batch, batch_first=True, padding_value=output_vocab.stoi['<pad>'])
    return x_batch, y_batch

# DataLoaders
BATCH_SIZE = 64

train_dataset = TransliterationDataset(train_df, input_vocab, output_vocab)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

dev_dataset = TransliterationDataset(dev_df, input_vocab, output_vocab)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

# Model Classes (Same Encoder, Decoder, Seq2Seq)
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, n_layers, cell_type='RNN', dropout=0.3):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.dropout = nn.Dropout(dropout)
        if cell_type == 'RNN':
            self.rnn = nn.RNN(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        elif cell_type == 'LSTM':
            self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        self.cell_type = cell_type

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.rnn(embedded)
        return hidden

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, cell_type='RNN', dropout=0.3):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.dropout = nn.Dropout(dropout)
        if cell_type == 'RNN':
            self.rnn = nn.RNN(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        elif cell_type == 'LSTM':
            self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout if n_layers>1 else 0)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.cell_type = cell_type

    def forward(self, input, hidden):
        input = input.unsqueeze(1)
        embedded = self.dropout(self.embedding(input))
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        trg_len = trg.size(1)
        trg_vocab_size = self.decoder.fc_out.out_features

        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)

        hidden = self.encoder(src)
        input = trg[:, 0]

        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden)
            outputs[:, t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[:, t] if teacher_force else top1

        return outputs

# Train and Evaluate Functions
def train(model, iterator, optimizer, criterion, clip=1):
    model.train()
    epoch_loss = 0

    for src, trg in iterator:
        src, trg = src.to(DEVICE), trg.to(DEVICE)

        optimizer.zero_grad()
        output = model(src, trg)

        output_dim = output.shape[-1]
        output = output[:,1:].reshape(-1, output_dim)
        trg = trg[:,1:].reshape(-1)

        loss = criterion(output, trg)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)

def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0

    with torch.no_grad():
        for src, trg in iterator:
            src, trg = src.to(DEVICE), trg.to(DEVICE)
            output = model(src, trg, 0)

            output_dim = output.shape[-1]
            output = output[:,1:].reshape(-1, output_dim)
            trg = trg[:,1:].reshape(-1)

            loss = criterion(output, trg)
            epoch_loss += loss.item()

    return epoch_loss / len(iterator)

# WandB Training Loop
def wandb_train():
    wandb.init(project="dakshina_seq2seq")
    config = wandb.config

    encoder = Encoder(len(input_vocab.itos), config.emb_dim, config.hidden_dim, config.n_layers, config.cell_type, config.dropout)
    decoder = Decoder(len(output_vocab.itos), config.emb_dim, config.hidden_dim, config.n_layers, config.cell_type, config.dropout)

    model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=input_vocab.stoi['<pad>'])

    N_EPOCHS = 2

    for epoch in range(N_EPOCHS):
        train_loss = train(model, train_loader, optimizer, criterion)
        valid_loss = evaluate(model, dev_loader, criterion)

        wandb.log({"train_loss": train_loss, "val_loss": valid_loss, "accuracy": 1-valid_loss})

# Sweep Config
sweep_config = {
    'method': 'random',
    'metric': {'name': 'accuracy', 'goal': 'maximize'},
    'parameters': {
        'emb_dim': {'values': [16, 32, 64, 256]},
        'hidden_dim': {'values': [32, 64, 128, 256]},
        'n_layers': {'values': [1, 2, 3]},
        'cell_type': {'values': ['RNN', 'LSTM', 'GRU']},
        'dropout': {'values': [0.2, 0.3]}
    }
}

# Run Sweep
sweep_id = wandb.sweep(sweep_config, project="dakshina_seq2seq")
wandb.agent(sweep_id, function=wandb_train)


Create sweep with ID: 5mqljubn
Sweep URL: https://wandb.ai/viinod9-iitm/dakshina_seq2seq/sweeps/5mqljubn


[34m[1mwandb[0m: Agent Starting Run: aoznf48g with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	n_layers: 3


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.64857
train_loss,2.50363
val_loss,2.64857


[34m[1mwandb[0m: Agent Starting Run: hgbpt4wm with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	n_layers: 3


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.66023
train_loss,2.53007
val_loss,2.66023


[34m[1mwandb[0m: Agent Starting Run: 7n6gme9h with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	n_layers: 2


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.65843
train_loss,2.54515
val_loss,2.65843


[34m[1mwandb[0m: Agent Starting Run: 4bi61fzj with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	n_layers: 2


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.65789
train_loss,2.53512
val_loss,2.65789


[34m[1mwandb[0m: Agent Starting Run: lvrnrpcq with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 256
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	n_layers: 2


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.65672
train_loss,2.52092
val_loss,2.65672


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: npobkthd with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	n_layers: 3


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.65737
train_loss,2.57425
val_loss,2.65737


[34m[1mwandb[0m: Agent Starting Run: s9jnih7r with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	n_layers: 1


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.66153
train_loss,2.49704
val_loss,2.66153


[34m[1mwandb[0m: Agent Starting Run: blmibw11 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 256
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	n_layers: 2


0,1
accuracy,█▁
train_loss,█▁
val_loss,▁█

0,1
accuracy,-1.66232
train_loss,2.49785
val_loss,2.66232


[34m[1mwandb[0m: Agent Starting Run: 4ru7g64c with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	n_layers: 1


0,1
accuracy,▁█
train_loss,█▁
val_loss,█▁

0,1
accuracy,-1.666
train_loss,2.52912
val_loss,2.666


[34m[1mwandb[0m: Agent Starting Run: hzzyk8nh with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 256
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	n_layers: 1
