In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Encoder class
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, n_layers, cell_type='RNN'):
        super(Encoder, self).__init__()
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        if cell_type == 'RNN':
            self.rnn = nn.RNN(emb_dim, hidden_dim, n_layers, batch_first=True)
        elif cell_type == 'LSTM':
            self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True)
        else:
            raise ValueError("Invalid cell type. Choose from 'RNN', 'LSTM', or 'GRU'.")
            
        self.cell_type = cell_type
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden


# Decoder class
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, cell_type='RNN'):
        super(Decoder, self).__init__()

        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        if cell_type == 'RNN':
            self.rnn = nn.RNN(emb_dim, hidden_dim, n_layers, batch_first=True)
        elif cell_type == 'LSTM':
            self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True)
        else:
            raise ValueError("Invalid cell type. Choose from 'RNN', 'LSTM', or 'GRU'.")
        
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.cell_type = cell_type
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # [batch_size, 1]
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden


# Seq2Seq wrapper
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        assert encoder.hidden_dim == decoder.hidden_dim, "Hidden dimensions must match!"
        assert encoder.n_layers == decoder.n_layers, "Number of layers must match!"
        assert encoder.cell_type == decoder.cell_type, "Cell types must match!"

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        trg_len = trg.size(1)
        trg_vocab_size = self.decoder.fc_out.out_features

        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)

        hidden = self.encoder(src)

        input = trg[:,0]  # Start with <sos> token

        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden)
            outputs[:, t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[:, t] if teacher_force else top1

        return outputs


# Example of how to initialize
INPUT_DIM = 100   # Size of Latin vocab
OUTPUT_DIM = 100  # Size of Devanagari vocab
EMB_DIM = 64
HIDDEN_DIM = 128
N_LAYERS = 1
CELL_TYPE = 'LSTM'  # Choose 'RNN', 'LSTM' or 'GRU'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(INPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS, CELL_TYPE)
decoder = Decoder(OUTPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS, CELL_TYPE)
model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE)

print(model)

# Define optimizer and loss
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Assuming padding_idx=0

# Ready for training loop!


Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(100, 64)
    (rnn): LSTM(64, 128, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(100, 64)
    (rnn): LSTM(64, 128, batch_first=True)
    (fc_out): Linear(in_features=128, out_features=100, bias=True)
  )
)
