## Importing required libraries

In [1]:
# suppressing warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import random
import math
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
from typing import Iterable, List

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

import torchtext
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torchtext.datasets import multi30k, Multi30k

from torchdata.datapipes.iter import IterableWrapper, Mapper

from nltk.translate.bleu_score import sentence_bleu

!python -m spacy download en_core_web_sm
!python -m spacy download de_core_news_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ------ --------------------------------- 2.1/12.8 MB 13.0 MB/s eta 0:00:01
     ------------ --------------------------- 3.9/12.8 MB 10.2 MB/s eta 0:00:01
     -------------------- ------------------- 6.6/12.8 MB 10.9 MB/s eta 0:00:01
     ---------------------------- ----------- 9.2/12.8 MB 11.2 MB/s eta 0:00:01
     ----------------------------------- --- 11.5/12.8 MB 11.3 MB/s eta 0:00:01
     --------------------------------------- 12.8/12.8 MB 11.1 MB/s eta 0:00:00
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')
Collecting de-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl 

## Checking if CUDA is available

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


## The Encoder

In [4]:
# a rrn implemented using "nn.EmbeddingBag", "nn.LSTM", "nn.Dropout" functions from "pytorch" library
class Encoder(nn.Module):
    def __init__(self, vocab_len, emb_dim, hid_dim, n_layers, dropout_prob):
        super().__init__()

        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(vocab_len, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input_batch):
        embed = self.dropout(self.embedding(input_batch))
        embed = embed.to(device)
        outputs, (hidden, cell) = self.lstm(embed)

        return hidden, cell

#### Encoder -> example of one forward pass

In [5]:
# dummy data
vocab_len = 8
emb_dim = 10
hid_dim = 8
n_layers = 1
dropout_prob = 0.5

# instantiating the model
encoder_t = Encoder(vocab_len, emb_dim, hid_dim, n_layers, dropout_prob).to(device)

In [6]:
# dummy data 
src_batch = torch.tensor([0,3,4,2,1]) #where 0,3,4,2,1 are vocab indecies
src_batch = src_batch.t().to(device)

# getting the embedding of the text token indices
embedded = encoder_t.embedding(src_batch)
# appling dropout to embedded
embedded_dropout = encoder_t.dropout(embedded)
embedded_dropout.to(device)
# passing through the lstm
outputs, (hidden_t, cell_t) = encoder_t.lstm(embedded_dropout)

print(f"Input(src) tensor  [shape -> {src_batch.shape[0]}]:-\n", src_batch)
print(f"\nEmbedded tokens  [shape -> {embedded.shape[0]}]:-\n", embedded)
print(f"\nAfter dropout:-  [shape -> {embedded_dropout.shape[0]}]\n", embedded_dropout)
print(f"\nHidden:-         [shape -> {hidden_t.shape[0]}]\n", hidden_t)
print(f"\nCell:-           [shape -> {cell_t.shape[0]}]\n", cell_t)
print(f"\nOutput:-         [shape -> {outputs.shape[0]}]\n", outputs)

Input(src) tensor  [shape -> 5]:-
 tensor([0, 3, 4, 2, 1], device='cuda:0')

Embedded tokens  [shape -> 5]:-
 tensor([[-0.5747, -1.1600, -0.6047, -1.1889,  0.2840, -0.6863, -0.0109, -0.3284,
         -0.1796,  1.0256],
        [ 1.0830, -0.0276,  0.0984, -0.3963, -1.4614, -1.7912, -0.5361, -0.1614,
         -1.9529, -0.3981],
        [-0.9258, -0.4066,  0.8818, -1.3187,  0.7958,  1.6644,  0.5789, -0.4460,
          0.4834, -0.3851],
        [-1.2413, -1.1788,  2.3537, -0.1122, -2.6231, -1.4404,  0.9435, -0.9235,
         -1.0891,  0.8601],
        [ 0.3792, -0.0135,  0.2998,  1.1062,  0.3078,  0.4981, -0.1278, -0.5539,
          0.0046, -0.4288]], device='cuda:0', grad_fn=<EmbeddingBackward0>)

After dropout:-  [shape -> 5]
 tensor([[-0.0000, -2.3201, -1.2094, -2.3779,  0.5680, -0.0000, -0.0000, -0.6569,
         -0.0000,  2.0512],
        [ 2.1661, -0.0552,  0.0000, -0.0000, -0.0000, -3.5825, -0.0000, -0.0000,
         -0.0000, -0.7961],
        [-1.8515, -0.8132,  1.7636, -2.6375,  0

## The Decoder

In [7]:
# a rrn implemented using "nn.EmbeddingBag", "nn.Linear" ,"nn.LSTM", "nn.Dropout", "nn.LogSoftmax" functions from "pytorch" library
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, cell):
        # input = [batch_size]
        input = input.unsqueeze(0) # input = [1, batch_size]
        embedded = self.dropout(self.embedding(input))
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        prediction_logit = self.fc_out(output.squeeze(0))
        prediction = self.softmax(prediction_logit)

        return prediction, hidden, cell

#### Decoder -> example of one forward pass

In [8]:
# dummy data
output_dim = 6
emb_dim = 10
hid_dim = 8
n_layers = 1
dropout = 0.5

# instantiating the model
decoder_t = Decoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)

In [37]:
# dummy data 
input_t = torch.tensor([0]).to(device) # <bos>

# getting the embedding of the input text token <bos>
embedded = decoder_t.embedding(input_t)
# appling dropout to embedded
embedded_dropout = decoder_t.dropout(embedded)
embedded_dropout.to(device)
# passing through the lstm
output, (hidden, cell) = decoder_t.lstm(embedded_dropout, (hidden_t, cell_t))
# passing through the fully connected layer
prediction_logit = decoder_t.fc_out(output)
# appling softmax
prediction = decoder_t.softmax(prediction_logit)

print(f"Input(target) tensor  [shape -> {input_t.shape[0]}]:-\n", input_t)
print(f"\nEmbedded tokens  [shape -> {embedded.shape[0]}]:-\n", embedded)
print(f"\nAfter dropout:-  [shape -> {embedded_dropout.shape[0]}]\n", embedded_dropout)
print(f"\nRNN Hidden:-     [shape -> {hidden.shape[0]}]\n", hidden)
print(f"\nRNN Cell:-       [shape -> {cell.shape[0]}]\n", cell)
print(f"\nRNN Output:-     [shape -> {output.shape[0]}]\n", output)
print(f"\nFC layer Out:-   [shape -> {prediction_logit.shape[0]}]\n", prediction_logit)
print(f"\nSoftmax to Out:- [shape -> {prediction.shape[0]}]\n", prediction)