Extracting the dataset

In [None]:
# !pip install datasets
# from datasets import load_dataset

# dataset = load_dataset("cfilt/iitb-english-hindi")

# with open('eng-hin-train.txt' , 'w+' , encoding = "utf8") as file:
#     for translation_pair in dataset["train"]["translation"]:
#         source_sentence = translation_pair["en"]
#         target_sentence = translation_pair["hi"]
#         file.write(source_sentence.strip() + "\t")
#         file.write(target_sentence.strip() + "\n")



Collecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, huggingface-hub, datasets
Successfully installed datasets-2.14.

Downloading readme:   0%|          | 0.00/3.11k [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


Downloading metadata:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/190M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/85.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/500k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1659083 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/520 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2507 [00:00<?, ? examples/s]

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
device

device(type='cpu')

In [None]:
SOS_token = 0
EOS_token = 1
UNK_token = 2

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS" , 2:"<UNK>"}
        self.n_words = 3  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
#     s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()

In [None]:
def readLangs(lang1, lang2, path, reverse=False):
    print(f"Reading lines... from {path}")

    # Read the file and split into lines
    lines = open(path, encoding='utf-8').\
        read().strip().split('\n')

#     print(lines[0:3])

    # Split every line into pairs and normalize
    # take only 2 columns .
    pairs = [[normalizeString(s) for s in l.split('\t')[:2]] for l in lines]

#     print(pairs[:3])

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [None]:
MAX_LENGTH = 15


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

## The full process for preparing the data is:

    1. Read text file and split into lines, split lines into pairs

    2. Normalize text, filter by length and content ( optional )

    3. Make word lists from sentences in pairs


In [None]:
data_path = 'eng-hin-train-100000.txt'
def prepareData(lang1, lang2,path , reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, path , reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs



In [None]:
# input_lang, output_lang, pairs = prepareData('eng', 'hin', data_path)
# print(random.choice(pairs))


# print("Total Sentences = ", len(pairs))
# print(random.choice(pairs))

Reading lines... from eng-hin-train-100000.txt
Read 100000 sentence pairs
Trimmed to 97471 sentence pairs
Counting words...
Counted words:
eng 52137
hin 61973
['tea at the dhaba unning away from school  !', 'सकल स भागना  !']
Total Sentences =  97471
['rasgulla', 'रसगलल']


In [None]:
# print(random.choice(pairs))

["and from that, we 're subtracting 42 .", 'और इस म स ४२ घटा द']


In [None]:
# def saveDataSize(pairs , size):
#   with open(f'eng-hin-train-{size}.txt' , 'w+' , encoding = "utf8") as file:
#     random.shuffle(pairs)
#     totalExamples = min(len(pairs) , size)
#     newPairs = pairs[:totalExamples]
#     for pair in newPairs:
#         source_sentence = pair[0]
#         target_sentence = pair[1]
#         file.write(source_sentence.strip() + "\t")
#         file.write(target_sentence.strip() + "\n")



# # print(random.choice(pairs))

In [None]:
# saveDataSize(pairs , 100000)

# Encoder

In [None]:
class Encoder(nn.Module):
    def __init__(self , input_size , hidden_size , dropout = 0.2):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        # We will train the embedding layer simultaneously
        # Alternatively you can have pre-trained embeddings
        self.embedding_layer = nn.Embedding(input_size , hidden_size)

        self.rnn = nn.GRU(hidden_size , hidden_size , batch_first = True)

        self.dropout = nn.Dropout(dropout)


    def forward(self , input_vector):
        embedded_output = self.dropout(self.embedding_layer(input_vector))

        output , hidden_state = self.rnn(embedded_output)
        # Here output will contain all the outputs fill t -> length of sequence
        # hidden state will have final hidden state

        return output , hidden_state


# Decoder
Simple Decoder

In the simplest seq2seq decoder we use only last output of the encoder. This last output is sometimes called the context vector as it encodes context from the entire sequence. This context vector is used as the initial hidden state of the decoder.

At every step of decoding, the decoder is given an input token and hidden state. The initial input token is the start-of-string <SOS> token, and the first hidden state is the context vector (the encoder’s last hidden state).


In [None]:
class Decoder(nn.Module):
    def __init__(self , hidden_size , output_size):
        super(Decoder , self).__init__()
        # Embedding layer for the target language
        self.embedding_layer = nn.Embedding(output_size , hidden_size)
        # Now comes out RNN Model
        self.rnn = nn.GRU(hidden_size , hidden_size , batch_first=True)
        # Finally our output Layer
        self.outputLayer = nn.Linear(hidden_size , output_size)


    def forward_step(self , input_vector , hidden_state):
        output = self.embedding_layer(input_vector)
        output = F.relu(output)
        output , hidden_state = self.rnn(output , hidden_state)
        output = self.outputLayer(output)

        return output , hidden_state


    def forward(self , encoder_output , encoder_hidden_state , target_tensor = None):
        batch_size = encoder_output.size(0)
        # for starting the sentence we fill all the values by SOS
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        # now comes the hidden state
        decoder_hidden_state = encoder_hidden_state

        decoder_outputs = []

        for i in range(MAX_LENGTH):
            # get the first word
            decoder_output , decoder_hidden_state = self.forward_step(decoder_input , decoder_hidden_state)
            decoder_outputs.append(decoder_output)

            # Teacher Forcing
            # giving the correct input to the classifier rather than giving
            # its own output
            if target_tensor is not None:
                # this will happen during training time
                decoder_input = target_tensor[: ,i].unsqueeze(1)
                # adding a dimension accross
            else:
                # generally we take top k for beam search and we maintain
                # these k candidate translations
                _ , topI = decoder_output.topk(1)
                # some pytorch output related trick, i dont know
                decoder_input = topI.squeeze(-1).detach()

        # concatinate along columns
        decoder_outputs = torch.cat(decoder_outputs , dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)

        return decoder_outputs, decoder_hidden_state, None





# Attention Mechanism



In [None]:
class BahadanuAttention(nn.Module):
  def __init__(self , hidden_size):
    super(BahadanuAttention , self).__init__()

    self.Wa = nn.Linear(hidden_size , hidden_size)
    self.Ua = nn.Linear(hidden_size , hidden_size)
    self.Va = nn.Linear(hidden_size , 1)

  def forward(self , decoder_hidden , encoder_hidden):
    align_scores = self.Va(torch.tanh(self.Wa(decoder_hidden) + self.Ua(encoder_hidden)))

    #                             n*h*h + n*h*h = n*h*1
                                  # n*h n*1*h
    align_scores = align_scores.squeeze(2).unsqueeze(1)

    probabilisticWeights = F.softmax(align_scores , dim = -1) # n*1*h

    context_vector = torch.bmm(probabilisticWeights , encoder_hidden) # n*1*n = n * alphaij * hij


    return context_vector , probabilisticWeights



class AttentionDecoder(nn.Module):
  def __init__(self , hidden_size , output_size , drop_out = 0.1):
    super(AttentionDecoder , self).__init__()

    self.embedding = nn.Embedding(output_size , hidden_size)

    self.simpleAttention = BahadanuAttention(hidden_size)

    self.rnn =  nn.GRU(2*hidden_size , hidden_size , batch_first=True)

    self.output = nn.Linear(hidden_size , output_size)

    self.dropout = nn.Dropout(drop_out)



# This is a slightly modidied code , it uses encoder outputs rather than hidden
# states because they are also generateed from hidden states
  def forward_step(self , input_word , decoder_hidden , encoder_outputs):
    embedded_input = self.dropout(self.embedding(input_word))


# hidden state is also called query
    hidden_state_as_query = decoder_hidden.permute(1,0,2)

    context_vector , attention_weights = self.simpleAttention(hidden_state_as_query , encoder_outputs)

    input_rnn = torch.cat((embedded_input ,context_vector ) , dim=2)

    output, hidden = self.rnn(input_rnn, decoder_hidden)

    output = self.output(output)

    return output, hidden, attention_weights



  def forward(self , encoder_outputs , encoder_hidden , target_tensor=None):
    batch_size = encoder_outputs.size(0)
    decoder_input = torch.empty(batch_size , 1, dtype = torch.long, device = device).fill_(SOS_token)

    decoder_hidden = encoder_hidden
    decoder_outputs = []
    attentions = []

    for i in range(MAX_LENGTH):
      decoder_output , decoder_hidden , attention_weights = self.forward_step(decoder_input , decoder_hidden , encoder_outputs )
      decoder_outputs.append(decoder_output)
      attentions.append(attention_weights)

      if target_tensor is not None:
        # Teacher Forcinr
        # Teacher forcing: Feed the target as the next input
        decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
      else:
        _ , topRes = decoder_output.topk(1)
        decoder_input = topRes.squeeze(-1).detach()

    decoder_outputs = torch.cat(decoder_outputs , dim = 1)
    decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
    attentions = torch.cat(attentions, dim=1)

    return decoder_outputs, decoder_hidden, attentions




# Training and Inference

In [None]:

def indexesFromSentence(lang, sentence):
    return [lang.word2index.get(word , 2) for word in sentence.split(' ')]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def tensorsFromPair(pair):
    # using the varient from the seq2seq paper to reverse the
    # input vectors to introduce short term dependencies between
    # input and output
    input_tensor = tensorFromSentence(input_lang, pair[0])
    # input_tensor = tensorFromSentence(input_lang, reversed(pair[0]))
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

def get_dataloader(batch_size , max_rows = 50000):
    input_lang, output_lang, pairs = prepareData('eng', 'hin', data_path)

    # # random shuffle pairs and select 50000 rows
    # max_rows = min(len(pairs) , max_rows)
    # random.shuffle(pairs)
    # pairs = pairs[:max_rows]

    n = len(pairs)
    print("Final Pairs" , n)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids = indexesFromSentence(input_lang, inp)
        tgt_ids = indexesFromSentence(output_lang, tgt)
        inp_ids.append(EOS_token)
        tgt_ids.append(EOS_token)
        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids

    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    return input_lang, output_lang, train_dataloader, pairs



To train we run the input sentence through the encoder, and keep track of every output and the latest hidden state. Then the decoder is given the <SOS> token as its first input, and the last hidden state of the encoder as its first hidden state.


### “Teacher forcing”
    is the concept of using the real target outputs as each next input, instead of using the decoder’s guess as the next input. Using teacher forcing causes it to converge faster but when the trained network is exploited, it may exhibit instability.

You can observe outputs of teacher-forced networks that read with coherent grammar but wander far from the correct translation - intuitively it has learned to represent the output grammar and can “pick up” the meaning once the teacher tells it the first few words, but it has not properly learned how to create the sentence from the translation in the first place.

Because of the freedom PyTorch’s autograd gives us, we can randomly choose to use teacher forcing or not with a simple if statement. Turn teacher_forcing_ratio up to use more of it.

In [None]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion):

    total_loss = 0
    for data in dataloader:
        input_tensor, target_tensor = data

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        # Here note that in training you are explicitly giving the target_tensor
        # this will not happen in evaluation
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)

        loss = criterion(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),
            target_tensor.view(-1)
        )
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

    # this is returning avg loss
    return total_loss / len(dataloader)

In [None]:
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

The whole training process looks like this:

    Start a timer

    Initialize optimizers and criterion

    Create set of training pairs

    Start empty losses array for plotting

Then we call train many times and occasionally print the progress (% of examples, time so far, estimated time) and average loss.

In [None]:
def saveEncDec(encoder , encoder_path , decoder , decoder_path):
    print("Saving ...")
    torch.save(encoder , encoder_path)
    print("Saved Encoder at " , encoder_path)
    torch.save(decoder , decoder_path)
    print("Saved Decoder at" , decoder_path)

def loadEncDec(encoder_path , decoder_path):
    print("Loading Encoder ...")
    encoder = torch.load(encoder_path)
    print("Loadind Decoder ...")
    decoder = torch.load(decoder_path)
    print("Loading Done")
    return encoder , decoder

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)


def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
               print_every=100, plot_every=100 , save_every = 5):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    # Loss is negative log likelyhood loss
    # minimizing this loss is the Maximum Likelihood Estimate (MLE)
    # MLE is just maximizing the likelihood of the training data given
    # the parameter i.e max ( P(Dataset|Parameters))
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),
                                        epoch, epoch / n_epochs * 100, print_loss_avg))

        # if epoch % plot_every == 0:
        #     plot_loss_avg = plot_loss_total / plot_every
        #     plot_losses.append(plot_loss_avg)
        #     plot_loss_total = 0

        if epoch % save_every == 0:
          saveEncDec(encoder , f"encoderHindi-iitb-with-attn-100k-gru-data-nonrev-{epoch}-epoch.pt" , decoder , f"decoderHindi-iitb-with-attn-100k-gru-data-nonrev-{epoch}-epoch.pt")



    # showPlot(plot_losses)

# Evaluation

Evaluation is mostly the same as training, but there are no targets so we simply feed the decoder’s predictions back to itself for each step. Every time it predicts a word we add it to the output string, and if it predicts the EOS token we stop there. We also store the decoder’s attention outputs for display later.

In [None]:
def evaluate(encoder, decoder, sentence, input_lang, output_lang):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        # Not giving the target output
        # Now the network has to generate the output
        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)

        # taking only the best prediction
        # this is incomplete search
        # you need to do beam search
        # Will add to TODO
        _, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()

        decoded_words = []
        for idx in decoded_ids:
            if idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            decoded_words.append(output_lang.index2word[idx.item()])
    # Here decoded attention will be null in case we do not use the attention
    # mechanism
    return decoded_words, decoder_attn

In [None]:
def evaluateRandomly(encoder, decoder,  pairs, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('Input Sentence :: ', pair[0])
        print('Actual Translated Sentence :: ', pair[1])
        output_words, _ = evaluate(encoder, decoder, pair[0], input_lang, output_lang)
        output_sentence = ' '.join(output_words)
        print('Translated Sentence :: ', output_sentence)
        print('')

In [None]:
hidden_size = 128
batch_size = 64

input_lang, output_lang, train_dataloader , pairs = get_dataloader(batch_size , 100000)


Reading lines... from eng-hin-train-100000.txt
Read 100000 sentence pairs
Trimmed to 97471 sentence pairs
Counting words...
Counted words:
eng 52137
hin 61973
Final Pairs 97471


In [None]:
encoder = Encoder(input_lang.n_words, hidden_size).to(device)
decoder = AttentionDecoder(hidden_size, output_lang.n_words).to(device)

In [None]:
train(train_dataloader, encoder, decoder, 100, print_every=1, plot_every=1 , save_every = 4)

1m 30s (- 150m 5s) (1 1%) 2.6821
3m 0s (- 147m 31s) (2 2%) 2.2165
4m 30s (- 145m 40s) (3 3%) 2.0013
5m 59s (- 143m 54s) (4 4%) 1.8280
Saving ...
Saved Encoder at  encoderHindi-iitb-with-attn-100k-gru-data-nonrev-4-epoch.pt
Saved Decoder at decoderHindi-iitb-with-attn-100k-gru-data-nonrev-4-epoch.pt
7m 29s (- 142m 21s) (5 5%) 1.6806
8m 59s (- 140m 45s) (6 6%) 1.5529
10m 28s (- 139m 9s) (7 7%) 1.4416
11m 57s (- 137m 36s) (8 8%) 1.3432
Saving ...
Saved Encoder at  encoderHindi-iitb-with-attn-100k-gru-data-nonrev-8-epoch.pt
Saved Decoder at decoderHindi-iitb-with-attn-100k-gru-data-nonrev-8-epoch.pt
13m 27s (- 136m 2s) (9 9%) 1.2563
14m 56s (- 134m 30s) (10 10%) 1.1798
16m 26s (- 132m 58s) (11 11%) 1.1129
17m 55s (- 131m 25s) (12 12%) 1.0534
Saving ...
Saved Encoder at  encoderHindi-iitb-with-attn-100k-gru-data-nonrev-12-epoch.pt
Saved Decoder at decoderHindi-iitb-with-attn-100k-gru-data-nonrev-12-epoch.pt
19m 24s (- 129m 54s) (13 13%) 1.0014
20m 54s (- 128m 23s) (14 14%) 0.9548


In [None]:
saveEncDec(encoder , "encoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt" , decoder , "decoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt")

Saving ...
Saved Encoder at  encoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt
Saved Decoder at decoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt


In [None]:
evaluateRandomly(encoder , decoder , n = 10 , pairs=pairs)

Input Sentence ::  surely in it you shall have whatever you choose  !
Actual Translated Sentence ::  कि उसम तमहार लिए वह कछ ह जो तम पसनद करो  ?
Translated Sentence ::  और तम लोग मह फर लो और वह तो ह <EOS>

Input Sentence ::  clip height
Actual Translated Sentence ::  ऊचाई काट
Translated Sentence ::  ऊचाई का उपयोग <EOS>

Input Sentence ::  birthday and anniversary reminder
Actual Translated Sentence ::  जनमदिन और वरषगाठ ससचक
Translated Sentence ::  जनमदिन और वरषगाठ ससचक <EOS>

Input Sentence ::  full of varieties  .
Actual Translated Sentence ::  घनी डालियोवाल;
Translated Sentence ::  रहत रखन की कषमता ह। <EOS>

Input Sentence ::  more than 300 languages are spoken here  .
Actual Translated Sentence ::  यहा ३०० स अधिक भाषाए बोली जाती ह।
Translated Sentence ::  यहा ३०० स अधिक भाषाए लकडी स अधिक ह। <EOS>

Input Sentence ::  daily start time:
Actual Translated Sentence ::  नितय परारभ समयः @info: whatsthis
Translated Sentence ::  (v) दनिक कारड दिखाय <EOS>

Input Sentence ::  tiles' scores
Actu

In [None]:
evaluateRandomly(*loadEncDec("/content/encoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt" , "/content/decoderHindi-iitb-without-attn-100k-lstm-data-nonrev.pt") , pairs , 30)

Loading Encoder ...
Loadind Decoder ...
Loading Done
Input Sentence ::  orion arm
Actual Translated Sentence ::  शिकारी-हनस भजा
Translated Sentence ::  लत का हो <EOS>

Input Sentence ::  override document restrictions
Actual Translated Sentence ::  दसतावज परतिबध पर लिख
Translated Sentence ::  दसतावज & परिणाम <EOS>

Input Sentence ::  behavior
Actual Translated Sentence ::  वयवहार
Translated Sentence ::  मदरण का आधनिकीकरण <EOS>

Input Sentence ::  revitalize local health traditions and mainstream ayush  .
Actual Translated Sentence ::  सथानीय सवासथय परपरा एव मखयधारा आयष को पनरजीवित करना।
Translated Sentence ::  सथानीय उदयोग म करियानवयन परापत करन की नीति और <EOS>

Input Sentence ::  marathi film industry is also situated in mumbai  .
Actual Translated Sentence ::  मराठी चलचितर उदयोग भी मबई म ही सथित ह।
Translated Sentence ::  मराठी चलचितर उदयोग भी मबई म ही सथित ह। <EOS>

Input Sentence ::  mesh normal
Actual Translated Sentence ::  मश सामानय नटः
Translated Sentence ::  सगरह की परकरिया <E

In [None]:
pairs