In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re  
import random
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import os
from torch.utils.data import Dataset
from sacrebleu import corpus_bleu

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
path_zh = '/home/ys2542/Neural-Translation-System/iwslt-zh-en-processed/'
path_vi = '/home/ys2542/Neural-Translation-System/iwslt-vi-en-processed/'
ft_home = '/scratch/ys2542/NLP_FASTTEXT/'
model_path = '/scratch/ys2542/model/'

In [4]:
PAD_token = 0
UNK_token = 1
SOS_token = 2
EOS_token = 3

In [5]:
words_to_load = 100000 
EMBED_SIZE = 300

In [6]:
with open(ft_home + 'wiki-news-300d-1M.vec') as f:
    matrix_size = words_to_load + 4
    loaded_embeddings_ft_en = np.zeros((matrix_size, EMBED_SIZE))
    words_ft_en = {'<pad>': PAD_token, '<unk>': UNK_token, '<sos>': SOS_token, '<eos>': EOS_token,}
    idx2words_ft_en = {PAD_token: '<pad>', UNK_token: '<unk>', SOS_token: '<sos>', EOS_token: '<eos>'}
    ordered_words_ft_en = ['<pad>', '<unk>', '<sos>', '<eos>']
    
    loaded_embeddings_ft_en[0,:] = np.zeros(EMBED_SIZE)
    loaded_embeddings_ft_en[1,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    loaded_embeddings_ft_en[2,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    loaded_embeddings_ft_en[3,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    
    for i, line in enumerate(f):
        if i == 0:
            continue
        if i == words_to_load + 1: 
            break
        s = line.split()
        idx = i + 3
        loaded_embeddings_ft_en[idx, :] = np.asarray(s[1:])
        words_ft_en[s[0]] = idx
        idx2words_ft_en[idx] = s[0]
        ordered_words_ft_en.append(s[0])

In [7]:
with open(ft_home + 'cc.zh.300.vec') as f:
    matrix_size = words_to_load + 4
    loaded_embeddings_ft_zh = np.zeros((matrix_size, EMBED_SIZE))
    words_ft_zh = {'<pad>': PAD_token, '<unk>': UNK_token, '<sos>': SOS_token, '<eos>': EOS_token,}
    idx2words_ft_zh = {PAD_token: '<pad>', UNK_token: '<unk>', SOS_token: '<sos>', EOS_token: '<eos>'}
    ordered_words_ft_zh = ['<pad>', '<unk>', '<sos>', '<eos>']
    
    loaded_embeddings_ft_zh[0,:] = np.zeros(EMBED_SIZE)
    loaded_embeddings_ft_zh[1,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    loaded_embeddings_ft_zh[2,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    loaded_embeddings_ft_zh[3,:] = np.random.uniform(-1.0, 1.0, EMBED_SIZE)
    
    for i, line in enumerate(f):
        if i == 0:
            continue
        if i == words_to_load + 1: 
            break
        s = line.split()
        idx = i + 3
        loaded_embeddings_ft_zh[idx, :] = np.asarray(s[1:])
        words_ft_zh[s[0]] = idx
        idx2words_ft_zh[idx] = s[0]
        ordered_words_ft_zh.append(s[0])

In [8]:
lines_zh_train = open(path_zh + 'train.tok.zh', encoding = 'utf-8').read().strip().split('\n')
lines_en_train = open(path_zh + 'train.tok.en', encoding = 'utf-8').read().strip().split('\n')

lines_zh_val = open(path_zh + 'dev.tok.zh', encoding = 'utf-8').read().strip().split('\n')
lines_en_val = open(path_zh + 'dev.tok.en', encoding = 'utf-8').read().strip().split('\n')

lines_zh_test = open(path_zh + 'test.tok.zh', encoding = 'utf-8').read().strip().split('\n')
lines_en_test = open(path_zh + 'test.tok.en', encoding = 'utf-8').read().strip().split('\n')

In [9]:
def clean_lines(lines, lang):
    data = []
    for line in lines:
        if line == '':
            line = ' '
        if lang == 'en':
            line = line.replace("&apos;", "").replace("&quot;", "")
        if line[-1] != ' ':
            line = line + ' '
       
        line = '<sos> ' + line + '<eos>'
        data.append(line)
    return data

In [10]:
train_zh = clean_lines(lines_zh_train, 'zh')
train_en = clean_lines(lines_en_train, 'en')

val_zh = clean_lines(lines_zh_val, 'zh')
val_en = clean_lines(lines_en_val, 'en')

test_zh = clean_lines(lines_zh_test, 'zh')
test_en = clean_lines(lines_en_test, 'en')   

In [11]:
def indexesFromSentence(data, lang):
    indexes = []
    for sentence in data:
        index = []
        for token in sentence.split():
            if lang == 'zh':
                try:
                    index.append(words_ft_zh[token])
                except KeyError:
                    index.append(UNK_token)
            elif lang == 'en':
                try:
                    index.append(words_ft_en[token])
                except KeyError:
                    index.append(UNK_token)
        indexes.append(index)
    return indexes

In [12]:
train_zh_indexes = indexesFromSentence(train_zh, 'zh')
train_en_indexes = indexesFromSentence(train_en, 'en')

val_zh_indexes = indexesFromSentence(val_zh, 'zh')
val_en_indexes = indexesFromSentence(val_en, 'en')

test_zh_indexes = indexesFromSentence(test_zh, 'zh')
test_en_indexes = indexesFromSentence(test_en, 'en')

In [13]:
length_zh = []
for line in train_zh_indexes:
        length_zh.append(len(line))
        
length_zh = sorted(length_zh)
MAX_LENGTH_ZH = length_zh[int(len(train_zh_indexes)*0.99)]
print(MAX_LENGTH_ZH)

69


In [14]:
length_en = []
for line in train_en_indexes:
        length_en.append(len(line))
        
length_zh = sorted(length_en)
MAX_LENGTH_EN = length_en[int(len(train_en_indexes)*0.99)]
print(MAX_LENGTH_EN)

21


In [15]:
length_en = []
for line in val_zh_indexes:
        length_en.append(len(line))
        
length_zh = sorted(length_en)
MAX_LENGTH_EN = length_en[int(len(val_zh_indexes)*0.99)]
print(MAX_LENGTH_EN)

42


In [16]:
length_en = []
for line in val_en_indexes:
        length_en.append(len(line))
        
length_zh = sorted(length_en)
MAX_LENGTH_EN = length_en[int(len(val_en_indexes)*0.99)]
print(MAX_LENGTH_EN)

40


In [17]:
length_en = []
for line in test_zh_indexes:
        length_en.append(len(line))
        
length_zh = sorted(length_en)
MAX_LENGTH_EN = length_en[int(len(test_zh_indexes)*0.99)]
print(MAX_LENGTH_EN)

11


In [18]:
length_en = []
for line in test_en_indexes:
        length_en.append(len(line))
        
length_zh = sorted(length_en)
MAX_LENGTH_EN = length_en[int(len(test_en_indexes)*0.99)]
print(MAX_LENGTH_EN)

12


In [20]:
MAX_LENGTH_ZH = 69
MAX_LENGTH_EN = 40

In [21]:
train_zh_indexes_filtered = []
train_en_indexes_filtered = []
for i in range(len(train_zh_indexes)):
    if len(train_zh_indexes[i]) <= MAX_LENGTH_ZH and len(train_en_indexes[i]) <= MAX_LENGTH_EN:
        train_zh_indexes_filtered.append(train_zh_indexes[i])
        train_en_indexes_filtered.append(train_en_indexes[i])

In [22]:
val_zh_indexes_filtered = []
val_en_indexes_filtered = []
for i in range(len(val_zh_indexes)):
    if len(val_zh_indexes[i]) <= MAX_LENGTH_ZH and len(val_en_indexes[i]) <= MAX_LENGTH_EN:
        val_zh_indexes_filtered.append(val_zh_indexes[i])
        val_en_indexes_filtered.append(val_en_indexes[i])

In [23]:
test_zh_indexes_filtered = []
test_en_indexes_filtered = []
for i in range(len(test_zh_indexes)):
    if len(test_zh_indexes[i]) <= MAX_LENGTH_ZH and len(test_en_indexes[i]) <= MAX_LENGTH_EN:
        test_zh_indexes_filtered.append(test_zh_indexes[i])
        test_en_indexes_filtered.append(test_en_indexes[i])

In [24]:
class VocabDataset(Dataset):
    def __init__(self, data_list1, data_list2):
        
        self.data_list1 = data_list1
        self.data_list2 = data_list2
        
        assert (len(self.data_list1) == len(self.data_list2))

    def __len__(self):
        return len(self.data_list1)
            
    def __getitem__(self, key):        
        return [self.data_list1[key], self.data_list2[key], len(self.data_list1[key]), len(self.data_list2[key])]

In [25]:
def vocab_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    data_list1 = []
    data_list2 = []
    length_list1 = []
    length_list2 = []
    
    for datum in batch:
        length_list1.append(datum[2])
        length_list2.append(datum[3])
        
        padded_vec1 = np.pad(np.array(datum[0]), 
                                pad_width=((0,MAX_LENGTH_ZH-datum[2])), 
                                mode="constant", constant_values=0)
        padded_vec2 = np.pad(np.array(datum[1]), 
                                pad_width=((0,MAX_LENGTH_EN-datum[3])), 
                                mode="constant", constant_values=0)
        
        data_list1.append(padded_vec1[:MAX_LENGTH_ZH])
        data_list2.append(padded_vec2[:MAX_LENGTH_EN])


    return [torch.from_numpy(np.array(data_list1)).cuda(), torch.from_numpy(np.array(data_list2)).cuda(),
                torch.LongTensor(length_list1).cuda(), torch.LongTensor(length_list2).cuda()]

In [26]:
BATCH_SIZE = 32

train_dataset = VocabDataset(train_zh_indexes_filtered, train_en_indexes_filtered)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

val_dataset = VocabDataset(val_zh_indexes_filtered, val_en_indexes_filtered)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)

test_dataset = VocabDataset(test_zh_indexes_filtered, test_en_indexes_filtered)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=1,
                                           collate_fn=vocab_collate_func,
                                           shuffle=False)

val_loader2 = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=1,
                                           collate_fn=vocab_collate_func,
                                           shuffle=False)

In [27]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embed_size=EMBED_SIZE):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(loaded_embeddings_ft_zh).float(), freeze=False)
        self.gru = nn.GRU(embed_size, hidden_size, batch_first=True)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [28]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH_ZH, embed_size=EMBED_SIZE):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        
        self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(loaded_embeddings_ft_en).float(), freeze=False)
        self.attn = nn.Linear(hidden_size + embed_size, self.max_length)
        self.attn_combine = nn.Linear(hidden_size + embed_size, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)

        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded, hidden), 2)), dim=2)
        attn_applied = torch.bmm(attn_weights[0].unsqueeze(1), encoder_outputs).squeeze(1)

        output = torch.cat((embedded[0], attn_applied), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = self.softmax(self.out(output[0]))
        return output, hidden, attn_weights
    
    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [29]:
teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    batch_size, input_length = input_tensor.size()
    _, target_length = target_tensor.size()
    
    encoder_hidden = encoder.initHidden(batch_size)
    encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)
    
    loss = 0

    decoder_input = torch.tensor(np.array([[SOS_token]] * batch_size).reshape(1, batch_size), device=device)
    
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        for di in range(target_length):
            
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_output)
        
            
            loss += criterion(decoder_output, target_tensor[:,di])
            decoder_input = target_tensor[:,di].unsqueeze(0) 
            
    else:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_output)
                        
            
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  
            decoder_input = decoder_input.unsqueeze(0)
 
            loss += criterion(decoder_output, target_tensor[:,di])
            

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [30]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
%matplotlib inline

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [33]:
def trainIters(encoder, decoder, n_iters, print_every=100, plot_every=100, learning_rate=0.001):
    start = time.time()
    
    plot_losses_t = []
    print_loss_total_t = 0  
    plot_loss_total_t = 0  
    
    plot_losses_v = []
    print_loss_total_v = 0  
    plot_loss_total_v = 0 
    
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        for i, (data1, data2, length1, length2) in enumerate(train_loader):
            input_tensor = data1
            target_tensor = data2
            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total_t += loss
            plot_loss_total_t += loss

            if i % print_every == 0:
                print_loss_avg = print_loss_total_t / print_every
                print_loss_total_t = 0
                print('Train %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

            if i % plot_every == 0:
                plot_loss_avg = plot_loss_total_t / plot_every
                plot_losses_t.append(plot_loss_avg)
                plot_loss_total_t = 0
                
        for i, (data1, data2, length1, length2) in enumerate(val_loader):
            input_tensor = data1
            target_tensor = data2
            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total_v += loss
            plot_loss_total_v += loss

            if i % print_every == 0:
                print_loss_avg = print_loss_total_v / print_every
                print_loss_total_v = 0
                print('Val %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

            if i % plot_every == 0:
                plot_loss_avg = plot_loss_total_v / plot_every
                plot_losses_v.append(plot_loss_avg)
                plot_loss_total_v = 0
                
        torch.save(encoder.state_dict(), model_path + "encoder_rnn_attn"+str(hidden_size)+str(iter)+".pth")
        torch.save(decoder.state_dict(), model_path + "decoder_rnn_attn"+str(hidden_size)+str(iter)+".pth")

    return plot_losses_t, plot_losses_v

In [34]:
hidden_size = 300
encoder1 = EncoderRNN(hidden_size).to(device)
decoder1 = AttnDecoderRNN(hidden_size, len(ordered_words_ft_zh)).to(device)

plotloss_t1, plotloss_v1 = trainIters(encoder1, decoder1, 8)

Train 0m 0s (- 0m 5s) (1 12%) 0.1153
Train 1m 9s (- 8m 8s) (1 12%) 3.9951
Train 2m 20s (- 16m 22s) (1 12%) 3.0700
Train 3m 30s (- 24m 36s) (1 12%) 3.0693
Train 4m 42s (- 32m 58s) (1 12%) 2.9614
Train 5m 54s (- 41m 20s) (1 12%) 2.9468
Train 7m 5s (- 49m 41s) (1 12%) 2.8705
Train 8m 17s (- 58m 4s) (1 12%) 2.9036
Train 9m 29s (- 66m 25s) (1 12%) 2.9424
Train 10m 40s (- 74m 44s) (1 12%) 2.8698
Train 11m 52s (- 83m 4s) (1 12%) 2.8363
Train 13m 3s (- 91m 26s) (1 12%) 2.7893
Train 14m 15s (- 99m 46s) (1 12%) 2.7380
Train 15m 27s (- 108m 10s) (1 12%) 2.7281
Train 16m 38s (- 116m 30s) (1 12%) 2.7013
Train 17m 50s (- 124m 54s) (1 12%) 2.6658
Train 19m 2s (- 133m 17s) (1 12%) 2.6631
Train 20m 13s (- 141m 37s) (1 12%) 2.5621
Train 21m 25s (- 149m 58s) (1 12%) 2.6221
Train 22m 37s (- 158m 19s) (1 12%) 2.5598
Train 23m 48s (- 166m 42s) (1 12%) 2.5841
Train 25m 0s (- 175m 4s) (1 12%) 2.5988
Train 26m 12s (- 183m 30s) (1 12%) 2.5894
Train 27m 24s (- 191m 50s) (1 12%) 2.5036
Train 28m 36s (- 200m 12s) 

Train 224m 2s (- 224m 2s) (4 50%) 1.8963
Train 225m 13s (- 225m 13s) (4 50%) 1.9147
Train 226m 25s (- 226m 25s) (4 50%) 1.9360
Train 227m 37s (- 227m 37s) (4 50%) 1.9148
Train 228m 49s (- 228m 49s) (4 50%) 1.9366
Train 230m 0s (- 230m 0s) (4 50%) 1.8476
Train 231m 12s (- 231m 12s) (4 50%) 1.9414
Train 232m 23s (- 232m 23s) (4 50%) 1.8837
Train 233m 34s (- 233m 34s) (4 50%) 1.8676
Train 234m 46s (- 234m 46s) (4 50%) 1.8636
Train 235m 57s (- 235m 57s) (4 50%) 1.8832
Train 237m 9s (- 237m 9s) (4 50%) 1.9297
Train 238m 20s (- 238m 20s) (4 50%) 1.8700
Train 239m 32s (- 239m 32s) (4 50%) 1.8875
Train 240m 43s (- 240m 43s) (4 50%) 1.8513
Train 241m 54s (- 241m 54s) (4 50%) 1.9023
Train 243m 5s (- 243m 5s) (4 50%) 1.9086
Train 244m 17s (- 244m 17s) (4 50%) 1.9014
Train 245m 28s (- 245m 28s) (4 50%) 1.8271
Train 246m 39s (- 246m 39s) (4 50%) 1.8636
Train 247m 51s (- 247m 51s) (4 50%) 1.8893
Train 249m 2s (- 249m 2s) (4 50%) 1.9259
Train 250m 13s (- 250m 13s) (4 50%) 1.8593
Train 251m 24s (- 251

Train 448m 4s (- 64m 0s) (7 87%) 1.7244
Train 449m 15s (- 64m 10s) (7 87%) 1.7989
Train 450m 26s (- 64m 20s) (7 87%) 1.6989
Train 451m 37s (- 64m 31s) (7 87%) 1.7137
Train 452m 49s (- 64m 41s) (7 87%) 1.7732
Train 454m 0s (- 64m 51s) (7 87%) 1.7519
Train 455m 11s (- 65m 1s) (7 87%) 1.7506
Train 456m 22s (- 65m 11s) (7 87%) 1.7273
Train 457m 33s (- 65m 21s) (7 87%) 1.7045
Train 458m 45s (- 65m 32s) (7 87%) 1.7693
Train 459m 56s (- 65m 42s) (7 87%) 1.7036
Train 461m 8s (- 65m 52s) (7 87%) 1.7842
Train 462m 19s (- 66m 2s) (7 87%) 1.7607
Train 463m 31s (- 66m 13s) (7 87%) 1.7952
Train 464m 42s (- 66m 23s) (7 87%) 1.7550
Train 465m 54s (- 66m 33s) (7 87%) 1.8129
Train 467m 5s (- 66m 43s) (7 87%) 1.7732
Train 468m 16s (- 66m 53s) (7 87%) 1.7732
Train 469m 27s (- 67m 3s) (7 87%) 1.7962
Train 470m 39s (- 67m 14s) (7 87%) 1.8423
Train 471m 51s (- 67m 24s) (7 87%) 1.8231
Train 473m 3s (- 67m 34s) (7 87%) 1.8491
Train 474m 15s (- 67m 45s) (7 87%) 1.7743
Train 475m 26s (- 67m 55s) (7 87%) 1.8564
T

In [207]:
hidden_size = 300

encoder3 = EncoderRNN(hidden_size).to(device)
decoder3 = AttnDecoderRNN(hidden_size, len(ordered_words_ft_zh)).to(device)

#encoder3.load_state_dict(torch.load(model_path + "encoder_rnn_attn3008.pth"))
#decoder3.load_state_dict(torch.load(model_path + "decoder_rnn_attn3008.pth"))

encoder3.load_state_dict(torch.load(model_path + "encoder_rnn_atten.pth"))
decoder3.load_state_dict(torch.load(model_path + "decoder_rnn_atten.pth"))


In [196]:
def evaluate(loader, encoder, decoder):
    decoded_words_list = []
    decoder_attentions_list = []
    with torch.no_grad():
        for i, (data1, data2, length1, length2) in enumerate(loader):
            input_tensor = data1
            input_length = input_tensor.size()[0]
            
            encoder_hidden = encoder.initHidden(input_length)

            encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)
            
            decoder_input = torch.tensor(np.array([[SOS_token]]), device=device)

            decoder_hidden = encoder_hidden

            decoded_words = []
            decoder_attentions = torch.zeros(MAX_LENGTH_ZH, MAX_LENGTH_ZH)
            
            for di in range(MAX_LENGTH_EN):
                decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input.reshape(1,1), decoder_hidden, encoder_output)
                decoder_attentions[di] = decoder_attention.data
                topv, topi = decoder_output.data.topk(1) 
                if topi.item() == EOS_token:
                    decoded_words.append('<eos>')
                    break
                else:
                    decoded_words.append(idx2words_ft_en[topi.item()])

                decoder_input = topi.squeeze().detach()
                decoder_input = decoder_input.unsqueeze(0)
                
            decoded_words_list.append(decoded_words)
            decoder_attentions_list.append(decoder_attentions[:di + 1])
                   
        return decoded_words_list, decoder_attentions_list  

In [208]:
predicted_list, attention_list = evaluate(val_loader2, encoder2, decoder2)

In [105]:
#predicted_list_nopad = []
#for ii in range(len(predicted_list)):
    #line = ''
    #for jj in predicted_list[ii]:
        #if jj != '<pad>':
            #line = line + ' ' + jj
    #predicted_list_nopad.append(line)

In [57]:
#for iii in range(len(predicted_list_nopad)):
    #if predicted_list_nopad[iii][-5:] != '<eos>':
        #predicted_list_nopad[iii] = predicted_list_nopad[iii] + ' <eos>'

In [209]:
predicted_list_nopad = []
for ii in range(len(predicted_list)):
    line = ''
    for jj in predicted_list[ii]:
        if jj != '<pad>':
            line = line + ' ' + jj
    predicted_list_nopad.append(line)

for iii in range(len(predicted_list_nopad)):
    if predicted_list_nopad[iii][-5:] == '<eos>':
        predicted_list_nopad[iii] = predicted_list_nopad[iii][5:-5]
    else:
        predicted_list_nopad[iii] = predicted_list_nopad[iii][5:]

label_list = []
for iii in range(len(val_en_indexes_filtered)):
    line = ''
    for jjj in val_en_indexes_filtered[iii]:
        line = line + ' ' + idx2words_ft_en[jjj]
    label_list.append(line[5:-5])

print('bleu score for validation dataset:', corpus_bleu(predicted_list_nopad, [label_list]).score)



bleu score for validation dataset: 3.5405271581042284


In [106]:
#label_list = []
#for iii in range(len(val_en_indexes_filtered)):
    #line = ''
    #for jjj in val_en_indexes_filtered[iii]:
        #line = line + ' ' + idx2words_ft_en[jjj]
    #label_list.append(line)

In [99]:
fp = []
for fp_item in predicted_list_nopad:
    fp.append(fp_item[6:-6])

In [181]:
#print('bleu score for validation dataset:', corpus_bleu(predicted_list_nopad, [label_list]).score)

In [102]:
fl = []
for fl_item in label_list:
    fl.append(fl_item[6:-6])

In [221]:
choice = random.randint(0, len(predicted_list_nopad)-1)
print(predicted_list_nopad[choice])
print(label_list[choice])

> There s a countries in the world .
> Afghanistan looks so different from here in America . 


In [204]:
predicted_list, attention_list = evaluate(test_loader, encoder2, decoder2)

In [205]:
predicted_list_nopad = []
for ii in range(len(predicted_list)):
    line = ''
    for jj in predicted_list[ii]:
        if jj != '<pad>':
            line = line + ' ' + jj
    predicted_list_nopad.append(line)

for iii in range(len(predicted_list_nopad)):
    if predicted_list_nopad[iii][-5:] == '<eos>':
        predicted_list_nopad[iii] = predicted_list_nopad[iii][5:-5]
    else:
        predicted_list_nopad[iii] = predicted_list_nopad[iii][5:]

label_list = []
for iii in range(len(test_en_indexes_filtered)):
    line = ''
    for jjj in test_en_indexes_filtered[iii]:
        line = line + ' ' + idx2words_ft_en[jjj]
    label_list.append(line[5:-5])

print('bleu score for test dataset:', corpus_bleu(predicted_list_nopad, [label_list]).score)



bleu score for test dataset: 4.0543943769984265


In [135]:
#predicted_list_nopad = []
#for ii in range(len(predicted_list)):
    #line = ''
    #for jj in predicted_list[ii]:
        #if jj != '<pad>':
            #line = line + ' ' + jj
    #predicted_list_nopad.append(line)

In [63]:
#for iii in range(len(predicted_list_nopad)):
    #if predicted_list_nopad[iii][-5:] != '<eos>':
        #predicted_list_nopad[iii] = predicted_list_nopad[iii] + ' <eos>'

In [136]:
#label_list = []
#for iii in range(len(test_en_indexes_filtered)):
    #line = ''
    #for jjj in test_en_indexes_filtered[iii]:
        #line = line + ' ' + idx2words_ft_en[jjj]
    #label_list.append(line)

In [203]:
#print('bleu score for test dataset:', corpus_bleu(predicted_list_nopad, [label_list]).score)

In [206]:
choice = random.randint(0, len(predicted_list_nopad)-1)
print(predicted_list_nopad[choice])
print(label_list[choice])

> We love love . . 
> We love innovation . 
