**IMPORTS**


*   Utility imports used to handle characters written in unicode, complex mathematical operations, opening files and handling NumPy arrays 
*   Pytorch imports: autograd for differentiation, Neural Networks and also using the GPU for creating the translation models faster
*   Helpful "sys" import that permits the network to use argparse in backpropagation as a gradient clipper

After acquiring all the imports that we need, we ensure that we are using a GPU. Note that this code *needs* a GPU in order to train models. 




In [None]:
import unicodedata
import re
import math
import matplotlib.pyplot as plt
import psutil
import time
import datetime
import random
from random import shuffle
import argparse
import numpy as np
from io import open
import pickle

import torch
from torch.autograd import Variable
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import torch.cuda
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import sys
sys.argv=['']
del sys

use_cuda = torch.cuda.is_available()
print(use_cuda)

True


Create a Language class which constructs a vocabulary for the two languages from the parallel corpus. The functions are designed to add words from new sentences in the processing dataset and count them.

We also represent each word in the language as a one-hot vector, where the value of one is present at the index of the word. So, it is also required to code appropriate functions in order to assign an index to each of the word (word2index and index2word).


In [None]:
class Language:
  def __init__(self, language):
    self.name = language
    self.word2index = {"SOS": 0, "EOS": 1}
    self.word2count = {}
    self.index2word = {0: "SOS", 1: "EOS"}
    self.n_words = 2 # considering the start-of-sentence, end-of-sentence,
                     # and unknown tokens as words 

  def countWords(self, word):
    if word in self.word2count:
      self.word2count[word] += 1
    else:
      self.word2count[word] = 1

  def addSentence(self, sentence):
    new_sentence = ''
    for word in sentence.split(' '):
      new_word = self.addWord(word)
      if new_sentence:
        new_sentence = new_sentence + ' ' + new_word
      else:
        new_sentence = new_word
    return new_sentence

  def addWord(self, word):
    if word not in self.word2index:
      self.word2index[word] = self.n_words
      self.index2word[self.n_words] = word
      self.n_words += 1
    return word

Before handling the data, we should note that we are going to use a parallel corpus file. This means that we will have a large number of pairs, where a pair is two sentences in two languages with the same meaning.

The processes are:
*   Converting Unicode characters from the corpus files to ASCII characters. (code from https://stackoverflow.com/a/518232/2809427)
*   Simplifying the strings: converting every letter to lowercase and punctuation to end-of-sentence tags).

In this snippet there is also an extra utility function:
*   Converting time measurements from seconds to hours. Essential for measuring how long does an epoch take in a human-friendly way.


In [None]:
def uniToAscii(sentence):
  return ''.join(c for c in unicodedata.
                 normalize('NFD', sentence) if unicodedata.category(c) != 'Mn')

def normalizeString(s):
  s = re.sub(r" ##AT##-##AT## ", r" ", s)
  s = uniToAscii(s.lower().strip())
  s = re.sub(r"([.!?])", r" \1", s)
  s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
  return s

def toHours(s):
  m = math.floor(s / 60)
  h = math.floor(m / 60)
  s -= m * 60
  m -= h * 60
  return '%dh %dm %ds' % (h, m, s)

""" Maximum translation length - the resulting translation can be up to 200 
words """
MAX_TRANSLATION_LENGTH = 200

Then, we open the file located in the given path, process the strings from each line and create the classes according to the order that we want.

In [None]:
def prepareLangs(language1, language2, filepath, reverse_langs):
  print("Reading lines...")

  # Reading lines from the file which are split into rows
  lines = open(filepath, encoding='utf-8').read().strip().split('\n')

  # Splitting each line to create the translation pair
  pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

  if reverse_langs:
    pairs = [list(reversed(p)) for p in pairs]
    input_lang = Language(language2)
    output_lang = Language(language1)
  else:
    input_lang = Language(language1)
    output_lang = Language(language2)

  return input_lang, output_lang, pairs

The prepareData function will creates Language classes for each language and fully clean and trim the data according to the specified passed arguments. In the end, this function will return both language classes along with a set of training pairs and a set of test pairs.

This function also provides comments related to the size of test and train translations. They are useful when creating models to judge what train proportion do we need, if there is any chance of overfitting and so on.

In [None]:
def prepareData(language1, language2, filepath, reverse_langs, train_proportion):
    
  input_lang, output_lang, pairs = prepareLangs(language1, language2, 
                                                  filepath, reverse_langs)
  print("Read %s sentence pairs" % len(pairs))

  for pair in pairs:
    for word in pair[0].split(' '):
      input_lang.countWords(word)
    for word in pair[1].split(' '):
      output_lang.countWords(word)

  pairs = [(input_lang.addSentence(pair[0]),output_lang.addSentence(pair[1])) 
            for pair in pairs]

  shuffle(pairs)
    
  train_translations = pairs[:math.floor(len(pairs)*train_proportion)]
  test_translations = pairs[math.floor(len(pairs)*train_proportion):]

  print("Train translation pairs: %s" % (len(train_translations)))
  print("Test translation pairs: %s" % (len(test_translations)))
  print("Number of words for each language:")
  print("%s, %s -> %s" % (input_lang.name, len(input_lang.word2count),
                          input_lang.n_words))
  print("%s, %s -> %s" % (output_lang.name, len(output_lang.word2count), 
                          output_lang.n_words))
  print()
        
  return input_lang, output_lang, train_translations, test_translations

Creating the Recurrent Neural Network: Encoder and Decoder. Both components are bidirectional Long Short-Term Memory (LSTM) Neural Networks.

In [None]:
class EncoderRNN(nn.Module):
	def __init__(self, n_input_words, hidden_size, layers, dropout=0.1):
		super(EncoderRNN, self).__init__()

		self.directions = 2
		self.n_input_words = n_input_words
		self.hidden_size = hidden_size
		self.embedding = nn.Embedding(n_input_words, hidden_size)
		self.dropout = dropout
		self.dropout = nn.Dropout(dropout)
		self.n_layers = layers
		self.lstm = nn.LSTM(input_size=hidden_size,hidden_size=hidden_size,
                        num_layers=layers,dropout=dropout,
                        bidirectional=True,batch_first=False)
		self.fc = nn.Linear(hidden_size*self.directions, hidden_size)

	def forward(self, input_data, h_hidden, c_hidden):
		embeddings = self.embedding(input_data)
		embeddings = self.dropout(embeddings)
		hiddens, outputs = self.lstm(embeddings, (h_hidden, c_hidden))

		return hiddens, outputs

	# Creating initial hidden states of zero for encoder corresponding to batch size
	def create_init_hiddens(self, batch_size):
		h_hidden = Variable(torch.zeros(self.n_layers*self.directions, 
                                    batch_size, self.hidden_size))
		c_hidden = Variable(torch.zeros(self.n_layers*self.directions, 
                                    batch_size, self.hidden_size))
		if use_cuda:
			return h_hidden.cuda(), c_hidden.cuda()
		else:
			return h_hidden, c_hidden

The implementation of the Decoder is similar to the Encoder's. It has extra tools to handle the context (the bridge between the Encoder and the Decoder) and to create scores that allows the code to show the expected translation according to the maximum value of the softmax layer.

In [None]:
class DecoderRNN(nn.Module):
	def __init__(self, hidden_size, output_size, layers, dropout = 0.1):
		super(DecoderRNN, self).__init__()

		self.directions = 2
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.n_layers = layers
		self.dropout = dropout
		self.embedding = nn.Embedding(output_size, hidden_size)
		self.dropout = nn.Dropout(dropout)
		self.score_learner = nn.Linear(hidden_size*self.directions, 
                                   hidden_size*self.directions)
		self.lstm = nn.LSTM(input_size=hidden_size,hidden_size=hidden_size,
                        num_layers=layers,dropout=dropout,
                        bidirectional=True,batch_first=False)
		self.context_combiner = nn.Linear((hidden_size*self.directions)
                                      +(hidden_size*self.directions), hidden_size)
		self.tanh = nn.Tanh()
		self.output = nn.Linear(hidden_size, output_size)
		self.soft = nn.Softmax(dim=1)
		self.log_soft = nn.LogSoftmax(dim=1)

	def forward(self, input_data, h_hidden, c_hidden, encoder_hiddens):

		embeddings = self.embedding(input_data)
		embeddings = self.dropout(embeddings)	
		batch_size = embeddings.shape[1]
		hiddens, outputs = self.lstm(embeddings, (h_hidden, c_hidden))	
		top_hidden = outputs[0].view(self.n_layers,self.directions,
                                 hiddens.shape[1],
                                 self.hidden_size)[self.n_layers-1]
		top_hidden = top_hidden.permute(1,2,0).contiguous().view(batch_size,-1, 1)

		prep_scores = self.score_learner(encoder_hiddens.permute(1,0,2))
		scores = torch.bmm(prep_scores, top_hidden)
		attn_scores = self.soft(scores)
		con_mat = torch.bmm(encoder_hiddens.permute(1,2,0), attn_scores)
		h_tilde = self.tanh(self.context_combiner(torch.cat((con_mat, top_hidden),
		                                                    dim=1).view(batch_size,-1)))
		pred = self.output(h_tilde)
		pred = self.log_soft(pred)

		return pred, outputs

Now, we go onto training the data to use the created network. First, we need to have some functions which transform sentences to input and output tensors. To do that, we firstly transform the sentence to its one-hot vector representation

In [None]:
EOS_token = 1 # end of sentence token gets index 1

# Gets a sentence and turns it into an one-hot vector
def indexesFromSentence(language, sentence):
  return [language.word2index[word] for word in sentence.split(' ')]

def tensorFromSentence(language, sentence):
  indexes = indexesFromSentence(language, sentence)
  indexes.append(EOS_token)
  if use_cuda:
    return torch.LongTensor(indexes).view(-1).cuda()
  else:
    return torch.LongTensor(indexes).view(-1)
      
# Converts a pair of sentence (input and target) to a pair of tensors
def tensorsFromPair(input_lang, output_lang, pair):
  input_variable = tensorFromSentence(input_lang, pair[0])
  target_variable = tensorFromSentence(output_lang, pair[1])
  return (input_variable, target_variable)

Create a function to distribute into batches our sentence pairs to perform mini-batch gradient descent. Details of this technique can be found here: https://machinelearningmastery.com/gentle-introduction-mini-batch-gradient-descent-configure-batch-size/

In [None]:
def distribute_into_batches(data, input_lang, output_lang, batch_size, shuffle_data=True):
  if shuffle_data == True:
    shuffle(data)
  number_of_batches = math.floor(len(data) / batch_size)
  batches = list(range(number_of_batches))
    
  for batch_number in range(number_of_batches):
    idx = 0  
    input_variables = list(range(batch_size))
    target_variables = list(range(batch_size))    
    for pair in range((batch_number*batch_size),((batch_number+1)*batch_size)):
      input_variables[idx], target_variables[idx] = tensorsFromPair(
          input_lang, output_lang, data[pair])
      idx += 1
    batches[batch_number] = (input_variables, target_variables)
  return batches

The function *train_batch* performs a training loop on a single training batch. This means completing a forward pass through the model to:

*   create a predicted translation for each sentence in the batch
*   computing the total loss for the batch
*   back-propagating on the loss to update all of the weight matrices in both the Encoder and the Decoder

The function *train_epoch* applies the *train_batch* function iteratively for each existing training batch. Before computing the loss, the sentences lengths are equalized by padding. This means that End-of-Sentence tokens are added to the shorter sentence until the sentences are of the same length.

In [None]:
def train_batch(input_batch, target_batch, encoder, decoder, 
                encoder_optimizer, decoder_optimizer, loss_criterion):
	encoder_optimizer.zero_grad()
	decoder_optimizer.zero_grad()

	# Create initial hidden state for encoder
	enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(input_batch.shape[1])
	enc_hiddens, enc_outputs = encoder(input_batch, enc_h_hidden, enc_c_hidden)

	if use_cuda:
		decoder_input = Variable(torch.LongTensor(1,input_batch.shape[1]).
		                         fill_(output_lang.word2index.get("SOS")).cuda())
	else:
		Variable(torch.LongTensor(1,input_batch.shape[1]).
                        fill_(output_lang.word2index.get("SOS")))

	dec_h_hidden = enc_outputs[0]
	dec_c_hidden = enc_outputs[1]
	
	loss = 0
	for i in range(target_batch.shape[0]):
		pred, dec_outputs = decoder(decoder_input, dec_h_hidden, 
                                dec_c_hidden, enc_hiddens)

		decoder_input = target_batch[i].view(1,-1)
		dec_h_hidden = dec_outputs[0]
		dec_c_hidden = dec_outputs[1]
		
		loss += loss_criterion(pred,target_batch[i])

	loss.backward()

	torch.nn.utils.clip_grad_norm_(encoder.parameters(),args.clip)
	torch.nn.utils.clip_grad_norm_(decoder.parameters(),args.clip)
	encoder_optimizer.step()
	decoder_optimizer.step()

	return loss.item() / target_batch.shape[0]

def train_epoch(train_batches, encoder, decoder, encoder_optimizer, 
                decoder_optimizer, loss_criterion):

	epoch_loss = 0
	for batch in train_batches:

		padded_input_batch = torch.nn.utils.rnn.pad_sequence(batch[0],padding_value=EOS_token)
		padded_target_batch = torch.nn.utils.rnn.pad_sequence(batch[1],padding_value=EOS_token)
	
		batch_loss = train_batch(padded_input_batch, padded_target_batch, encoder, decoder, 
                           encoder_optimizer, decoder_optimizer, loss_criterion)
		epoch_loss += batch_loss

	return epoch_loss / len(train_batches)

The function *test_batch* also creates a predicted translation for each sentence in the batch and computes the total loss for the batch, but does not do backpropagation, because testing results don't interact with weights. They are utilized only to calculate the loss on data which the model has not seen.

Below the *test_batch* function, there is the test_epoch function which applies the *test_batch* function iteratively for each existing test batch.
Before computing the loss, the sentences lengths are equalized by padding. This means that End-of-Sentence tokens to the shorter sentence until the sentences are of the same length.

In [None]:
def test_batch(input_batch, target_batch, encoder, decoder, loss_criterion):
	
	# Create initial hidden state for encoder
	enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(input_batch.shape[1])

	enc_hiddens, enc_outputs = encoder(input_batch, enc_h_hidden, enc_c_hidden)

	if use_cuda:
		decoder_input = Variable(torch.LongTensor(1,input_batch.shape[1]).
                           fill_(output_lang.word2index.get("SOS")).cuda())
	else: 
		Variable(torch.LongTensor(1,input_batch.shape[1]).
                        fill_(output_lang.word2index.get("SOS")))
	dec_h_hidden = enc_outputs[0]
	dec_c_hidden = enc_outputs[1]
	
	loss = 0
	for i in range(target_batch.shape[0]):
		pred, dec_outputs = decoder(decoder_input, dec_h_hidden, dec_c_hidden, enc_hiddens)

		topv, topi = pred.topk(1,dim=1)
		ni = topi.view(1,-1)
		
		decoder_input = ni
		dec_h_hidden = dec_outputs[0]
		dec_c_hidden = dec_outputs[1]

		loss += loss_criterion(pred,target_batch[i])
		
	return loss.item() / target_batch.shape[0]

def test_epoch(test_batches, encoder, decoder, loss_criterion):

	with torch.no_grad():
		epoch_loss = 0
		for batch in test_batches:
			padded_input_batch = torch.nn.utils.rnn.pad_sequence(
														batch[0], padding_value=EOS_token)
			padded_target_batch = torch.nn.utils.rnn.pad_sequence(
														batch[1], padding_value=EOS_token)
			batch_loss = test_batch(padded_input_batch, padded_target_batch,
			                        encoder, decoder, loss_criterion)
			epoch_loss += batch_loss

	return epoch_loss / len(test_batches)

The *evaluate* function is the one that does the translation. It predicts each output word, one by one, and they are added to the expected translated string until the end-of-sentence token is predicted.

In [None]:
def evaluate(encoder, decoder, sentence, translate_from, translate_to, max_length):
	with torch.no_grad():
		input_variable = tensorFromSentence(translate_from, sentence)
		input_variable = input_variable.view(-1,1)
		enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(1)

		enc_hiddens, enc_outputs = encoder(input_variable, enc_h_hidden, enc_c_hidden)

		if use_cuda:
			decoder_input = Variable(torch.LongTensor(1,1).
		                         fill_(translate_to.word2index.get("SOS")).cuda())
		else:
			Variable(torch.LongTensor(1,1).fill_(translate_to.word2index.get("SOS")))
		dec_h_hidden = enc_outputs[0]
		dec_c_hidden = enc_outputs[1]

		decoded_words = []

		for di in range(max_length):
			pred, dec_outputs = decoder(decoder_input, dec_h_hidden, dec_c_hidden, 
			                            enc_hiddens)
			topv, topi = pred.topk(1,dim=1)
			ni = topi.item()
			if ni == translate_to.word2index.get("EOS"):
				break
			else:
				decoded_words.append(translate_to.index2word[ni])

			if use_cuda:
				decoder_input = Variable(torch.LongTensor(1,1).fill_(ni).cuda())
			else: 
				Variable(torch.LongTensor(1,1).fill_(ni))
		
			dec_h_hidden = dec_outputs[0]
			dec_c_hidden = dec_outputs[1]

		output_sentence = ' '.join(decoded_words)
    
		return output_sentence

The function *predict10translations* predicts translation for 10 random sentences from the test set. They show up when creating the model through the master function at every epoch. This helps to visualizing translations and evaluating them with my proper understanding. The returned output structure is the following:

  \> input sentence

  \= correct translation

  < predicted translation


In [None]:
def predict10translations(encoder, decoder, pairs):
	for i in range(10):
		pair = random.choice(pairs)
		print('>', pair[0])
		print('=', pair[1])
		print('<', evaluate(encoder, decoder, pair[0], input_lang, output_lang,
		                    max_length=MAX_TRANSLATION_LENGTH))
		print('')

The following function completely trains the model and evaluates the progress on the train set. It uses stochastic gradient descend to do the optimization and updates the learning rate according to our learning rate hash map.

After all the epochs are done, this function saves the whole model (not just the dictionary) for future use.

In [None]:
def create_model(epochs, lr_change, lr, train_translations, 
									 test_translations, input_lang, output_lang, batch_size, 
									 encoder, decoder, loss_criterion):
	clock_time = []
	losses = {'train set':[], 'test set': []}

	test_batches = distribute_into_batches(test_translations, 
	                                       input_lang, output_lang, 
																				 batch_size, shuffle_data=False)
	start = time.time()
 
	epoch_labels = list(range(epochs))
	loss_labels = []
	for i in range(epochs):

		if i in lr_change.keys():
			lr /= lr_change.get(i)
		
		encoder.train()
		decoder.train()
		encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr)
		decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr)

		batches = distribute_into_batches(train_translations, 
		                                       input_lang, output_lang, batch_size, 
                                           shuffle_data=True)
		train_loss = train_epoch(batches, encoder, decoder, encoder_optimizer, 
                       decoder_optimizer, loss_criterion)
		
		now = time.time()
		print("Iter: %s \nLearning Rate: %s \nTime: %s \nTrain Loss: %s \n" 
          % (i, lr, toHours(now-start), train_loss))

		test_loss = test_epoch(test_batches, encoder, decoder, criterion)
		loss_labels.append(test_loss)
		print("Test set loss: %s" % (test_loss))
		predict10translations(encoder, decoder, test_translations)

		clock_time.append((time.time()-start)/60)
		losses['train set'].append(train_loss)
		losses['test set'].append(test_loss)
	
	plt.plot(epoch_labels, loss_labels)
	plt.title('Loss values at every epoch')
	plt.show()
	
	torch.save(encoder, input_lang_name+output_lang_name+'_enc_weights.pt')
	torch.save(decoder, input_lang_name+output_lang_name+'_dec_weights.pt')

# Providing information for the translation:
*   Shortcode of the two used languages
*   The filepath of the dataset from which we get the translation pairs
*   A boolean variable which determines if we want to reverse the order of the languages in the pairs. Naturally, it is False when translating from English and True when translating to English
*   The proportion of the training data. Generally 0.9





In [None]:
input_lang_name = 'en'
output_lang_name = 'de'

filepath = ('drive/MyDrive/IndividualProject/ProjectSoftwareArchive/TranslationTrainingFiles/eng-deu.txt')
reverse_langs = False
train_proportion = 0.9

Hyperparameters:
*   layers: number of neural network layers in each of the Encoder and the Decoder
*   hidden_size: number of neurons in the hidden layers. It is the same as the input size in the LSTM. The size is fixed to 300 because the historical embeddings have the y=300.
*   dropout: probability of an element from a layer to be zeroed. Used to regularize the embeddings.
*   batch_size: number of sentences in a training/test batch
*   epochs: repetitions of training the given data from the corpus file
*   lr: Learning rate at the beginning of training the model
*   lr_change: Hash map representing the change of the learning rate. 3:2 means that the learning rate is divided by 2 right before training the 3rd epoch.
*   criterion: The criterion of loss is the negative log likelihood

After setting the Hyperparameters, we call the prepareData function to obtain the vocabulary of the input and output language and also all the translation pairs separated to the train and test sets









In [None]:
# Hyperparameters
batch_size = 48
epochs = 9
lr = 0.8
lr_change = {3:2, 5:4, 7:10}

layers = 2
hidden_size = 512
dropout = 0.2

criterion = nn.NLLLoss()

# Acquiring the languages and the sets
input_lang, output_lang, train_translations, test_translations = prepareData(
    input_lang_name, output_lang_name, filepath, reverse_langs=reverse_langs, 
    train_proportion=train_proportion)

This snippet does the gradient clipping allowing to keep the modifications in backpropagation stable and also creates the model according to the parameters assigned above. Note that this may take up to three hours.

In the output of this snippet, you can manually asses ten expected translations at every epoch along with both the training and test loss.

In [None]:
print('Train Pairs #')
print(len(train_translations))

"""for gradient clipping from 
https://github.com/pytorch/examples/blob/master/word_language_model/main.py"""
parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model')
parser.add_argument('--clip', type=float, default=0.25,
                    help='gradient clipping')
args = parser.parse_args()

encoder = EncoderRNN(input_lang.n_words, hidden_size, layers=layers, 
                     dropout=dropout)
decoder = DecoderRNN(hidden_size, output_lang.n_words, layers=layers, 
                      dropout=dropout)

if use_cuda:
	print('Using cuda')
	encoder = encoder.cuda()
	decoder = decoder.cuda()

create_model(epochs, lr_change, lr, train_translations, test_translations, 
             input_lang, output_lang, batch_size, encoder, decoder, criterion)

# Loading a saved model

In [None]:
encoder = torch.load('drive/MyDrive/IndividualProject/ProjectSoftwareArchive/TranslationModelFiles/French/english-german_encoder_300neurons')
encoder.eval()
decoder = torch.load('drive/MyDrive/IndividualProject/ProjectSoftwareArchive/TranslationModelFiles/French/english-german_decoder_300neurons')
decoder.eval()

# Loading embedding files
Creating the historical language and assigning the embedding to each historical word according to the HistWords files

In [None]:
source_vocab_path = 'drive/MyDrive/IndividualProject/en-embs/1990-vocab.pkl'
source_weights_path = 'drive/MyDrive/IndividualProject/en-embs/1990-w.npy'
source_vocab = pickle.load(open(source_vocab_path, 'rb'))
source_weights = np.load(source_weights_path)
print(len(source_vocab))
print(source_weights)
source_lang = Language('hist_en_1990')
for word in source_vocab:
  source_lang.addWord(word)

tokens_tensor = encoder.embedding(torch.LongTensor([0,1]).cuda()).cuda()
hist_tensor = torch.cat((tokens_tensor.cuda(), 
                            torch.FloatTensor(source_weights).cuda()))
encoder.embedding = nn.Embedding.from_pretrained(hist_tensor)
encoder = encoder.cuda()

target_vocab_path = 'drive/MyDrive/IndividualProject/en-embs/1800-vocab.pkl'
target_weights_path = 'drive/MyDrive/IndividualProject/en-embs/1800-w.npy'
target_vocab = pickle.load(open(target_vocab_path, 'rb'))
target_weights = np.load(target_weights_path)
print(len(target_vocab))
print(target_weights)
target_lang = Language('hist_en_1800')
for word in target_vocab:
  target_lang.addWord(word)

tokens_tensor = decoder.embedding(torch.LongTensor([0,1]).cuda()).cuda()
tensor_hist_en = torch.cat((tokens_tensor.cuda(), 
                            torch.FloatTensor(target_weights).cuda()))
decoder.embedding = nn.Embedding.from_pretrained(tensor_hist_en)
decoder = decoder.cuda()

print(encoder.embedding)
print(decoder.embedding)
print(target_lang.n_words)

# Creating translation

Please write your sentence that you want to translate. It is preferable to use punctuation.

In [None]:
translating_sentence = "george is the king"
translating_sentence = normalizeString(translating_sentence)
evaluate(encoder, decoder, translating_sentence, translate_from = source_lang, 
         translate_to = target_lang, max_length=MAX_TRANSLATION_LENGTH)