In [1]:
%matplotlib inline

## SCAN Add-Prim JUMP Experiment
*************************************************************

Reference: http://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html


**Requirements**

* Python 3.6
* PyTorch 0.4

In [2]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import pickle
import os
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
print("Device is using", device)

Device is using cpu


Loading data files
==================

In [4]:
SOS_token = 0
EOS_token = 1
TASK_NAME = "addprim-jump"


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

To read the data file we will split the file into lines, and then split
lines into pairs. 



In [5]:
def readLangs(lang1, lang2, reverse=False, trainOrtest='train'):
    print("Reading lines...")

    # Read the file and split into lines        
    lines = open('/Users/Viola/CDS/AAI/Project/SCAN-Learn/data/processed/{}-{}_{}-{}.txt'.\
                 format(trainOrtest, TASK_NAME, lang1, lang2), encoding='utf-8').\
                 read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[s for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [6]:
MAX_LENGTH = 50
# PRED_LENGTH = 50

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

The full process for preparing the data is:

-  Read text file and split into lines, split lines into pairs
-  Normalize text, filter by length and content
-  Make word lists from sentences in pairs




In [7]:
def prepareData(lang1, lang2, reverse=False, dataFrom='train'):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse=False, trainOrtest=dataFrom)
    print("Read %s sentence pairs" % len(pairs))
#     pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('in', 'out', True)
print(random.choice(pairs))

Reading lines...
Read 37046 sentence pairs
Trimmed to 37046 sentence pairs
Counting words...
Counted words:
in 15
out 8
['walk right twice and look around right', 'I_TURN_RIGHT I_WALK I_TURN_RIGHT I_WALK I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_LOOK']


Model
=================

The model we are using is a GRU encoder-decoder seq2seq model with attention mechanism. In order to solve the zero-shot generalization task, we embed the encoder networks with pre-trained embeddings, from GloVe and Google Word2Vec.  

In [8]:
EMBEDDEING_SOURCE = 'glove'
hidden_size = 300

if EMBEDDEING_SOURCE == 'google':
    with open('/Users/Viola/CDS/AAI/Project/SCAN-Learn/data/emb_pretrained/embedding_GoogleNews300Negative.pkl', 'rb') as handle:
        b = pickle.load(handle)
else:
    with open('/Users/Viola/CDS/AAI/Project/SCAN-Learn/data/emb_pretrained/embedding_raw{}d.pkl'.format(hidden_size), 'rb') as handle:
        b = pickle.load(handle)

pretrained_emb = np.zeros((input_lang.n_words, hidden_size))
for k, v in input_lang.index2word.items():
    if v == 'SOS':
        pretrained_emb[k] = np.zeros(hidden_size)
    elif (v == 'EOS') and (EMBEDDEING_SOURCE != 'google'):
        pretrained_emb[k] = b['.']
    elif (v == 'and') and (EMBEDDEING_SOURCE == 'google'):
        pretrained_emb[k] = b['AND']
    else:
        pretrained_emb[k] = b[v]

The Encoder
-----------

The encoder of this seq2seq network is a GRU netword. For every input word the encoder
outputs a vector and a hidden state, and uses the hidden state for the
next input word.




In [9]:
EMBEDDEING_PRETRAINED = True
WEIGHT_UPDATE = False

MODEL_VERSION = 'T0.4'

In [10]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        if EMBEDDEING_PRETRAINED:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
            self.embedding.weight.requires_grad = WEIGHT_UPDATE
        
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

The Decoder
-----------

The decoder is a GRU network with attention mechanism that takes the last output of the encoder and
outputs a sequence of words to create the translation.

First we calculate a set of *attention weights*. These will be multiplied by
the encoder output vectors to create a weighted combination. The result
(called ``attn_applied`` in the code) should contain information about
that specific part of the input sequence, and thus help the decoder
choose the right output words.

Calculating the attention weights is done with another feed-forward
layer ``attn``, using the decoder's input and hidden state as inputs.
Because there are sentences of all sizes in the training data, to
actually create and train this layer we have to choose a maximum
sentence length (input length, for encoder outputs) that it can apply
to. Sentences of the maximum length will use all the attention weights,
while shorter sentences will only use the first few.




In [12]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

Training
========

Preparing Training Data
-----------------------

To train, for each pair we need an input tensor (indexes of the
words in the input sentence) and target tensor (indexes of the words in
the target sentence). While creating these vectors we append the
EOS token to both sequences.




In [13]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

Training the Model
------------------

To train we run the input sentence through the encoder, and keep track
of every output and the latest hidden state. Then the decoder is given
the ``<SOS>`` token as its first input, and the last hidden state of the
encoder as its first hidden state.

We use teacher forcing to help converge faster with a delay fashion.




In [14]:
teacher_forcing_ratio = 0.8


def train(input_tensor, target_tensor, encoder, decoder, 
          encoder_optimizer, decoder_optimizer, criterion, 
          max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

Helper function for timing




In [15]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

### Training interation

In [16]:
def trainIters(encoder, decoder, n_iters, print_every=1000, eval_every=1000, learning_rate=0.001):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    if os.path.exists("saved_models/encoder_" + MODEL_VERSION):
        encoder = torch.load("saved_models/encoder_" + MODEL_VERSION)
        decoder = torch.load("saved_models/decoder_" + MODEL_VERSION)
        
    best_test_acc = evaluateAccuracy(encoder, decoder, 500)
    print("Best evaluation accuracy: {0:.2f}%".format(best_test_acc * 100))

    parameters = filter(lambda p: p.requires_grad, encoder.parameters())
        
    encoder_optimizer = optim.Adam(parameters, lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg), end=' ')
            
            if iter % eval_every == 0:
                test_acc = evaluateAccuracy(encoder, decoder, 200)
                print('{0:.2f}%'.format(test_acc * 100))
                
                if test_acc > best_test_acc:
                    with open("saved_models/encoder_" + MODEL_VERSION, "wb") as f:
                        torch.save(encoder, f)
                    with open("saved_models/decoder_" + MODEL_VERSION, "wb") as f:
                        torch.save(decoder, f)
                    print("New best test accuracy! Model Updated!")
                    best_test_acc = test_acc
#                 elif test_acc < best_test_acc - 0.001:
#                     encoder = torch.load("saved_models/encoder_" + MODEL_VERSION)
#                     decoder = torch.load("saved_models/decoder_" + MODEL_VERSION)
                    
            else:
                print('')

Evaluation
==========

Evaluation is mostly the same as training, but there are no targets so
we simply feed the decoder's predictions back to itself for each step.
Every time it predicts a word we add it to the output string, and if it
predicts the EOS token we stop there. We also store the decoder's
attention outputs for display later.




In [18]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

We can evaluate random sentences from the training set and print out the
input, target, and output to make some subjective quality judgements:




In [19]:
input_lang, output_lang, pairs_eval = prepareData('in', 'out', True, dataFrom='test')
print(random.choice(pairs_eval))

Reading lines...
Read 15412 sentence pairs
Trimmed to 15412 sentence pairs
Counting words...
Counted words:
in 15
out 8
['turn around right twice after jump opposite right twice', 'I_TURN_RIGHT I_TURN_RIGHT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT']


In [20]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs_eval)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [21]:
def evaluateAccuracy(encoder, decoder, n=10):
    ACCs = []
    for i in range(n):
        pair = random.choice(pairs_eval)
        output_words, _ = evaluate(encoder, decoder, pair[0])
        
        if output_words[-1] == '<EOS>':
            output_words = output_words[:-1]
        output_sentence = ' '.join(output_words)
        
        if output_sentence == pair[1]:
            ACCs.append(1)
        else:
            ACCs.append(0)
    return np.array(ACCs).mean()

Training and Evaluating
=======================


The model is initially trained with a higher teacher aid, and relatively large learning rate. Both teacher forcing effect and the learning rate decay over iterations when the model approaches the optimum.  

#### The model achieves 97% accuracy rate for the best test sample evaluation, and is 94% correct on average for the testset.

In [22]:
teacher_forcing_ratio = 0.8

encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 5000, print_every=50, eval_every=500, learning_rate=0.001)

Best evaluation accuracy: 0.00%
0m 33s (- 55m 16s) (50 1%) 1.8531 
0m 37s (- 30m 56s) (100 2%) 1.6460 
0m 41s (- 22m 20s) (150 3%) 1.2627 
0m 45s (- 18m 7s) (200 4%) 1.1962 
0m 49s (- 15m 36s) (250 5%) 1.3049 
0m 52s (- 13m 48s) (300 6%) 1.0255 
0m 56s (- 12m 36s) (350 7%) 1.1453 
1m 1s (- 11m 45s) (400 8%) 1.1230 
1m 5s (- 10m 58s) (450 9%) 1.1119 
1m 8s (- 10m 17s) (500 10%) 0.8724 0.00%
1m 19s (- 10m 46s) (550 11%) 0.9449 
1m 24s (- 10m 16s) (600 12%) 0.8409 
1m 28s (- 9m 49s) (650 13%) 0.9497 
1m 32s (- 9m 27s) (700 14%) 0.8867 
1m 37s (- 9m 12s) (750 15%) 0.7976 
1m 41s (- 8m 52s) (800 16%) 0.8399 
1m 45s (- 8m 34s) (850 17%) 0.7813 
1m 49s (- 8m 16s) (900 18%) 0.7319 
1m 53s (- 8m 2s) (950 19%) 0.7370 
1m 57s (- 7m 49s) (1000 20%) 0.6536 0.00%
2m 8s (- 8m 5s) (1050 21%) 0.7980 
2m 13s (- 7m 52s) (1100 22%) 0.6912 
2m 17s (- 7m 39s) (1150 23%) 0.6949 
2m 21s (- 7m 26s) (1200 24%) 0.6980 
2m 25s (- 7m 16s) (1250 25%) 0.5271 
2m 29s (- 7m 4s) (1300 26%) 0.6871 
2m 32s (- 6m 53s) (13

In [23]:
teacher_forcing_ratio = 0.5
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.001)

Best evaluation accuracy: 13.60%
0m 13s (- 4m 23s) (50 5%) 0.5547 
0m 18s (- 2m 49s) (100 10%) 0.5065 
0m 23s (- 2m 13s) (150 15%) 0.4760 
0m 27s (- 1m 51s) (200 20%) 0.3585 
0m 32s (- 1m 38s) (250 25%) 0.4330 
0m 37s (- 1m 26s) (300 30%) 0.3983 
0m 42s (- 1m 18s) (350 35%) 0.4234 
0m 46s (- 1m 9s) (400 40%) 0.3870 
0m 50s (- 1m 1s) (450 45%) 0.3770 
0m 54s (- 0m 54s) (500 50%) 0.4302 5.50%
1m 3s (- 0m 51s) (550 55%) 0.5413 
1m 8s (- 0m 45s) (600 60%) 0.4528 
1m 12s (- 0m 39s) (650 65%) 0.5108 
1m 18s (- 0m 33s) (700 70%) 0.4360 
1m 22s (- 0m 27s) (750 75%) 0.3269 
1m 27s (- 0m 21s) (800 80%) 0.3529 
1m 32s (- 0m 16s) (850 85%) 0.5202 
1m 37s (- 0m 10s) (900 90%) 0.3848 
1m 42s (- 0m 5s) (950 95%) 0.4396 
1m 47s (- 0m 0s) (1000 100%) 0.5019 12.00%


In [24]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.001)

Best evaluation accuracy: 10.60%
0m 12s (- 4m 0s) (50 5%) 0.5040 
0m 15s (- 2m 21s) (100 10%) 0.4421 
0m 19s (- 1m 47s) (150 15%) 0.4616 
0m 22s (- 1m 28s) (200 20%) 0.6345 
0m 25s (- 1m 15s) (250 25%) 0.4463 
0m 28s (- 1m 6s) (300 30%) 0.4816 
0m 31s (- 0m 59s) (350 35%) 0.5444 
0m 34s (- 0m 52s) (400 40%) 0.4340 
0m 38s (- 0m 46s) (450 45%) 0.3638 
0m 41s (- 0m 41s) (500 50%) 0.4350 9.00%
0m 49s (- 0m 40s) (550 55%) 0.4330 
0m 51s (- 0m 34s) (600 60%) 0.4733 
0m 54s (- 0m 29s) (650 65%) 0.4028 
0m 58s (- 0m 25s) (700 70%) 0.4733 
1m 1s (- 0m 20s) (750 75%) 0.3322 
1m 5s (- 0m 16s) (800 80%) 0.4213 
1m 8s (- 0m 12s) (850 85%) 0.4040 
1m 11s (- 0m 7s) (900 90%) 0.3955 
1m 15s (- 0m 3s) (950 95%) 0.3665 
1m 19s (- 0m 0s) (1000 100%) 0.4379 16.00%
New best test accuracy! Model Updated!


In [25]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.001)

Best evaluation accuracy: 15.60%
0m 12s (- 3m 58s) (50 5%) 0.4148 
0m 15s (- 2m 20s) (100 10%) 0.3887 
0m 18s (- 1m 46s) (150 15%) 0.3530 
0m 22s (- 1m 28s) (200 20%) 0.4633 
0m 25s (- 1m 16s) (250 25%) 0.4489 
0m 28s (- 1m 6s) (300 30%) 0.3755 
0m 31s (- 0m 58s) (350 35%) 0.3400 
0m 34s (- 0m 51s) (400 40%) 0.3629 
0m 38s (- 0m 46s) (450 45%) 0.4588 
0m 41s (- 0m 41s) (500 50%) 0.4215 15.50%
0m 47s (- 0m 38s) (550 55%) 0.3727 
0m 51s (- 0m 34s) (600 60%) 0.4441 
0m 54s (- 0m 29s) (650 65%) 0.3512 
0m 57s (- 0m 24s) (700 70%) 0.3028 
1m 1s (- 0m 20s) (750 75%) 0.3797 
1m 4s (- 0m 16s) (800 80%) 0.3150 
1m 7s (- 0m 11s) (850 85%) 0.4082 
1m 11s (- 0m 7s) (900 90%) 0.3635 
1m 14s (- 0m 3s) (950 95%) 0.3929 
1m 18s (- 0m 0s) (1000 100%) 0.4602 9.00%


In [26]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.001)

Best evaluation accuracy: 12.40%
0m 11s (- 3m 39s) (50 5%) 0.4174 
0m 15s (- 2m 15s) (100 10%) 0.4224 
0m 18s (- 1m 45s) (150 15%) 0.4317 
0m 21s (- 1m 27s) (200 20%) 0.3479 
0m 25s (- 1m 15s) (250 25%) 0.4895 
0m 28s (- 1m 6s) (300 30%) 0.4067 
0m 31s (- 0m 58s) (350 35%) 0.4090 
0m 34s (- 0m 52s) (400 40%) 0.4451 
0m 37s (- 0m 46s) (450 45%) 0.4318 
0m 40s (- 0m 40s) (500 50%) 0.3709 17.00%
New best test accuracy! Model Updated!
0m 47s (- 0m 38s) (550 55%) 0.4098 
0m 50s (- 0m 33s) (600 60%) 0.4016 
0m 53s (- 0m 28s) (650 65%) 0.3762 
0m 56s (- 0m 24s) (700 70%) 0.3307 
1m 0s (- 0m 20s) (750 75%) 0.3702 
1m 4s (- 0m 16s) (800 80%) 0.4648 
1m 7s (- 0m 11s) (850 85%) 0.3732 
1m 11s (- 0m 7s) (900 90%) 0.3948 
1m 14s (- 0m 3s) (950 95%) 0.3703 
1m 18s (- 0m 0s) (1000 100%) 0.2973 18.50%
New best test accuracy! Model Updated!


In [27]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0005)

Best evaluation accuracy: 19.60%
0m 10s (- 3m 23s) (50 5%) 0.3135 
0m 14s (- 2m 6s) (100 10%) 0.3340 
0m 16s (- 1m 35s) (150 15%) 0.2837 
0m 20s (- 1m 20s) (200 20%) 0.3610 
0m 23s (- 1m 11s) (250 25%) 0.2773 
0m 26s (- 1m 2s) (300 30%) 0.2549 
0m 30s (- 0m 56s) (350 35%) 0.2719 
0m 33s (- 0m 50s) (400 40%) 0.3024 
0m 36s (- 0m 45s) (450 45%) 0.3166 
0m 40s (- 0m 40s) (500 50%) 0.2565 19.50%
0m 47s (- 0m 38s) (550 55%) 0.2573 
0m 50s (- 0m 33s) (600 60%) 0.2567 
0m 53s (- 0m 28s) (650 65%) 0.1881 
0m 56s (- 0m 24s) (700 70%) 0.2806 
0m 59s (- 0m 19s) (750 75%) 0.2576 
1m 2s (- 0m 15s) (800 80%) 0.2561 
1m 5s (- 0m 11s) (850 85%) 0.2130 
1m 9s (- 0m 7s) (900 90%) 0.2192 
1m 12s (- 0m 3s) (950 95%) 0.3000 
1m 15s (- 0m 0s) (1000 100%) 0.2279 41.50%
New best test accuracy! Model Updated!


In [28]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0005)

Best evaluation accuracy: 38.00%
0m 10s (- 3m 23s) (50 5%) 0.2492 
0m 13s (- 2m 3s) (100 10%) 0.2154 
0m 16s (- 1m 33s) (150 15%) 0.2003 
0m 19s (- 1m 18s) (200 20%) 0.3465 
0m 22s (- 1m 8s) (250 25%) 0.2395 
0m 26s (- 1m 0s) (300 30%) 0.2626 
0m 29s (- 0m 54s) (350 35%) 0.2409 
0m 32s (- 0m 48s) (400 40%) 0.2556 
0m 35s (- 0m 43s) (450 45%) 0.1621 
0m 38s (- 0m 38s) (500 50%) 0.1853 32.00%
0m 44s (- 0m 36s) (550 55%) 0.2213 
0m 48s (- 0m 32s) (600 60%) 0.2072 
0m 51s (- 0m 27s) (650 65%) 0.1681 
0m 55s (- 0m 23s) (700 70%) 0.1462 
0m 58s (- 0m 19s) (750 75%) 0.1631 
1m 1s (- 0m 15s) (800 80%) 0.2741 
1m 5s (- 0m 11s) (850 85%) 0.2427 
1m 8s (- 0m 7s) (900 90%) 0.2375 
1m 11s (- 0m 3s) (950 95%) 0.2292 
1m 15s (- 0m 0s) (1000 100%) 0.2078 47.50%
New best test accuracy! Model Updated!


In [29]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 47.00%
0m 10s (- 3m 24s) (50 5%) 0.1710 
0m 14s (- 2m 7s) (100 10%) 0.1293 
0m 17s (- 1m 37s) (150 15%) 0.1526 
0m 20s (- 1m 21s) (200 20%) 0.1156 
0m 23s (- 1m 10s) (250 25%) 0.1291 
0m 26s (- 1m 2s) (300 30%) 0.2149 
0m 30s (- 0m 55s) (350 35%) 0.1689 
0m 33s (- 0m 50s) (400 40%) 0.1578 
0m 37s (- 0m 45s) (450 45%) 0.1050 
0m 39s (- 0m 39s) (500 50%) 0.1290 54.50%
New best test accuracy! Model Updated!
0m 46s (- 0m 38s) (550 55%) 0.1098 
0m 50s (- 0m 33s) (600 60%) 0.2137 
0m 53s (- 0m 28s) (650 65%) 0.1018 
0m 56s (- 0m 24s) (700 70%) 0.1097 
1m 0s (- 0m 20s) (750 75%) 0.1432 
1m 3s (- 0m 15s) (800 80%) 0.1731 
1m 7s (- 0m 11s) (850 85%) 0.0924 
1m 10s (- 0m 7s) (900 90%) 0.1173 
1m 13s (- 0m 3s) (950 95%) 0.0984 
1m 17s (- 0m 0s) (1000 100%) 0.0882 65.50%
New best test accuracy! Model Updated!


In [30]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 65.60%
0m 10s (- 3m 10s) (50 5%) 0.3040 
0m 12s (- 1m 56s) (100 10%) 0.1473 
0m 16s (- 1m 33s) (150 15%) 0.1476 
0m 19s (- 1m 19s) (200 20%) 0.1238 
0m 23s (- 1m 9s) (250 25%) 0.1289 
0m 26s (- 1m 1s) (300 30%) 0.1352 
0m 29s (- 0m 55s) (350 35%) 0.0878 
0m 33s (- 0m 49s) (400 40%) 0.1366 
0m 36s (- 0m 44s) (450 45%) 0.0922 
0m 39s (- 0m 39s) (500 50%) 0.0961 66.50%
New best test accuracy! Model Updated!
0m 45s (- 0m 37s) (550 55%) 0.1116 
0m 48s (- 0m 32s) (600 60%) 0.0782 
0m 51s (- 0m 27s) (650 65%) 0.0799 
0m 54s (- 0m 23s) (700 70%) 0.1063 
0m 57s (- 0m 19s) (750 75%) 0.0846 
1m 1s (- 0m 15s) (800 80%) 0.0765 
1m 4s (- 0m 11s) (850 85%) 0.0897 
1m 8s (- 0m 7s) (900 90%) 0.0961 
1m 11s (- 0m 3s) (950 95%) 0.0900 
1m 14s (- 0m 0s) (1000 100%) 0.0931 65.50%


In [37]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 67.80%
0m 12s (- 4m 4s) (50 5%) 0.1109 
0m 18s (- 2m 46s) (100 10%) 0.0572 
0m 24s (- 2m 16s) (150 15%) 0.1216 
0m 29s (- 1m 58s) (200 20%) 0.0911 
0m 36s (- 1m 48s) (250 25%) 0.0792 
0m 41s (- 1m 36s) (300 30%) 0.1087 
0m 47s (- 1m 27s) (350 35%) 0.0978 
0m 51s (- 1m 17s) (400 40%) 0.1062 
0m 56s (- 1m 9s) (450 45%) 0.0618 
1m 1s (- 1m 1s) (500 50%) 0.1198 67.00%
1m 10s (- 0m 57s) (550 55%) 0.1716 
1m 14s (- 0m 49s) (600 60%) 0.0631 
1m 19s (- 0m 42s) (650 65%) 0.1456 
1m 24s (- 0m 36s) (700 70%) 0.0785 
1m 30s (- 0m 30s) (750 75%) 0.1053 
1m 35s (- 0m 23s) (800 80%) 0.1415 
1m 43s (- 0m 18s) (850 85%) 0.0777 
1m 50s (- 0m 12s) (900 90%) 0.0749 
1m 56s (- 0m 6s) (950 95%) 0.0810 
2m 1s (- 0m 0s) (1000 100%) 0.0740 72.50%
New best test accuracy! Model Updated!


In [38]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 76.20%
0m 10s (- 3m 14s) (50 5%) 0.0545 
0m 13s (- 2m 3s) (100 10%) 0.0679 
0m 16s (- 1m 35s) (150 15%) 0.0638 
0m 20s (- 1m 20s) (200 20%) 0.1739 
0m 23s (- 1m 9s) (250 25%) 0.0644 
0m 26s (- 1m 2s) (300 30%) 0.0681 
0m 30s (- 0m 56s) (350 35%) 0.0525 
0m 33s (- 0m 50s) (400 40%) 0.0829 
0m 37s (- 0m 45s) (450 45%) 0.0930 
0m 40s (- 0m 40s) (500 50%) 0.0723 78.00%
New best test accuracy! Model Updated!
0m 47s (- 0m 38s) (550 55%) 0.0406 
0m 51s (- 0m 34s) (600 60%) 0.0512 
0m 54s (- 0m 29s) (650 65%) 0.0586 
0m 57s (- 0m 24s) (700 70%) 0.0619 
1m 0s (- 0m 20s) (750 75%) 0.0688 
1m 4s (- 0m 16s) (800 80%) 0.0478 
1m 7s (- 0m 11s) (850 85%) 0.0619 
1m 11s (- 0m 7s) (900 90%) 0.1190 
1m 14s (- 0m 3s) (950 95%) 0.0672 
1m 18s (- 0m 0s) (1000 100%) 0.0840 78.00%


In [39]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 78.20%
0m 10s (- 3m 21s) (50 5%) 0.0706 
0m 13s (- 2m 5s) (100 10%) 0.0466 
0m 17s (- 1m 37s) (150 15%) 0.1047 
0m 20s (- 1m 21s) (200 20%) 0.0674 
0m 23s (- 1m 11s) (250 25%) 0.0645 
0m 26s (- 1m 2s) (300 30%) 0.0377 
0m 30s (- 0m 55s) (350 35%) 0.1067 
0m 33s (- 0m 50s) (400 40%) 0.0631 
0m 37s (- 0m 45s) (450 45%) 0.0721 
0m 40s (- 0m 40s) (500 50%) 0.0609 81.00%
New best test accuracy! Model Updated!
0m 46s (- 0m 38s) (550 55%) 0.0748 
0m 50s (- 0m 33s) (600 60%) 0.0523 
0m 53s (- 0m 29s) (650 65%) 0.0454 
0m 57s (- 0m 24s) (700 70%) 0.0466 
1m 0s (- 0m 20s) (750 75%) 0.1134 
1m 4s (- 0m 16s) (800 80%) 0.0669 
1m 7s (- 0m 11s) (850 85%) 0.1493 
1m 11s (- 0m 7s) (900 90%) 0.0557 
1m 14s (- 0m 3s) (950 95%) 0.0483 
1m 17s (- 0m 0s) (1000 100%) 0.0540 75.50%


In [40]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 79.40%
0m 11s (- 3m 45s) (50 5%) 0.0899 
0m 15s (- 2m 18s) (100 10%) 0.0499 
0m 18s (- 1m 44s) (150 15%) 0.0441 
0m 21s (- 1m 27s) (200 20%) 0.0726 
0m 25s (- 1m 17s) (250 25%) 0.1029 
0m 29s (- 1m 8s) (300 30%) 0.0376 
0m 32s (- 1m 0s) (350 35%) 0.0749 
0m 35s (- 0m 53s) (400 40%) 0.0751 
0m 39s (- 0m 48s) (450 45%) 0.0497 
0m 42s (- 0m 42s) (500 50%) 0.0381 81.00%
New best test accuracy! Model Updated!
0m 49s (- 0m 40s) (550 55%) 0.0521 
0m 52s (- 0m 35s) (600 60%) 0.0504 
0m 56s (- 0m 30s) (650 65%) 0.1254 
0m 59s (- 0m 25s) (700 70%) 0.0463 
1m 3s (- 0m 21s) (750 75%) 0.0389 
1m 6s (- 0m 16s) (800 80%) 0.0448 
1m 9s (- 0m 12s) (850 85%) 0.0664 
1m 13s (- 0m 8s) (900 90%) 0.0465 
1m 16s (- 0m 4s) (950 95%) 0.1186 
1m 20s (- 0m 0s) (1000 100%) 0.0349 87.00%
New best test accuracy! Model Updated!


In [42]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 83.60%
0m 11s (- 3m 46s) (50 5%) 0.1510 
0m 17s (- 2m 37s) (100 10%) 0.0343 
0m 23s (- 2m 10s) (150 15%) 0.0724 
0m 27s (- 1m 50s) (200 20%) 0.0492 
0m 33s (- 1m 39s) (250 25%) 0.0383 
0m 38s (- 1m 29s) (300 30%) 0.0704 
0m 43s (- 1m 20s) (350 35%) 0.0745 
0m 49s (- 1m 14s) (400 40%) 0.0785 
0m 54s (- 1m 6s) (450 45%) 0.0571 
0m 57s (- 0m 57s) (500 50%) 0.0434 86.50%
New best test accuracy! Model Updated!
1m 4s (- 0m 52s) (550 55%) 0.0455 
1m 6s (- 0m 44s) (600 60%) 0.0457 
1m 10s (- 0m 37s) (650 65%) 0.0527 
1m 13s (- 0m 31s) (700 70%) 0.0436 
1m 16s (- 0m 25s) (750 75%) 0.0526 
1m 20s (- 0m 20s) (800 80%) 0.0519 
1m 23s (- 0m 14s) (850 85%) 0.0430 
1m 26s (- 0m 9s) (900 90%) 0.0443 
1m 30s (- 0m 4s) (950 95%) 0.1227 
1m 33s (- 0m 0s) (1000 100%) 0.0693 82.00%


In [46]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 82.80%
0m 10s (- 3m 19s) (50 5%) 0.0420 
0m 14s (- 2m 6s) (100 10%) 0.0732 
0m 17s (- 1m 37s) (150 15%) 0.0625 
0m 20s (- 1m 21s) (200 20%) 0.0779 
0m 24s (- 1m 12s) (250 25%) 0.0350 
0m 27s (- 1m 3s) (300 30%) 0.0360 
0m 30s (- 0m 56s) (350 35%) 0.0336 
0m 33s (- 0m 50s) (400 40%) 0.0347 
0m 37s (- 0m 45s) (450 45%) 0.0484 
0m 40s (- 0m 40s) (500 50%) 0.0529 76.50%
0m 46s (- 0m 38s) (550 55%) 0.0331 
0m 50s (- 0m 33s) (600 60%) 0.0791 
0m 53s (- 0m 28s) (650 65%) 0.0445 
0m 56s (- 0m 24s) (700 70%) 0.0436 
1m 0s (- 0m 20s) (750 75%) 0.0241 
1m 4s (- 0m 16s) (800 80%) 0.0468 
1m 10s (- 0m 12s) (850 85%) 0.0320 
1m 16s (- 0m 8s) (900 90%) 0.0414 
1m 22s (- 0m 4s) (950 95%) 0.0460 
1m 27s (- 0m 0s) (1000 100%) 0.0685 85.50%
New best test accuracy! Model Updated!


In [47]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 85.20%
0m 11s (- 3m 30s) (50 5%) 0.0499 
0m 15s (- 2m 20s) (100 10%) 0.0294 
0m 18s (- 1m 47s) (150 15%) 0.0349 
0m 22s (- 1m 29s) (200 20%) 0.0820 
0m 25s (- 1m 17s) (250 25%) 0.0819 
0m 29s (- 1m 8s) (300 30%) 0.0286 
0m 32s (- 1m 0s) (350 35%) 0.0530 
0m 35s (- 0m 53s) (400 40%) 0.0284 
0m 39s (- 0m 48s) (450 45%) 0.0458 
0m 43s (- 0m 43s) (500 50%) 0.0331 84.50%
0m 49s (- 0m 40s) (550 55%) 0.0513 
0m 53s (- 0m 35s) (600 60%) 0.0320 
0m 56s (- 0m 30s) (650 65%) 0.0196 
0m 59s (- 0m 25s) (700 70%) 0.0287 
1m 3s (- 0m 21s) (750 75%) 0.0323 
1m 6s (- 0m 16s) (800 80%) 0.0356 
1m 10s (- 0m 12s) (850 85%) 0.0590 
1m 13s (- 0m 8s) (900 90%) 0.0200 
1m 16s (- 0m 4s) (950 95%) 0.0235 
1m 19s (- 0m 0s) (1000 100%) 0.0317 89.00%
New best test accuracy! Model Updated!


In [50]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 84.00%
0m 11s (- 3m 44s) (50 5%) 0.0315 
0m 15s (- 2m 23s) (100 10%) 0.0291 
0m 19s (- 1m 50s) (150 15%) 0.0315 
0m 23s (- 1m 35s) (200 20%) 0.0288 
0m 27s (- 1m 22s) (250 25%) 0.0208 
0m 30s (- 1m 12s) (300 30%) 0.0322 
0m 34s (- 1m 3s) (350 35%) 0.0279 
0m 37s (- 0m 56s) (400 40%) 0.0566 
0m 41s (- 0m 50s) (450 45%) 0.0421 
0m 44s (- 0m 44s) (500 50%) 0.0250 82.50%
0m 51s (- 0m 41s) (550 55%) 0.0236 
0m 54s (- 0m 36s) (600 60%) 0.0788 
0m 57s (- 0m 31s) (650 65%) 0.0255 
1m 1s (- 0m 26s) (700 70%) 0.0219 
1m 4s (- 0m 21s) (750 75%) 0.0409 
1m 7s (- 0m 16s) (800 80%) 0.0365 
1m 11s (- 0m 12s) (850 85%) 0.0433 
1m 14s (- 0m 8s) (900 90%) 0.1026 
1m 18s (- 0m 4s) (950 95%) 0.0254 
1m 21s (- 0m 0s) (1000 100%) 0.0350 89.00%
New best test accuracy! Model Updated!


In [51]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 86.20%
0m 10s (- 3m 22s) (50 5%) 0.0298 
0m 14s (- 2m 7s) (100 10%) 0.0551 
0m 17s (- 1m 39s) (150 15%) 0.0286 
0m 21s (- 1m 24s) (200 20%) 0.0217 
0m 24s (- 1m 12s) (250 25%) 0.0567 
0m 27s (- 1m 3s) (300 30%) 0.0374 
0m 30s (- 0m 57s) (350 35%) 0.0187 
0m 34s (- 0m 51s) (400 40%) 0.0219 
0m 37s (- 0m 45s) (450 45%) 0.0279 
0m 40s (- 0m 40s) (500 50%) 0.1717 89.00%
New best test accuracy! Model Updated!
0m 47s (- 0m 38s) (550 55%) 0.0301 
0m 50s (- 0m 33s) (600 60%) 0.0405 
0m 53s (- 0m 28s) (650 65%) 0.0730 
0m 56s (- 0m 24s) (700 70%) 0.0494 
1m 0s (- 0m 20s) (750 75%) 0.0145 
1m 3s (- 0m 15s) (800 80%) 0.0297 
1m 6s (- 0m 11s) (850 85%) 0.0392 
1m 10s (- 0m 7s) (900 90%) 0.1606 
1m 13s (- 0m 3s) (950 95%) 0.0261 
1m 16s (- 0m 0s) (1000 100%) 0.0235 92.50%
New best test accuracy! Model Updated!


In [52]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.0001)

Best evaluation accuracy: 91.20%
0m 10s (- 3m 14s) (50 5%) 0.0297 
0m 13s (- 2m 4s) (100 10%) 0.0380 
0m 16s (- 1m 35s) (150 15%) 0.1087 
0m 20s (- 1m 22s) (200 20%) 0.0187 
0m 23s (- 1m 11s) (250 25%) 0.0653 
0m 26s (- 1m 2s) (300 30%) 0.0328 
0m 29s (- 0m 55s) (350 35%) 0.0186 
0m 33s (- 0m 49s) (400 40%) 0.1069 
0m 36s (- 0m 44s) (450 45%) 0.0283 
0m 40s (- 0m 40s) (500 50%) 0.0357 94.50%
New best test accuracy! Model Updated!
0m 46s (- 0m 37s) (550 55%) 0.0383 
0m 49s (- 0m 33s) (600 60%) 0.0294 
0m 53s (- 0m 28s) (650 65%) 0.0522 
0m 56s (- 0m 24s) (700 70%) 0.0463 
0m 59s (- 0m 19s) (750 75%) 0.0224 
1m 2s (- 0m 15s) (800 80%) 0.0259 
1m 6s (- 0m 11s) (850 85%) 0.0217 
1m 9s (- 0m 7s) (900 90%) 0.0387 
1m 12s (- 0m 3s) (950 95%) 0.0247 
1m 16s (- 0m 0s) (1000 100%) 0.0224 88.50%


In [58]:
trainIters(encoder1, attn_decoder1, 1000, print_every=50, eval_every=500, learning_rate=0.00001)

Best evaluation accuracy: 89.00%
0m 10s (- 3m 12s) (50 5%) 0.0151 
0m 13s (- 2m 2s) (100 10%) 0.0146 
0m 17s (- 1m 38s) (150 15%) 0.0244 
0m 20s (- 1m 23s) (200 20%) 0.0313 
0m 24s (- 1m 12s) (250 25%) 0.0262 
0m 27s (- 1m 4s) (300 30%) 0.0173 
0m 31s (- 0m 57s) (350 35%) 0.0223 
0m 34s (- 0m 51s) (400 40%) 0.0244 
0m 37s (- 0m 45s) (450 45%) 0.0184 
0m 41s (- 0m 41s) (500 50%) 0.0348 91.00%
New best test accuracy! Model Updated!
0m 47s (- 0m 38s) (550 55%) 0.0186 
0m 50s (- 0m 33s) (600 60%) 0.0150 
0m 54s (- 0m 29s) (650 65%) 0.0235 
0m 57s (- 0m 24s) (700 70%) 0.0515 
1m 1s (- 0m 20s) (750 75%) 0.0264 
1m 4s (- 0m 16s) (800 80%) 0.0243 
1m 7s (- 0m 11s) (850 85%) 0.0266 
1m 11s (- 0m 7s) (900 90%) 0.0883 
1m 14s (- 0m 3s) (950 95%) 0.0199 
1m 18s (- 0m 0s) (1000 100%) 0.0186 95.00%
New best test accuracy! Model Updated!


In [62]:
trainIters(encoder1, attn_decoder1, 2000, print_every=50, eval_every=500, learning_rate=0.00001)

Best evaluation accuracy: 94.20%
0m 17s (- 11m 23s) (50 2%) 0.0659 
0m 22s (- 7m 12s) (100 5%) 0.0167 
0m 27s (- 5m 42s) (150 7%) 0.0387 
0m 31s (- 4m 45s) (200 10%) 0.0241 
0m 35s (- 4m 6s) (250 12%) 0.0190 
0m 38s (- 3m 37s) (300 15%) 0.0128 
0m 42s (- 3m 18s) (350 17%) 0.0161 
0m 45s (- 3m 2s) (400 20%) 0.0135 
0m 49s (- 2m 49s) (450 22%) 0.0215 
0m 52s (- 2m 38s) (500 25%) 0.0359 95.00%
New best test accuracy! Model Updated!
1m 2s (- 2m 45s) (550 27%) 0.0276 
1m 8s (- 2m 40s) (600 30%) 0.0195 
1m 13s (- 2m 32s) (650 32%) 0.0350 
1m 17s (- 2m 23s) (700 35%) 0.0115 
1m 22s (- 2m 17s) (750 37%) 0.0160 
1m 26s (- 2m 9s) (800 40%) 0.0248 
1m 31s (- 2m 3s) (850 42%) 0.0615 
1m 35s (- 1m 57s) (900 45%) 0.0832 
1m 40s (- 1m 50s) (950 47%) 0.0383 
1m 45s (- 1m 45s) (1000 50%) 0.0250 97.00%
New best test accuracy! Model Updated!
1m 54s (- 1m 43s) (1050 52%) 0.0843 
1m 58s (- 1m 36s) (1100 55%) 0.0236 
2m 2s (- 1m 30s) (1150 57%) 0.0241 
2m 8s (- 1m 25s) (1200 60%) 0.0171 
2m 12s (- 1m 19s) (

---

### Samples Evaluation

In [63]:
if os.path.exists("saved_models/encoder_" + MODEL_VERSION):
    encoder2 = torch.load("saved_models/encoder_" + MODEL_VERSION)
    decoder2 = torch.load("saved_models/decoder_" + MODEL_VERSION)
evaluateAccuracy(encoder2, decoder2, n=2000)

0.93999999999999995

In [64]:
evaluateRandomly(encoder2, decoder2)

> look opposite right twice and jump opposite left twice
= I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_LEFT I_TURN_LEFT I_JUMP I_TURN_LEFT I_TURN_LEFT I_JUMP
< I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_LEFT I_TURN_LEFT I_JUMP I_TURN_LEFT I_TURN_LEFT I_JUMP <EOS>

> turn around right twice after jump left
= I_TURN_LEFT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT
< I_TURN_LEFT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT I_TURN_RIGHT <EOS>

> jump left twice and look opposite right thrice
= I_TURN_LEFT I_JUMP I_TURN_LEFT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK
< I_TURN_LEFT I_JUMP I_TURN_LEFT I_JUMP I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK I_TURN_RIGHT I_TURN_RIGHT I_LOOK <EOS>

> turn opposite left twice after jump twice

---