## Seq2Seq based Q&A


based on bAbl tasks dataset https://research.fb.com/downloads/babi/

# Prepare dataset

Let's download our dataset from http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-1.tar.gz or a backup https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz

In [35]:
import urllib.request as urllib

dwn_url = 'http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-1.tar.gz'
# dwn_url = 'https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz'
urllib.urlretrieve(dwn_url, 'tasks-ds.tar.gz')

HTTPError: HTTP Error 403: Forbidden

Let's extract dataset

In [46]:
import tarfile

tar = tarfile.open('tasks-ds.tar.gz')
print('Files in the tar \n', '\n'.join(tar.getnames()))

Files in the tar 
 tasksv11
tasksv11/en
tasksv11/._LICENSE
tasksv11/LICENSE
tasksv11/README
tasksv11/shuffled
tasksv11/shuffled/qa10_indefinite-knowledge_test.txt
tasksv11/shuffled/qa10_indefinite-knowledge_train.txt
tasksv11/shuffled/qa11_basic-coreference_test.txt
tasksv11/shuffled/qa11_basic-coreference_train.txt
tasksv11/shuffled/qa12_conjunction_test.txt
tasksv11/shuffled/qa12_conjunction_train.txt
tasksv11/shuffled/qa13_compound-coreference_test.txt
tasksv11/shuffled/qa13_compound-coreference_train.txt
tasksv11/shuffled/qa14_time-reasoning_test.txt
tasksv11/shuffled/qa14_time-reasoning_train.txt
tasksv11/shuffled/qa15_basic-deduction_test.txt
tasksv11/shuffled/qa15_basic-deduction_train.txt
tasksv11/shuffled/qa16_basic-induction_test.txt
tasksv11/shuffled/qa16_basic-induction_train.txt
tasksv11/shuffled/qa17_positional-reasoning_test.txt
tasksv11/shuffled/qa17_positional-reasoning_train.txt
tasksv11/shuffled/qa18_size-reasoning_test.txt
tasksv11/shuffled/qa18_size-reasoning_train

Facebook [bAbI](https://research.fb.com/downloads/babi/) project dataset has next format.

```
ID text
ID question[tab]answer[tab]supporting fact IDS.
```

In [121]:
import re


def tokenize(sent):
    '''tokenize inlcuding punctuation'''
    return [x.strip() for x in re.split(r'(\W+)?', sent) if x.strip()]


def parse_stories(lines):
    data = []
    story = []

    for line in lines:
        line = line.decode('utf-8').strip()
        s_id, line = line.split(' ', 1)
        s_id = int(s_id)

        if s_id == 1:
            story = []

        if '\t' in line:
            q, a, sup = line.split('\t')
            q = tokenize(q)

            # Provide all the substories
            substory = [x for x in story if x]

            data.append((substory, q, a))
            story.append('')
        else:
            story.append(tokenize(line))

    return data


from functools import reduce


def get_stories(f):
    '''read lines from file and convert sentences into a single story.'''
    lines = f.readlines()
    data = parse_stories(lines)

    def flatten(data): return reduce(lambda x, y: x + y, data)
    data = [(flatten(story), q, answer) for story, q, answer in data]
    return data

In [129]:
file_to_extract = 'tasksv11/en/qa1_single-supporting-fact_{}.txt'

test = tar.extractfile(file_to_extract.format('test'))
train = tar.extractfile(file_to_extract.format('train'))

In [130]:
test_data = get_stories(test)
train_data = get_stories(train)

  return _compile(pattern, flags).split(string, maxsplit)


In [131]:
import random

random.choice(train_data)

(['Mary',
  'went',
  'back',
  'to',
  'the',
  'hallway',
  '.',
  'Daniel',
  'went',
  'back',
  'to',
  'the',
  'bedroom',
  '.',
  'Sandra',
  'moved',
  'to',
  'the',
  'bathroom',
  '.',
  'Sandra',
  'journeyed',
  'to',
  'the',
  'hallway',
  '.',
  'Mary',
  'went',
  'back',
  'to',
  'the',
  'bedroom',
  '.',
  'Mary',
  'went',
  'back',
  'to',
  'the',
  'garden',
  '.',
  'Sandra',
  'went',
  'back',
  'to',
  'the',
  'bathroom',
  '.',
  'John',
  'went',
  'to',
  'the',
  'office',
  '.'],
 ['Where', 'is', 'John', '?'],
 'office')

In [603]:
vocab = set('#')

for story, q, answer in train_data + test_data:
    vocab |= set(story + q + [answer])

vocab = sorted(vocab)

In [604]:
vocab

['#',
 '.',
 '?',
 'Daniel',
 'John',
 'Mary',
 'Sandra',
 'Where',
 'back',
 'bathroom',
 'bedroom',
 'garden',
 'hallway',
 'is',
 'journeyed',
 'kitchen',
 'moved',
 'office',
 'the',
 'to',
 'travelled',
 'went']

In [605]:
vocab_size = len(vocab)
story_maxlen = max(map(len, (x for x, _, _ in train_data + test_data)))
query_maxlen = max(map(len, (x for _, x, _ in train_data + test_data)))
answer_maxlen = max(map(len, (x for _, _, x in train_data + test_data)))

In [200]:
print('Vocab size:', vocab_size, 'unique words')
print('Story max length:', story_maxlen, 'words')
print('Query max length:', query_maxlen, 'words')
print('Answer max length:', answer_maxlen, 'words')
print('Number of training stories:', len(train_data))
print('Number of test stories:', len(test_data))
print('-')
print('Here\'s what a "story" tuple looks like (input, query, answer):')
print(random.choice(train_data))

Vocab size: 22 unique words
Story max length: 66 words
Query max length: 4 words
Answer max length: 8 words
Number of training stories: 1000
Number of test stories: 1000
-
Here's what a "story" tuple looks like (input, query, answer):
(['John', 'went', 'to', 'the', 'bathroom', '.', 'John', 'moved', 'to', 'the', 'office', '.', 'John', 'went', 'to', 'the', 'hallway', '.', 'Sandra', 'journeyed', 'to', 'the', 'hallway', '.', 'John', 'journeyed', 'to', 'the', 'bathroom', '.', 'Daniel', 'went', 'to', 'the', 'bathroom', '.', 'Sandra', 'moved', 'to', 'the', 'garden', '.', 'John', 'moved', 'to', 'the', 'office', '.', 'Sandra', 'moved', 'to', 'the', 'hallway', '.', 'Daniel', 'went', 'to', 'the', 'kitchen', '.'], ['Where', 'is', 'Daniel', '?'], 'kitchen')


In [561]:
print('Vectorizing the word sequences...')
import numpy as np


def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
    X = []
    Xq = []
    Y = []

    for story, query, answer in data:
        x = [word_idx[w] for w in story]
        xq = [word_idx[w] for w in query]
        y = [word_idx[answer]]
        X.append(x)
        Xq.append(xq)
        Y.append(y)
    return (X, Xq, Y)


# create words index
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

# vrctorize input
inputs_train, queries_train, answers_train = vectorize_stories(
    train_data, word_idx, story_maxlen, query_maxlen)
inputs_test, queries_test, answers_test = vectorize_stories(
    test_data, word_idx, story_maxlen, query_maxlen)

Vectorizing the word sequences...


In [691]:
print(random.choice(inputs_train))

[5, 14, 19, 18, 17, 1, 6, 20, 19, 18, 9, 1, 6, 20, 19, 18, 17, 1, 5, 21, 8, 19, 18, 12, 1, 3, 21, 8, 19, 18, 17, 1, 6, 21, 19, 18, 9, 1, 4, 14, 19, 18, 10, 1, 6, 21, 8, 19, 18, 17, 1, 3, 21, 19, 18, 10, 1, 6, 20, 19, 18, 11, 1]


# Design Model

![End-To-End Memory Networks](http://www.zmonster.me/assets/img/memn2n_single_layer.png)

In [394]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F

## Encoder model

In [696]:
class Encoder(nn.Module):

    def __init__(self, input_size, hidden_size, n_layers=1):
        super(Encoder, self).__init__()

        # define hyperparameters
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        # define architecture
        self.embedding = nn.Embedding(22, hidden_size)
#         self.gru = nn.GRU(hidden_size, hidden_size, n_layers)

    def forward(self, input_sequence, hidden_state):
        seq_len = len(input_sequence)

        embedded = F.dropout(self.embedding(input_sequence.unsqueeze(0)), 0.3)
#         output, hidden = self.gru(embedded, hidden_state)
        return embedded  # output, hidden

    def init_hidden(self):
        hidden = Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
        return hidden

## Decoder model

In [789]:
class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(Decoder, self).__init__()

        # define hyperparameters
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        # define architecture
        self.fc1 = nn.Linear(input_size * hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, answer):
        answer = answer.view(-1)
        answer = F.dropout(self.fc1(answer), 0.5)
        answer = F.dropout(self.fc2(answer), 0.5)
        answer = self.fc3(answer)
        answer = F.softmax(answer, dim=0)

        return answer

## Network Memory model

In [786]:
class Memory(nn.Module):

    def __init__(self, input_size, hidden_size, n_layers=1):
        super(Memory, self).__init__()

        # define hyperparameters
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        # define architecture
        self.m_encoder = Encoder(input_size, hidden_size, n_layers)
        self.c_encoder = Encoder(input_size, hidden_size, n_layers)
        self.question_encoder = Encoder(input_size, hidden_size, n_layers)

        self.memory = nn.GRU(hidden_size * 2, hidden_size, n_layers)

    def forward(self, fact, query, hidden_state):
        encoded_m = self.m_encoder(fact, hidden_state)
        encoded_question = self.question_encoder(query, hidden_state)

        match = torch.dot(encoded_m, encoded_question)
#         print(encoded_m.size(), encoded_question.size(), match)
        match = F.softmax(match, dim=0)

        encoded_c = self.c_encoder(fact, hidden_state)
        response = torch.add(match, 1, encoded_c)

        answer = torch.cat((response, encoded_question))
        answer = answer.unsqueeze(0).view(fact.size()[0], 1, -1)

        answer, hidden_state = self.memory(answer, hidden_state)

        return answer, hidden_state

## Testing models

In [806]:
def pad(array, max_len):
    return np.pad(array, (0, max_len - len(array) % max_len), 'constant')


m_input_size = 132
m_hidden_size = 5
m_n_layers = 1

case_idx = random.randrange(0, len(train_data))

fact = pad(inputs_train[case_idx], m_input_size)
question = pad(queries_train[case_idx], m_input_size)
answer = answers_train[case_idx]

print('fact', len(inputs_train[3]), 'question', len(question))

memory = Memory(m_input_size, m_hidden_size, m_n_layers)

m_hidden_state = Variable(torch.zeros(m_n_layers, 1, m_hidden_size))

a_output_size = vocab_size

fact = Variable(torch.LongTensor(fact))
question = Variable(torch.LongTensor(question))

prediction_for_answer, hidden_state = memory(fact, question, m_hidden_state)

decoder = Decoder(m_input_size, m_hidden_size, a_output_size, m_n_layers)

predicted_answer = decoder(prediction_for_answer)
_, word_idx = torch.max(predicted_answer.data, 0)
print('expected answer: [', vocab[answer[0]], '], predicted answer:',
      'word idx', word_idx[0], 'word: [', vocab[word_idx[0]], ']', predicted_answer.size())

fact 49 question 132
expected answer: [ kitchen ], predicted answer: word idx 1 word: [ . ] torch.Size([22])


## Training infra

In [807]:
clip = 0.5


def train(fact, question, answer, hidden_state, memory_model, answer_model, memory_optimizer, answer_optimizer, criterion):

    # reset gradients
    memory_optimizer.zero_grad()
    answer_optimizer.zero_grad()
    loss = 0

    # orchestrate model learning
    answer_support, hidden_state = memory_model(fact, question, hidden_state)
    predicted_answer = answer_model(answer_support)

    _, word_idx = torch.max(predicted_answer.data, 0)
#     print('predicted asnwer',
#           word_idx[0], 'excpected answer', answer.data[0], predicted_answer.size())

    # compute loss
    predicted_answer = predicted_answer.view(1, -1)
    loss = criterion(predicted_answer, answer)

    # backpropagation
    loss.backward()
    torch.nn.utils.clip_grad_norm(memory_model.parameters(), clip)
    torch.nn.utils.clip_grad_norm(answer_model.parameters(), clip)
    memory_optimizer.step()
    answer_optimizer.step()

    return loss.data[0]

In [461]:
import time
import math


def as_minutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def time_since(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (as_minutes(s), as_minutes(rs))

## Run Training

In [814]:
# init model parameters
tr_input_size = 132
tr_hidden_size = 5
tr_n_layers = 1
tr_output_size = vocab_size

# init model
memory_model = Memory(tr_input_size, tr_hidden_size, tr_n_layers)
answer_model = Decoder(tr_input_size, tr_hidden_size,
                       tr_output_size, tr_n_layers)

# init optimizers and loss calculator
lr = 0.0001
memory_optimizer = optim.RMSprop(memory_model.parameters(), lr=lr)
answer_optimizer = optim.RMSprop(answer_model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [815]:
# loop through
n_epochs = 2000
n_iterations = len(train_data)
trained_model_path = 'qna-at-epoch-{}.pt'

# keep track of time elapsed and running averages
plot_every = 1000
print_every = 500
save_model_every = 1  # save model every epoch

plot_losses = []
print_loss_total = 0  # reset every print_every
plot_loss_total = 0  # reset every plot_every

In [None]:
print('Is about to start training...')
start = time.time()
step = 1
for epoch in range(1, n_epochs + 1):
    for it in range(n_iterations):
        # randomly select input sequence
        idx = random.randrange(0, n_iterations)

        # convert input sequence to padded array
        fact = pad(inputs_train[idx], tr_input_size)
        question = pad(queries_train[idx], tr_input_size)
        answer = answers_train[idx]

        # wrap into Tensor and  Variable
        fact = Variable(torch.LongTensor(fact))
        question = Variable(torch.LongTensor(question))
        answer = Variable(torch.LongTensor(answer))

        # feed
        tr_hidden_state = Variable(torch.zeros(tr_n_layers, 1, tr_hidden_size))
        loss = train(fact, question, answer, tr_hidden_state, memory_model,
                     answer_model, memory_optimizer, answer_optimizer, criterion)

        # keep track loss
        print_loss_total += loss
        plot_loss_total += loss

        if it == 0:
            continue

        if it % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print_summary = '%s (s %d in e %d %d%%) %.4f' % (time_since(
                start, step / (n_epochs * n_iterations)), step, epoch, (step / (n_epochs * n_iterations)) * 100, print_loss_avg)
            print(print_summary)

        if it % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_loss_total = 0
            plot_losses.append(plot_loss_avg)
        step += 1

    if epoch % save_model_every == 0:
        torch.save({
            'encoder': memory_model.state_dict(),
            'decoder': answer_model.state_dict(),
            'encoder_optimizer': memory_optimizer.state_dict(),
            'decoder_optimizer': answer_optimizer.state_dict()
        },
            trained_model_path.format(epoch))

Is about to start training...
0m 17s (- 1197m 40s) (s 500 in e 1 0%) 3.0572
0m 54s (- 1201m 38s) (s 1499 in e 2 0%) 5.9910
1m 29s (- 1187m 45s) (s 2498 in e 3 0%) 5.9596
2m 4s (- 1183m 3s) (s 3497 in e 4 0%) 5.9483
2m 39s (- 1176m 33s) (s 4496 in e 5 0%) 5.9447
3m 13s (- 1170m 43s) (s 5495 in e 6 0%) 5.9440
3m 48s (- 1168m 43s) (s 6494 in e 7 0%) 5.9443
4m 23s (- 1167m 46s) (s 7493 in e 8 0%) 5.9452
4m 58s (- 1167m 16s) (s 8492 in e 9 0%) 5.9445
5m 33s (- 1165m 38s) (s 9491 in e 10 0%) 5.9425
6m 8s (- 1163m 24s) (s 10490 in e 11 0%) 5.9424
6m 42s (- 1161m 6s) (s 11489 in e 12 0%) 5.9421
7m 17s (- 1159m 55s) (s 12488 in e 13 0%) 5.9371
8m 3s (- 1186m 55s) (s 13487 in e 14 0%) 5.9384
8m 39s (- 1185m 42s) (s 14486 in e 15 0%) 5.9388
9m 13s (- 1182m 48s) (s 15485 in e 16 0%) 5.9355
9m 48s (- 1179m 39s) (s 16484 in e 17 0%) 5.9364
10m 22s (- 1177m 7s) (s 17483 in e 18 0%) 5.9327
10m 57s (- 1174m 35s) (s 18482 in e 19 0%) 5.9371
11m 31s (- 1172m 13s) (s 19481 in e 20 0%) 5.9343
12m 6s (- 117

95m 19s (- 1071m 53s) (s 163337 in e 164 8%) 5.1429
95m 52s (- 1071m 1s) (s 164336 in e 165 8%) 5.1049
96m 26s (- 1070m 10s) (s 165335 in e 166 8%) 5.1004
97m 0s (- 1069m 20s) (s 166334 in e 167 8%) 5.1177
97m 33s (- 1068m 29s) (s 167333 in e 168 8%) 5.1462
98m 7s (- 1067m 38s) (s 168332 in e 169 8%) 5.1303
98m 40s (- 1066m 47s) (s 169331 in e 170 8%) 5.1426
99m 14s (- 1065m 57s) (s 170330 in e 171 8%) 5.1457
99m 47s (- 1065m 7s) (s 171329 in e 172 8%) 5.1369
100m 20s (- 1064m 17s) (s 172328 in e 173 8%) 5.1642
100m 54s (- 1063m 28s) (s 173327 in e 174 8%) 5.0710
101m 28s (- 1062m 43s) (s 174326 in e 175 8%) 5.1740
102m 1s (- 1061m 53s) (s 175325 in e 176 8%) 5.1576
102m 35s (- 1061m 4s) (s 176324 in e 177 8%) 5.1402
103m 9s (- 1060m 16s) (s 177323 in e 178 8%) 5.1326
103m 42s (- 1059m 27s) (s 178322 in e 179 8%) 5.1231
104m 16s (- 1058m 38s) (s 179321 in e 180 8%) 5.1035
104m 49s (- 1057m 49s) (s 180320 in e 181 9%) 5.1084
105m 23s (- 1057m 1s) (s 181319 in e 182 9%) 5.1450
105m 56s (

181m 57s (- 961m 47s) (s 318182 in e 319 15%) 4.9800
182m 31s (- 961m 8s) (s 319181 in e 320 15%) 4.9563
183m 4s (- 960m 30s) (s 320180 in e 321 16%) 4.9710
183m 38s (- 959m 52s) (s 321179 in e 322 16%) 4.9524
184m 11s (- 959m 14s) (s 322178 in e 323 16%) 4.9525
184m 45s (- 958m 36s) (s 323177 in e 324 16%) 4.9057
185m 18s (- 957m 59s) (s 324176 in e 325 16%) 4.9895
185m 52s (- 957m 20s) (s 325175 in e 326 16%) 4.9402
186m 25s (- 956m 41s) (s 326174 in e 327 16%) 5.0129
186m 59s (- 956m 3s) (s 327173 in e 328 16%) 5.0284
187m 32s (- 955m 25s) (s 328172 in e 329 16%) 4.9411
188m 6s (- 954m 47s) (s 329171 in e 330 16%) 4.8907
188m 39s (- 954m 8s) (s 330170 in e 331 16%) 4.9872
189m 13s (- 953m 31s) (s 331169 in e 332 16%) 4.9670
189m 46s (- 952m 53s) (s 332168 in e 333 16%) 4.9882
190m 20s (- 952m 15s) (s 333167 in e 334 16%) 4.9770
190m 53s (- 951m 37s) (s 334166 in e 335 16%) 4.9771
191m 27s (- 950m 58s) (s 335165 in e 336 16%) 4.9453
192m 0s (- 950m 20s) (s 336164 in e 337 16%) 4.9418

269m 10s (- 866m 30s) (s 474026 in e 475 23%) 4.9862
269m 44s (- 865m 55s) (s 475025 in e 476 23%) 4.9405
270m 17s (- 865m 19s) (s 476024 in e 477 23%) 4.8781
270m 51s (- 864m 44s) (s 477023 in e 478 23%) 4.9175
271m 24s (- 864m 8s) (s 478022 in e 479 23%) 4.9458
271m 58s (- 863m 33s) (s 479021 in e 480 23%) 4.9665
272m 31s (- 862m 57s) (s 480020 in e 481 24%) 4.9322
273m 5s (- 862m 22s) (s 481019 in e 482 24%) 4.9563
273m 38s (- 861m 46s) (s 482018 in e 483 24%) 4.9008
274m 12s (- 861m 11s) (s 483017 in e 484 24%) 4.9177
274m 46s (- 860m 36s) (s 484016 in e 485 24%) 4.9258
275m 19s (- 860m 0s) (s 485015 in e 486 24%) 4.9951
275m 53s (- 859m 25s) (s 486014 in e 487 24%) 4.9452
276m 26s (- 858m 49s) (s 487013 in e 488 24%) 4.9059
277m 0s (- 858m 13s) (s 488012 in e 489 24%) 4.9551
277m 33s (- 857m 37s) (s 489011 in e 490 24%) 4.9020
278m 7s (- 857m 2s) (s 490010 in e 491 24%) 4.9075
278m 40s (- 856m 26s) (s 491009 in e 492 24%) 4.9377
279m 14s (- 855m 50s) (s 492008 in e 493 24%) 4.9206

356m 47s (- 776m 7s) (s 629870 in e 631 31%) 4.9590
357m 21s (- 775m 33s) (s 630869 in e 632 31%) 4.9174
357m 55s (- 774m 59s) (s 631868 in e 633 31%) 4.9412
358m 29s (- 774m 24s) (s 632867 in e 634 31%) 4.9629
359m 2s (- 773m 50s) (s 633866 in e 635 31%) 4.9321
359m 36s (- 773m 15s) (s 634865 in e 636 31%) 4.9000
360m 9s (- 772m 40s) (s 635864 in e 637 31%) 4.9152
360m 43s (- 772m 5s) (s 636863 in e 638 31%) 4.9536
361m 16s (- 771m 30s) (s 637862 in e 639 31%) 4.9199
361m 50s (- 770m 55s) (s 638861 in e 640 31%) 4.9500
362m 23s (- 770m 20s) (s 639860 in e 641 31%) 4.9614
362m 57s (- 769m 45s) (s 640859 in e 642 32%) 4.9054
363m 30s (- 769m 10s) (s 641858 in e 643 32%) 4.9628
364m 4s (- 768m 35s) (s 642857 in e 644 32%) 4.9790
364m 37s (- 768m 0s) (s 643856 in e 645 32%) 4.9832
365m 11s (- 767m 26s) (s 644855 in e 646 32%) 4.9550
365m 44s (- 766m 51s) (s 645854 in e 647 32%) 4.8953
366m 19s (- 766m 19s) (s 646853 in e 648 32%) 4.9277
366m 54s (- 765m 46s) (s 647852 in e 649 32%) 4.9612

## Test trained model

In [571]:
def evaluate(fact, question, memory_model, answer_model, hidden_state, vocab):

    # wrap into Tensor and  Variable
    fact = Variable(torch.LongTensor(fact))
    question = Variable(torch.LongTensor(question))

    # orchestrate model learning
    answer_support, hidden_state = memory_model(fact, question, hidden_state)
    predicted_answer = answer_model(answer_support)

    _, word_idx = torch.max(predicted_answer.data, 0)

    return vocab[word_idx[0]]

In [None]:
# convert input sequence to padded array
idx = random.randrange(0, len(inputs_test))
fact = pad(inputs_test[idx], tr_input_size)
question = pad(queries_test[idx], tr_input_size)

# feed
t_hidden_state = Variable(torch.zeros(tr_n_layers, 1, tr_hidden_size))

predicted_answer = evaluate(
    fact, question, memory_model, answer_model, t_hidden_state, vocab)

print([''.join(vocab[x]) for x in inputs_test[idx]])
print([''.join(vocab[x]) for x in queries_test[idx]])
print('expected answer: [', vocab[answers_test[idx][0]],
      '] and predicted one: [', predicted_answer, ']')