### Dependencies
- keras - obviously
- h5py - for model checkpointing
- keras-tqdm - because my [Jupyter notebooks freezes on the default Keras progbar](https://github.com/fchollet/keras/issues/4880). Also, it's awesome

In [1]:
'''Trains a memory network on the bAbI dataset.
References:
- Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush,
  "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks",
  http://arxiv.org/abs/1502.05698
- Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus,
  "End-To-End Memory Networks",
  http://arxiv.org/abs/1503.08895
Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
Time per epoch: 3s on CPU (core i7).
'''

# compat
from __future__ import print_function

# python 
from functools import reduce
import tarfile
import numpy as np
import re

# ML
from keras.models import Sequential, Model
from keras.layers.embeddings import Embedding
from keras.layers import Input, Activation, Dense, Permute, Dropout, add, dot, concatenate
from keras.layers import LSTM
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ModelCheckpoint

from keras_tqdm import TQDMNotebookCallback

# local libs
import preprocess

Using TensorFlow backend.


In [2]:
try:
    path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
except:
    print('Error downloading dataset, please download it manually:\n'
          '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n'
          '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
    raise
tar = tarfile.open(path)

challenges = {
    # QA1 with 10,000 samples
    'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
    # QA2 with 10,000 samples
    'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
}
challenge_type = 'single_supporting_fact_10k'
challenge = challenges[challenge_type]

print('Extracting stories for the challenge:', challenge_type)
train_stories = preprocess.get_stories(tar.extractfile(challenge.format('train')))
test_stories = preprocess.get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, answer in train_stories + test_stories:
    vocab |= set(story + q + [answer])
vocab = sorted(vocab)

# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))

print('-')
print('Vocab size:', vocab_size, 'unique words')
print('Story max length:', story_maxlen, 'words')
print('Query max length:', query_maxlen, 'words')
print('Number of training stories:', len(train_stories))
print('Number of test stories:', len(test_stories))
print('-')
print('Here\'s what a "story" tuple looks like (input, query, answer):')
print(train_stories[0])
print('-')
print('Vectorizing the word sequences...')

word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
inputs_train, queries_train, answers_train = preprocess.vectorize_stories(train_stories,
                                                                       word_idx,
                                                                       story_maxlen,
                                                                       query_maxlen)
inputs_test, queries_test, answers_test = preprocess.vectorize_stories(test_stories,
                                                            word_idx,
                                                            story_maxlen,
                                                            query_maxlen)

Extracting stories for the challenge: single_supporting_fact_10k


  return _compile(pattern, flags).split(string, maxsplit)


-
Vocab size: 22 unique words
Story max length: 68 words
Query max length: 4 words
Number of training stories: 10000
Number of test stories: 1000
-
Here's what a "story" tuple looks like (input, query, answer):
(['Mary', 'moved', 'to', 'the', 'bathroom', '.', 'John', 'went', 'to', 'the', 'hallway', '.'], ['Where', 'is', 'Mary', '?'], 'bathroom')
-
Vectorizing the word sequences...


In [3]:
print('-')
print('inputs: integer tensor of shape (samples, max_length)')
print('inputs_train shape:', inputs_train.shape)
print('inputs_test shape:', inputs_test.shape)
print('-')
print('queries: integer tensor of shape (samples, max_length)')
print('queries_train shape:', queries_train.shape)
print('queries_test shape:', queries_test.shape)
print('-')
print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)')
print('answers_train shape:', answers_train.shape)
print('answers_test shape:', answers_test.shape)
print('-')

-
inputs: integer tensor of shape (samples, max_length)
inputs_train shape: (10000, 68)
inputs_test shape: (1000, 68)
-
queries: integer tensor of shape (samples, max_length)
queries_train shape: (10000, 4)
queries_test shape: (1000, 4)
-
answers: binary (1 or 0) tensor of shape (samples, vocab_size)
answers_train shape: (10000, 22)
answers_test shape: (1000, 22)
-


In [4]:

class DeepMemNet:
    def __init__(self, vocab_size=22, story_maxlen=68, query_maxlen=4):
        # placeholders
        input_sequence = Input((story_maxlen,))
        question = Input((query_maxlen,))

        # encoders
        # embed the input sequence into a sequence of vectors
        input_encoder_m = Sequential()
        input_encoder_m.add(Embedding(input_dim=vocab_size,
                                      output_dim=64))
        input_encoder_m.add(Dropout(0.3))
        # output: (samples, story_maxlen, embedding_dim)

        # embed the input into a sequence of vectors of size query_maxlen
        input_encoder_c = Sequential()
        input_encoder_c.add(Embedding(input_dim=vocab_size,
                                      output_dim=query_maxlen))
        input_encoder_c.add(Dropout(0.3))
        # output: (samples, story_maxlen, query_maxlen)

        # embed the question into a sequence of vectors
        question_encoder = Sequential()
        question_encoder.add(Embedding(input_dim=vocab_size,
                                       output_dim=64,
                                       input_length=query_maxlen))
        question_encoder.add(Dropout(0.3))
        # output: (samples, query_maxlen, embedding_dim)

        # encode input sequence and questions (which are indices)
        # to sequences of dense vectors
        input_encoded_m = input_encoder_m(input_sequence)
        input_encoded_c = input_encoder_c(input_sequence)
        question_encoded = question_encoder(question)

        # compute a 'match' between the first input vector sequence
        # and the question vector sequence
        # shape: `(samples, story_maxlen, query_maxlen)`
        match = dot([input_encoded_m, question_encoded], axes=(2, 2))
        match = Activation('softmax')(match)

        # add the match matrix with the second input vector sequence
        response = add([match, input_encoded_c])  # (samples, story_maxlen, query_maxlen)
        response = Permute((2, 1))(response)  # (samples, query_maxlen, story_maxlen)

        # concatenate the match matrix with the question vector sequence
        answer = concatenate([response, question_encoded])

        # the original paper uses a matrix multiplication for this reduction step.
        # we choose to use a RNN instead.
        answer = LSTM(32)(answer)  # (samples, 32)

        # one regularization layer -- more would probably be needed.
        answer = Dropout(0.3)(answer)
        answer = Dense(vocab_size)(answer)  # (samples, vocab_size)
        # we output a probability distribution over the vocabulary
        answer = Activation('softmax')(answer)

        # build the final model
        model = Model([input_sequence, question], answer)
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                      metrics=['accuracy'])
        
        self.model = model


In [5]:
filepath = 'dmn{:02}.hdf5'.format(0)
checkpointer = ModelCheckpoint(monitor='val_acc', filepath=filepath, verbose=1, save_best_only=False)

In [6]:
dmn = DeepMemNet(vocab_size=vocab_size, story_maxlen=story_maxlen, query_maxlen=query_maxlen)

# train
dmn.model.fit([inputs_train, queries_train], answers_train,
          batch_size=32,
          epochs=12,
          validation_data=([inputs_test, queries_test], answers_test),
             verbose=0, callbacks=[checkpointer, TQDMNotebookCallback()])

Epoch 00000: saving model to dmn00.hdf5


Epoch 00001: saving model to dmn00.hdf5


Epoch 00002: saving model to dmn00.hdf5


Epoch 00003: saving model to dmn00.hdf5


Epoch 00004: saving model to dmn00.hdf5


Epoch 00005: saving model to dmn00.hdf5


Epoch 00006: saving model to dmn00.hdf5


Epoch 00007: saving model to dmn00.hdf5


Epoch 00008: saving model to dmn00.hdf5


Epoch 00009: saving model to dmn00.hdf5


Epoch 00010: saving model to dmn00.hdf5


Epoch 00011: saving model to dmn00.hdf5



<keras.callbacks.History at 0x7f0cbc1bc7b8>

In [239]:
class Choice(object):
    def __init__(self, key_or_tup, name=None, callback=None, **kwargs):
        self.kwargs = kwargs
        if isinstance(key_or_tup, Choice):
            key = key_or_tup.key
            name = key_or_tup.name
            callback = key_or_tup.callback
        elif isinstance(key_or_tup, (tuple, list)):
            if len(key_or_tup) == 3:
                key, name, callback = key_or_tup
            elif len(key_or_tup) == 2:
                key, name = key_or_tup
            elif len(key_or_tup) == 1:
                key = key_or_tup[0]
            else:
                raise ValueError('Invalid menu choice specification list')
        elif isinstance(key_or_tup, str):
            key = key_or_tup
        else:
            raise ValueError('Invalid menu choice specification list')
        self.key = key
        self.name = name
        self.callback = callback

    def __str__(self):
        return '{: >2}: {}'.format(self.key, self.name)

    def __call__(self, *args, **kwargs):
        if self.callback is not None:
            return self.callback(**self.kwargs)


def return_foo():
    return 'foo'

def meprint(foo='nofoo', bar='nobar'):
    print(foo, '|', bar)


class Quit(Choice):
    def __init__(self):
        super().__init__('q', 'Quit')

    def __call__(self):
        print('Quitting')


class Menu(Choice):
    '''
    A special choice object, which when called, returns a new context menu
    '''

    def __init__(self, key, name=None, choices=None, loop_on_invalid=False):
        self.loop_on_invalid = loop_on_invalid
        self.quit = Quit()
        choices = [self.quit] if choices is None else [self.quit] + choices
        self.choices = choices
        self.name = name
        super().__init__(key_or_tup=key, name=name, callback=self)

    def get_item(self, key):
        lookup = {choice.key: choice for choice in self.choices}
        if key == '' and self.loop_on_invalid:
            return Choice('', 'Nothing selected', lambda: print('Nothing selected'))
        elif key in lookup:
            return lookup[key]
        else:
            print('Invalid entry!')
            if self.loop_on_invalid:
                print('Enter menu selection: ')
            else:
                self.quit()

    def show_menu(self):
        print('calling show_menu' + self.name)
        print(self.name)
        for choice in self.choices:
            print(choice)

    def add(self, choice):
        self.choices.append(Choice(choice))

    def user_pick_menu(self):
        #         keystruct = self.show_menu()
        reply = input("Enter menu selection: ")
        return reply

    #         if reply == '' or reply == '0' or reply is None:
    #             print('Quitting')
    #             return None
    #         reply = int(reply)
    #         print(keystruct[reply])
    #         print('------')eply])
    #             return submenu.user_pick_menu()
    #         else:
    #             return keystruct[int(reply)]

    def __call__(self, *args, **kwargs):
        #         if isinstance(keystruct[reply], dict):
        #             submenu= MenuPicker(keystruct[r
        self.show_menu()
        r = self.user_pick_menu()
        choice = self.get_item(r)
        if choice is not None:
            return choice()

In [240]:
q = Quit()
class Foo:
    def __init__(self):
        pass
    def __call__(self):
        print('Foo was called')
        
f = Foo()
q()

Quitting


In [244]:
a = Choice('a', 'this is a choice', return_foo)
b = Choice('b', 'this is another choice', lambda: print('choice b'))
c = Choice('c', 'this is 3rd choice', return_foo)
d = Choice('d', 'this is 4th choice', lambda: print('choice d'))
e = Choice('e', 'print 666', meprint, foo='666', bar='bar')

In [245]:
m = Menu('m', 'Menu 1', [a, b, e])
m2 = Menu('5', 'Menu 2', [c, d, e, m], True)
# m2

In [246]:
r = m2()
print('r:', r)

calling show_menuMenu 2
Menu 2
 q: Quit
 c: this is 3rd choice
 d: this is 4th choice
 e: print 666
 m: Menu 1
Enter menu selection: e
666 | bar
r: None


In [None]:
print(f)

In [66]:
f('a')

'aa'

In [80]:
m()

In [104]:
isinstance(m, Choice)

True