## MemN2N Network- Facebook Babi Task Dataset

https://research.fb.com/downloads/babi/

https://github.com/facebookarchive/bAbI-tasks

We can read this paper to get more details about MemN2N network

https://arxiv.org/abs/1503.08895

In [0]:
import tensorflow as tf

#### Download the data

In [0]:
#Uncomment to download the file
#!wget https://github.com/atulpatelDS/Data_Files/tree/master/Facebook_datasets/tasks_1-20_v1-2.tar.gz --quiet

In [0]:
import requests

url = "https://raw.githubusercontent.com/atulpatelDS/Data_Files/master/Facebook_datasets/tasks_1-20_v1-2.tar.gz"
target_path = 'tasks_1-20_v1-2.tar.gz'
response = requests.get(url, stream=True)
if response.status_code == 200:
    with open(target_path, 'wb') as f:
        f.write(response.raw.read())

In [0]:
import tarfile

with tarfile.open('tasks_1-20_v1-2.tar.gz') as tar:
    f = tar.extractfile('tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt')
    print(f.readlines(600))

#### Parse bAbI stories

In [0]:
def parse_stories(lines):
    
    stories = []
    questions = []
    answers = []
    
    story = ''
    for line in lines:
        line = line.decode('utf-8').strip()
        #Get line number and rest of the line
        nid, line = line.split(' ', 1)
        nid = int(nid)
        if nid == 1:
            #Start a new story
            story = ''
        if '\t' in line:
            #End of the story
            q, a, supporting = line.split('\t')
            stories.append(story)
            questions.append(q)
            answers.append(a)            
        else:
            if (story == ''):
                story = line
            else:
                story += ' ' + line
    return stories, questions, answers

#### Extract the train and test files

In [0]:
with tarfile.open('tasks_1-20_v1-2.tar.gz') as tar:
    
    train_file = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt'
    test_file = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt'
    
    train_stories_txt, train_q_txt, train_a_txt  = parse_stories(tar.extractfile(train_file))
    test_stories_txt, test_q_txt, test_a_txt = parse_stories(tar.extractfile(test_file))

#### Explore dataset

In [0]:
print('Number of stories in training data: ', len(train_stories_txt))
print('Number of stories in test data: ', len(test_stories_txt))

In [0]:
#Lets explore examples
example_num = 9900

print('Story: ', train_stories_txt[example_num])
print('Question: ', train_q_txt[example_num])
print('Answer: ', train_a_txt[example_num])

#### Build Tokenizer

In [0]:
t = tf.keras.preprocessing.text.Tokenizer()

#Fit on training data
t.fit_on_texts(train_stories_txt)
t.fit_on_texts(train_q_txt)
t.fit_on_texts(train_a_txt)

#Fit on test data
t.fit_on_texts(test_stories_txt)
t.fit_on_texts(test_q_txt)
t.fit_on_texts(test_a_txt)

In [0]:
vocab_size =  len(t.word_index) + 1 #Tokenizer starts with index 1
print(vocab_size)

In [0]:
#### Convert text to numbers using Tokenizer

In [0]:
#Training data
train_stories_seq = t.texts_to_sequences(train_stories_txt)
train_q_seq = t.texts_to_sequences(train_q_txt)
train_a_seq = t.texts_to_sequences(train_a_txt)

#Test data
test_stories_seq = t.texts_to_sequences(test_stories_txt)
test_q_seq = t.texts_to_sequences(test_q_txt)
test_a_seq = t.texts_to_sequences(test_a_txt)

In [0]:
#Max Length of Story, Question and Answe
story_maxlen = max([len(txt) for txt in train_stories_seq + test_stories_seq])
question_maxlen = max([len(txt) for txt in train_q_seq + test_q_seq])
answer_maxlen = max([len(txt) for txt in train_a_seq + test_a_seq])

print('Max length for ...\nStory: ', story_maxlen, '\nQuestion: ',question_maxlen,'\nAnswer: ',answer_maxlen)

#### Pad the sequences

In [0]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

#Training Data
train_stories_seq = pad_sequences(train_stories_seq,maxlen=story_maxlen)
train_q_seq = pad_sequences(train_q_seq,maxlen=question_maxlen)
train_a_seq = pad_sequences(train_a_seq,maxlen=answer_maxlen)
## Even we can ignore padding for answer because answer is always 1 word

#Test Data
test_stories_seq = pad_sequences(test_stories_seq,maxlen=story_maxlen)
test_q_seq = pad_sequences(test_q_seq,maxlen=question_maxlen)
test_a_seq = pad_sequences(test_a_seq,maxlen=answer_maxlen)
## Even we can ignore padding for answer because answer is always 1 word

#### Integer to word converter

In [0]:
#Required during prediction
int_to_word = dict((i,w) for w, i in t.word_index.items())
int_to_word[11]

### Define the model layers

#### Input layers

In [0]:
#Define input for story and question
story = tf.keras.layers.Input(shape=(story_maxlen,))
question = tf.keras.layers.Input(shape=(question_maxlen,))

### Build 3 encoders to provide 3 Embeddings

<ol><li>Input Memory (Story)</li>
<li>Output Memory (Story)</li>
<li>Question embedding</li></ol>

<img src="https://raw.githubusercontent.com/atulpatelDS/Machine_Learning/master/Images/MemN2N1.PNG" width="540" height="240" align="left"/>

#### Embedding A for Input memory

In [0]:
a_encoder = tf.keras.models.Sequential()
a_encoder.add(tf.keras.layers.Embedding(input_dim=vocab_size,
                                        output_dim=story_maxlen))
a_encoder.add(tf.keras.layers.Dropout(0.3))
a_embedded_output = a_encoder(story)
#output is batch_size x story_maxlen x story_maxlen (embedding size)

In [0]:
a_encoder,a_embedded_output

#### Embedding B for Question

In [0]:
b_question_encoder = tf.keras.models.Sequential()
b_question_encoder.add(tf.keras.layers.Embedding(input_dim=vocab_size, 
                                               output_dim=story_maxlen, 
                                               input_length=question_maxlen))
b_question_encoder.add(tf.keras.layers.Dropout(0.3))
b_question_embeddding_output = b_question_encoder(question)
#output is batch_size x question_maxlen x story_maxlen (embedding size)

In [0]:
b_question_embeddding_output

#### Embedding C for Story, to use with Controller

In [0]:
c_encoder = tf.keras.models.Sequential()
c_encoder.add(tf.keras.layers.Embedding(input_dim=vocab_size, 
                                        output_dim=question_maxlen))
c_encoder.add(tf.keras.layers.Dropout(0.3))
c_embedded_output = c_encoder(story)
#output is batch_size x story_maxlen x question_maxlen (embedding size)

In [0]:
c_embedded_output

### Attention layer

#### Alignment Weights

In [0]:
attention_weights = tf.keras.layers.dot([a_embedded_output, b_question_embeddding_output], 
                                        axes=(2, 2))
attention_weights = tf.keras.layers.Activation('softmax')(attention_weights)
#output is batch_size x story_maxlen x question_maxlen

In [0]:
attention_weights

#### Calculate Context Vector / Weighted sum (here we are using Add function)

In [0]:
weighted_sum = tf.keras.layers.add([attention_weights, c_embedded_output])  
#Output batch_size x story_maxlen x question_maxlen

permuted_weighted_sum = tf.keras.layers.Permute((2, 1))(weighted_sum)  
#Output batch_size x question_maxlen x story_maxlen

In [0]:
weighted_sum ,permuted_weighted_sum 

<img src="https://raw.githubusercontent.com/atulpatelDS/Machine_Learning/master/Images/MemN2N.PNG" width="540" height="240" align="left"/>

##### Add both Context vector to Question embedding (for first hop)

In [0]:
output_1 = tf.keras.layers.add([permuted_weighted_sum, b_question_embeddding_output])
#Output batch_size x query_maxlen x story_maxlen
output_1

#### Output using LSTM

In [0]:
final_output = tf.keras.layers.LSTM(32)(output_1)
#Last hidden state - batch_size x 32

#Add dropout
final_output = tf.keras.layers.Dropout(0.3)(final_output)

#Output layer
answer = tf.keras.layers.Dense(vocab_size + 1 , activation='softmax')(final_output)
#Output batch_size x vocab_size

### Build the model

In [0]:
model = tf.keras.models.Model([story, question], answer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

### Train the model

In [0]:
model.fit([train_stories_seq, train_q_seq], train_a_seq,
          batch_size=32,
          epochs=500,
          validation_data=([test_stories_seq, test_q_seq], test_a_seq))

In [0]:
model.save('models\babi_memn2n.h5')

In [0]:
model = tf.keras.models.load_model('models\babi_memn2n.h5')

### Model Prediction

#### Prediction function

In [0]:
import numpy as np

def predict_answer(test_num):

    #Get padded story seuqence
    story_seq_ex = test_stories_seq[test_num]

    #Get padded question sequence
    question_seq_ex = test_q_seq[test_num]

    #reshape to batch_size 1
    story_seq_ex = np.reshape(story_seq_ex,(1,len(story_seq_ex)))
    question_seq_ex = np.reshape(question_seq_ex,(1,len(question_seq_ex)))

    #Predict
    result = model.predict([story_seq_ex, question_seq_ex])

    #Get the index with highest probability
    result = np.argmax(result)

    #Convert index to word
    result = int_to_word[result]
    
    return result

In [0]:
#### Test Predictions

In [0]:
test_num = 789
#test_num = np.random.randint(0, len(test_stories_txt))   #random story number e.g. 789, 885 etc
print ('Story number: ', test_num)
print (test_stories_txt[test_num])
print ('\nQuestion: ' + test_q_txt[test_num])
print ('Answer: ' + predict_answer(test_num))