In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
cd /content/drive/My Drive/BAbi

/content/drive/My Drive/BAbi


In [0]:
ls

qa1_single-supporting-fact_test.txt   QAS_BAbi.ipynb
qa1_single-supporting-fact_train.txt  Untitled0.ipynb


In [0]:
from __future__ import print_function

import keras
from keras.models import Sequential, Model
from keras.layers.embeddings import Embedding
from keras.layers import Input, Activation, Dense, Permute, add, Dropout, dot, concatenate
from keras.layers import LSTM, GRU
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K

from functools import reduce
import numpy as np
import re

Using TensorFlow backend.


In [0]:
def tokenize(sent):
  tokens = [x.strip() for x in re.split('(\W+)?',sent) if x.strip()]
  return tokens

In [0]:
def parse_stories(lines,only_supporting=False):
  story = []
  data = []
  for line in lines:
    nid, line = line.split(' ',1)
    nid = int(nid)
    if nid == 1:
      story = []
    if '\t' in line:
      q, a, supporting = line.split('\t')
      q = tokenize(q)
      substory = None
      if only_supporting:
        supporting = map(int, supporting.split())
        substory = [story[i-1] for i in supporing]
      else:
        substory = [x for x in story if x]
      data.append((substory,q,a))
    else:
      sent = tokenize(line)
      story.append(sent)
  return data

In [0]:
def get_stories(file,only_supporting=False):
  with open(file) as f:
    content = f.readlines()
  data = parse_stories(content,only_supporting = only_supporting)
  flatten = lambda data: reduce(lambda a,b: a+b, data)
  data = [(flatten(story),q,ans) for story,q,ans in data]
  return data

In [0]:
def vectorize_stories(data,word_int,story_maxlen,query_maxlen):
  X = []
  Xq = []
  Y = []
  for story, que, answer in data:
    x = [word_int[w] for w in story]
    xq = [word_int[w] for w in que]
    y = np.zeros(len(word_int)+1)
    y[word_int[answer]] = 1
    X.append(x)
    Xq.append(xq)
    Y.append(y)
  return (pad_sequences(X,maxlen = story_maxlen),pad_sequences(Xq,maxlen = query_maxlen), np.array(Y))

In [0]:
train_stories = get_stories('qa1_single-supporting-fact_train.txt')
test_stories = get_stories('qa1_single-supporting-fact_test.txt')

  return _compile(pattern, flags).split(string, maxsplit)


In [0]:
train_stories[0]

(['Mary',
  'moved',
  'to',
  'the',
  'bathroom',
  '.',
  'John',
  'went',
  'to',
  'the',
  'hallway',
  '.'],
 ['Where', 'is', 'Mary', '?'],
 'bathroom')

In [0]:
vocab = []
for story, q, a in train_stories+test_stories:
  vocab = vocab + story+q+[a]
vocab = list(sorted(set(vocab)))

In [0]:
vocab_size = len(vocab)+1
story_lens = [len(t) for t,x,x in train_stories+test_stories]
story_maxlen = max(story_lens)
query_lens = [len(t) for x,t,x in train_stories+test_stories]
query_maxlen = max(query_lens)

In [0]:
word_int = dict((c, i + 1) for i, c in enumerate(vocab))
int_word = dict((i+1, c) for i,c in enumerate(vocab))

In [0]:
story_train, question_train, answer_train = vectorize_stories(train_stories,word_int,story_maxlen,query_maxlen)
story_test, question_test, answer_test = vectorize_stories(test_stories,word_int,story_maxlen,query_maxlen)

In [0]:
question_train

array([[ 7, 13,  5,  2],
       [ 7, 13,  3,  2],
       [ 7, 13,  3,  2],
       ...,
       [ 7, 13,  5,  2],
       [ 7, 13,  4,  2],
       [ 7, 13,  4,  2]], dtype=int32)

In [0]:
print("Training Data")
print('story shape',story_train.shape)
print('question shape',question_train.shape)
print('Answer shape',answer_train.shape)
print('Testing Data')
print('story shape',story_test.shape)
print('question shape',question_test.shape)
print('Answer shape',answer_test.shape)

Training Data
story shape (10000, 68)
question shape (10000, 4)
Answer shape (10000, 22)
Testing Data
story shape (1000, 68)
question shape (1000, 4)
Answer shape (1000, 22)


In [0]:
epochs = 100
batch_size = 32
lstm_size = 64

In [0]:
input_seq = Input(shape=(story_maxlen,))
input_que = Input(shape=(query_maxlen,))
  
input_encoder_m = Embedding(input_dim = vocab_size,output_dim = 64)
input_encoder_m_dropout = Dropout(0.3)
  
input_encoder_c = Embedding(input_dim=vocab_size,output_dim = query_maxlen)
input_encoder_c_dropout = Dropout(0.3)
  
que_encoder = Embedding(input_dim = vocab_size,output_dim=64,input_length=query_maxlen)
que_encoder_dropout = Dropout(0.3)
  
input_encoded_m = input_encoder_m(input_seq)
input_encoded_m = input_encoder_m_dropout(input_encoded_m)
input_encoded_c = input_encoder_c(input_seq)
input_encoded_c = input_encoder_m_dropout(input_encoded_c)
que_encoded = que_encoder(input_que)
que_encoded = que_encoder_dropout(que_encoded)

In [0]:
print(input_encoded_m)
print(input_encoded_c)
print(que_encoded)

Tensor("dropout_49/cond/Merge:0", shape=(?, 68, 64), dtype=float32)
Tensor("dropout_49_1/cond/Merge:0", shape=(?, 68, 4), dtype=float32)
Tensor("dropout_51/cond/Merge:0", shape=(?, 4, 64), dtype=float32)


In [0]:
match = dot([input_encoded_m, que_encoded],axes=(2, 2))
print(match)

Tensor("dot_13/MatMul:0", shape=(?, 68, 4), dtype=float32)


In [0]:
response = add([input_encoded_c,match])
response = Permute((2,1))(response)
print(response)

Tensor("permute_10/transpose:0", shape=(?, 4, 68), dtype=float32)


In [0]:
answer = concatenate([response,que_encoded])
print(answer.shape)


(?, 4, 132)


In [0]:
answer = LSTM(lstm_size)(answer)
answer = Dropout(0.3)(answer)
answer = Dense(vocab_size,activation='softmax')(answer) 

In [0]:
model = Model([input_seq, input_que], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])

model.fit([story_train, question_train], answer_train, batch_size,epochs,verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f611b70ce48>

In [0]:
for i in range(10):
  current_inp = test_stories[i]
  current_story, current_question, current_answer = vectorize_stories([current_inp],word_int,story_maxlen,query_maxlen)
  prediction = model.predict([current_story,current_question])
  current_pred = int_word[np.argmax(prediction)]
  print(' '.join(current_inp[0]),' '.join(current_inp[1]),' Prediction :',current_pred, 'Actual :',current_inp[2])

John travelled to the hallway . Mary journeyed to the bathroom . Where is John ?  Prediction : hallway Actual : hallway
John travelled to the hallway . Mary journeyed to the bathroom . Daniel went back to the bathroom . John moved to the bedroom . Where is Mary ?  Prediction : bathroom Actual : bathroom
John travelled to the hallway . Mary journeyed to the bathroom . Daniel went back to the bathroom . John moved to the bedroom . John went to the hallway . Sandra journeyed to the kitchen . Where is Sandra ?  Prediction : kitchen Actual : kitchen
John travelled to the hallway . Mary journeyed to the bathroom . Daniel went back to the bathroom . John moved to the bedroom . John went to the hallway . Sandra journeyed to the kitchen . Sandra travelled to the hallway . John went to the garden . Where is Sandra ?  Prediction : hallway Actual : hallway
John travelled to the hallway . Mary journeyed to the bathroom . Daniel went back to the bathroom . John moved to the bedroom . John went to th