In [3]:
import tensorflow as tf
import pickle
import unicodedata
import re
import numpy as np
import os
import io
import time
from encoder import * 
from decoder import *
from train import *

## Read data

In [4]:
PATH = '../data/'

In [5]:
d_tensor_test = np.load(PATH + 'd_tensor_test.npy',allow_pickle=True)
r_tensor_test = np.load(PATH + 'r_tensor_test.npy',allow_pickle=True)
dia_test = [t[0] for t in d_tensor_test]
aid_test = [t[1] for t in d_tensor_test]
res_test = [t[0] for t in r_tensor_test]
sid_test = [t[1] for t in r_tensor_test]

In [6]:
with open(PATH + 'tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
vocab_size = len(tokenizer.word_index) + 1

In [7]:
BATCH_SIZE = 96
HIDDEN_SIZE = 512
NUM_LAYER = 4
DROP_OUT = 0.2
embedding_dim = 512
speaker_dim = 128
MAXLEN = 50
speakerNum = 14
EPOCHS = 10

## Response with beam search

In [8]:
checkpoint_dir = '../persona_ckpt_512_sp128/'

In [9]:
encoder = Encoder(HIDDEN_SIZE, vocab_size, embedding_dim, NUM_LAYER, BATCH_SIZE)
decoder = Decoder(HIDDEN_SIZE, vocab_size, embedding_dim, speaker_dim,NUM_LAYER)
optimizer = tf.keras.optimizers.Adam()

cp = tf.train.Checkpoint(optimizer=optimizer,
                         encoder=encoder,
                         decoder=decoder)
status = cp.restore(checkpoint_dir + "speaker-add-ckpt-4")
print(status)
train_nn = Train(encoder, decoder, optimizer, tokenizer)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus object at 0xb389947b8>


In [10]:
def convert_to_word(response_sentence,tokenizer):
    sentence = list()
    for idx in response_sentence:
        sentence.append(tokenizer.index_word[idx])
        if(tokenizer.index_word[idx]=='<eos>'):
            break;
    print(' '.join(sentence))

In [11]:
# Converts the unicode file to ascii
def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
    w = unicode_to_ascii(w.lower().strip())

    # creating a space between a word and the punctuation following it
    # eg: "he is a boy." => "he is a boy ."
    # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

    w = w.rstrip().strip()

    # adding a start and an end token to the sentence
    # so that the model know when to start and stop predicting.
    w = '<sos> ' + w + ' <eos>'
    return w

In [12]:
def conver_to_tensor(sentence,tokenizer):
    sentence = preprocess_sentence(sentence)
    inputs = [tokenizer.word_index[i] for i in sentence.split(' ')]
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=MAXLEN,
                                                         padding='post')
    tensor = tf.convert_to_tensor(inputs,dtype=np.int32)
    return tensor[0]

In [13]:
def response(train_nn,inp,speaker_id,addressee_id=None,max_len = MAXLEN,beam_size = 2):
    generate_size = beam_size
    inputs = np.expand_dims(inp, axis=0)
    s_id = np.expand_dims(speaker_id, axis=0)
    if addressee_id is not None:
        a_id = np.expand_dims(addressee_id, axis=0)
    enc_hidden = [tf.zeros((1, HIDDEN_SIZE)),tf.zeros((1, HIDDEN_SIZE))]
    enc_out, enc_hidden,enc_c = train_nn.encoder(inputs, enc_hidden)
    dec_init_state = [enc_hidden,enc_c]
    dec_input = tf.convert_to_tensor([train_nn.tokenizer.word_index['<sos>']])

    sos = train_nn.tokenizer.word_index['<sos>']
    dec_input = tf.expand_dims(dec_input,0)

    if addressee_id is not None:
        predictions, dec_hidden, dec_c,_ = train_nn.decoder(dec_input,enc_out, dec_init_state,s_id,a_id)
    else:
        predictions, dec_hidden, dec_c,_ = train_nn.decoder(dec_input,enc_out, dec_init_state,s_id)

    dec_init_state = [dec_hidden,dec_c]
    pred_prob,pred_top_k = tf.math.top_k(predictions,k=beam_size)
#     for i in range(pred_prob[0].shape[0]):
#                 print("prob:{},word:{}".format(pred_prob[0][i],tokenizer.index_word[pred_top_k[0][i].numpy()]))
    # beam: (prob,sentence,dec_inp,dec_init,isEnd)
    finished = list() # to store finished sentence
    beam = list()
    for ind, i in enumerate(pred_top_k[0]):
        word_idx = i.numpy()
        if tokenizer.index_word[word_idx]=='<eos>':
            finished.append([pred_prob[0][ind],[sos]+[word_idx],i,dec_init_state,True])
        beam.append([pred_prob[0][ind],[sos]+[i.numpy()],i,dec_init_state,False])
    for t in range(2,max_len):
        tmp_beam = list()
        for ind,b in enumerate(beam):
            if b[-1]:
                continue
            dec_input = tf.expand_dims(tf.convert_to_tensor([b[2]]),0)
            dec_init_state = b[3]
            if addressee_id is not None:
                predictions, dec_hidden, dec_c,_ = train_nn.decoder(dec_input,enc_out, dec_init_state,s_id,a_id)
            else:
                predictions, dec_hidden, dec_c,_ = train_nn.decoder(dec_input,enc_out, dec_init_state,s_id)
            
            dec_init_state = [dec_hidden,dec_c]

            # use beam search to get top k prediction
            pred_prob,pred_top_k = tf.math.top_k(predictions,k=beam_size)
#             for i in range(pred_prob[0].shape[0]):
#                 print("beam:{} prob:{},word:{}".format(ind,pred_prob[0][i],tokenizer.index_word[pred_top_k[0][i].numpy()]))
            beam_prob = beam[ind][0]
            beam_sen = beam[ind][1]
            for ind, i in enumerate(pred_top_k[0]):
                isEnd = False
                word_idx = i.numpy()
                if tokenizer.index_word[word_idx]=='<eos>':
                    isEnd = True
                tmp_beam.append([(beam_prob + pred_prob[0][ind])/(t+1),beam_sen + [word_idx],i,dec_init_state,isEnd])
                
        # select top k candidates combination
        tmp_beam.sort(key=lambda x:x[0],reverse=True)
        tmp_beam = tmp_beam[:generate_size]
        beam = list()
        for ind, b in enumerate(tmp_beam):
            if b[-1]:
                finished.append(b)
                generate_size = generate_size - 1
            else:
                tmp_beam[ind][0] = b[0] * (t+1) # continue to compute probability of whole sentence
                beam.append(tmp_beam[ind])
        
    finished.sort(key=lambda x:x[0],reverse=True)
    print("{} Finished sentence:".format(len(finished)))
    for i in range(len(finished)):
        convert_to_word(finished[i][1],train_nn.tokenizer)
#     response_sentence = finished[:beam_size]
#     print("Response sentence:")
#     for i in range(beam_size):
#         convert_to_word(response_sentence[i][1],train_nn.tokenizer)
    

In [2]:
idx2 = 1291

In [14]:
convert_to_word(dia_test[idx2],tokenizer)
convert_to_word(res_test[idx2],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx2],sid_test[idx2],aid_test[idx2],beam_size=3)
print("addressee:{}".format(aid_test[idx2]))
print("speaker:{}".format(sid_test[idx2]))

<sos> i m being awful ? you re the one who went out of your way to hurt me . <eos>
<sos> because you were being selfish . <eos>
RESPONSE:
3 Finished sentence:
<sos> what ? <eos>
<sos> i m sorry . <eos>
<sos> i m sorry , i m sorry . <eos>
addressee:7
speaker:6


In [15]:
addressee = tf.convert_to_tensor(8)
convert_to_word(dia_test[idx2],tokenizer)
convert_to_word(res_test[idx2],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx2],sid_test[idx2],addressee,beam_size=3)
print("addressee:{}".format(aid_test[idx2]))
print("speaker:{}".format(sid_test[idx2]))

<sos> i m being awful ? you re the one who went out of your way to hurt me . <eos>
<sos> because you were being selfish . <eos>
RESPONSE:
3 Finished sentence:
<sos> i m sorry . <eos>
<sos> i m sorry , i m sorry . <eos>
<sos> i m sorry , i m sorry . i m sorry . <eos>
addressee:7
speaker:6


In [16]:
addressee = tf.convert_to_tensor(10)
convert_to_word(dia_test[idx2],tokenizer)
convert_to_word(res_test[idx2],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx2],sid_test[idx2],addressee,beam_size=3)

<sos> i m being awful ? you re the one who went out of your way to hurt me . <eos>
<sos> because you were being selfish . <eos>
RESPONSE:
3 Finished sentence:
<sos> what ? <eos>
<sos> i m sorry . <eos>
<sos> i m sorry , i m sorry . <eos>


In [17]:
idx = 3198

In [18]:
convert_to_word(dia_test[idx],tokenizer)
convert_to_word(res_test[idx],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx],sid_test[idx],aid_test[idx],beam_size=3)
print("addressee:{}".format(aid_test[idx]))
print("speaker:{}".format(sid_test[idx]))

<sos> well , no , no , wait , wait , wait . all right , i gotta go . just listen . promise me , that you will wait a minute before you call her . <eos>
<sos> ok . why ? <eos>
RESPONSE:
4 Finished sentence:
<sos> hey ! <eos>
<sos> oh , i m sorry . <eos>
<sos> oh , i m sorry , i m sorry . <eos>
<sos> <eos>
addressee:5
speaker:0


In [19]:
addressee = tf.convert_to_tensor(2)
convert_to_word(dia_test[idx],tokenizer)
convert_to_word(res_test[idx],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx],sid_test[idx2],addressee,beam_size=3)

<sos> well , no , no , wait , wait , wait . all right , i gotta go . just listen . promise me , that you will wait a minute before you call her . <eos>
<sos> ok . why ? <eos>
RESPONSE:
3 Finished sentence:
<sos> what ? <eos>
<sos> i m sorry . <eos>
<sos> i m sorry , i m sorry . <eos>


In [20]:
addressee = tf.convert_to_tensor(3)
convert_to_word(dia_test[idx],tokenizer)
convert_to_word(res_test[idx],tokenizer)
print("RESPONSE:")
response(train_nn,dia_test[idx],sid_test[idx2],addressee,beam_size=3)

<sos> well , no , no , wait , wait , wait . all right , i gotta go . just listen . promise me , that you will wait a minute before you call her . <eos>
<sos> ok . why ? <eos>
RESPONSE:
3 Finished sentence:
<sos> what ? <eos>
<sos> i m sorry . <eos>
<sos> i m sorry , i m sorry . <eos>
