In [1]:
import os
import pickle
import argparse
import numpy as np
from model import Options, Seq2SeqAttn
import tensorflow as tf
from tensorflow.contrib.seq2seq import tile_batch
from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
import tensorflow as tf

In [2]:
# Parse the command line arguments.
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type = str, default = '../pre-data/',
                    help = 'the directory to the data')

parser.add_argument('--word_embeddings_path', type = str, default = '../pre-data/word_embeddings.npy',
                    help = 'the directory to the pre-trained word embeddings')
parser.add_argument('--VAD_path', type = str, default = '../pre-data/VAD.npy',
                    help = 'the directory to VAD')
parser.add_argument('--tf_path', type = str, default = '../pre-data/tf.npy',
                    help = 'the directory to term frequency')
parser.add_argument('--VAD_loss_path', type = str, default = '../pre-data/VAD_loss.npy',
                    help = 'the directory to VAD loss for each word')
parser.add_argument('--ti_path', type = str, default = '../pre-data/mu_li.npy',
                    help = 'the directory to term importance')

parser.add_argument('--num_epochs', type = int, default = 1,
                    help = 'the number of epochs to train the data')
parser.add_argument('--batch_size', type = int, default = 64,
                    help = 'the batch size')
parser.add_argument('--learning_rate', type = float, default = 0.0001,
                    help = 'the learning rate')
parser.add_argument('--beam_width', type = int, default = 256,
                    help = 'the beam width when decoding')
parser.add_argument('--word_embed_size', type = int, default = 256,
                    help = 'the size of word embeddings')
parser.add_argument('--n_hidden_units_enc', type = int, default = 256,
                    help = 'the number of hidden units of encoder')
parser.add_argument('--n_hidden_units_dec', type = int, default = 256,
                    help = 'the number of hidden units of decoder')
# ? attn_depth
parser.add_argument('--attn_depth', type = int, default = 128,
                    help = 'attention depth')
parser.add_argument('--restore_path', type = str, default = '../model_dailydialog_rf',
                    help = 'the path to restore the trained model')
parser.add_argument('--restore_epoch', type = int, default = 5,
                    help = 'the epoch to restore')

parser.add_argument('--save_path', type = str, default = '../model_dailydialog_rf',
                    help = 'the path to save the trained model to')

# args = parser.parse_args()
args, unknown = parser.parse_known_args()


In [3]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        return my_set
    train_set = load_np_files(os.path.join(data_path, 'train'))
    valid_set = load_np_files(os.path.join(data_path, 'validation'))
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)

    return train_set, valid_set, token2id

In [4]:
train_set, valid_set, token2id = read_data(args.data_path)
max_uttr_len_enc = train_set['enc_input'].shape[1]
max_uttr_len_dec = train_set['dec_input'].shape[1]

word_embeddings = np.load(args.word_embeddings_path)
VAD = np.load(args.VAD_path)
termfreq = np.load(args.ti_path) # term importance
termfreq = termfreq.reshape(-1,1)
VAD_loss = np.load(args.VAD_loss_path)
VAD_loss = VAD_loss.reshape(-1,1)


In [5]:
enc_input = train_set['enc_input'][0:5,:]
dec_input = train_set['dec_input'][0:5,:]

target = train_set['target'][0:5,:]
enc_input_len = tf.placeholder(tf.int32, shape = [args.batch_size])

In [6]:
enc_input_len = train_set['enc_input_len'][0:5]

In [7]:
options = Options(mode = 'PREDICT',
                  num_epochs = args.num_epochs,
                  batch_size = args.batch_size,
                  learning_rate = args.learning_rate,
                  beam_width = args.beam_width,
                  vocab_size = len(token2id),
                  max_uttr_len_enc = max_uttr_len_enc,
                  max_uttr_len_dec = max_uttr_len_dec,
                  go_index = token2id['<go>'],
                  eos_index = token2id['<eos>'],
                  word_embed_size = args.word_embed_size,
                  n_hidden_units_enc = args.n_hidden_units_enc,
                  n_hidden_units_dec = args.n_hidden_units_dec,
                  attn_depth = args.attn_depth,
                  word_embeddings = word_embeddings)
model = Seq2SeqAttn(options)

for var in model.tvars:
    print(var.name)


Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0


In [8]:
options = Options(mode = 'TRAIN',
                  num_epochs = args.num_epochs,
                  batch_size = args.batch_size,
                  learning_rate = args.learning_rate,
                  beam_width = args.beam_width,
                  vocab_size = len(token2id),
                  max_uttr_len_enc = max_uttr_len_enc,
                  max_uttr_len_dec = max_uttr_len_dec,
                  go_index = token2id['<go>'],
                  eos_index = token2id['<eos>'],
                  word_embed_size = args.word_embed_size,
                  n_hidden_units_enc = args.n_hidden_units_enc,
                  n_hidden_units_dec = args.n_hidden_units_dec,
                  attn_depth = args.attn_depth,
                  word_embeddings = word_embeddings)
model = Seq2SeqAttn(options)

for var in model.tvars:
    print(var.name)


Building the TensorFlow graph...
TensorFlow session is closed.
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0


In [8]:
latest_ckp = tf.train.latest_checkpoint(args.restore_path)

In [11]:
print_tensors_in_checkpoint_file(latest_ckp, all_tensors=False, tensor_name='')

decoding/attention_v (DT_FLOAT) [128]
decoding/attention_wrapper/gru_cell/candidate/bias (DT_FLOAT) [256]
decoding/attention_wrapper/gru_cell/candidate/kernel (DT_FLOAT) [768,256]
decoding/attention_wrapper/gru_cell/gates/bias (DT_FLOAT) [512]
decoding/attention_wrapper/gru_cell/gates/kernel (DT_FLOAT) [768,512]
decoding/attention_wrapper/my_bahdanau_attention/attention_Wb/kernel (DT_FLOAT) [256,3]
decoding/attention_wrapper/my_bahdanau_attention/query_layer/kernel (DT_FLOAT) [256,128]
decoding/beta1_power (DT_FLOAT) []
decoding/beta2_power (DT_FLOAT) []
decoding/decoding/attention_wrapper/gru_cell/candidate/bias/Adam (DT_FLOAT) [256]
decoding/decoding/attention_wrapper/gru_cell/candidate/bias/Adam_1 (DT_FLOAT) [256]
decoding/decoding/attention_wrapper/gru_cell/candidate/kernel/Adam (DT_FLOAT) [768,256]
decoding/decoding/attention_wrapper/gru_cell/candidate/kernel/Adam_1 (DT_FLOAT) [768,256]
decoding/decoding/attention_wrapper/gru_cell/gates/bias/Adam (DT_FLOAT) [512]
decoding/decoding

---
Prediction

In [27]:
from tensorflow.contrib.seq2seq import sequence_loss

In [23]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        return my_set
    test_set = load_np_files(os.path.join(data_path, 'test'))
    # dictionary index of words
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)
    with open(os.path.join(data_path, 'id2token.pickle'), 'rb') as file:
        id2token = pickle.load(file)
    return test_set, token2id, id2token


In [15]:
with open('../pre-data/test/prediction.pickle', 'rb') as file:
    prediction = pickle.load(file)

In [17]:
len(prediction) # 28 batches

28

In [19]:
prediction[0].shape # 64: args.batch_size 20: uttr_len_dec 32: args.beam_width

(64, 20, 32)

In [24]:
test_set, token2id, id2token = read_data(args.data_path)
max_uttr_len_enc = test_set['enc_input'].shape[1]
max_uttr_len_dec = test_set['dec_input'].shape[1]

In [28]:
max_uttr_len_dec

20

In [26]:
test_set['enc_input'].shape

(1807, 19)

In [30]:
test_set.keys()

dict_keys(['enc_input', 'dec_input', 'target', 'enc_input_len', 'dec_input_len'])

In [37]:
idx = 0
test_set['target'][idx]

array([3237, 2536, 1281, 2918, 1780,   31, 2781, 2912,  272, 1741, 1975,
       2864,   31,   37,    0,    0,    0,    0,    0,    0])

In [40]:
token2id['<eos>']

37

In [38]:
prediction[idx//args.batch_size][idx%args.batch_size,:,:]

array([[2437, 3219, 3219,  388,  374,  374,  374,  374,  374,  374,  374,
         374,  374,  374,  374,  374,  374,  374,  374,  374,  374,  374,
         374,  374,  374,  374,  374,  374,  374,  374,  374,  374],
       [  37,   37, 2577, 2812,  326,  326,  326,  326,  326,  326,  326,
         326,  326,  326,  326,  326,  326,  326,  326,  326,  326,  326,
         326,  326,  326,  326,  326,  326,  326,  326,  326,  326],
       [  37,   37,   37,   37, 2502, 2502, 2502, 2502, 2502, 2502, 2502,
        2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502,
        2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502],
       [  37,   37,   37,   37, 2502, 2502, 2502, 2502, 2502, 2502, 2502,
        2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502,
        2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502],
       [  37,   37,   37,   37,   37, 2502, 2502, 2502, 2502, 2502, 2502,
        2502, 2502, 2502, 2502, 2502, 2502, 2502, 2502, 25

In [53]:
idx = 0
sequence_mask = tf.sequence_mask(test_set['enc_input_len'], maxlen = max_uttr_len_dec, dtype = tf.float32)
weights = sequence_mask# * target_VAD_loss # affective objective function
# sequence_mask: [batch_size, max_len]
# target: [batch_size, max_len] VAD_loss: [batch_size,max_len]
logits = prediction[idx//args.batch_size][idx%args.batch_size:idx%args.batch_size+args.batch_size,:,0]
target = test_set['target'][idx:idx+args.batch_size]
target = tf.constant(target)
loss = sequence_loss(logits, target, weights)

print(loss)

AttributeError: 'numpy.ndarray' object has no attribute 'get_shape'

In [108]:
idx = np.arange(test_set['dec_input'].shape[0])

In [109]:
left_samples = idx[-1]%args.batch_size

In [115]:
last_batch_idx = np.random.randint(0,idx[-1]-left_samples,size = args.batch_size - left_samples - 1)

In [116]:
np.concatenate([idx,last_batch_idx]).shape

(1807,)

In [118]:
1807//64

28

In [117]:
1856//64

29

In [65]:
id2token[31]

'.'

In [63]:
test_set['dec_input'][0]

array([  38, 3237, 2536, 1281, 2918, 1780,   31, 2781, 2912,  272, 1741,
       1975, 2864,   31,    0,    0,    0,    0,    0,    0])

In [55]:
logits.shape

array([[2437,   37,   37, ...,   37,   37,   37],
       [2437, 2437,   37, ...,   37,   37,   37],
       [2776, 2776,  320, ..., 2335, 2335,  592],
       ...,
       [2437,   37,   37, ...,   37,   37,   37],
       [1309, 1043, 1043, ..., 2335, 2335, 2335],
       [ 374,  326, 2502, ...,  508,  508, 2934]], dtype=int32)

In [56]:
tf.constant(logits).get_shape

<bound method Tensor.get_shape of <tf.Tensor 'Const_5:0' shape=(64, 20) dtype=int32>>

In [51]:
target.shape

(20,)

In [21]:
enc_input_len

array([6, 5, 5, 5, 8])

In [58]:
(1904-6)/4

474.5