In [1]:
import os
import pickle
import argparse
import numpy as np
from model import Options, Seq2SeqAttn


In [2]:
# Parse the command line arguments.
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type = str, default = '../pre-data/',
                    help = 'the directory to the data')

parser.add_argument('--word_embeddings_path', type = str, default = '../pre-data/word_embeddings.npy',
                    help = 'the directory to the pre-trained word embeddings')
parser.add_argument('--VAD_path', type = str, default = '../pre-data/VAD.npy',
                    help = 'the directory to VAD')
parser.add_argument('--tf_path', type = str, default = '../pre-data/tf.npy',
                    help = 'the directory to term frequency')
parser.add_argument('--VAD_loss_path', type = str, default = '../pre-data/VAD_loss.npy',
                    help = 'the directory to VAD loss for each word')
parser.add_argument('--ti_path', type = str, default = '../pre-data/mu_li.npy',
                    help = 'the directory to term importance')

parser.add_argument('--num_epochs', type = int, default = 5,
                    help = 'the number of epochs to train the data')
parser.add_argument('--batch_size', type = int, default = 64,
                    help = 'the batch size')
parser.add_argument('--learning_rate', type = float, default = 0.0001,
                    help = 'the learning rate')
parser.add_argument('--beam_width', type = int, default = 32,
                    help = 'the beam width when decoding')
parser.add_argument('--word_embed_size', type = int, default = 256,
                    help = 'the size of word embeddings')
parser.add_argument('--n_hidden_units_enc', type = int, default = 256,
                    help = 'the number of hidden units of encoder')
parser.add_argument('--n_hidden_units_dec', type = int, default = 256,
                    help = 'the number of hidden units of decoder')
# ? attn_depth
parser.add_argument('--attn_depth', type = int, default = 128,
                    help = 'attention depth')

parser.add_argument('--restore_path_TS', type = str, default = '../model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_TS', type = str, default = '../model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_path_ST', type = str, default = '../model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_ST', type = str, default = '../model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_epoch', type = int, default = 0,
                    help = 'the epoch to restore')

# args = parser.parse_args()
args, unknown = parser.parse_known_args()


In [3]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        # to check if or not to complete the last batch
        idx = np.arange(my_set['dec_input'].shape[0])
        left_samples = idx[-1]%args.batch_size
        if left_samples:
            last_batch_idx = np.random.randint(0,idx[-1]-left_samples,size = args.batch_size - left_samples - 1)
            idx = np.concatenate([idx,last_batch_idx])
            
            my_set['enc_input'] = my_set['enc_input'][idx]
            my_set['dec_input'] = my_set['dec_input'][idx]
            my_set['target'] = my_set['target'][idx]
            my_set['enc_input_len'] = my_set['enc_input_len'][idx]
            my_set['dec_input_len'] = my_set['dec_input_len'][idx]
        return my_set
    train_set = load_np_files(os.path.join(data_path, 'train'))
    valid_set = load_np_files(os.path.join(data_path, 'validation'))
    
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)
    with open(os.path.join(data_path, 'id2token.pickle'), 'rb') as file:
        id2token = pickle.load(file)

    return train_set, valid_set, token2id,id2token

---
Train model maximizing P(T|S)

In [4]:
train_set, valid_set, token2id,id2token = read_data(args.data_path)

In [5]:
train_set['enc_input'].shape

(14464, 19)

In [6]:
14464/0.8

18080.0

In [4]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_TS = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_TS.restore(os.path.join(args.restore_path_TS, 'model_TS_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_TS.init_tf_vars()
    model_TS.train(train_set, VAD,termfreq, VAD_loss,args.save_path_TS, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/005, valid ppl = None, batch 0001/0226, train loss = 8.112724304199219
Epoch 001/005, valid ppl = None, batch 0002/0226, train loss = 8.112360954284668
Epoch 001/005, valid ppl = None, batch 0003/0226, train loss = 8.082193374633789
Epoch 001/005, valid ppl = None, batch 0004/0226, train loss = 8.0678901672

Epoch 001/005, valid ppl = None, batch 0094/0226, train loss = 5.46762228012085
Epoch 001/005, valid ppl = None, batch 0095/0226, train loss = 5.512917518615723
Epoch 001/005, valid ppl = None, batch 0096/0226, train loss = 5.533533573150635
Epoch 001/005, valid ppl = None, batch 0097/0226, train loss = 5.262310028076172
Epoch 001/005, valid ppl = None, batch 0098/0226, train loss = 5.409660339355469
Epoch 001/005, valid ppl = None, batch 0099/0226, train loss = 5.4105095863342285
Epoch 001/005, valid ppl = None, batch 0100/0226, train loss = 5.375607490539551
Epoch 001/005, valid ppl = None, batch 0101/0226, train loss = 5.4132609367370605
Epoch 001/005, valid ppl = None, batch 0102/0226, train loss = 5.36468505859375
Epoch 001/005, valid ppl = None, batch 0103/0226, train loss = 5.2652268409729
Epoch 001/005, valid ppl = None, batch 0104/0226, train loss = 5.4645209312438965
Epoch 001/005, valid ppl = None, batch 0105/0226, train loss = 5.22594690322876
Epoch 001/005, valid ppl = Non

Epoch 001/005, valid ppl = None, batch 0196/0226, train loss = 4.976428985595703
Epoch 001/005, valid ppl = None, batch 0197/0226, train loss = 4.825370788574219
Epoch 001/005, valid ppl = None, batch 0198/0226, train loss = 4.8680925369262695
Epoch 001/005, valid ppl = None, batch 0199/0226, train loss = 4.914774417877197
Epoch 001/005, valid ppl = None, batch 0200/0226, train loss = 4.950156211853027
Epoch 001/005, valid ppl = None, batch 0201/0226, train loss = 4.8421759605407715
Epoch 001/005, valid ppl = None, batch 0202/0226, train loss = 4.860027313232422
Epoch 001/005, valid ppl = None, batch 0203/0226, train loss = 4.768272876739502
Epoch 001/005, valid ppl = None, batch 0204/0226, train loss = 4.819958209991455
Epoch 001/005, valid ppl = None, batch 0205/0226, train loss = 4.916742324829102
Epoch 001/005, valid ppl = None, batch 0206/0226, train loss = 4.776703357696533
Epoch 001/005, valid ppl = None, batch 0207/0226, train loss = 4.825057029724121
Epoch 001/005, valid ppl =

Epoch 002/005, valid ppl = 120.4233167746116, batch 0061/0226, train loss = 4.760985851287842
Epoch 002/005, valid ppl = 120.4233167746116, batch 0062/0226, train loss = 4.550327301025391
Epoch 002/005, valid ppl = 120.4233167746116, batch 0063/0226, train loss = 4.5599775314331055
Epoch 002/005, valid ppl = 120.4233167746116, batch 0064/0226, train loss = 4.638552188873291
Epoch 002/005, valid ppl = 120.4233167746116, batch 0065/0226, train loss = 4.805595397949219
Epoch 002/005, valid ppl = 120.4233167746116, batch 0066/0226, train loss = 4.583685874938965
Epoch 002/005, valid ppl = 120.4233167746116, batch 0067/0226, train loss = 4.677098274230957
Epoch 002/005, valid ppl = 120.4233167746116, batch 0068/0226, train loss = 4.534418106079102
Epoch 002/005, valid ppl = 120.4233167746116, batch 0069/0226, train loss = 4.717148780822754
Epoch 002/005, valid ppl = 120.4233167746116, batch 0070/0226, train loss = 4.795843601226807
Epoch 002/005, valid ppl = 120.4233167746116, batch 0071/02

Epoch 002/005, valid ppl = 120.4233167746116, batch 0149/0226, train loss = 4.485030174255371
Epoch 002/005, valid ppl = 120.4233167746116, batch 0150/0226, train loss = 4.619609355926514
Epoch 002/005, valid ppl = 120.4233167746116, batch 0151/0226, train loss = 4.292534828186035
Epoch 002/005, valid ppl = 120.4233167746116, batch 0152/0226, train loss = 4.531655311584473
Epoch 002/005, valid ppl = 120.4233167746116, batch 0153/0226, train loss = 4.467715263366699
Epoch 002/005, valid ppl = 120.4233167746116, batch 0154/0226, train loss = 4.598151206970215
Epoch 002/005, valid ppl = 120.4233167746116, batch 0155/0226, train loss = 4.431955814361572
Epoch 002/005, valid ppl = 120.4233167746116, batch 0156/0226, train loss = 4.442891597747803
Epoch 002/005, valid ppl = 120.4233167746116, batch 0157/0226, train loss = 4.369269371032715
Epoch 002/005, valid ppl = 120.4233167746116, batch 0158/0226, train loss = 4.2945427894592285
Epoch 002/005, valid ppl = 120.4233167746116, batch 0159/02

Epoch 003/005, valid ppl = 84.43877547561851, batch 0010/0226, train loss = 4.428177356719971
Epoch 003/005, valid ppl = 84.43877547561851, batch 0011/0226, train loss = 4.275556564331055
Epoch 003/005, valid ppl = 84.43877547561851, batch 0012/0226, train loss = 4.232733726501465
Epoch 003/005, valid ppl = 84.43877547561851, batch 0013/0226, train loss = 4.2764716148376465
Epoch 003/005, valid ppl = 84.43877547561851, batch 0014/0226, train loss = 4.544807434082031
Epoch 003/005, valid ppl = 84.43877547561851, batch 0015/0226, train loss = 4.400261402130127
Epoch 003/005, valid ppl = 84.43877547561851, batch 0016/0226, train loss = 4.475508689880371
Epoch 003/005, valid ppl = 84.43877547561851, batch 0017/0226, train loss = 4.384039878845215
Epoch 003/005, valid ppl = 84.43877547561851, batch 0018/0226, train loss = 4.308535099029541
Epoch 003/005, valid ppl = 84.43877547561851, batch 0019/0226, train loss = 4.146601676940918
Epoch 003/005, valid ppl = 84.43877547561851, batch 0020/02

Epoch 003/005, valid ppl = 84.43877547561851, batch 0097/0226, train loss = 4.14580774307251
Epoch 003/005, valid ppl = 84.43877547561851, batch 0098/0226, train loss = 4.26607608795166
Epoch 003/005, valid ppl = 84.43877547561851, batch 0099/0226, train loss = 4.224373817443848
Epoch 003/005, valid ppl = 84.43877547561851, batch 0100/0226, train loss = 4.204413890838623
Epoch 003/005, valid ppl = 84.43877547561851, batch 0101/0226, train loss = 4.318620681762695
Epoch 003/005, valid ppl = 84.43877547561851, batch 0102/0226, train loss = 4.395260334014893
Epoch 003/005, valid ppl = 84.43877547561851, batch 0103/0226, train loss = 4.173366546630859
Epoch 003/005, valid ppl = 84.43877547561851, batch 0104/0226, train loss = 4.273773670196533
Epoch 003/005, valid ppl = 84.43877547561851, batch 0105/0226, train loss = 4.315649032592773
Epoch 003/005, valid ppl = 84.43877547561851, batch 0106/0226, train loss = 4.341385364532471
Epoch 003/005, valid ppl = 84.43877547561851, batch 0107/0226,

Epoch 003/005, valid ppl = 84.43877547561851, batch 0185/0226, train loss = 4.24221658706665
Epoch 003/005, valid ppl = 84.43877547561851, batch 0186/0226, train loss = 4.1300048828125
Epoch 003/005, valid ppl = 84.43877547561851, batch 0187/0226, train loss = 4.183599948883057
Epoch 003/005, valid ppl = 84.43877547561851, batch 0188/0226, train loss = 4.199120998382568
Epoch 003/005, valid ppl = 84.43877547561851, batch 0189/0226, train loss = 4.23482608795166
Epoch 003/005, valid ppl = 84.43877547561851, batch 0190/0226, train loss = 4.24653959274292
Epoch 003/005, valid ppl = 84.43877547561851, batch 0191/0226, train loss = 4.165822505950928
Epoch 003/005, valid ppl = 84.43877547561851, batch 0192/0226, train loss = 4.1915364265441895
Epoch 003/005, valid ppl = 84.43877547561851, batch 0193/0226, train loss = 4.1828227043151855
Epoch 003/005, valid ppl = 84.43877547561851, batch 0194/0226, train loss = 4.279025554656982
Epoch 003/005, valid ppl = 84.43877547561851, batch 0195/0226, 

Epoch 004/005, valid ppl = 65.74766113002066, batch 0046/0226, train loss = 4.344231128692627
Epoch 004/005, valid ppl = 65.74766113002066, batch 0047/0226, train loss = 4.131371021270752
Epoch 004/005, valid ppl = 65.74766113002066, batch 0048/0226, train loss = 3.975245714187622
Epoch 004/005, valid ppl = 65.74766113002066, batch 0049/0226, train loss = 3.8188607692718506
Epoch 004/005, valid ppl = 65.74766113002066, batch 0050/0226, train loss = 4.053962707519531
Epoch 004/005, valid ppl = 65.74766113002066, batch 0051/0226, train loss = 4.0626678466796875
Epoch 004/005, valid ppl = 65.74766113002066, batch 0052/0226, train loss = 4.026628494262695
Epoch 004/005, valid ppl = 65.74766113002066, batch 0053/0226, train loss = 4.044682025909424
Epoch 004/005, valid ppl = 65.74766113002066, batch 0054/0226, train loss = 4.058419227600098
Epoch 004/005, valid ppl = 65.74766113002066, batch 0055/0226, train loss = 3.986515760421753
Epoch 004/005, valid ppl = 65.74766113002066, batch 0056/0

Epoch 004/005, valid ppl = 65.74766113002066, batch 0133/0226, train loss = 3.9726622104644775
Epoch 004/005, valid ppl = 65.74766113002066, batch 0134/0226, train loss = 3.859510660171509
Epoch 004/005, valid ppl = 65.74766113002066, batch 0135/0226, train loss = 3.8033254146575928
Epoch 004/005, valid ppl = 65.74766113002066, batch 0136/0226, train loss = 4.144413471221924
Epoch 004/005, valid ppl = 65.74766113002066, batch 0137/0226, train loss = 3.952004909515381
Epoch 004/005, valid ppl = 65.74766113002066, batch 0138/0226, train loss = 3.8545608520507812
Epoch 004/005, valid ppl = 65.74766113002066, batch 0139/0226, train loss = 3.9134814739227295
Epoch 004/005, valid ppl = 65.74766113002066, batch 0140/0226, train loss = 3.9692094326019287
Epoch 004/005, valid ppl = 65.74766113002066, batch 0141/0226, train loss = 4.108391284942627
Epoch 004/005, valid ppl = 65.74766113002066, batch 0142/0226, train loss = 4.071457862854004
Epoch 004/005, valid ppl = 65.74766113002066, batch 014

Epoch 004/005, valid ppl = 65.74766113002066, batch 0220/0226, train loss = 3.8423140048980713
Epoch 004/005, valid ppl = 65.74766113002066, batch 0221/0226, train loss = 4.105627536773682
Epoch 004/005, valid ppl = 65.74766113002066, batch 0222/0226, train loss = 3.9324164390563965
Epoch 004/005, valid ppl = 65.74766113002066, batch 0223/0226, train loss = 3.939445972442627
Epoch 004/005, valid ppl = 65.74766113002066, batch 0224/0226, train loss = 4.0268049240112305
Epoch 004/005, valid ppl = 65.74766113002066, batch 0225/0226, train loss = 3.91568922996521
Epoch 004/005, valid ppl = 65.74766113002066, batch 0226/0226, train loss = 3.9184393882751465
Saving the trained model to ../model_dailydialog_rf_noVAD/model_TS/model_epoch_004.ckpt...
Epoch 005/005, valid ppl = 53.108577298873435, batch 0001/0226, train loss = 4.061442852020264
Epoch 005/005, valid ppl = 53.108577298873435, batch 0002/0226, train loss = 4.028983116149902
Epoch 005/005, valid ppl = 53.108577298873435, batch 0003/

Epoch 005/005, valid ppl = 53.108577298873435, batch 0079/0226, train loss = 3.7590394020080566
Epoch 005/005, valid ppl = 53.108577298873435, batch 0080/0226, train loss = 3.6808481216430664
Epoch 005/005, valid ppl = 53.108577298873435, batch 0081/0226, train loss = 3.8811283111572266
Epoch 005/005, valid ppl = 53.108577298873435, batch 0082/0226, train loss = 3.6778974533081055
Epoch 005/005, valid ppl = 53.108577298873435, batch 0083/0226, train loss = 3.758288860321045
Epoch 005/005, valid ppl = 53.108577298873435, batch 0084/0226, train loss = 3.785147190093994
Epoch 005/005, valid ppl = 53.108577298873435, batch 0085/0226, train loss = 3.705564498901367
Epoch 005/005, valid ppl = 53.108577298873435, batch 0086/0226, train loss = 3.9617552757263184
Epoch 005/005, valid ppl = 53.108577298873435, batch 0087/0226, train loss = 3.8303005695343018
Epoch 005/005, valid ppl = 53.108577298873435, batch 0088/0226, train loss = 3.8050694465637207
Epoch 005/005, valid ppl = 53.1085772988734

Epoch 005/005, valid ppl = 53.108577298873435, batch 0165/0226, train loss = 3.7108941078186035
Epoch 005/005, valid ppl = 53.108577298873435, batch 0166/0226, train loss = 3.6251323223114014
Epoch 005/005, valid ppl = 53.108577298873435, batch 0167/0226, train loss = 3.6243302822113037
Epoch 005/005, valid ppl = 53.108577298873435, batch 0168/0226, train loss = 3.7193100452423096
Epoch 005/005, valid ppl = 53.108577298873435, batch 0169/0226, train loss = 3.7072064876556396
Epoch 005/005, valid ppl = 53.108577298873435, batch 0170/0226, train loss = 3.819054365158081
Epoch 005/005, valid ppl = 53.108577298873435, batch 0171/0226, train loss = 3.836606025695801
Epoch 005/005, valid ppl = 53.108577298873435, batch 0172/0226, train loss = 3.889694929122925
Epoch 005/005, valid ppl = 53.108577298873435, batch 0173/0226, train loss = 3.765200138092041
Epoch 005/005, valid ppl = 53.108577298873435, batch 0174/0226, train loss = 3.917965888977051
Epoch 005/005, valid ppl = 53.108577298873435

---
Train model P(S|T)

In [5]:
def revert(myset):
    enc_input = myset['dec_input'][:,1:]
    dec_input =  np.insert(myset['enc_input'], 0, token2id['<go>'], axis=1) # add <go> in the beginning of decoder

    target = np.insert(myset['enc_input'], -1, 0, axis=1) 
    tmp_idx = [np.where(s==0)[0][0] for s in target] 
    target[np.arange(target.shape[0]),tmp_idx] = token2id['<eos>'] # add <eos> at the end of decoder
    
    newset = {}
    
    newset['enc_input'] = enc_input
    newset['dec_input'] = dec_input
    newset['target'] = target
    newset['enc_input_len'] = myset['dec_input_len']
    newset['dec_input_len'] = myset['enc_input_len']
    return newset

In [6]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
    train_set = revert(train_set)
    valid_set = revert(valid_set)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_ST = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_ST.restore(os.path.join(args.restore_path_ST, 'model_TS_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_ST.init_tf_vars()
    model_ST.train(train_set, VAD,termfreq, VAD_loss,args.save_path_ST, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/005, valid ppl = None, batch 0001/0226, train loss = 8.100800514221191
Epoch 001/005, valid ppl = None, batch 0002/0226, train loss = 8.088526725769043
Epoch 001/005, valid ppl = None, batch 0003/0226, train loss = 8.07837963104248
Epoch 001/005, valid ppl = None, batch 0004/0226, train loss = 8.06023788452

Epoch 001/005, valid ppl = None, batch 0094/0226, train loss = 5.710849285125732
Epoch 001/005, valid ppl = None, batch 0095/0226, train loss = 5.703164100646973
Epoch 001/005, valid ppl = None, batch 0096/0226, train loss = 5.477018356323242
Epoch 001/005, valid ppl = None, batch 0097/0226, train loss = 5.5196943283081055
Epoch 001/005, valid ppl = None, batch 0098/0226, train loss = 5.494857311248779
Epoch 001/005, valid ppl = None, batch 0099/0226, train loss = 5.410406112670898
Epoch 001/005, valid ppl = None, batch 0100/0226, train loss = 5.4671525955200195
Epoch 001/005, valid ppl = None, batch 0101/0226, train loss = 5.65301513671875
Epoch 001/005, valid ppl = None, batch 0102/0226, train loss = 5.409722805023193
Epoch 001/005, valid ppl = None, batch 0103/0226, train loss = 5.5099639892578125
Epoch 001/005, valid ppl = None, batch 0104/0226, train loss = 5.600381851196289
Epoch 001/005, valid ppl = None, batch 0105/0226, train loss = 5.497859477996826
Epoch 001/005, valid ppl =

Epoch 001/005, valid ppl = None, batch 0196/0226, train loss = 4.6891632080078125
Epoch 001/005, valid ppl = None, batch 0197/0226, train loss = 4.946145534515381
Epoch 001/005, valid ppl = None, batch 0198/0226, train loss = 4.542754650115967
Epoch 001/005, valid ppl = None, batch 0199/0226, train loss = 5.057478904724121
Epoch 001/005, valid ppl = None, batch 0200/0226, train loss = 4.782932281494141
Epoch 001/005, valid ppl = None, batch 0201/0226, train loss = 4.879207611083984
Epoch 001/005, valid ppl = None, batch 0202/0226, train loss = 4.903680801391602
Epoch 001/005, valid ppl = None, batch 0203/0226, train loss = 4.968288421630859
Epoch 001/005, valid ppl = None, batch 0204/0226, train loss = 4.864683151245117
Epoch 001/005, valid ppl = None, batch 0205/0226, train loss = 4.814464092254639
Epoch 001/005, valid ppl = None, batch 0206/0226, train loss = 4.7938618659973145
Epoch 001/005, valid ppl = None, batch 0207/0226, train loss = 4.9422078132629395
Epoch 001/005, valid ppl 

Epoch 002/005, valid ppl = 124.91660904510148, batch 0060/0226, train loss = 4.974656105041504
Epoch 002/005, valid ppl = 124.91660904510148, batch 0061/0226, train loss = 4.556333541870117
Epoch 002/005, valid ppl = 124.91660904510148, batch 0062/0226, train loss = 4.797314167022705
Epoch 002/005, valid ppl = 124.91660904510148, batch 0063/0226, train loss = 4.792729377746582
Epoch 002/005, valid ppl = 124.91660904510148, batch 0064/0226, train loss = 4.973660945892334
Epoch 002/005, valid ppl = 124.91660904510148, batch 0065/0226, train loss = 4.637019634246826
Epoch 002/005, valid ppl = 124.91660904510148, batch 0066/0226, train loss = 4.741960525512695
Epoch 002/005, valid ppl = 124.91660904510148, batch 0067/0226, train loss = 4.507218837738037
Epoch 002/005, valid ppl = 124.91660904510148, batch 0068/0226, train loss = 4.5676422119140625
Epoch 002/005, valid ppl = 124.91660904510148, batch 0069/0226, train loss = 4.771228313446045
Epoch 002/005, valid ppl = 124.91660904510148, ba

Epoch 002/005, valid ppl = 124.91660904510148, batch 0147/0226, train loss = 4.547279357910156
Epoch 002/005, valid ppl = 124.91660904510148, batch 0148/0226, train loss = 4.475005626678467
Epoch 002/005, valid ppl = 124.91660904510148, batch 0149/0226, train loss = 4.684288501739502
Epoch 002/005, valid ppl = 124.91660904510148, batch 0150/0226, train loss = 4.713789939880371
Epoch 002/005, valid ppl = 124.91660904510148, batch 0151/0226, train loss = 4.700634479522705
Epoch 002/005, valid ppl = 124.91660904510148, batch 0152/0226, train loss = 4.568606853485107
Epoch 002/005, valid ppl = 124.91660904510148, batch 0153/0226, train loss = 4.605108261108398
Epoch 002/005, valid ppl = 124.91660904510148, batch 0154/0226, train loss = 4.6651530265808105
Epoch 002/005, valid ppl = 124.91660904510148, batch 0155/0226, train loss = 4.494077205657959
Epoch 002/005, valid ppl = 124.91660904510148, batch 0156/0226, train loss = 4.585419654846191
Epoch 002/005, valid ppl = 124.91660904510148, ba

Epoch 003/005, valid ppl = 83.431075629146, batch 0007/0226, train loss = 4.470936298370361
Epoch 003/005, valid ppl = 83.431075629146, batch 0008/0226, train loss = 4.494640350341797
Epoch 003/005, valid ppl = 83.431075629146, batch 0009/0226, train loss = 4.5066022872924805
Epoch 003/005, valid ppl = 83.431075629146, batch 0010/0226, train loss = 4.402885437011719
Epoch 003/005, valid ppl = 83.431075629146, batch 0011/0226, train loss = 4.419730186462402
Epoch 003/005, valid ppl = 83.431075629146, batch 0012/0226, train loss = 4.2721967697143555
Epoch 003/005, valid ppl = 83.431075629146, batch 0013/0226, train loss = 4.315207004547119
Epoch 003/005, valid ppl = 83.431075629146, batch 0014/0226, train loss = 4.440648555755615
Epoch 003/005, valid ppl = 83.431075629146, batch 0015/0226, train loss = 4.403649806976318
Epoch 003/005, valid ppl = 83.431075629146, batch 0016/0226, train loss = 4.283197402954102
Epoch 003/005, valid ppl = 83.431075629146, batch 0017/0226, train loss = 4.34

Epoch 003/005, valid ppl = 83.431075629146, batch 0097/0226, train loss = 4.192875385284424
Epoch 003/005, valid ppl = 83.431075629146, batch 0098/0226, train loss = 4.607680797576904
Epoch 003/005, valid ppl = 83.431075629146, batch 0099/0226, train loss = 3.9367055892944336
Epoch 003/005, valid ppl = 83.431075629146, batch 0100/0226, train loss = 4.16977596282959
Epoch 003/005, valid ppl = 83.431075629146, batch 0101/0226, train loss = 4.288405418395996
Epoch 003/005, valid ppl = 83.431075629146, batch 0102/0226, train loss = 4.270907878875732
Epoch 003/005, valid ppl = 83.431075629146, batch 0103/0226, train loss = 4.383097171783447
Epoch 003/005, valid ppl = 83.431075629146, batch 0104/0226, train loss = 4.226972579956055
Epoch 003/005, valid ppl = 83.431075629146, batch 0105/0226, train loss = 4.235749244689941
Epoch 003/005, valid ppl = 83.431075629146, batch 0106/0226, train loss = 4.397200107574463
Epoch 003/005, valid ppl = 83.431075629146, batch 0107/0226, train loss = 4.2534

Epoch 003/005, valid ppl = 83.431075629146, batch 0187/0226, train loss = 3.9787609577178955
Epoch 003/005, valid ppl = 83.431075629146, batch 0188/0226, train loss = 4.070389747619629
Epoch 003/005, valid ppl = 83.431075629146, batch 0189/0226, train loss = 3.992981433868408
Epoch 003/005, valid ppl = 83.431075629146, batch 0190/0226, train loss = 4.139906406402588
Epoch 003/005, valid ppl = 83.431075629146, batch 0191/0226, train loss = 4.2147440910339355
Epoch 003/005, valid ppl = 83.431075629146, batch 0192/0226, train loss = 4.350514888763428
Epoch 003/005, valid ppl = 83.431075629146, batch 0193/0226, train loss = 4.20426082611084
Epoch 003/005, valid ppl = 83.431075629146, batch 0194/0226, train loss = 4.09444522857666
Epoch 003/005, valid ppl = 83.431075629146, batch 0195/0226, train loss = 4.267040252685547
Epoch 003/005, valid ppl = 83.431075629146, batch 0196/0226, train loss = 4.258049011230469
Epoch 003/005, valid ppl = 83.431075629146, batch 0197/0226, train loss = 4.3402

Epoch 004/005, valid ppl = 63.27359063321028, batch 0048/0226, train loss = 3.9506797790527344
Epoch 004/005, valid ppl = 63.27359063321028, batch 0049/0226, train loss = 4.1176676750183105
Epoch 004/005, valid ppl = 63.27359063321028, batch 0050/0226, train loss = 4.080044269561768
Epoch 004/005, valid ppl = 63.27359063321028, batch 0051/0226, train loss = 3.844287872314453
Epoch 004/005, valid ppl = 63.27359063321028, batch 0052/0226, train loss = 4.2578887939453125
Epoch 004/005, valid ppl = 63.27359063321028, batch 0053/0226, train loss = 4.101673126220703
Epoch 004/005, valid ppl = 63.27359063321028, batch 0054/0226, train loss = 4.101263523101807
Epoch 004/005, valid ppl = 63.27359063321028, batch 0055/0226, train loss = 4.25340461730957
Epoch 004/005, valid ppl = 63.27359063321028, batch 0056/0226, train loss = 3.9019649028778076
Epoch 004/005, valid ppl = 63.27359063321028, batch 0057/0226, train loss = 3.9572482109069824
Epoch 004/005, valid ppl = 63.27359063321028, batch 0058

Epoch 004/005, valid ppl = 63.27359063321028, batch 0135/0226, train loss = 3.7086832523345947
Epoch 004/005, valid ppl = 63.27359063321028, batch 0136/0226, train loss = 4.331571102142334
Epoch 004/005, valid ppl = 63.27359063321028, batch 0137/0226, train loss = 4.098219871520996
Epoch 004/005, valid ppl = 63.27359063321028, batch 0138/0226, train loss = 3.9619061946868896
Epoch 004/005, valid ppl = 63.27359063321028, batch 0139/0226, train loss = 4.119431972503662
Epoch 004/005, valid ppl = 63.27359063321028, batch 0140/0226, train loss = 3.9844970703125
Epoch 004/005, valid ppl = 63.27359063321028, batch 0141/0226, train loss = 3.952735424041748
Epoch 004/005, valid ppl = 63.27359063321028, batch 0142/0226, train loss = 3.7623181343078613
Epoch 004/005, valid ppl = 63.27359063321028, batch 0143/0226, train loss = 3.722766399383545
Epoch 004/005, valid ppl = 63.27359063321028, batch 0144/0226, train loss = 3.9177663326263428
Epoch 004/005, valid ppl = 63.27359063321028, batch 0145/0

Epoch 004/005, valid ppl = 63.27359063321028, batch 0222/0226, train loss = 4.090516567230225
Epoch 004/005, valid ppl = 63.27359063321028, batch 0223/0226, train loss = 3.753840446472168
Epoch 004/005, valid ppl = 63.27359063321028, batch 0224/0226, train loss = 3.9905335903167725
Epoch 004/005, valid ppl = 63.27359063321028, batch 0225/0226, train loss = 3.9092962741851807
Epoch 004/005, valid ppl = 63.27359063321028, batch 0226/0226, train loss = 3.911162853240967
Saving the trained model to ../model_dailydialog_rf_noVAD/model_ST/model_epoch_004.ckpt...
Epoch 005/005, valid ppl = 50.25530429061359, batch 0001/0226, train loss = 3.8758039474487305
Epoch 005/005, valid ppl = 50.25530429061359, batch 0002/0226, train loss = 3.9165537357330322
Epoch 005/005, valid ppl = 50.25530429061359, batch 0003/0226, train loss = 3.8420097827911377
Epoch 005/005, valid ppl = 50.25530429061359, batch 0004/0226, train loss = 3.6246745586395264
Epoch 005/005, valid ppl = 50.25530429061359, batch 0005/

Epoch 005/005, valid ppl = 50.25530429061359, batch 0082/0226, train loss = 4.0378737449646
Epoch 005/005, valid ppl = 50.25530429061359, batch 0083/0226, train loss = 3.884145736694336
Epoch 005/005, valid ppl = 50.25530429061359, batch 0084/0226, train loss = 3.962857246398926
Epoch 005/005, valid ppl = 50.25530429061359, batch 0085/0226, train loss = 3.829669237136841
Epoch 005/005, valid ppl = 50.25530429061359, batch 0086/0226, train loss = 3.600188970565796
Epoch 005/005, valid ppl = 50.25530429061359, batch 0087/0226, train loss = 3.8100950717926025
Epoch 005/005, valid ppl = 50.25530429061359, batch 0088/0226, train loss = 3.7679193019866943
Epoch 005/005, valid ppl = 50.25530429061359, batch 0089/0226, train loss = 3.6132235527038574
Epoch 005/005, valid ppl = 50.25530429061359, batch 0090/0226, train loss = 3.712935447692871
Epoch 005/005, valid ppl = 50.25530429061359, batch 0091/0226, train loss = 3.7653744220733643
Epoch 005/005, valid ppl = 50.25530429061359, batch 0092/0

Epoch 005/005, valid ppl = 50.25530429061359, batch 0169/0226, train loss = 3.5575242042541504
Epoch 005/005, valid ppl = 50.25530429061359, batch 0170/0226, train loss = 3.6985812187194824
Epoch 005/005, valid ppl = 50.25530429061359, batch 0171/0226, train loss = 3.663882255554199
Epoch 005/005, valid ppl = 50.25530429061359, batch 0172/0226, train loss = 3.9472360610961914
Epoch 005/005, valid ppl = 50.25530429061359, batch 0173/0226, train loss = 3.6347970962524414
Epoch 005/005, valid ppl = 50.25530429061359, batch 0174/0226, train loss = 3.746159553527832
Epoch 005/005, valid ppl = 50.25530429061359, batch 0175/0226, train loss = 3.862483024597168
Epoch 005/005, valid ppl = 50.25530429061359, batch 0176/0226, train loss = 3.792515993118286
Epoch 005/005, valid ppl = 50.25530429061359, batch 0177/0226, train loss = 3.736521005630493
Epoch 005/005, valid ppl = 50.25530429061359, batch 0178/0226, train loss = 3.703287363052368
Epoch 005/005, valid ppl = 50.25530429061359, batch 0179