In [1]:
import os
import pickle
import argparse
import numpy as np
from model import Options, Seq2SeqAttn


In [2]:
# Parse the command line arguments.
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type = str, default = '../pre-data/',
                    help = 'the directory to the data')

parser.add_argument('--word_embeddings_path', type = str, default = '../pre-data/word_embeddings.npy',
                    help = 'the directory to the pre-trained word embeddings')
parser.add_argument('--VAD_path', type = str, default = '../pre-data/VAD.npy',
                    help = 'the directory to VAD')
parser.add_argument('--tf_path', type = str, default = '../pre-data/tf.npy',
                    help = 'the directory to term frequency')
parser.add_argument('--VAD_loss_path', type = str, default = '../pre-data/VAD_loss.npy',
                    help = 'the directory to VAD loss for each word')
parser.add_argument('--ti_path', type = str, default = '../pre-data/mu_li.npy',
                    help = 'the directory to term importance')

parser.add_argument('--num_epochs', type = int, default = 5,
                    help = 'the number of epochs to train the data')
parser.add_argument('--batch_size', type = int, default = 64,
                    help = 'the batch size')
parser.add_argument('--learning_rate', type = float, default = 0.0001,
                    help = 'the learning rate')
parser.add_argument('--beam_width', type = int, default = 32,
                    help = 'the beam width when decoding')
parser.add_argument('--word_embed_size', type = int, default = 256,
                    help = 'the size of word embeddings')
parser.add_argument('--n_hidden_units_enc', type = int, default = 256,
                    help = 'the number of hidden units of encoder')
parser.add_argument('--n_hidden_units_dec', type = int, default = 256,
                    help = 'the number of hidden units of decoder')
# ? attn_depth
parser.add_argument('--attn_depth', type = int, default = 128,
                    help = 'attention depth')

parser.add_argument('--restore_path_TS', type = str, default = '../model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_TS', type = str, default = '../model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_path_ST', type = str, default = '../model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_ST', type = str, default = '../model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_epoch', type = int, default = 0,
                    help = 'the epoch to restore')

# args = parser.parse_args()
args, unknown = parser.parse_known_args()


In [3]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        # to check if or not to complete the last batch
        idx = np.arange(my_set['dec_input'].shape[0])
        left_samples = idx[-1]%args.batch_size
        if left_samples:
            last_batch_idx = np.random.randint(0,idx[-1]-left_samples,size = args.batch_size - left_samples - 1)
            idx = np.concatenate([idx,last_batch_idx])
            
            my_set['enc_input'] = my_set['enc_input'][idx]
            my_set['dec_input'] = my_set['dec_input'][idx]
            my_set['target'] = my_set['target'][idx]
            my_set['enc_input_len'] = my_set['enc_input_len'][idx]
            my_set['dec_input_len'] = my_set['dec_input_len'][idx]
        return my_set
    train_set = load_np_files(os.path.join(data_path, 'train'))
    valid_set = load_np_files(os.path.join(data_path, 'validation'))
    
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)
    with open(os.path.join(data_path, 'id2token.pickle'), 'rb') as file:
        id2token = pickle.load(file)

    return train_set, valid_set, token2id,id2token

---
Train model maximizing P(T|S)

In [4]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_TS = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_TS.restore(os.path.join(args.restore_path_TS, 'model_TS_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_TS.init_tf_vars()
    model_TS.train(train_set, VAD,termfreq, VAD_loss,args.save_path_TS, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/005, valid ppl = None, batch 0001/0226, train loss = 8.088951110839844
Epoch 001/005, valid ppl = None, batch 0002/0226, train loss = 8.083484649658203
Epoch 001/005, valid ppl = None, batch 0003/0226, train loss = 8.062005043029785
Epoch 001/005, valid ppl = None, batch 0004/0226, train loss = 8.0533742904

Epoch 001/005, valid ppl = None, batch 0094/0226, train loss = 5.35126256942749
Epoch 001/005, valid ppl = None, batch 0095/0226, train loss = 5.356368541717529
Epoch 001/005, valid ppl = None, batch 0096/0226, train loss = 5.20646858215332
Epoch 001/005, valid ppl = None, batch 0097/0226, train loss = 5.4400954246521
Epoch 001/005, valid ppl = None, batch 0098/0226, train loss = 5.313283920288086
Epoch 001/005, valid ppl = None, batch 0099/0226, train loss = 5.256808757781982
Epoch 001/005, valid ppl = None, batch 0100/0226, train loss = 5.280642032623291
Epoch 001/005, valid ppl = None, batch 0101/0226, train loss = 5.141192436218262
Epoch 001/005, valid ppl = None, batch 0102/0226, train loss = 5.347930431365967
Epoch 001/005, valid ppl = None, batch 0103/0226, train loss = 5.280808448791504
Epoch 001/005, valid ppl = None, batch 0104/0226, train loss = 5.230621337890625
Epoch 001/005, valid ppl = None, batch 0105/0226, train loss = 5.293386459350586
Epoch 001/005, valid ppl = None,

Epoch 001/005, valid ppl = None, batch 0196/0226, train loss = 4.8175225257873535
Epoch 001/005, valid ppl = None, batch 0197/0226, train loss = 4.835391044616699
Epoch 001/005, valid ppl = None, batch 0198/0226, train loss = 4.809834003448486
Epoch 001/005, valid ppl = None, batch 0199/0226, train loss = 4.967055797576904
Epoch 001/005, valid ppl = None, batch 0200/0226, train loss = 4.860891819000244
Epoch 001/005, valid ppl = None, batch 0201/0226, train loss = 5.029396057128906
Epoch 001/005, valid ppl = None, batch 0202/0226, train loss = 4.871562480926514
Epoch 001/005, valid ppl = None, batch 0203/0226, train loss = 4.971390247344971
Epoch 001/005, valid ppl = None, batch 0204/0226, train loss = 4.822817325592041
Epoch 001/005, valid ppl = None, batch 0205/0226, train loss = 4.6669087409973145
Epoch 001/005, valid ppl = None, batch 0206/0226, train loss = 4.762619495391846
Epoch 001/005, valid ppl = None, batch 0207/0226, train loss = 4.831693649291992
Epoch 001/005, valid ppl =

Epoch 002/005, valid ppl = 117.95818713098286, batch 0060/0226, train loss = 4.865610122680664
Epoch 002/005, valid ppl = 117.95818713098286, batch 0061/0226, train loss = 4.695831775665283
Epoch 002/005, valid ppl = 117.95818713098286, batch 0062/0226, train loss = 4.681426048278809
Epoch 002/005, valid ppl = 117.95818713098286, batch 0063/0226, train loss = 4.677123546600342
Epoch 002/005, valid ppl = 117.95818713098286, batch 0064/0226, train loss = 4.523500442504883
Epoch 002/005, valid ppl = 117.95818713098286, batch 0065/0226, train loss = 4.644879341125488
Epoch 002/005, valid ppl = 117.95818713098286, batch 0066/0226, train loss = 4.444182872772217
Epoch 002/005, valid ppl = 117.95818713098286, batch 0067/0226, train loss = 4.511797904968262
Epoch 002/005, valid ppl = 117.95818713098286, batch 0068/0226, train loss = 4.646875381469727
Epoch 002/005, valid ppl = 117.95818713098286, batch 0069/0226, train loss = 4.700347900390625
Epoch 002/005, valid ppl = 117.95818713098286, bat

Epoch 002/005, valid ppl = 117.95818713098286, batch 0147/0226, train loss = 4.598510265350342
Epoch 002/005, valid ppl = 117.95818713098286, batch 0148/0226, train loss = 4.59454870223999
Epoch 002/005, valid ppl = 117.95818713098286, batch 0149/0226, train loss = 4.453695297241211
Epoch 002/005, valid ppl = 117.95818713098286, batch 0150/0226, train loss = 4.5502119064331055
Epoch 002/005, valid ppl = 117.95818713098286, batch 0151/0226, train loss = 4.471029281616211
Epoch 002/005, valid ppl = 117.95818713098286, batch 0152/0226, train loss = 4.533938407897949
Epoch 002/005, valid ppl = 117.95818713098286, batch 0153/0226, train loss = 4.411929130554199
Epoch 002/005, valid ppl = 117.95818713098286, batch 0154/0226, train loss = 4.509737968444824
Epoch 002/005, valid ppl = 117.95818713098286, batch 0155/0226, train loss = 4.575893402099609
Epoch 002/005, valid ppl = 117.95818713098286, batch 0156/0226, train loss = 4.5748467445373535
Epoch 002/005, valid ppl = 117.95818713098286, ba

Epoch 003/005, valid ppl = 83.83633605258021, batch 0007/0226, train loss = 4.269045352935791
Epoch 003/005, valid ppl = 83.83633605258021, batch 0008/0226, train loss = 4.268401622772217
Epoch 003/005, valid ppl = 83.83633605258021, batch 0009/0226, train loss = 4.319323539733887
Epoch 003/005, valid ppl = 83.83633605258021, batch 0010/0226, train loss = 4.469695091247559
Epoch 003/005, valid ppl = 83.83633605258021, batch 0011/0226, train loss = 4.298798084259033
Epoch 003/005, valid ppl = 83.83633605258021, batch 0012/0226, train loss = 4.149795055389404
Epoch 003/005, valid ppl = 83.83633605258021, batch 0013/0226, train loss = 4.16184139251709
Epoch 003/005, valid ppl = 83.83633605258021, batch 0014/0226, train loss = 4.354247570037842
Epoch 003/005, valid ppl = 83.83633605258021, batch 0015/0226, train loss = 4.483072280883789
Epoch 003/005, valid ppl = 83.83633605258021, batch 0016/0226, train loss = 4.58226203918457
Epoch 003/005, valid ppl = 83.83633605258021, batch 0017/0226,

Epoch 003/005, valid ppl = 83.83633605258021, batch 0095/0226, train loss = 4.371263027191162
Epoch 003/005, valid ppl = 83.83633605258021, batch 0096/0226, train loss = 4.300204277038574
Epoch 003/005, valid ppl = 83.83633605258021, batch 0097/0226, train loss = 4.212101459503174
Epoch 003/005, valid ppl = 83.83633605258021, batch 0098/0226, train loss = 4.426673412322998
Epoch 003/005, valid ppl = 83.83633605258021, batch 0099/0226, train loss = 4.291828155517578
Epoch 003/005, valid ppl = 83.83633605258021, batch 0100/0226, train loss = 4.411757946014404
Epoch 003/005, valid ppl = 83.83633605258021, batch 0101/0226, train loss = 4.454925537109375
Epoch 003/005, valid ppl = 83.83633605258021, batch 0102/0226, train loss = 4.405818939208984
Epoch 003/005, valid ppl = 83.83633605258021, batch 0103/0226, train loss = 4.184712886810303
Epoch 003/005, valid ppl = 83.83633605258021, batch 0104/0226, train loss = 4.3431620597839355
Epoch 003/005, valid ppl = 83.83633605258021, batch 0105/02

Epoch 003/005, valid ppl = 83.83633605258021, batch 0183/0226, train loss = 4.142383098602295
Epoch 003/005, valid ppl = 83.83633605258021, batch 0184/0226, train loss = 4.07343864440918
Epoch 003/005, valid ppl = 83.83633605258021, batch 0185/0226, train loss = 4.01580286026001
Epoch 003/005, valid ppl = 83.83633605258021, batch 0186/0226, train loss = 4.2276082038879395
Epoch 003/005, valid ppl = 83.83633605258021, batch 0187/0226, train loss = 4.331422805786133
Epoch 003/005, valid ppl = 83.83633605258021, batch 0188/0226, train loss = 4.254114627838135
Epoch 003/005, valid ppl = 83.83633605258021, batch 0189/0226, train loss = 4.0153961181640625
Epoch 003/005, valid ppl = 83.83633605258021, batch 0190/0226, train loss = 4.291493892669678
Epoch 003/005, valid ppl = 83.83633605258021, batch 0191/0226, train loss = 4.175111293792725
Epoch 003/005, valid ppl = 83.83633605258021, batch 0192/0226, train loss = 4.346847057342529
Epoch 003/005, valid ppl = 83.83633605258021, batch 0193/022

Epoch 004/005, valid ppl = 65.37338690549294, batch 0044/0226, train loss = 4.0045061111450195
Epoch 004/005, valid ppl = 65.37338690549294, batch 0045/0226, train loss = 4.178160190582275
Epoch 004/005, valid ppl = 65.37338690549294, batch 0046/0226, train loss = 4.035097122192383
Epoch 004/005, valid ppl = 65.37338690549294, batch 0047/0226, train loss = 4.045903205871582
Epoch 004/005, valid ppl = 65.37338690549294, batch 0048/0226, train loss = 3.8744428157806396
Epoch 004/005, valid ppl = 65.37338690549294, batch 0049/0226, train loss = 4.166877269744873
Epoch 004/005, valid ppl = 65.37338690549294, batch 0050/0226, train loss = 3.983846664428711
Epoch 004/005, valid ppl = 65.37338690549294, batch 0051/0226, train loss = 4.001969337463379
Epoch 004/005, valid ppl = 65.37338690549294, batch 0052/0226, train loss = 4.2443461418151855
Epoch 004/005, valid ppl = 65.37338690549294, batch 0053/0226, train loss = 4.0518293380737305
Epoch 004/005, valid ppl = 65.37338690549294, batch 0054

Epoch 004/005, valid ppl = 65.37338690549294, batch 0131/0226, train loss = 4.034865856170654
Epoch 004/005, valid ppl = 65.37338690549294, batch 0132/0226, train loss = 3.8768210411071777
Epoch 004/005, valid ppl = 65.37338690549294, batch 0133/0226, train loss = 4.0640764236450195
Epoch 004/005, valid ppl = 65.37338690549294, batch 0134/0226, train loss = 4.001852512359619
Epoch 004/005, valid ppl = 65.37338690549294, batch 0135/0226, train loss = 4.001311779022217
Epoch 004/005, valid ppl = 65.37338690549294, batch 0136/0226, train loss = 3.9492225646972656
Epoch 004/005, valid ppl = 65.37338690549294, batch 0137/0226, train loss = 3.8753762245178223
Epoch 004/005, valid ppl = 65.37338690549294, batch 0138/0226, train loss = 4.077665328979492
Epoch 004/005, valid ppl = 65.37338690549294, batch 0139/0226, train loss = 3.925234079360962
Epoch 004/005, valid ppl = 65.37338690549294, batch 0140/0226, train loss = 3.9260306358337402
Epoch 004/005, valid ppl = 65.37338690549294, batch 014

Epoch 004/005, valid ppl = 65.37338690549294, batch 0218/0226, train loss = 4.09942102432251
Epoch 004/005, valid ppl = 65.37338690549294, batch 0219/0226, train loss = 3.7654833793640137
Epoch 004/005, valid ppl = 65.37338690549294, batch 0220/0226, train loss = 3.7504844665527344
Epoch 004/005, valid ppl = 65.37338690549294, batch 0221/0226, train loss = 3.7017695903778076
Epoch 004/005, valid ppl = 65.37338690549294, batch 0222/0226, train loss = 3.7991750240325928
Epoch 004/005, valid ppl = 65.37338690549294, batch 0223/0226, train loss = 3.9763236045837402
Epoch 004/005, valid ppl = 65.37338690549294, batch 0224/0226, train loss = 3.8859920501708984
Epoch 004/005, valid ppl = 65.37338690549294, batch 0225/0226, train loss = 3.974921464920044
Epoch 004/005, valid ppl = 65.37338690549294, batch 0226/0226, train loss = 3.8759608268737793
Saving the trained model to ../model_dailydialog_rf/model_TS/model_epoch_004.ckpt...
Epoch 005/005, valid ppl = 52.727635435626325, batch 0001/0226,

Epoch 005/005, valid ppl = 52.727635435626325, batch 0078/0226, train loss = 3.628239154815674
Epoch 005/005, valid ppl = 52.727635435626325, batch 0079/0226, train loss = 3.8736884593963623
Epoch 005/005, valid ppl = 52.727635435626325, batch 0080/0226, train loss = 3.725865602493286
Epoch 005/005, valid ppl = 52.727635435626325, batch 0081/0226, train loss = 3.7754743099212646
Epoch 005/005, valid ppl = 52.727635435626325, batch 0082/0226, train loss = 3.911797046661377
Epoch 005/005, valid ppl = 52.727635435626325, batch 0083/0226, train loss = 3.6882331371307373
Epoch 005/005, valid ppl = 52.727635435626325, batch 0084/0226, train loss = 3.854161500930786
Epoch 005/005, valid ppl = 52.727635435626325, batch 0085/0226, train loss = 3.8818423748016357
Epoch 005/005, valid ppl = 52.727635435626325, batch 0086/0226, train loss = 3.867429494857788
Epoch 005/005, valid ppl = 52.727635435626325, batch 0087/0226, train loss = 3.7899303436279297
Epoch 005/005, valid ppl = 52.727635435626325

Epoch 005/005, valid ppl = 52.727635435626325, batch 0164/0226, train loss = 3.8656256198883057
Epoch 005/005, valid ppl = 52.727635435626325, batch 0165/0226, train loss = 3.8069846630096436
Epoch 005/005, valid ppl = 52.727635435626325, batch 0166/0226, train loss = 3.792940855026245
Epoch 005/005, valid ppl = 52.727635435626325, batch 0167/0226, train loss = 3.8331758975982666
Epoch 005/005, valid ppl = 52.727635435626325, batch 0168/0226, train loss = 3.7103023529052734
Epoch 005/005, valid ppl = 52.727635435626325, batch 0169/0226, train loss = 3.838608741760254
Epoch 005/005, valid ppl = 52.727635435626325, batch 0170/0226, train loss = 3.874558925628662
Epoch 005/005, valid ppl = 52.727635435626325, batch 0171/0226, train loss = 3.781325101852417
Epoch 005/005, valid ppl = 52.727635435626325, batch 0172/0226, train loss = 3.63488507270813
Epoch 005/005, valid ppl = 52.727635435626325, batch 0173/0226, train loss = 3.8363711833953857
Epoch 005/005, valid ppl = 52.727635435626325,

---
Train model P(S|T)

In [5]:
def revert(myset):
    enc_input = myset['dec_input'][:,1:]
    dec_input =  np.insert(myset['enc_input'], 0, token2id['<go>'], axis=1) # add <go> in the beginning of decoder

    target = np.insert(myset['enc_input'], -1, 0, axis=1) 
    tmp_idx = [np.where(s==0)[0][0] for s in target] 
    target[np.arange(target.shape[0]),tmp_idx] = token2id['<eos>'] # add <eos> at the end of decoder
    
    newset = {}
    
    newset['enc_input'] = enc_input
    newset['dec_input'] = dec_input
    newset['target'] = target
    newset['enc_input_len'] = myset['dec_input_len']
    newset['dec_input_len'] = myset['enc_input_len']
    return newset

In [6]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
    train_set = revert(train_set)
    valid_set = revert(valid_set)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_ST = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_ST.restore(os.path.join(args.restore_path_ST, 'model_TS_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_ST.init_tf_vars()
    model_ST.train(train_set, VAD,termfreq, VAD_loss,args.save_path_ST, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/005, valid ppl = None, batch 0001/0226, train loss = 8.102294921875
Epoch 001/005, valid ppl = None, batch 0002/0226, train loss = 8.092033386230469
Epoch 001/005, valid ppl = None, batch 0003/0226, train loss = 8.078495979309082
Epoch 001/005, valid ppl = None, batch 0004/0226, train loss = 8.0523128509521

Epoch 001/005, valid ppl = None, batch 0094/0226, train loss = 5.711510181427002
Epoch 001/005, valid ppl = None, batch 0095/0226, train loss = 5.53770112991333
Epoch 001/005, valid ppl = None, batch 0096/0226, train loss = 5.62586784362793
Epoch 001/005, valid ppl = None, batch 0097/0226, train loss = 5.600035667419434
Epoch 001/005, valid ppl = None, batch 0098/0226, train loss = 5.453129768371582
Epoch 001/005, valid ppl = None, batch 0099/0226, train loss = 5.494683742523193
Epoch 001/005, valid ppl = None, batch 0100/0226, train loss = 5.484755516052246
Epoch 001/005, valid ppl = None, batch 0101/0226, train loss = 5.590873718261719
Epoch 001/005, valid ppl = None, batch 0102/0226, train loss = 5.380161762237549
Epoch 001/005, valid ppl = None, batch 0103/0226, train loss = 5.536734104156494
Epoch 001/005, valid ppl = None, batch 0104/0226, train loss = 5.453049182891846
Epoch 001/005, valid ppl = None, batch 0105/0226, train loss = 5.478157043457031
Epoch 001/005, valid ppl = Non

Epoch 001/005, valid ppl = None, batch 0196/0226, train loss = 5.065994739532471
Epoch 001/005, valid ppl = None, batch 0197/0226, train loss = 4.82331657409668
Epoch 001/005, valid ppl = None, batch 0198/0226, train loss = 4.943610668182373
Epoch 001/005, valid ppl = None, batch 0199/0226, train loss = 5.0180792808532715
Epoch 001/005, valid ppl = None, batch 0200/0226, train loss = 5.020513534545898
Epoch 001/005, valid ppl = None, batch 0201/0226, train loss = 5.04124641418457
Epoch 001/005, valid ppl = None, batch 0202/0226, train loss = 4.904719829559326
Epoch 001/005, valid ppl = None, batch 0203/0226, train loss = 5.045432090759277
Epoch 001/005, valid ppl = None, batch 0204/0226, train loss = 4.746914863586426
Epoch 001/005, valid ppl = None, batch 0205/0226, train loss = 4.834890365600586
Epoch 001/005, valid ppl = None, batch 0206/0226, train loss = 5.038267612457275
Epoch 001/005, valid ppl = None, batch 0207/0226, train loss = 4.883409023284912
Epoch 001/005, valid ppl = No

Epoch 002/005, valid ppl = 127.79917695464735, batch 0060/0226, train loss = 4.523746967315674
Epoch 002/005, valid ppl = 127.79917695464735, batch 0061/0226, train loss = 4.7913641929626465
Epoch 002/005, valid ppl = 127.79917695464735, batch 0062/0226, train loss = 4.755381107330322
Epoch 002/005, valid ppl = 127.79917695464735, batch 0063/0226, train loss = 4.831333160400391
Epoch 002/005, valid ppl = 127.79917695464735, batch 0064/0226, train loss = 4.732765197753906
Epoch 002/005, valid ppl = 127.79917695464735, batch 0065/0226, train loss = 4.614318370819092
Epoch 002/005, valid ppl = 127.79917695464735, batch 0066/0226, train loss = 4.801609039306641
Epoch 002/005, valid ppl = 127.79917695464735, batch 0067/0226, train loss = 4.583022594451904
Epoch 002/005, valid ppl = 127.79917695464735, batch 0068/0226, train loss = 4.49279260635376
Epoch 002/005, valid ppl = 127.79917695464735, batch 0069/0226, train loss = 4.741464138031006
Epoch 002/005, valid ppl = 127.79917695464735, bat

Epoch 002/005, valid ppl = 127.79917695464735, batch 0147/0226, train loss = 4.57163143157959
Epoch 002/005, valid ppl = 127.79917695464735, batch 0148/0226, train loss = 4.5454277992248535
Epoch 002/005, valid ppl = 127.79917695464735, batch 0149/0226, train loss = 4.521693706512451
Epoch 002/005, valid ppl = 127.79917695464735, batch 0150/0226, train loss = 4.631141185760498
Epoch 002/005, valid ppl = 127.79917695464735, batch 0151/0226, train loss = 4.4507927894592285
Epoch 002/005, valid ppl = 127.79917695464735, batch 0152/0226, train loss = 4.714728355407715
Epoch 002/005, valid ppl = 127.79917695464735, batch 0153/0226, train loss = 4.4280171394348145
Epoch 002/005, valid ppl = 127.79917695464735, batch 0154/0226, train loss = 4.470146179199219
Epoch 002/005, valid ppl = 127.79917695464735, batch 0155/0226, train loss = 4.4625563621521
Epoch 002/005, valid ppl = 127.79917695464735, batch 0156/0226, train loss = 4.662348747253418
Epoch 002/005, valid ppl = 127.79917695464735, bat

Epoch 003/005, valid ppl = 85.06220583927397, batch 0007/0226, train loss = 4.4242730140686035
Epoch 003/005, valid ppl = 85.06220583927397, batch 0008/0226, train loss = 4.253048419952393
Epoch 003/005, valid ppl = 85.06220583927397, batch 0009/0226, train loss = 4.372897148132324
Epoch 003/005, valid ppl = 85.06220583927397, batch 0010/0226, train loss = 4.432713985443115
Epoch 003/005, valid ppl = 85.06220583927397, batch 0011/0226, train loss = 4.4823198318481445
Epoch 003/005, valid ppl = 85.06220583927397, batch 0012/0226, train loss = 4.531274795532227
Epoch 003/005, valid ppl = 85.06220583927397, batch 0013/0226, train loss = 4.567074298858643
Epoch 003/005, valid ppl = 85.06220583927397, batch 0014/0226, train loss = 4.482940196990967
Epoch 003/005, valid ppl = 85.06220583927397, batch 0015/0226, train loss = 4.750920295715332
Epoch 003/005, valid ppl = 85.06220583927397, batch 0016/0226, train loss = 4.32856559753418
Epoch 003/005, valid ppl = 85.06220583927397, batch 0017/02

Epoch 003/005, valid ppl = 85.06220583927397, batch 0095/0226, train loss = 4.3622331619262695
Epoch 003/005, valid ppl = 85.06220583927397, batch 0096/0226, train loss = 4.258184432983398
Epoch 003/005, valid ppl = 85.06220583927397, batch 0097/0226, train loss = 4.359480857849121
Epoch 003/005, valid ppl = 85.06220583927397, batch 0098/0226, train loss = 4.266103267669678
Epoch 003/005, valid ppl = 85.06220583927397, batch 0099/0226, train loss = 4.611269950866699
Epoch 003/005, valid ppl = 85.06220583927397, batch 0100/0226, train loss = 4.320242404937744
Epoch 003/005, valid ppl = 85.06220583927397, batch 0101/0226, train loss = 4.475290775299072
Epoch 003/005, valid ppl = 85.06220583927397, batch 0102/0226, train loss = 4.195134162902832
Epoch 003/005, valid ppl = 85.06220583927397, batch 0103/0226, train loss = 4.245851039886475
Epoch 003/005, valid ppl = 85.06220583927397, batch 0104/0226, train loss = 4.4644775390625
Epoch 003/005, valid ppl = 85.06220583927397, batch 0105/0226

Epoch 003/005, valid ppl = 85.06220583927397, batch 0183/0226, train loss = 4.127874374389648
Epoch 003/005, valid ppl = 85.06220583927397, batch 0184/0226, train loss = 4.377033710479736
Epoch 003/005, valid ppl = 85.06220583927397, batch 0185/0226, train loss = 4.143725395202637
Epoch 003/005, valid ppl = 85.06220583927397, batch 0186/0226, train loss = 4.150585651397705
Epoch 003/005, valid ppl = 85.06220583927397, batch 0187/0226, train loss = 4.170413017272949
Epoch 003/005, valid ppl = 85.06220583927397, batch 0188/0226, train loss = 4.502882957458496
Epoch 003/005, valid ppl = 85.06220583927397, batch 0189/0226, train loss = 3.946021795272827
Epoch 003/005, valid ppl = 85.06220583927397, batch 0190/0226, train loss = 4.278843402862549
Epoch 003/005, valid ppl = 85.06220583927397, batch 0191/0226, train loss = 4.3839430809021
Epoch 003/005, valid ppl = 85.06220583927397, batch 0192/0226, train loss = 4.261743068695068
Epoch 003/005, valid ppl = 85.06220583927397, batch 0193/0226,

Epoch 004/005, valid ppl = 63.882522875832414, batch 0043/0226, train loss = 4.127693176269531
Epoch 004/005, valid ppl = 63.882522875832414, batch 0044/0226, train loss = 4.117112159729004
Epoch 004/005, valid ppl = 63.882522875832414, batch 0045/0226, train loss = 3.8232383728027344
Epoch 004/005, valid ppl = 63.882522875832414, batch 0046/0226, train loss = 4.245189666748047
Epoch 004/005, valid ppl = 63.882522875832414, batch 0047/0226, train loss = 4.254871368408203
Epoch 004/005, valid ppl = 63.882522875832414, batch 0048/0226, train loss = 4.0325212478637695
Epoch 004/005, valid ppl = 63.882522875832414, batch 0049/0226, train loss = 4.139317512512207
Epoch 004/005, valid ppl = 63.882522875832414, batch 0050/0226, train loss = 4.126082420349121
Epoch 004/005, valid ppl = 63.882522875832414, batch 0051/0226, train loss = 4.03937292098999
Epoch 004/005, valid ppl = 63.882522875832414, batch 0052/0226, train loss = 4.113461017608643
Epoch 004/005, valid ppl = 63.882522875832414, ba

Epoch 004/005, valid ppl = 63.882522875832414, batch 0130/0226, train loss = 3.8795652389526367
Epoch 004/005, valid ppl = 63.882522875832414, batch 0131/0226, train loss = 3.9953436851501465
Epoch 004/005, valid ppl = 63.882522875832414, batch 0132/0226, train loss = 4.04533576965332
Epoch 004/005, valid ppl = 63.882522875832414, batch 0133/0226, train loss = 4.009659290313721
Epoch 004/005, valid ppl = 63.882522875832414, batch 0134/0226, train loss = 3.817899465560913
Epoch 004/005, valid ppl = 63.882522875832414, batch 0135/0226, train loss = 4.013462066650391
Epoch 004/005, valid ppl = 63.882522875832414, batch 0136/0226, train loss = 4.020383358001709
Epoch 004/005, valid ppl = 63.882522875832414, batch 0137/0226, train loss = 3.919435501098633
Epoch 004/005, valid ppl = 63.882522875832414, batch 0138/0226, train loss = 4.026352405548096
Epoch 004/005, valid ppl = 63.882522875832414, batch 0139/0226, train loss = 4.019875526428223
Epoch 004/005, valid ppl = 63.882522875832414, ba

Epoch 004/005, valid ppl = 63.882522875832414, batch 0216/0226, train loss = 4.060579299926758
Epoch 004/005, valid ppl = 63.882522875832414, batch 0217/0226, train loss = 3.8574676513671875
Epoch 004/005, valid ppl = 63.882522875832414, batch 0218/0226, train loss = 3.9804866313934326
Epoch 004/005, valid ppl = 63.882522875832414, batch 0219/0226, train loss = 3.777012825012207
Epoch 004/005, valid ppl = 63.882522875832414, batch 0220/0226, train loss = 3.9559366703033447
Epoch 004/005, valid ppl = 63.882522875832414, batch 0221/0226, train loss = 3.919271230697632
Epoch 004/005, valid ppl = 63.882522875832414, batch 0222/0226, train loss = 3.799790382385254
Epoch 004/005, valid ppl = 63.882522875832414, batch 0223/0226, train loss = 3.894270896911621
Epoch 004/005, valid ppl = 63.882522875832414, batch 0224/0226, train loss = 3.6277785301208496
Epoch 004/005, valid ppl = 63.882522875832414, batch 0225/0226, train loss = 3.785543918609619
Epoch 004/005, valid ppl = 63.882522875832414,

Epoch 005/005, valid ppl = 50.497177656448855, batch 0075/0226, train loss = 3.724844455718994
Epoch 005/005, valid ppl = 50.497177656448855, batch 0076/0226, train loss = 4.194393634796143
Epoch 005/005, valid ppl = 50.497177656448855, batch 0077/0226, train loss = 3.9166271686553955
Epoch 005/005, valid ppl = 50.497177656448855, batch 0078/0226, train loss = 3.6815903186798096
Epoch 005/005, valid ppl = 50.497177656448855, batch 0079/0226, train loss = 3.8549563884735107
Epoch 005/005, valid ppl = 50.497177656448855, batch 0080/0226, train loss = 3.816321611404419
Epoch 005/005, valid ppl = 50.497177656448855, batch 0081/0226, train loss = 3.7308919429779053
Epoch 005/005, valid ppl = 50.497177656448855, batch 0082/0226, train loss = 3.949897289276123
Epoch 005/005, valid ppl = 50.497177656448855, batch 0083/0226, train loss = 3.7617337703704834
Epoch 005/005, valid ppl = 50.497177656448855, batch 0084/0226, train loss = 3.5141139030456543
Epoch 005/005, valid ppl = 50.49717765644885

Epoch 005/005, valid ppl = 50.497177656448855, batch 0161/0226, train loss = 3.782019853591919
Epoch 005/005, valid ppl = 50.497177656448855, batch 0162/0226, train loss = 3.752420425415039
Epoch 005/005, valid ppl = 50.497177656448855, batch 0163/0226, train loss = 3.651052713394165
Epoch 005/005, valid ppl = 50.497177656448855, batch 0164/0226, train loss = 3.538804054260254
Epoch 005/005, valid ppl = 50.497177656448855, batch 0165/0226, train loss = 3.6885879039764404
Epoch 005/005, valid ppl = 50.497177656448855, batch 0166/0226, train loss = 3.803370714187622
Epoch 005/005, valid ppl = 50.497177656448855, batch 0167/0226, train loss = 3.754439353942871
Epoch 005/005, valid ppl = 50.497177656448855, batch 0168/0226, train loss = 3.6097159385681152
Epoch 005/005, valid ppl = 50.497177656448855, batch 0169/0226, train loss = 3.7017736434936523
Epoch 005/005, valid ppl = 50.497177656448855, batch 0170/0226, train loss = 3.570158004760742
Epoch 005/005, valid ppl = 50.497177656448855, 