In [1]:
import os
import pickle
import argparse
import numpy as np
from model import Options, Seq2SeqAttn


In [2]:
# Parse the command line arguments.
save_dir = '/Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/input/'
output_dir = '/Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/output/'

parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type = str, default = save_dir,
                    help = 'the directory to the data')

parser.add_argument('--word_embeddings_path', type = str, default = save_dir+'word_embeddings.npy',
                    help = 'the directory to the pre-trained word embeddings')
parser.add_argument('--VAD_path', type = str, default = save_dir+'VAD.npy',
                    help = 'the directory to VAD')
parser.add_argument('--tf_path', type = str, default = save_dir+'tf.npy',
                    help = 'the directory to term frequency')
parser.add_argument('--VAD_loss_path', type = str, default = save_dir+'VAD_loss.npy',
                    help = 'the directory to VAD loss for each word')
parser.add_argument('--ti_path', type = str, default = save_dir+'mu_li.npy',
                    help = 'the directory to term importance')

parser.add_argument('--num_epochs', type = int, default = 3,
                    help = 'the number of epochs to train the data')
parser.add_argument('--batch_size', type = int, default = 64,
                    help = 'the batch size')
parser.add_argument('--learning_rate', type = float, default = 0.001,
                    help = 'the learning rate')
parser.add_argument('--beam_width', type = int, default = 32,
                    help = 'the beam width when decoding')
parser.add_argument('--word_embed_size', type = int, default = 300,
                    help = 'the size of word embeddings')
parser.add_argument('--n_hidden_units_enc', type = int, default = 256,
                    help = 'the number of hidden units of encoder')
parser.add_argument('--n_hidden_units_dec', type = int, default = 256,
                    help = 'the number of hidden units of decoder')
# ? attn_depth
parser.add_argument('--attn_depth', type = int, default = 128,
                    help = 'attention depth')

parser.add_argument('--restore_path_TS', type = str, default = output_dir+'model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_TS', type = str, default = output_dir+'/model_dailydialog_rf_noVAD/model_TS',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_path_ST', type = str, default = output_dir+'model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_ST', type = str, default = output_dir+'/model_dailydialog_rf_noVAD/model_ST',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_epoch', type = int, default = 0,
                    help = 'the epoch to restore')

# args = parser.parse_args()
args, unknown = parser.parse_known_args()


In [3]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        # to check if or not to complete the last batch
        idx = np.arange(my_set['dec_input'].shape[0])
        left_samples = idx[-1]%args.batch_size
        if left_samples:
            last_batch_idx = np.random.randint(0,idx[-1]-left_samples,size = args.batch_size - left_samples - 1)
            idx = np.concatenate([idx,last_batch_idx])
            
            my_set['enc_input'] = my_set['enc_input'][idx]
            my_set['dec_input'] = my_set['dec_input'][idx]
            my_set['target'] = my_set['target'][idx]
            my_set['enc_input_len'] = my_set['enc_input_len'][idx]
            my_set['dec_input_len'] = my_set['dec_input_len'][idx]
        return my_set
    train_set = load_np_files(os.path.join(data_path, 'train'))
    valid_set = load_np_files(os.path.join(data_path, 'validation'))
    
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)
    with open(os.path.join(data_path, 'id2token.pickle'), 'rb') as file:
        id2token = pickle.load(file)

    return train_set, valid_set, token2id,id2token

---
Train model maximizing P(T|S)

In [4]:
train_set, valid_set, token2id,id2token = read_data(args.data_path)

In [5]:
train_set['enc_input'].shape

(46400, 20)

In [6]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = False,
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_TS = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_TS.restore(os.path.join(args.restore_path_TS, 'model_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_TS.init_tf_vars()
    model_TS.train(train_set, VAD,termfreq, VAD_loss,args.save_path_TS, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/003, valid ppl = None, batch 0001/0725, train loss = 9.997103691101074
Epoch 001/003, valid ppl = None, batch 0002/0725, train loss = 9.832530975341797
Epoch 001/003, valid ppl = None, batch 0003/0725, train loss = 9.66786003112793
Epoch 001/003, valid ppl = None, batch 0004/0725, train loss = 9.470152854919434
Epoch 001/003, valid ppl = None, batch 0005/0725

Epoch 001/003, valid ppl = None, batch 0095/0725, train loss = 4.860218524932861
Epoch 001/003, valid ppl = None, batch 0096/0725, train loss = 5.312703609466553
Epoch 001/003, valid ppl = None, batch 0097/0725, train loss = 4.865604877471924
Epoch 001/003, valid ppl = None, batch 0098/0725, train loss = 4.972722053527832
Epoch 001/003, valid ppl = None, batch 0099/0725, train loss = 5.010200500488281
Epoch 001/003, valid ppl = None, batch 0100/0725, train loss = 4.911144256591797
Epoch 001/003, valid ppl = None, batch 0101/0725, train loss = 5.053136348724365
Epoch 001/003, valid ppl = None, batch 0102/0725, train loss = 4.901992321014404
Epoch 001/003, valid ppl = None, batch 0103/0725, train loss = 4.73117733001709
Epoch 001/003, valid ppl = None, batch 0104/0725, train loss = 4.737037181854248
Epoch 001/003, valid ppl = None, batch 0105/0725, train loss = 4.713125228881836
Epoch 001/003, valid ppl = None, batch 0106/0725, train loss = 4.852753162384033
Epoch 001/003, valid ppl = No

Epoch 001/003, valid ppl = None, batch 0197/0725, train loss = 4.880639553070068
Epoch 001/003, valid ppl = None, batch 0198/0725, train loss = 4.5918498039245605
Epoch 001/003, valid ppl = None, batch 0199/0725, train loss = 4.538132667541504
Epoch 001/003, valid ppl = None, batch 0200/0725, train loss = 4.615043640136719
Epoch 001/003, valid ppl = None, batch 0201/0725, train loss = 4.962582111358643
Epoch 001/003, valid ppl = None, batch 0202/0725, train loss = 4.765137672424316
Epoch 001/003, valid ppl = None, batch 0203/0725, train loss = 4.604966163635254
Epoch 001/003, valid ppl = None, batch 0204/0725, train loss = 4.798954010009766
Epoch 001/003, valid ppl = None, batch 0205/0725, train loss = 4.4516377449035645
Epoch 001/003, valid ppl = None, batch 0206/0725, train loss = 4.682681083679199
Epoch 001/003, valid ppl = None, batch 0207/0725, train loss = 4.773443698883057
Epoch 001/003, valid ppl = None, batch 0208/0725, train loss = 4.884274005889893
Epoch 001/003, valid ppl =

Epoch 001/003, valid ppl = None, batch 0299/0725, train loss = 4.177657127380371
Epoch 001/003, valid ppl = None, batch 0300/0725, train loss = 4.489805698394775
Epoch 001/003, valid ppl = None, batch 0301/0725, train loss = 4.487781524658203
Epoch 001/003, valid ppl = None, batch 0302/0725, train loss = 4.177856922149658
Epoch 001/003, valid ppl = None, batch 0303/0725, train loss = 4.5559844970703125
Epoch 001/003, valid ppl = None, batch 0304/0725, train loss = 4.410938262939453
Epoch 001/003, valid ppl = None, batch 0305/0725, train loss = 4.434031009674072
Epoch 001/003, valid ppl = None, batch 0306/0725, train loss = 4.398378372192383
Epoch 001/003, valid ppl = None, batch 0307/0725, train loss = 4.572534561157227
Epoch 001/003, valid ppl = None, batch 0308/0725, train loss = 4.58293342590332
Epoch 001/003, valid ppl = None, batch 0309/0725, train loss = 4.574490547180176
Epoch 001/003, valid ppl = None, batch 0310/0725, train loss = 4.631105422973633
Epoch 001/003, valid ppl = N

Epoch 001/003, valid ppl = None, batch 0401/0725, train loss = 4.294539451599121
Epoch 001/003, valid ppl = None, batch 0402/0725, train loss = 4.430874347686768
Epoch 001/003, valid ppl = None, batch 0403/0725, train loss = 4.599501609802246
Epoch 001/003, valid ppl = None, batch 0404/0725, train loss = 4.23789119720459
Epoch 001/003, valid ppl = None, batch 0405/0725, train loss = 4.485577583312988
Epoch 001/003, valid ppl = None, batch 0406/0725, train loss = 4.407413959503174
Epoch 001/003, valid ppl = None, batch 0407/0725, train loss = 4.384725093841553
Epoch 001/003, valid ppl = None, batch 0408/0725, train loss = 4.190430641174316
Epoch 001/003, valid ppl = None, batch 0409/0725, train loss = 4.640187740325928
Epoch 001/003, valid ppl = None, batch 0410/0725, train loss = 4.180296897888184
Epoch 001/003, valid ppl = None, batch 0411/0725, train loss = 4.167431354522705
Epoch 001/003, valid ppl = None, batch 0412/0725, train loss = 4.247748851776123
Epoch 001/003, valid ppl = No

Epoch 001/003, valid ppl = None, batch 0503/0725, train loss = 3.933210611343384
Epoch 001/003, valid ppl = None, batch 0504/0725, train loss = 4.342437267303467
Epoch 001/003, valid ppl = None, batch 0505/0725, train loss = 4.288024425506592
Epoch 001/003, valid ppl = None, batch 0506/0725, train loss = 4.032937526702881
Epoch 001/003, valid ppl = None, batch 0507/0725, train loss = 4.192527770996094
Epoch 001/003, valid ppl = None, batch 0508/0725, train loss = 4.427724361419678
Epoch 001/003, valid ppl = None, batch 0509/0725, train loss = 4.183162212371826
Epoch 001/003, valid ppl = None, batch 0510/0725, train loss = 4.315271377563477
Epoch 001/003, valid ppl = None, batch 0511/0725, train loss = 4.247882843017578
Epoch 001/003, valid ppl = None, batch 0512/0725, train loss = 3.93652081489563
Epoch 001/003, valid ppl = None, batch 0513/0725, train loss = 4.247684001922607
Epoch 001/003, valid ppl = None, batch 0514/0725, train loss = 4.323773384094238
Epoch 001/003, valid ppl = No

Epoch 001/003, valid ppl = None, batch 0605/0725, train loss = 4.483853340148926
Epoch 001/003, valid ppl = None, batch 0606/0725, train loss = 4.34060001373291
Epoch 001/003, valid ppl = None, batch 0607/0725, train loss = 4.21736478805542
Epoch 001/003, valid ppl = None, batch 0608/0725, train loss = 4.091705799102783
Epoch 001/003, valid ppl = None, batch 0609/0725, train loss = 4.443647384643555
Epoch 001/003, valid ppl = None, batch 0610/0725, train loss = 4.020394325256348
Epoch 001/003, valid ppl = None, batch 0611/0725, train loss = 4.4254021644592285
Epoch 001/003, valid ppl = None, batch 0612/0725, train loss = 4.19633674621582
Epoch 001/003, valid ppl = None, batch 0613/0725, train loss = 4.156355857849121
Epoch 001/003, valid ppl = None, batch 0614/0725, train loss = 4.003321170806885
Epoch 001/003, valid ppl = None, batch 0615/0725, train loss = 4.279946804046631
Epoch 001/003, valid ppl = None, batch 0616/0725, train loss = 4.3188323974609375
Epoch 001/003, valid ppl = No

Epoch 001/003, valid ppl = None, batch 0707/0725, train loss = 3.967344284057617
Epoch 001/003, valid ppl = None, batch 0708/0725, train loss = 4.226655006408691
Epoch 001/003, valid ppl = None, batch 0709/0725, train loss = 3.892012357711792
Epoch 001/003, valid ppl = None, batch 0710/0725, train loss = 4.30521297454834
Epoch 001/003, valid ppl = None, batch 0711/0725, train loss = 4.096858978271484
Epoch 001/003, valid ppl = None, batch 0712/0725, train loss = 4.060495853424072
Epoch 001/003, valid ppl = None, batch 0713/0725, train loss = 4.069798946380615
Epoch 001/003, valid ppl = None, batch 0714/0725, train loss = 4.172112464904785
Epoch 001/003, valid ppl = None, batch 0715/0725, train loss = 4.1418681144714355
Epoch 001/003, valid ppl = None, batch 0716/0725, train loss = 4.188450336456299
Epoch 001/003, valid ppl = None, batch 0717/0725, train loss = 4.318543434143066
Epoch 001/003, valid ppl = None, batch 0718/0725, train loss = 3.987241506576538
Epoch 001/003, valid ppl = N

Epoch 002/003, valid ppl = 63.3399736056754, batch 0071/0725, train loss = 3.546990394592285
Epoch 002/003, valid ppl = 63.3399736056754, batch 0072/0725, train loss = 3.904982328414917
Epoch 002/003, valid ppl = 63.3399736056754, batch 0073/0725, train loss = 3.938284158706665
Epoch 002/003, valid ppl = 63.3399736056754, batch 0074/0725, train loss = 4.060672760009766
Epoch 002/003, valid ppl = 63.3399736056754, batch 0075/0725, train loss = 3.6677472591400146
Epoch 002/003, valid ppl = 63.3399736056754, batch 0076/0725, train loss = 3.8421823978424072
Epoch 002/003, valid ppl = 63.3399736056754, batch 0077/0725, train loss = 3.832784652709961
Epoch 002/003, valid ppl = 63.3399736056754, batch 0078/0725, train loss = 3.826345205307007
Epoch 002/003, valid ppl = 63.3399736056754, batch 0079/0725, train loss = 3.9570820331573486
Epoch 002/003, valid ppl = 63.3399736056754, batch 0080/0725, train loss = 3.6108031272888184
Epoch 002/003, valid ppl = 63.3399736056754, batch 0081/0725, trai

Epoch 002/003, valid ppl = 63.3399736056754, batch 0159/0725, train loss = 3.904268503189087
Epoch 002/003, valid ppl = 63.3399736056754, batch 0160/0725, train loss = 3.9054453372955322
Epoch 002/003, valid ppl = 63.3399736056754, batch 0161/0725, train loss = 3.865385055541992
Epoch 002/003, valid ppl = 63.3399736056754, batch 0162/0725, train loss = 3.9950742721557617
Epoch 002/003, valid ppl = 63.3399736056754, batch 0163/0725, train loss = 4.017649173736572
Epoch 002/003, valid ppl = 63.3399736056754, batch 0164/0725, train loss = 3.6728906631469727
Epoch 002/003, valid ppl = 63.3399736056754, batch 0165/0725, train loss = 3.9152116775512695
Epoch 002/003, valid ppl = 63.3399736056754, batch 0166/0725, train loss = 3.8423895835876465
Epoch 002/003, valid ppl = 63.3399736056754, batch 0167/0725, train loss = 3.857224941253662
Epoch 002/003, valid ppl = 63.3399736056754, batch 0168/0725, train loss = 3.9080278873443604
Epoch 002/003, valid ppl = 63.3399736056754, batch 0169/0725, tr

Epoch 002/003, valid ppl = 63.3399736056754, batch 0247/0725, train loss = 3.821248769760132
Epoch 002/003, valid ppl = 63.3399736056754, batch 0248/0725, train loss = 3.890195608139038
Epoch 002/003, valid ppl = 63.3399736056754, batch 0249/0725, train loss = 3.747215509414673
Epoch 002/003, valid ppl = 63.3399736056754, batch 0250/0725, train loss = 3.905595541000366
Epoch 002/003, valid ppl = 63.3399736056754, batch 0251/0725, train loss = 3.6937172412872314
Epoch 002/003, valid ppl = 63.3399736056754, batch 0252/0725, train loss = 3.89815354347229
Epoch 002/003, valid ppl = 63.3399736056754, batch 0253/0725, train loss = 4.024538516998291
Epoch 002/003, valid ppl = 63.3399736056754, batch 0254/0725, train loss = 3.8715977668762207
Epoch 002/003, valid ppl = 63.3399736056754, batch 0255/0725, train loss = 4.019964218139648
Epoch 002/003, valid ppl = 63.3399736056754, batch 0256/0725, train loss = 4.136954307556152
Epoch 002/003, valid ppl = 63.3399736056754, batch 0257/0725, train l

Epoch 002/003, valid ppl = 63.3399736056754, batch 0335/0725, train loss = 4.052115440368652
Epoch 002/003, valid ppl = 63.3399736056754, batch 0336/0725, train loss = 3.778634548187256
Epoch 002/003, valid ppl = 63.3399736056754, batch 0337/0725, train loss = 3.4545364379882812
Epoch 002/003, valid ppl = 63.3399736056754, batch 0338/0725, train loss = 3.750166893005371
Epoch 002/003, valid ppl = 63.3399736056754, batch 0339/0725, train loss = 4.0358662605285645
Epoch 002/003, valid ppl = 63.3399736056754, batch 0340/0725, train loss = 3.936466693878174
Epoch 002/003, valid ppl = 63.3399736056754, batch 0341/0725, train loss = 3.6995370388031006
Epoch 002/003, valid ppl = 63.3399736056754, batch 0342/0725, train loss = 3.694521903991699
Epoch 002/003, valid ppl = 63.3399736056754, batch 0343/0725, train loss = 3.867457866668701
Epoch 002/003, valid ppl = 63.3399736056754, batch 0344/0725, train loss = 3.7004921436309814
Epoch 002/003, valid ppl = 63.3399736056754, batch 0345/0725, trai

Epoch 002/003, valid ppl = 63.3399736056754, batch 0423/0725, train loss = 3.7162914276123047
Epoch 002/003, valid ppl = 63.3399736056754, batch 0424/0725, train loss = 3.887932538986206
Epoch 002/003, valid ppl = 63.3399736056754, batch 0425/0725, train loss = 3.770714521408081
Epoch 002/003, valid ppl = 63.3399736056754, batch 0426/0725, train loss = 3.7122435569763184
Epoch 002/003, valid ppl = 63.3399736056754, batch 0427/0725, train loss = 3.833315134048462
Epoch 002/003, valid ppl = 63.3399736056754, batch 0428/0725, train loss = 3.6321356296539307
Epoch 002/003, valid ppl = 63.3399736056754, batch 0429/0725, train loss = 3.685088872909546
Epoch 002/003, valid ppl = 63.3399736056754, batch 0430/0725, train loss = 3.804158926010132
Epoch 002/003, valid ppl = 63.3399736056754, batch 0431/0725, train loss = 3.803469657897949
Epoch 002/003, valid ppl = 63.3399736056754, batch 0432/0725, train loss = 3.7077527046203613
Epoch 002/003, valid ppl = 63.3399736056754, batch 0433/0725, trai

KeyboardInterrupt: 

---
Train model P(S|T)

In [7]:
def revert(myset):
    enc_input = myset['dec_input'][:,1:]
    dec_input =  np.insert(myset['enc_input'], 0, token2id['<go>'], axis=1) # add <go> in the beginning of decoder

    target = np.insert(myset['enc_input'], -1, 0, axis=1) 
    tmp_idx = [np.where(s==0)[0][0] for s in target] 
    target[np.arange(target.shape[0]),tmp_idx] = token2id['<eos>'] # add <eos> at the end of decoder
    
    newset = {}
    
    newset['enc_input'] = enc_input
    newset['dec_input'] = dec_input
    newset['target'] = target
    newset['enc_input_len'] = myset['dec_input_len']
    newset['dec_input_len'] = myset['enc_input_len']
    return newset

In [8]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
    train_set = revert(train_set)
    valid_set = revert(valid_set)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_ST = Seq2SeqAttn(options)

    for var in model_ST.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_ST.restore(os.path.join(args.restore_path_ST, 'model_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_ST.init_tf_vars()
    model_ST.train(train_set, VAD,termfreq, VAD_loss,args.save_path_ST, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/003, valid ppl = None, batch 0001/0725, train loss = 9.990707397460938
Epoch 001/003, valid ppl = None, batch 0002/0725, train loss = 9.855420112609863
Epoch 001/003, valid ppl = None, batch 0003/0725, train loss = 9.725510597229004
Epoch 001/003, valid ppl = None, batch 0004/0725, train loss = 9.5158042907

Epoch 001/003, valid ppl = None, batch 0094/0725, train loss = 4.883930206298828
Epoch 001/003, valid ppl = None, batch 0095/0725, train loss = 5.187080383300781
Epoch 001/003, valid ppl = None, batch 0096/0725, train loss = 5.111042022705078
Epoch 001/003, valid ppl = None, batch 0097/0725, train loss = 5.306817054748535
Epoch 001/003, valid ppl = None, batch 0098/0725, train loss = 5.258883476257324
Epoch 001/003, valid ppl = None, batch 0099/0725, train loss = 5.0688982009887695
Epoch 001/003, valid ppl = None, batch 0100/0725, train loss = 5.270203113555908
Epoch 001/003, valid ppl = None, batch 0101/0725, train loss = 4.951502799987793
Epoch 001/003, valid ppl = None, batch 0102/0725, train loss = 4.980239391326904
Epoch 001/003, valid ppl = None, batch 0103/0725, train loss = 5.097230911254883
Epoch 001/003, valid ppl = None, batch 0104/0725, train loss = 5.236363887786865
Epoch 001/003, valid ppl = None, batch 0105/0725, train loss = 5.241433620452881
Epoch 001/003, valid ppl = 

Epoch 001/003, valid ppl = None, batch 0196/0725, train loss = 4.815104961395264
Epoch 001/003, valid ppl = None, batch 0197/0725, train loss = 4.779452800750732
Epoch 001/003, valid ppl = None, batch 0198/0725, train loss = 4.60347318649292
Epoch 001/003, valid ppl = None, batch 0199/0725, train loss = 4.8945183753967285
Epoch 001/003, valid ppl = None, batch 0200/0725, train loss = 4.635896682739258
Epoch 001/003, valid ppl = None, batch 0201/0725, train loss = 4.866043567657471
Epoch 001/003, valid ppl = None, batch 0202/0725, train loss = 4.663118362426758
Epoch 001/003, valid ppl = None, batch 0203/0725, train loss = 4.947104454040527
Epoch 001/003, valid ppl = None, batch 0204/0725, train loss = 4.838286399841309
Epoch 001/003, valid ppl = None, batch 0205/0725, train loss = 4.4580078125
Epoch 001/003, valid ppl = None, batch 0206/0725, train loss = 4.3888726234436035
Epoch 001/003, valid ppl = None, batch 0207/0725, train loss = 4.5985822677612305
Epoch 001/003, valid ppl = None

Epoch 001/003, valid ppl = None, batch 0298/0725, train loss = 4.119123935699463
Epoch 001/003, valid ppl = None, batch 0299/0725, train loss = 4.507351875305176
Epoch 001/003, valid ppl = None, batch 0300/0725, train loss = 4.299500942230225
Epoch 001/003, valid ppl = None, batch 0301/0725, train loss = 4.645863056182861
Epoch 001/003, valid ppl = None, batch 0302/0725, train loss = 4.926673412322998
Epoch 001/003, valid ppl = None, batch 0303/0725, train loss = 4.456812381744385
Epoch 001/003, valid ppl = None, batch 0304/0725, train loss = 4.607220649719238
Epoch 001/003, valid ppl = None, batch 0305/0725, train loss = 4.775251388549805
Epoch 001/003, valid ppl = None, batch 0306/0725, train loss = 4.679826736450195
Epoch 001/003, valid ppl = None, batch 0307/0725, train loss = 4.278611183166504
Epoch 001/003, valid ppl = None, batch 0308/0725, train loss = 4.334272861480713
Epoch 001/003, valid ppl = None, batch 0309/0725, train loss = 4.471628189086914
Epoch 001/003, valid ppl = N

Epoch 001/003, valid ppl = None, batch 0400/0725, train loss = 4.366425514221191
Epoch 001/003, valid ppl = None, batch 0401/0725, train loss = 4.580059051513672
Epoch 001/003, valid ppl = None, batch 0402/0725, train loss = 4.464258193969727
Epoch 001/003, valid ppl = None, batch 0403/0725, train loss = 4.154728412628174
Epoch 001/003, valid ppl = None, batch 0404/0725, train loss = 4.295827865600586
Epoch 001/003, valid ppl = None, batch 0405/0725, train loss = 4.284739971160889
Epoch 001/003, valid ppl = None, batch 0406/0725, train loss = 4.624973773956299
Epoch 001/003, valid ppl = None, batch 0407/0725, train loss = 4.4171319007873535
Epoch 001/003, valid ppl = None, batch 0408/0725, train loss = 4.193425178527832
Epoch 001/003, valid ppl = None, batch 0409/0725, train loss = 4.662446975708008
Epoch 001/003, valid ppl = None, batch 0410/0725, train loss = 4.421490669250488
Epoch 001/003, valid ppl = None, batch 0411/0725, train loss = 4.228417873382568
Epoch 001/003, valid ppl = 

Epoch 001/003, valid ppl = None, batch 0502/0725, train loss = 4.189351558685303
Epoch 001/003, valid ppl = None, batch 0503/0725, train loss = 4.440555572509766
Epoch 001/003, valid ppl = None, batch 0504/0725, train loss = 4.108150482177734
Epoch 001/003, valid ppl = None, batch 0505/0725, train loss = 4.246307849884033
Epoch 001/003, valid ppl = None, batch 0506/0725, train loss = 4.512243747711182
Epoch 001/003, valid ppl = None, batch 0507/0725, train loss = 4.431280612945557
Epoch 001/003, valid ppl = None, batch 0508/0725, train loss = 4.123673915863037
Epoch 001/003, valid ppl = None, batch 0509/0725, train loss = 4.5843424797058105
Epoch 001/003, valid ppl = None, batch 0510/0725, train loss = 4.158246994018555
Epoch 001/003, valid ppl = None, batch 0511/0725, train loss = 4.567461013793945
Epoch 001/003, valid ppl = None, batch 0512/0725, train loss = 4.124699115753174
Epoch 001/003, valid ppl = None, batch 0513/0725, train loss = 4.276573657989502
Epoch 001/003, valid ppl = 

Epoch 001/003, valid ppl = None, batch 0604/0725, train loss = 3.890913963317871
Epoch 001/003, valid ppl = None, batch 0605/0725, train loss = 4.296520709991455
Epoch 001/003, valid ppl = None, batch 0606/0725, train loss = 4.056914806365967
Epoch 001/003, valid ppl = None, batch 0607/0725, train loss = 4.381605625152588
Epoch 001/003, valid ppl = None, batch 0608/0725, train loss = 3.9507951736450195
Epoch 001/003, valid ppl = None, batch 0609/0725, train loss = 4.036889553070068
Epoch 001/003, valid ppl = None, batch 0610/0725, train loss = 4.160537242889404
Epoch 001/003, valid ppl = None, batch 0611/0725, train loss = 4.165740489959717
Epoch 001/003, valid ppl = None, batch 0612/0725, train loss = 4.182663917541504
Epoch 001/003, valid ppl = None, batch 0613/0725, train loss = 4.137069225311279
Epoch 001/003, valid ppl = None, batch 0614/0725, train loss = 4.104990482330322
Epoch 001/003, valid ppl = None, batch 0615/0725, train loss = 4.248867988586426
Epoch 001/003, valid ppl = 

Epoch 001/003, valid ppl = None, batch 0706/0725, train loss = 4.336468696594238
Epoch 001/003, valid ppl = None, batch 0707/0725, train loss = 4.109037399291992
Epoch 001/003, valid ppl = None, batch 0708/0725, train loss = 3.847865343093872
Epoch 001/003, valid ppl = None, batch 0709/0725, train loss = 4.2065558433532715
Epoch 001/003, valid ppl = None, batch 0710/0725, train loss = 4.435121059417725
Epoch 001/003, valid ppl = None, batch 0711/0725, train loss = 3.797776460647583
Epoch 001/003, valid ppl = None, batch 0712/0725, train loss = 4.025731086730957
Epoch 001/003, valid ppl = None, batch 0713/0725, train loss = 4.324324607849121
Epoch 001/003, valid ppl = None, batch 0714/0725, train loss = 4.21558141708374
Epoch 001/003, valid ppl = None, batch 0715/0725, train loss = 4.073867321014404
Epoch 001/003, valid ppl = None, batch 0716/0725, train loss = 3.9881317615509033
Epoch 001/003, valid ppl = None, batch 0717/0725, train loss = 4.376705169677734
Epoch 001/003, valid ppl = 

Epoch 002/003, valid ppl = 60.682336799751084, batch 0069/0725, train loss = 3.9014108180999756
Epoch 002/003, valid ppl = 60.682336799751084, batch 0070/0725, train loss = 4.135241985321045
Epoch 002/003, valid ppl = 60.682336799751084, batch 0071/0725, train loss = 3.8885273933410645
Epoch 002/003, valid ppl = 60.682336799751084, batch 0072/0725, train loss = 3.980590343475342
Epoch 002/003, valid ppl = 60.682336799751084, batch 0073/0725, train loss = 4.008225440979004
Epoch 002/003, valid ppl = 60.682336799751084, batch 0074/0725, train loss = 3.6882190704345703
Epoch 002/003, valid ppl = 60.682336799751084, batch 0075/0725, train loss = 3.9059231281280518
Epoch 002/003, valid ppl = 60.682336799751084, batch 0076/0725, train loss = 3.9506139755249023
Epoch 002/003, valid ppl = 60.682336799751084, batch 0077/0725, train loss = 4.029925346374512
Epoch 002/003, valid ppl = 60.682336799751084, batch 0078/0725, train loss = 4.069431304931641
Epoch 002/003, valid ppl = 60.682336799751084

Epoch 002/003, valid ppl = 60.682336799751084, batch 0155/0725, train loss = 3.840520143508911
Epoch 002/003, valid ppl = 60.682336799751084, batch 0156/0725, train loss = 3.990886926651001
Epoch 002/003, valid ppl = 60.682336799751084, batch 0157/0725, train loss = 3.7038075923919678
Epoch 002/003, valid ppl = 60.682336799751084, batch 0158/0725, train loss = 3.7337591648101807
Epoch 002/003, valid ppl = 60.682336799751084, batch 0159/0725, train loss = 3.7498865127563477
Epoch 002/003, valid ppl = 60.682336799751084, batch 0160/0725, train loss = 3.73114013671875
Epoch 002/003, valid ppl = 60.682336799751084, batch 0161/0725, train loss = 3.7093441486358643
Epoch 002/003, valid ppl = 60.682336799751084, batch 0162/0725, train loss = 3.8576250076293945
Epoch 002/003, valid ppl = 60.682336799751084, batch 0163/0725, train loss = 3.779073476791382
Epoch 002/003, valid ppl = 60.682336799751084, batch 0164/0725, train loss = 3.7384777069091797
Epoch 002/003, valid ppl = 60.682336799751084

Epoch 002/003, valid ppl = 60.682336799751084, batch 0241/0725, train loss = 3.9556665420532227
Epoch 002/003, valid ppl = 60.682336799751084, batch 0242/0725, train loss = 4.022710800170898
Epoch 002/003, valid ppl = 60.682336799751084, batch 0243/0725, train loss = 3.693286180496216
Epoch 002/003, valid ppl = 60.682336799751084, batch 0244/0725, train loss = 3.6093757152557373
Epoch 002/003, valid ppl = 60.682336799751084, batch 0245/0725, train loss = 3.6052663326263428
Epoch 002/003, valid ppl = 60.682336799751084, batch 0246/0725, train loss = 3.500948905944824
Epoch 002/003, valid ppl = 60.682336799751084, batch 0247/0725, train loss = 3.6299610137939453
Epoch 002/003, valid ppl = 60.682336799751084, batch 0248/0725, train loss = 4.188615322113037
Epoch 002/003, valid ppl = 60.682336799751084, batch 0249/0725, train loss = 3.683124303817749
Epoch 002/003, valid ppl = 60.682336799751084, batch 0250/0725, train loss = 3.851334571838379
Epoch 002/003, valid ppl = 60.682336799751084,

Epoch 002/003, valid ppl = 60.682336799751084, batch 0327/0725, train loss = 3.662947416305542
Epoch 002/003, valid ppl = 60.682336799751084, batch 0328/0725, train loss = 3.702204942703247
Epoch 002/003, valid ppl = 60.682336799751084, batch 0329/0725, train loss = 3.896033763885498
Epoch 002/003, valid ppl = 60.682336799751084, batch 0330/0725, train loss = 3.7506601810455322
Epoch 002/003, valid ppl = 60.682336799751084, batch 0331/0725, train loss = 3.827775001525879
Epoch 002/003, valid ppl = 60.682336799751084, batch 0332/0725, train loss = 3.6594064235687256
Epoch 002/003, valid ppl = 60.682336799751084, batch 0333/0725, train loss = 3.6649506092071533
Epoch 002/003, valid ppl = 60.682336799751084, batch 0334/0725, train loss = 3.6688544750213623
Epoch 002/003, valid ppl = 60.682336799751084, batch 0335/0725, train loss = 3.7553277015686035
Epoch 002/003, valid ppl = 60.682336799751084, batch 0336/0725, train loss = 3.8314292430877686
Epoch 002/003, valid ppl = 60.68233679975108

Epoch 002/003, valid ppl = 60.682336799751084, batch 0413/0725, train loss = 3.976006507873535
Epoch 002/003, valid ppl = 60.682336799751084, batch 0414/0725, train loss = 3.595658779144287
Epoch 002/003, valid ppl = 60.682336799751084, batch 0415/0725, train loss = 3.7336995601654053
Epoch 002/003, valid ppl = 60.682336799751084, batch 0416/0725, train loss = 3.5273914337158203
Epoch 002/003, valid ppl = 60.682336799751084, batch 0417/0725, train loss = 3.622518539428711
Epoch 002/003, valid ppl = 60.682336799751084, batch 0418/0725, train loss = 3.54213285446167
Epoch 002/003, valid ppl = 60.682336799751084, batch 0419/0725, train loss = 3.9730095863342285
Epoch 002/003, valid ppl = 60.682336799751084, batch 0420/0725, train loss = 3.4994056224823
Epoch 002/003, valid ppl = 60.682336799751084, batch 0421/0725, train loss = 3.7973334789276123
Epoch 002/003, valid ppl = 60.682336799751084, batch 0422/0725, train loss = 3.7495172023773193
Epoch 002/003, valid ppl = 60.682336799751084, b

Epoch 002/003, valid ppl = 60.682336799751084, batch 0499/0725, train loss = 3.8484151363372803
Epoch 002/003, valid ppl = 60.682336799751084, batch 0500/0725, train loss = 3.646993398666382
Epoch 002/003, valid ppl = 60.682336799751084, batch 0501/0725, train loss = 3.8992583751678467
Epoch 002/003, valid ppl = 60.682336799751084, batch 0502/0725, train loss = 3.7215230464935303
Epoch 002/003, valid ppl = 60.682336799751084, batch 0503/0725, train loss = 3.6342031955718994
Epoch 002/003, valid ppl = 60.682336799751084, batch 0504/0725, train loss = 3.85201096534729
Epoch 002/003, valid ppl = 60.682336799751084, batch 0505/0725, train loss = 3.727457284927368
Epoch 002/003, valid ppl = 60.682336799751084, batch 0506/0725, train loss = 3.418001413345337
Epoch 002/003, valid ppl = 60.682336799751084, batch 0507/0725, train loss = 3.8007819652557373
Epoch 002/003, valid ppl = 60.682336799751084, batch 0508/0725, train loss = 3.6050214767456055
Epoch 002/003, valid ppl = 60.682336799751084

Epoch 002/003, valid ppl = 60.682336799751084, batch 0585/0725, train loss = 3.693279504776001
Epoch 002/003, valid ppl = 60.682336799751084, batch 0586/0725, train loss = 3.6365692615509033
Epoch 002/003, valid ppl = 60.682336799751084, batch 0587/0725, train loss = 3.6990599632263184
Epoch 002/003, valid ppl = 60.682336799751084, batch 0588/0725, train loss = 3.7010631561279297
Epoch 002/003, valid ppl = 60.682336799751084, batch 0589/0725, train loss = 3.5476255416870117
Epoch 002/003, valid ppl = 60.682336799751084, batch 0590/0725, train loss = 3.459735870361328
Epoch 002/003, valid ppl = 60.682336799751084, batch 0591/0725, train loss = 3.582854747772217
Epoch 002/003, valid ppl = 60.682336799751084, batch 0592/0725, train loss = 3.621490478515625
Epoch 002/003, valid ppl = 60.682336799751084, batch 0593/0725, train loss = 3.49515962600708
Epoch 002/003, valid ppl = 60.682336799751084, batch 0594/0725, train loss = 3.5325584411621094
Epoch 002/003, valid ppl = 60.682336799751084,

Epoch 002/003, valid ppl = 60.682336799751084, batch 0671/0725, train loss = 3.8508687019348145
Epoch 002/003, valid ppl = 60.682336799751084, batch 0672/0725, train loss = 3.535349130630493
Epoch 002/003, valid ppl = 60.682336799751084, batch 0673/0725, train loss = 3.7330193519592285
Epoch 002/003, valid ppl = 60.682336799751084, batch 0674/0725, train loss = 3.7615115642547607
Epoch 002/003, valid ppl = 60.682336799751084, batch 0675/0725, train loss = 3.425196647644043
Epoch 002/003, valid ppl = 60.682336799751084, batch 0676/0725, train loss = 3.7061569690704346
Epoch 002/003, valid ppl = 60.682336799751084, batch 0677/0725, train loss = 3.5463006496429443
Epoch 002/003, valid ppl = 60.682336799751084, batch 0678/0725, train loss = 3.424635171890259
Epoch 002/003, valid ppl = 60.682336799751084, batch 0679/0725, train loss = 3.7926788330078125
Epoch 002/003, valid ppl = 60.682336799751084, batch 0680/0725, train loss = 3.7452404499053955
Epoch 002/003, valid ppl = 60.6823367997510

Epoch 003/003, valid ppl = 43.60350735564386, batch 0031/0725, train loss = 3.162583827972412
Epoch 003/003, valid ppl = 43.60350735564386, batch 0032/0725, train loss = 3.151074171066284
Epoch 003/003, valid ppl = 43.60350735564386, batch 0033/0725, train loss = 3.3589072227478027
Epoch 003/003, valid ppl = 43.60350735564386, batch 0034/0725, train loss = 3.1130123138427734
Epoch 003/003, valid ppl = 43.60350735564386, batch 0035/0725, train loss = 3.0582072734832764
Epoch 003/003, valid ppl = 43.60350735564386, batch 0036/0725, train loss = 3.298461437225342
Epoch 003/003, valid ppl = 43.60350735564386, batch 0037/0725, train loss = 3.4211044311523438
Epoch 003/003, valid ppl = 43.60350735564386, batch 0038/0725, train loss = 3.0557494163513184
Epoch 003/003, valid ppl = 43.60350735564386, batch 0039/0725, train loss = 3.183049201965332
Epoch 003/003, valid ppl = 43.60350735564386, batch 0040/0725, train loss = 3.3281538486480713
Epoch 003/003, valid ppl = 43.60350735564386, batch 00

Epoch 003/003, valid ppl = 43.60350735564386, batch 0118/0725, train loss = 2.8978986740112305
Epoch 003/003, valid ppl = 43.60350735564386, batch 0119/0725, train loss = 3.0242679119110107
Epoch 003/003, valid ppl = 43.60350735564386, batch 0120/0725, train loss = 3.125211000442505
Epoch 003/003, valid ppl = 43.60350735564386, batch 0121/0725, train loss = 3.386990785598755
Epoch 003/003, valid ppl = 43.60350735564386, batch 0122/0725, train loss = 3.3707919120788574
Epoch 003/003, valid ppl = 43.60350735564386, batch 0123/0725, train loss = 3.1272101402282715
Epoch 003/003, valid ppl = 43.60350735564386, batch 0124/0725, train loss = 3.376210927963257
Epoch 003/003, valid ppl = 43.60350735564386, batch 0125/0725, train loss = 3.1872310638427734
Epoch 003/003, valid ppl = 43.60350735564386, batch 0126/0725, train loss = 3.24088978767395
Epoch 003/003, valid ppl = 43.60350735564386, batch 0127/0725, train loss = 3.3087594509124756
Epoch 003/003, valid ppl = 43.60350735564386, batch 012

Epoch 003/003, valid ppl = 43.60350735564386, batch 0205/0725, train loss = 3.3456578254699707
Epoch 003/003, valid ppl = 43.60350735564386, batch 0206/0725, train loss = 3.242525577545166
Epoch 003/003, valid ppl = 43.60350735564386, batch 0207/0725, train loss = 2.994342565536499
Epoch 003/003, valid ppl = 43.60350735564386, batch 0208/0725, train loss = 3.199608087539673
Epoch 003/003, valid ppl = 43.60350735564386, batch 0209/0725, train loss = 3.1823625564575195
Epoch 003/003, valid ppl = 43.60350735564386, batch 0210/0725, train loss = 3.3520853519439697
Epoch 003/003, valid ppl = 43.60350735564386, batch 0211/0725, train loss = 3.1864640712738037
Epoch 003/003, valid ppl = 43.60350735564386, batch 0212/0725, train loss = 3.069211959838867
Epoch 003/003, valid ppl = 43.60350735564386, batch 0213/0725, train loss = 3.3840091228485107
Epoch 003/003, valid ppl = 43.60350735564386, batch 0214/0725, train loss = 3.132793664932251
Epoch 003/003, valid ppl = 43.60350735564386, batch 021

Epoch 003/003, valid ppl = 43.60350735564386, batch 0292/0725, train loss = 3.0333313941955566
Epoch 003/003, valid ppl = 43.60350735564386, batch 0293/0725, train loss = 2.9093329906463623
Epoch 003/003, valid ppl = 43.60350735564386, batch 0294/0725, train loss = 3.1456286907196045
Epoch 003/003, valid ppl = 43.60350735564386, batch 0295/0725, train loss = 2.994464874267578
Epoch 003/003, valid ppl = 43.60350735564386, batch 0296/0725, train loss = 3.3493905067443848
Epoch 003/003, valid ppl = 43.60350735564386, batch 0297/0725, train loss = 3.2448296546936035
Epoch 003/003, valid ppl = 43.60350735564386, batch 0298/0725, train loss = 3.1779592037200928
Epoch 003/003, valid ppl = 43.60350735564386, batch 0299/0725, train loss = 3.355391263961792
Epoch 003/003, valid ppl = 43.60350735564386, batch 0300/0725, train loss = 3.045433282852173
Epoch 003/003, valid ppl = 43.60350735564386, batch 0301/0725, train loss = 3.065046787261963
Epoch 003/003, valid ppl = 43.60350735564386, batch 03

Epoch 003/003, valid ppl = 43.60350735564386, batch 0379/0725, train loss = 3.3145933151245117
Epoch 003/003, valid ppl = 43.60350735564386, batch 0380/0725, train loss = 3.4441065788269043
Epoch 003/003, valid ppl = 43.60350735564386, batch 0381/0725, train loss = 3.3188514709472656
Epoch 003/003, valid ppl = 43.60350735564386, batch 0382/0725, train loss = 3.2015891075134277
Epoch 003/003, valid ppl = 43.60350735564386, batch 0383/0725, train loss = 3.0426316261291504
Epoch 003/003, valid ppl = 43.60350735564386, batch 0384/0725, train loss = 3.1288721561431885
Epoch 003/003, valid ppl = 43.60350735564386, batch 0385/0725, train loss = 3.166163444519043
Epoch 003/003, valid ppl = 43.60350735564386, batch 0386/0725, train loss = 3.037213087081909
Epoch 003/003, valid ppl = 43.60350735564386, batch 0387/0725, train loss = 3.1841118335723877
Epoch 003/003, valid ppl = 43.60350735564386, batch 0388/0725, train loss = 3.234384536743164
Epoch 003/003, valid ppl = 43.60350735564386, batch 0

Epoch 003/003, valid ppl = 43.60350735564386, batch 0466/0725, train loss = 3.5140397548675537
Epoch 003/003, valid ppl = 43.60350735564386, batch 0467/0725, train loss = 3.065960645675659
Epoch 003/003, valid ppl = 43.60350735564386, batch 0468/0725, train loss = 3.2700235843658447
Epoch 003/003, valid ppl = 43.60350735564386, batch 0469/0725, train loss = 3.018458366394043
Epoch 003/003, valid ppl = 43.60350735564386, batch 0470/0725, train loss = 2.9374191761016846
Epoch 003/003, valid ppl = 43.60350735564386, batch 0471/0725, train loss = 3.0517940521240234
Epoch 003/003, valid ppl = 43.60350735564386, batch 0472/0725, train loss = 3.108652114868164
Epoch 003/003, valid ppl = 43.60350735564386, batch 0473/0725, train loss = 3.1740384101867676
Epoch 003/003, valid ppl = 43.60350735564386, batch 0474/0725, train loss = 3.06602144241333
Epoch 003/003, valid ppl = 43.60350735564386, batch 0475/0725, train loss = 3.1689038276672363
Epoch 003/003, valid ppl = 43.60350735564386, batch 047

Epoch 003/003, valid ppl = 43.60350735564386, batch 0553/0725, train loss = 2.980897903442383
Epoch 003/003, valid ppl = 43.60350735564386, batch 0554/0725, train loss = 3.0589253902435303
Epoch 003/003, valid ppl = 43.60350735564386, batch 0555/0725, train loss = 3.127553939819336
Epoch 003/003, valid ppl = 43.60350735564386, batch 0556/0725, train loss = 3.014857769012451
Epoch 003/003, valid ppl = 43.60350735564386, batch 0557/0725, train loss = 3.015998125076294
Epoch 003/003, valid ppl = 43.60350735564386, batch 0558/0725, train loss = 2.773847818374634
Epoch 003/003, valid ppl = 43.60350735564386, batch 0559/0725, train loss = 3.2930099964141846
Epoch 003/003, valid ppl = 43.60350735564386, batch 0560/0725, train loss = 3.3181660175323486
Epoch 003/003, valid ppl = 43.60350735564386, batch 0561/0725, train loss = 2.977242946624756
Epoch 003/003, valid ppl = 43.60350735564386, batch 0562/0725, train loss = 3.036402702331543
Epoch 003/003, valid ppl = 43.60350735564386, batch 0563/

Epoch 003/003, valid ppl = 43.60350735564386, batch 0640/0725, train loss = 3.004286527633667
Epoch 003/003, valid ppl = 43.60350735564386, batch 0641/0725, train loss = 2.9617421627044678
Epoch 003/003, valid ppl = 43.60350735564386, batch 0642/0725, train loss = 3.2117817401885986
Epoch 003/003, valid ppl = 43.60350735564386, batch 0643/0725, train loss = 3.2269175052642822
Epoch 003/003, valid ppl = 43.60350735564386, batch 0644/0725, train loss = 3.0800302028656006
Epoch 003/003, valid ppl = 43.60350735564386, batch 0645/0725, train loss = 2.9972009658813477
Epoch 003/003, valid ppl = 43.60350735564386, batch 0646/0725, train loss = 3.1627960205078125
Epoch 003/003, valid ppl = 43.60350735564386, batch 0647/0725, train loss = 2.980532646179199
Epoch 003/003, valid ppl = 43.60350735564386, batch 0648/0725, train loss = 3.012706756591797
Epoch 003/003, valid ppl = 43.60350735564386, batch 0649/0725, train loss = 3.0420706272125244
Epoch 003/003, valid ppl = 43.60350735564386, batch 0

Epoch 001, valid ppl = 60.682336799751084
Epoch 002, valid ppl = 43.60350735564386
Epoch 003, valid ppl = 34.67138074459025
