In [1]:
import os
import pickle
import argparse
import numpy as np
from model import Options, Seq2SeqAttn


In [7]:
# Parse the command line arguments.
save_dir = '/Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/input/'
output_dir = '/Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/output/'

parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type = str, default = save_dir,
                    help = 'the directory to the data')

parser.add_argument('--word_embeddings_path', type = str, default = save_dir+'word_embeddings.npy',
                    help = 'the directory to the pre-trained word embeddings')
parser.add_argument('--VAD_path', type = str, default = save_dir+'VAD.npy',
                    help = 'the directory to VAD')
parser.add_argument('--tf_path', type = str, default = save_dir+'tf.npy',
                    help = 'the directory to term frequency')
parser.add_argument('--VAD_loss_path', type = str, default = save_dir+'VAD_loss.npy',
                    help = 'the directory to VAD loss for each word')
parser.add_argument('--ti_path', type = str, default = save_dir+'mu_li.npy',
                    help = 'the directory to term importance')

parser.add_argument('--num_epochs', type = int, default = 3,
                    help = 'the number of epochs to train the data')
parser.add_argument('--batch_size', type = int, default = 64,
                    help = 'the batch size')
parser.add_argument('--learning_rate', type = float, default = 0.001,
                    help = 'the learning rate')
parser.add_argument('--beam_width', type = int, default = 32,
                    help = 'the beam width when decoding')
parser.add_argument('--word_embed_size', type = int, default = 300,
                    help = 'the size of word embeddings')
parser.add_argument('--n_hidden_units_enc', type = int, default = 256,
                    help = 'the number of hidden units of encoder')
parser.add_argument('--n_hidden_units_dec', type = int, default = 256,
                    help = 'the number of hidden units of decoder')
# ? attn_depth
parser.add_argument('--attn_depth', type = int, default = 128,
                    help = 'attention depth')

parser.add_argument('--restore_path_TS', type = str, default = output_dir+'model_dailydialog_rf/model_TS',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_TS', type = str, default = output_dir+'/model_dailydialog_rf/model_TS',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_path_ST', type = str, default = output_dir+'model_dailydialog_rf/model_ST',
                    help = 'the path to restore the trained model')
parser.add_argument('--save_path_ST', type = str, default = output_dir+'/model_dailydialog_rf/model_ST',
                    help = 'the path to save the trained model to')

parser.add_argument('--restore_epoch', type = int, default = 0,
                    help = 'the epoch to restore')

# args = parser.parse_args()
args, unknown = parser.parse_known_args()


In [3]:
def read_data(data_path):
    def load_np_files(path):
        my_set = {}
        my_set['enc_input'] = np.load(os.path.join(path, 'enc_input.npy'))
        my_set['dec_input'] = np.load(os.path.join(path, 'dec_input.npy'))
        my_set['target'] = np.load(os.path.join(path, 'target.npy'))
        my_set['enc_input_len'] = np.load(os.path.join(path, 'enc_input_len.npy'))
        my_set['dec_input_len'] = np.load(os.path.join(path, 'dec_input_len.npy'))
        # to check if or not to complete the last batch
        idx = np.arange(my_set['dec_input'].shape[0])
        left_samples = idx[-1]%args.batch_size
        if left_samples:
            last_batch_idx = np.random.randint(0,idx[-1]-left_samples,size = args.batch_size - left_samples - 1)
            idx = np.concatenate([idx,last_batch_idx])
            
            my_set['enc_input'] = my_set['enc_input'][idx]
            my_set['dec_input'] = my_set['dec_input'][idx]
            my_set['target'] = my_set['target'][idx]
            my_set['enc_input_len'] = my_set['enc_input_len'][idx]
            my_set['dec_input_len'] = my_set['dec_input_len'][idx]
        return my_set
    train_set = load_np_files(os.path.join(data_path, 'train'))
    valid_set = load_np_files(os.path.join(data_path, 'validation'))
    
    with open(os.path.join(data_path, 'token2id.pickle'), 'rb') as file:
        token2id = pickle.load(file)
    with open(os.path.join(data_path, 'id2token.pickle'), 'rb') as file:
        id2token = pickle.load(file)

    return train_set, valid_set, token2id,id2token

---
Train model maximizing P(T|S)

In [22]:
train_set, valid_set, token2id,id2token = read_data(args.data_path)

In [26]:
train_set['enc_input'].shape

(46400, 20)

In [None]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = True,
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_TS = Seq2SeqAttn(options)

    for var in model_TS.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_TS.restore(os.path.join(args.restore_path_TS, 'model_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_TS.init_tf_vars()
    model_TS.train(train_set, VAD,termfreq, VAD_loss,args.save_path_TS, args.restore_epoch, valid_set)

Building the TensorFlow graph...
TensorFlow session is closed.
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
TensorFlow variables initialized.
Start to train the model...
Epoch 001/003, valid ppl = None, batch 0001/0725, train loss = 9.971991539001465
Epoch 001/003, valid ppl = None, batch 0002/0725, train loss = 9.963825225830078
Epoch 001/003, valid ppl = None, batch 0003/0725, train loss = 9.938911437988281
Epoch 001/003, valid ppl = None, batch 0004/0

Epoch 001/003, valid ppl = None, batch 0094/0725, train loss = 6.5913004875183105
Epoch 001/003, valid ppl = None, batch 0095/0725, train loss = 6.527729034423828
Epoch 001/003, valid ppl = None, batch 0096/0725, train loss = 6.566829204559326
Epoch 001/003, valid ppl = None, batch 0097/0725, train loss = 6.553555965423584
Epoch 001/003, valid ppl = None, batch 0098/0725, train loss = 6.611718654632568
Epoch 001/003, valid ppl = None, batch 0099/0725, train loss = 6.594954490661621
Epoch 001/003, valid ppl = None, batch 0100/0725, train loss = 6.489307880401611
Epoch 001/003, valid ppl = None, batch 0101/0725, train loss = 6.447384357452393
Epoch 001/003, valid ppl = None, batch 0102/0725, train loss = 6.396202564239502
Epoch 001/003, valid ppl = None, batch 0103/0725, train loss = 6.54709005355835
Epoch 001/003, valid ppl = None, batch 0104/0725, train loss = 6.341094017028809
Epoch 001/003, valid ppl = None, batch 0105/0725, train loss = 6.305828094482422
Epoch 001/003, valid ppl = N

---
Train model P(S|T)

In [12]:
def revert(myset):
    enc_input = myset['dec_input'][:,1:]
    dec_input =  np.insert(myset['enc_input'], 0, token2id['<go>'], axis=1) # add <go> in the beginning of decoder

    target = np.insert(myset['enc_input'], -1, 0, axis=1) 
    tmp_idx = [np.where(s==0)[0][0] for s in target] 
    target[np.arange(target.shape[0]),tmp_idx] = token2id['<eos>'] # add <eos> at the end of decoder
    
    newset = {}
    
    newset['enc_input'] = enc_input
    newset['dec_input'] = dec_input
    newset['target'] = target
    newset['enc_input_len'] = myset['dec_input_len']
    newset['dec_input_len'] = myset['enc_input_len']
    return newset

In [13]:
if __name__ == '__main__':
    train_set, valid_set, token2id,id2token = read_data(args.data_path)
    train_set = revert(train_set)
    valid_set = revert(valid_set)
#     train_set['enc_input'] = train_set['enc_input'][:128,]
    
    max_uttr_len_enc = train_set['enc_input'].shape[1]
    max_uttr_len_dec = train_set['dec_input'].shape[1]

    word_embeddings = np.load(args.word_embeddings_path)
    VAD = np.load(args.VAD_path)
    termfreq = np.load(args.ti_path) # term importance
    termfreq = termfreq.reshape(-1,1)
    VAD_loss = np.load(args.VAD_loss_path)
    VAD_loss = VAD_loss.reshape(-1,1)
    
    options = Options(mode = 'TRAIN',
                      VAD_mode = 'FALSE',
                      num_epochs = args.num_epochs,
                      batch_size = args.batch_size,
                      learning_rate = args.learning_rate,
                      beam_width = args.beam_width,
                      corpus_size = len(token2id),
                      max_uttr_len_enc = max_uttr_len_enc,
                      max_uttr_len_dec = max_uttr_len_dec,
                      go_index = token2id['<go>'],
                      eos_index = token2id['<eos>'],
                      word_embed_size = args.word_embed_size,
                      n_hidden_units_enc = args.n_hidden_units_enc,
                      n_hidden_units_dec = args.n_hidden_units_dec,
                      attn_depth = args.attn_depth,
                      word_embeddings = word_embeddings)
    model_ST = Seq2SeqAttn(options)

    for var in model_ST.tvars:
        print(var.name)

    if args.restore_epoch > 0:
        model_ST.restore(os.path.join(args.restore_path_ST, 'model_epoch_{:03d}.ckpt'.format(args.restore_epoch)))
    else:
        model_ST.init_tf_vars()
    model_ST.train(train_set, VAD,termfreq, VAD_loss,args.save_path_ST, args.restore_epoch, valid_set)

Building the TensorFlow graph...
embedding/embedding:0
encoding/rnn/gru_cell/gates/kernel:0
encoding/rnn/gru_cell/gates/bias:0
encoding/rnn/gru_cell/candidate/kernel:0
encoding/rnn/gru_cell/candidate/bias:0
decoding/memory_layer/kernel:0
decoding/attention_v:0
decoding/my_bahdanau_attention/query_layer/kernel:0
decoding/my_bahdanau_attention/attention_Wb/kernel:0
decoding/attention_wrapper/gru_cell/gates/kernel:0
decoding/attention_wrapper/gru_cell/gates/bias:0
decoding/attention_wrapper/gru_cell/candidate/kernel:0
decoding/attention_wrapper/gru_cell/candidate/bias:0
decoding/dense/kernel:0
decoding/dense/bias:0
Restoring a pre-trained model from /Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/output/model_dailydialog_rf/model_ST/model_epoch_003.ckpt...
INFO:tensorflow:Restoring parameters from /Users/yan/Documents/document/EPFL/MA2/semesterprj/code/seq2seq_attn/affect-rich/output/model_dailydialog_rf/model_ST/model_epoch_003.ckpt
Start to train the mod

Epoch 001/003, valid ppl = None, batch 0090/0644, train loss = 3.399214506149292
Epoch 001/003, valid ppl = None, batch 0091/0644, train loss = 3.545828104019165
Epoch 001/003, valid ppl = None, batch 0092/0644, train loss = 3.2699573040008545
Epoch 001/003, valid ppl = None, batch 0093/0644, train loss = 3.2847533226013184
Epoch 001/003, valid ppl = None, batch 0094/0644, train loss = 3.451704740524292
Epoch 001/003, valid ppl = None, batch 0095/0644, train loss = 3.317626476287842
Epoch 001/003, valid ppl = None, batch 0096/0644, train loss = 3.354041814804077
Epoch 001/003, valid ppl = None, batch 0097/0644, train loss = 3.570060968399048
Epoch 001/003, valid ppl = None, batch 0098/0644, train loss = 3.414484977722168
Epoch 001/003, valid ppl = None, batch 0099/0644, train loss = 3.440544605255127
Epoch 001/003, valid ppl = None, batch 0100/0644, train loss = 3.3308064937591553
Epoch 001/003, valid ppl = None, batch 0101/0644, train loss = 3.2305238246917725
Epoch 001/003, valid ppl

Epoch 001/003, valid ppl = None, batch 0191/0644, train loss = 3.3014495372772217
Epoch 001/003, valid ppl = None, batch 0192/0644, train loss = 3.2710516452789307
Epoch 001/003, valid ppl = None, batch 0193/0644, train loss = 3.2774171829223633
Epoch 001/003, valid ppl = None, batch 0194/0644, train loss = 3.3398709297180176
Epoch 001/003, valid ppl = None, batch 0195/0644, train loss = 3.5668599605560303
Epoch 001/003, valid ppl = None, batch 0196/0644, train loss = 3.487105369567871
Epoch 001/003, valid ppl = None, batch 0197/0644, train loss = 3.5610101222991943
Epoch 001/003, valid ppl = None, batch 0198/0644, train loss = 3.3424322605133057
Epoch 001/003, valid ppl = None, batch 0199/0644, train loss = 3.426948308944702
Epoch 001/003, valid ppl = None, batch 0200/0644, train loss = 3.4501280784606934
Epoch 001/003, valid ppl = None, batch 0201/0644, train loss = 3.364928722381592
Epoch 001/003, valid ppl = None, batch 0202/0644, train loss = 3.342785120010376
Epoch 001/003, valid

Epoch 001/003, valid ppl = None, batch 0292/0644, train loss = 3.633577823638916
Epoch 001/003, valid ppl = None, batch 0293/0644, train loss = 3.47641658782959
Epoch 001/003, valid ppl = None, batch 0294/0644, train loss = 3.461153268814087
Epoch 001/003, valid ppl = None, batch 0295/0644, train loss = 3.1805920600891113
Epoch 001/003, valid ppl = None, batch 0296/0644, train loss = 3.3204360008239746
Epoch 001/003, valid ppl = None, batch 0297/0644, train loss = 3.3599531650543213
Epoch 001/003, valid ppl = None, batch 0298/0644, train loss = 3.2904303073883057
Epoch 001/003, valid ppl = None, batch 0299/0644, train loss = 3.482123851776123
Epoch 001/003, valid ppl = None, batch 0300/0644, train loss = 3.323749303817749
Epoch 001/003, valid ppl = None, batch 0301/0644, train loss = 3.425752878189087
Epoch 001/003, valid ppl = None, batch 0302/0644, train loss = 3.3481907844543457
Epoch 001/003, valid ppl = None, batch 0303/0644, train loss = 3.4876575469970703
Epoch 001/003, valid pp

Epoch 001/003, valid ppl = None, batch 0393/0644, train loss = 3.4407825469970703
Epoch 001/003, valid ppl = None, batch 0394/0644, train loss = 3.145026922225952
Epoch 001/003, valid ppl = None, batch 0395/0644, train loss = 3.251859664916992
Epoch 001/003, valid ppl = None, batch 0396/0644, train loss = 3.363626480102539
Epoch 001/003, valid ppl = None, batch 0397/0644, train loss = 3.4544429779052734
Epoch 001/003, valid ppl = None, batch 0398/0644, train loss = 3.33648419380188
Epoch 001/003, valid ppl = None, batch 0399/0644, train loss = 3.394709825515747
Epoch 001/003, valid ppl = None, batch 0400/0644, train loss = 3.5435426235198975
Epoch 001/003, valid ppl = None, batch 0401/0644, train loss = 3.5506417751312256
Epoch 001/003, valid ppl = None, batch 0402/0644, train loss = 3.5132765769958496
Epoch 001/003, valid ppl = None, batch 0403/0644, train loss = 3.3067731857299805
Epoch 001/003, valid ppl = None, batch 0404/0644, train loss = 3.1436033248901367
Epoch 001/003, valid p

Epoch 001/003, valid ppl = None, batch 0494/0644, train loss = 3.367311477661133
Epoch 001/003, valid ppl = None, batch 0495/0644, train loss = 3.4536354541778564
Epoch 001/003, valid ppl = None, batch 0496/0644, train loss = 3.4083495140075684
Epoch 001/003, valid ppl = None, batch 0497/0644, train loss = 3.251432418823242
Epoch 001/003, valid ppl = None, batch 0498/0644, train loss = 3.439368486404419
Epoch 001/003, valid ppl = None, batch 0499/0644, train loss = 3.458909511566162
Epoch 001/003, valid ppl = None, batch 0500/0644, train loss = 3.2603726387023926
Epoch 001/003, valid ppl = None, batch 0501/0644, train loss = 3.4993557929992676
Epoch 001/003, valid ppl = None, batch 0502/0644, train loss = 3.4372639656066895
Epoch 001/003, valid ppl = None, batch 0503/0644, train loss = 3.2193307876586914
Epoch 001/003, valid ppl = None, batch 0504/0644, train loss = 3.578834056854248
Epoch 001/003, valid ppl = None, batch 0505/0644, train loss = 3.3467345237731934
Epoch 001/003, valid 

Epoch 001/003, valid ppl = None, batch 0595/0644, train loss = 3.5746467113494873
Epoch 001/003, valid ppl = None, batch 0596/0644, train loss = 3.2554233074188232
Epoch 001/003, valid ppl = None, batch 0597/0644, train loss = 3.2502455711364746
Epoch 001/003, valid ppl = None, batch 0598/0644, train loss = 3.348299980163574
Epoch 001/003, valid ppl = None, batch 0599/0644, train loss = 3.395965814590454
Epoch 001/003, valid ppl = None, batch 0600/0644, train loss = 3.4007151126861572
Epoch 001/003, valid ppl = None, batch 0601/0644, train loss = 3.510460615158081
Epoch 001/003, valid ppl = None, batch 0602/0644, train loss = 3.3868494033813477
Epoch 001/003, valid ppl = None, batch 0603/0644, train loss = 3.295961380004883
Epoch 001/003, valid ppl = None, batch 0604/0644, train loss = 3.418672800064087
Epoch 001/003, valid ppl = None, batch 0605/0644, train loss = 3.3511343002319336
Epoch 001/003, valid ppl = None, batch 0606/0644, train loss = 3.4673094749450684
Epoch 001/003, valid 

Epoch 002/003, valid ppl = 74.42816363173927, batch 0043/0644, train loss = 3.4286251068115234
Epoch 002/003, valid ppl = 74.42816363173927, batch 0044/0644, train loss = 3.271486282348633
Epoch 002/003, valid ppl = 74.42816363173927, batch 0045/0644, train loss = 3.5112597942352295
Epoch 002/003, valid ppl = 74.42816363173927, batch 0046/0644, train loss = 3.138451337814331
Epoch 002/003, valid ppl = 74.42816363173927, batch 0047/0644, train loss = 3.1375489234924316
Epoch 002/003, valid ppl = 74.42816363173927, batch 0048/0644, train loss = 3.2385849952697754
Epoch 002/003, valid ppl = 74.42816363173927, batch 0049/0644, train loss = 3.3029229640960693
Epoch 002/003, valid ppl = 74.42816363173927, batch 0050/0644, train loss = 3.3642096519470215
Epoch 002/003, valid ppl = 74.42816363173927, batch 0051/0644, train loss = 3.2462403774261475
Epoch 002/003, valid ppl = 74.42816363173927, batch 0052/0644, train loss = 3.5532941818237305
Epoch 002/003, valid ppl = 74.42816363173927, batch 

Epoch 002/003, valid ppl = 74.42816363173927, batch 0130/0644, train loss = 3.351308584213257
Epoch 002/003, valid ppl = 74.42816363173927, batch 0131/0644, train loss = 3.356323719024658
Epoch 002/003, valid ppl = 74.42816363173927, batch 0132/0644, train loss = 3.495096206665039
Epoch 002/003, valid ppl = 74.42816363173927, batch 0133/0644, train loss = 3.273498058319092
Epoch 002/003, valid ppl = 74.42816363173927, batch 0134/0644, train loss = 3.3468122482299805
Epoch 002/003, valid ppl = 74.42816363173927, batch 0135/0644, train loss = 3.281017303466797
Epoch 002/003, valid ppl = 74.42816363173927, batch 0136/0644, train loss = 3.270460605621338
Epoch 002/003, valid ppl = 74.42816363173927, batch 0137/0644, train loss = 3.142301559448242
Epoch 002/003, valid ppl = 74.42816363173927, batch 0138/0644, train loss = 3.3943841457366943
Epoch 002/003, valid ppl = 74.42816363173927, batch 0139/0644, train loss = 3.432379961013794
Epoch 002/003, valid ppl = 74.42816363173927, batch 0140/0

Epoch 002/003, valid ppl = 74.42816363173927, batch 0217/0644, train loss = 3.5206422805786133
Epoch 002/003, valid ppl = 74.42816363173927, batch 0218/0644, train loss = 3.44258975982666
Epoch 002/003, valid ppl = 74.42816363173927, batch 0219/0644, train loss = 3.1083059310913086
Epoch 002/003, valid ppl = 74.42816363173927, batch 0220/0644, train loss = 3.158205032348633
Epoch 002/003, valid ppl = 74.42816363173927, batch 0221/0644, train loss = 3.1169590950012207
Epoch 002/003, valid ppl = 74.42816363173927, batch 0222/0644, train loss = 3.084209442138672
Epoch 002/003, valid ppl = 74.42816363173927, batch 0223/0644, train loss = 3.3966805934906006
Epoch 002/003, valid ppl = 74.42816363173927, batch 0224/0644, train loss = 3.269355297088623
Epoch 002/003, valid ppl = 74.42816363173927, batch 0225/0644, train loss = 3.537851333618164
Epoch 002/003, valid ppl = 74.42816363173927, batch 0226/0644, train loss = 3.446751832962036
Epoch 002/003, valid ppl = 74.42816363173927, batch 0227/

Epoch 002/003, valid ppl = 74.42816363173927, batch 0304/0644, train loss = 3.3232555389404297
Epoch 002/003, valid ppl = 74.42816363173927, batch 0305/0644, train loss = 3.4494121074676514
Epoch 002/003, valid ppl = 74.42816363173927, batch 0306/0644, train loss = 3.0839314460754395
Epoch 002/003, valid ppl = 74.42816363173927, batch 0307/0644, train loss = 3.304769277572632
Epoch 002/003, valid ppl = 74.42816363173927, batch 0308/0644, train loss = 3.2405881881713867
Epoch 002/003, valid ppl = 74.42816363173927, batch 0309/0644, train loss = 3.280585527420044
Epoch 002/003, valid ppl = 74.42816363173927, batch 0310/0644, train loss = 3.426680088043213
Epoch 002/003, valid ppl = 74.42816363173927, batch 0311/0644, train loss = 3.2811625003814697
Epoch 002/003, valid ppl = 74.42816363173927, batch 0312/0644, train loss = 3.2175803184509277
Epoch 002/003, valid ppl = 74.42816363173927, batch 0313/0644, train loss = 3.228987693786621
Epoch 002/003, valid ppl = 74.42816363173927, batch 03

Epoch 002/003, valid ppl = 74.42816363173927, batch 0391/0644, train loss = 3.1517412662506104
Epoch 002/003, valid ppl = 74.42816363173927, batch 0392/0644, train loss = 3.1229474544525146
Epoch 002/003, valid ppl = 74.42816363173927, batch 0393/0644, train loss = 3.393700361251831
Epoch 002/003, valid ppl = 74.42816363173927, batch 0394/0644, train loss = 3.108192205429077
Epoch 002/003, valid ppl = 74.42816363173927, batch 0395/0644, train loss = 3.504847526550293
Epoch 002/003, valid ppl = 74.42816363173927, batch 0396/0644, train loss = 3.410048246383667
Epoch 002/003, valid ppl = 74.42816363173927, batch 0397/0644, train loss = 3.397461175918579
Epoch 002/003, valid ppl = 74.42816363173927, batch 0398/0644, train loss = 3.310055732727051
Epoch 002/003, valid ppl = 74.42816363173927, batch 0399/0644, train loss = 3.1433756351470947
Epoch 002/003, valid ppl = 74.42816363173927, batch 0400/0644, train loss = 3.2876014709472656
Epoch 002/003, valid ppl = 74.42816363173927, batch 0401

Epoch 002/003, valid ppl = 74.42816363173927, batch 0478/0644, train loss = 3.024843692779541
Epoch 002/003, valid ppl = 74.42816363173927, batch 0479/0644, train loss = 3.3270480632781982
Epoch 002/003, valid ppl = 74.42816363173927, batch 0480/0644, train loss = 3.4354851245880127
Epoch 002/003, valid ppl = 74.42816363173927, batch 0481/0644, train loss = 3.422672748565674
Epoch 002/003, valid ppl = 74.42816363173927, batch 0482/0644, train loss = 3.135908365249634
Epoch 002/003, valid ppl = 74.42816363173927, batch 0483/0644, train loss = 3.2078566551208496
Epoch 002/003, valid ppl = 74.42816363173927, batch 0484/0644, train loss = 3.132645845413208
Epoch 002/003, valid ppl = 74.42816363173927, batch 0485/0644, train loss = 3.0957860946655273
Epoch 002/003, valid ppl = 74.42816363173927, batch 0486/0644, train loss = 3.265941858291626
Epoch 002/003, valid ppl = 74.42816363173927, batch 0487/0644, train loss = 3.6352107524871826
Epoch 002/003, valid ppl = 74.42816363173927, batch 048

Epoch 002/003, valid ppl = 74.42816363173927, batch 0565/0644, train loss = 3.057060718536377
Epoch 002/003, valid ppl = 74.42816363173927, batch 0566/0644, train loss = 3.232100248336792
Epoch 002/003, valid ppl = 74.42816363173927, batch 0567/0644, train loss = 3.2810676097869873
Epoch 002/003, valid ppl = 74.42816363173927, batch 0568/0644, train loss = 3.2927322387695312
Epoch 002/003, valid ppl = 74.42816363173927, batch 0569/0644, train loss = 3.5062308311462402
Epoch 002/003, valid ppl = 74.42816363173927, batch 0570/0644, train loss = 3.2119266986846924
Epoch 002/003, valid ppl = 74.42816363173927, batch 0571/0644, train loss = 3.3050920963287354
Epoch 002/003, valid ppl = 74.42816363173927, batch 0572/0644, train loss = 3.5290586948394775
Epoch 002/003, valid ppl = 74.42816363173927, batch 0573/0644, train loss = 3.447561025619507
Epoch 002/003, valid ppl = 74.42816363173927, batch 0574/0644, train loss = 3.2325656414031982
Epoch 002/003, valid ppl = 74.42816363173927, batch 0

KeyboardInterrupt: 