In [1]:
import tensorflow as tf
from model import LanguageModel
import json
import numpy as np
from utils import batchify, get_batch
import time

In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # pylint: disable=no-member
sess = tf.Session(config=config)

In [3]:
with open('baomoi_punc/word2idx.json', 'r') as inp:
    word2idx = json.load(inp)
with open('baomoi_punc/char2idx.json', 'r') as inp:
    char2idx = json.load(inp)

In [4]:
params = {
   "rnn_layers":[
          {
             "units": 1024,
             "input_size": 1024,
             "drop_i": 0.01,
             "wdrop": 0.05,
             "drop_o": 0.01
          },
          {
             "units": 1024,
             "input_size": 1024,
             "wdrop": 0.05,
             "drop_o": 0.01
          },
          {
             "units": 1024,
             "input_size": 1024,
             "drop_o": 0.1,
             "wdrop": 0.05
          }
       ],
       "vocab_size": len(word2idx) + 1,
       "drop_e": 0.0,
       "char_vocab_size": len(char2idx) + 1,
       "char_cnn_layers": [
            [1, 16],
            [2, 16],
            [3, 32],
            [4, 64],
            [5, 128],
            [6, 256],
            [7, 512]
        ],
        "char_vec_size": 16,
    }

In [5]:
model = LanguageModel(**params, is_training=False)

In [6]:
model.build_model()
saver = tf.train.Saver([x for x in tf.global_variables() if x not in tf.get_collection('LSTM_SAVED_STATE')])
sess.run(tf.global_variables_initializer())

In [7]:
saver.restore(sess, '11/checkpoints/test/model.cpkt-47181')

INFO:tensorflow:Restoring parameters from 11/checkpoints/test/model.cpkt-47181


In [8]:
with open('baomoi_punc/test_char.npy', 'rb') as inp:
    test_char = np.load(inp)
with open('baomoi_punc/test_word.npy', 'rb') as inp:
    test_word = np.load(inp)
len(test_word)

5835622

In [9]:
test_word = batchify(test_word, 38).T
test_char = batchify(test_char, 38).T

In [10]:
fw_iy = tf.placeholder(dtype=tf.int32, shape=[None, None], name='fw_y')
bw_iy = tf.placeholder(dtype=tf.int32, shape=[None, None], name='bw_y')
test_loss = 0.5 * tf.add(
    tf.contrib.seq2seq.sequence_loss(
        logits=model.fw_model['decoder'],
        targets=fw_iy,
        weights=model.seq_masks,
        average_across_timesteps=True,
        average_across_batch=True),
    tf.contrib.seq2seq.sequence_loss(
        logits=model.bw_model['decoder'],
        targets=bw_iy,
        weights=model.seq_masks,
        average_across_timesteps=True,
        average_across_batch=True),
    name='test_loss'
)

In [11]:
def evaluate_step(test_word, test_char, bptt):
    start_time = time.time()
    total_loss = 0
    step = None
    for i in range(0, len(test_word), bptt):
        (fw_x, fw_y), (bw_x, bw_y) = get_batch(test_word, test_char, bptt, i, evaluate=True)
        loss = sess.run(
            test_loss,
            feed_dict={
                model.fw_inputs: fw_x,
                model.bw_inputs: bw_x,
                fw_iy: fw_y,
                bw_iy: bw_y,
                model.seq_lens: [fw_y.shape[0]] * fw_y.shape[1],
                model.reset_state: i == 0
            }
        )
        total_loss += loss * len(fw_y)
        print("Evaluate loss {}, time {}".format(loss, time.time()-start_time))
    total_loss /= len(test_word)
    print("Evaluate total loss {}, time {}".format(total_loss, time.time()-start_time))

In [None]:
evaluate_step(test_word, test_char, 100)

Evaluate loss 3.69686222076416, time 0.6846129894256592
Evaluate loss 3.551373243331909, time 0.9483013153076172
Evaluate loss 3.5342602729797363, time 1.2975621223449707
Evaluate loss 3.5873868465423584, time 1.5611605644226074
Evaluate loss 3.602341890335083, time 1.9999737739562988
Evaluate loss 3.626741647720337, time 2.3224804401397705
Evaluate loss 3.456254720687866, time 2.67144513130188
Evaluate loss 3.3528120517730713, time 2.9059252738952637
Evaluate loss 3.467489242553711, time 3.1477320194244385
Evaluate loss 3.506603717803955, time 3.499591827392578
Evaluate loss 3.5235543251037598, time 3.7362241744995117
Evaluate loss 3.436035633087158, time 4.074859380722046
Evaluate loss 3.506998062133789, time 4.3129377365112305
Evaluate loss 3.560364007949829, time 4.651585817337036
Evaluate loss 3.607978582382202, time 4.883833646774292
Evaluate loss 3.5692455768585205, time 5.116603374481201
Evaluate loss 3.567150831222534, time 5.456765413284302
Evaluate loss 3.5863871574401855, t

Evaluate loss 3.476344347000122, time 41.54769444465637
Evaluate loss 3.428722858428955, time 41.78670310974121
Evaluate loss 3.406343460083008, time 42.02679634094238
Evaluate loss 3.460768461227417, time 42.36982178688049
Evaluate loss 3.542360305786133, time 42.61316251754761
Evaluate loss 3.4489026069641113, time 42.93291449546814
Evaluate loss 3.468369245529175, time 43.184099197387695
Evaluate loss 3.471867322921753, time 43.420939207077026
Evaluate loss 3.54521107673645, time 43.69525361061096
Evaluate loss 3.460273265838623, time 43.99702858924866
Evaluate loss 3.636570453643799, time 44.235997915267944
Evaluate loss 3.5865182876586914, time 44.54841065406799
Evaluate loss 3.6147022247314453, time 44.79630947113037
Evaluate loss 3.603364944458008, time 45.12781620025635
Evaluate loss 3.691828727722168, time 45.367536544799805
Evaluate loss 3.696754217147827, time 45.71440005302429
Evaluate loss 3.730435371398926, time 45.95170497894287
Evaluate loss 3.6746490001678467, time 46.