In [13]:
import os
from datetime import datetime
import tensorflow.compat.v1 as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [3]:
DATA_IN_PATH='./data_in/'
DATA_OUT_PATH='./data_out/'
INPUT_TRAIN_DATA='nsmc_train_input.npy'
LABEL_TRAIN_DATA='nsmc_train_label.npy'
DATA_CONFIGS='data_configs.json'

input_data=np.load(open(DATA_IN_PATH+INPUT_TRAIN_DATA,'rb'))
label_data=np.load(open(DATA_IN_PATH+LABEL_TRAIN_DATA,'rb'))
prepro_configs=json.load(open(DATA_IN_PATH+DATA_CONFIGS,'r'))

In [11]:
TEST_SPLIT=0.1
RNG_SEED=13371447
VOCAB_SIZE = prepro_configs['vocab_size']+1
EMB_SIZE = 128
BATCH_SIZE = 16
NUM_EPOCHS = 1

input_train, input_eval, label_train, label_eval=train_test_split(input_data, label_data[0], test_size=TEST_SPLIT, random_state=RNG_SEED)

In [21]:
def mapping_fn(X,Y):
    input,label={'x':X},Y
    return input, label

def train_input_fn():
    dataset=tf.data.Dataset.from_tensor_slices((input_train,label_train))
    dataset=dataset.shuffle(buffer_size=len(input_train))
    dataset=dataset.batch(BATCH_SIZE)
    dataset=dataset.map(mapping_fn)
    dataset=dataset.repeat(count=NUM_EPOCHS)
    iterator=dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def eval_input_fn():
    dataset=tf.data.Dataset.from_tensor_slices((input_eval, label_eval))
    dataset=dataset.shuffle(buffer_size=len(input_eval))
    dataset=dataset.batch(16)
    dataset=dataset.map(mapping_fn)
    iterator=dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [28]:
def model_fn(features, labels, mode, params):
    TRAIN=mode==tf.estimator.ModeKeys.TRAIN
    EVAL=mode==tf.estimator.ModeKeys.EVAL
    PREDICT=mode==tf.estimator.ModeKeys.PREDICT
    
    embedding_layer=tf.keras.layers.Embedding(VOCAB_SIZE, EMB_SIZE)(features['x'])
    
    dropout_emb=tf.keras.layers.Dropout(rate=0.2)(embedding_layer)
    
    conv=tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation=tf.nn.relu)(dropout_emb)
    
    pool=tf.keras.layers.GlobalMaxPool1D()(conv)
    
    hidden=tf.keras.layers.Dense(units=250, activation=tf.nn.relu)(pool)
    
    dropout_hidden=tf.keras.layers.Dropout(rate=0.2)(hidden, training=TRAIN)
    logits=tf.keras.layers.Dense(units=1)(dropout_hidden)
    
    if labels is not None:
        labels=tf.reshape(labels,[-1,1])
        
    if TRAIN:
        global_step=tf.train.get_global_step()
        loss=tf.losses.sigmoid_cross_entropy(labels, logits)
        train_op=tf.train.AdamOptimizer(0.001).minimize(loss, global_step)
        
        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss)
    
    elif EVAL:
        loss=tf.losses.sigmoid_cross_entropy(labels,logits)
        pred=tf.nn.sigmoid(logits)
        accuracy=tf.metrics.accuracy(labels, tf.round(pred))
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops={'acc':accuracy})
    
    elif PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,predictions={'prob':tf.nn.sigmoid(logits),})

In [29]:
est=tf.estimator.Estimator(model_fn, model_dir="data_out/checkpoint/cnn_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'data_out/checkpoint/cnn_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [30]:
time_start=datetime.utcnow()
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".....................................")

est.train(train_input_fn)



Experiment started at 15:28:12
.....................................
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into data_out/checkpoint/cnn_model\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.695583, step = 0
INFO:tensorflow:global_step/sec: 13.9143
INFO:tensorflow:loss = 0.58177173, step = 100 (7.188 sec)
INFO:tensorflow:global_step/sec: 14.0038
INFO:tensorflow:loss = 0.7051201, step = 200 (7.141 sec)
INFO:tensorflow:global_step/sec: 12.9914
INFO:tensorflow:loss = 0.418813, step = 300 (7.698 sec)
INFO:tensorflow:global_step/sec: 14.1044
INFO:tensorflow:loss = 0.5912791, step = 400 (7.088 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 14.8137
INFO:tensorflow:loss = 0.25539652, step = 7800 (6.750 sec)
INFO:tensorflow:global_step/sec: 14.7861
INFO:tensorflow:loss = 0.44581944, step = 7900 (6.764 sec)
INFO:tensorflow:global_step/sec: 14.82
INFO:tensorflow:loss = 0.42771804, step = 8000 (6.748 sec)
INFO:tensorflow:global_step/sec: 14.7921
INFO:tensorflow:loss = 0.16223654, step = 8100 (6.759 sec)
INFO:tensorflow:global_step/sec: 14.783
INFO:tensorflow:loss = 0.50869465, step = 8200 (6.766 sec)
INFO:tensorflow:global_step/sec: 14.7951
INFO:tensorflow:loss = 0.46364266, step = 8300 (6.758 sec)
INFO:tensorflow:global_step/sec: 14.7234
INFO:tensorflow:loss = 0.67906415, step = 8400 (6.793 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 8438...
INFO:tensorflow:Saving checkpoints for 8438 into data_out/checkpoint/cnn_model\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 8438...
INFO:tensorflow:Loss for final step: 0.36863905.


NameError: name 'datatime' is not defined

In [31]:
time_end=datetime.utcnow()
print(".....................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed=time_end-time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))

.....................................
Experiment finished at 15:39:18

Experiment elapsed time: 666.616026 seconds


In [32]:
valid=est.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-01-21T00:42:07Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from data_out/checkpoint/cnn_model\model.ckpt-8438
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.60039s
INFO:tensorflow:Finished evaluation at 2021-01-21-00:42:08
INFO:tensorflow:Saving dict for global step 8438: acc = 0.8265333, global_step = 8438, loss = 0.38181958
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 8438: data_out/checkpoint/cnn_model\model.ckpt-8438


In [33]:
INPUT_TEST_DATA = 'nsmc_test_input.npy'
LABEL_TEST_DATA = 'nsmc_test_label.npy'

test_input_data = np.load(open(DATA_IN_PATH + INPUT_TEST_DATA, 'rb'))
test_label_data = np.load(open(DATA_IN_PATH + LABEL_TEST_DATA, 'rb'))

In [34]:
def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((test_input_data, test_label_data))
    dataset = dataset.batch(16)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [35]:
predict=est.evaluate(test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-01-21T00:47:12Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from data_out/checkpoint/cnn_model\model.ckpt-8438
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.38098s
INFO:tensorflow:Finished evaluation at 2021-01-21-00:47:13
INFO:tensorflow:Saving dict for global step 8438: acc = 0.82738, global_step = 8438, loss = 0.38326234
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 8438: data_out/checkpoint/cnn_model\model.ckpt-8438
