# RNN 분류 모델

In [0]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import json

# 학습 데이터 불러오기

In [0]:
DATA_IN_PATH = '/.data_in/'
DATA_OUT_PATH = '/.data_out/'

TRAIN_INPUT_DATA = 'train_input.npy'
TRAIN_LABEL_DATA = 'train_label.npy'
DATA_CONFIGS = 'data_configs.json'

input_data = np.load(open(DATA_IN_PATH + TRAIN_INPUT_DATA, 'rb'))
label_data = np.load(open(DATA_IN_PATH + TRAIN_LABEL_DATA, 'rb'))

prepro_configs = None

with open(DATA_IN_PATH + DATA_CONFIGS, 'r') as f:
    prepro_configs = json.load(f)

# 학습과 검증 데이터셋 분리

In [0]:
from sklearn.model_selection import train_test_split

TEST_SPLIT = 0.1
RANDOM_SEED = 13371447

train_input, test_input, train_label, test_label = train_test_split(input_data, label_data,
                                                                    test_size=TEST_SPLIT, random_state=RANDOM_SEED)

# 데이터 입력 함수

In [0]:
BATCH_SIZE = 16
EPOCHS = 3

def mapping_fn(X, Y):
    inputs, labels = {'x': X}, Y
    return inputs, labels

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((train_input, train_label))
    dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(mapping_fn)
    dataset = dataset.repeat(count=EPOCHS)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def eval_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((test_input, test_label))
    dataset = dataset.map(mapping_fn)
    dataset = dataset.batch(BATCH_SIZE)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

# 모델 정의, 학습

In [0]:
VOCAB_SIZE = prepro_configs['vocab_size']+1

WORD_EMBEDDING_DIM = 100
HIDDEN_STATE_DIM = 150
DENSE_FEATURE_DIM = 150

learning_rate = 0.001

In [0]:
def model_fn(features, labels, mode):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT
    
    embedding_layer = tf.keras.layers.Embedding(VOCAB_SIZE, WORD_EMBEDDING_DIM)(features['x'])
    embedding_layer = tf.keras.layers.Dropout(0.2)(embedding_layer)
    
    rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [HIDDEN_STATE_DIM, HIDDEN_STATE_DIM]]
    multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
    outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
                                       inputs=embedding_layer,
                                       dtype=tf.float32)
    
    outputs = tf.keras.layers.Dropout(0.2)(outputs)
    hidden_layer = tf.keras.layers.Dense(DENSE_FEATURE_DIM, activation=tf.nn.tanh)(outputs[:,-1,:])
    hidden_layer = tf.keras.layers.Dropout(0.2)(hidden_layer)
    logits = tf.keras.layers.Dense(1)(hidden_layer)
    logits = tf.squeeze(logits, axis=-1)
    
    sigmoid_logits = tf.nn.sigmoid(logits)
    
    loss = tf.losses.sigmoid_cross_entropy(labels, logits)
    
    if PREDICT:
        predictions = {'sentiment': sigmoid_logits}
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions)
    
    if EVAL:
        accuracy = tf.metrics.accuracy(labels, tf.round(sigmoid_logits))
        eval_metric_ops = {'acc': accuracy}
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
    
    if TRAIN:
        global_step = tf.train.get_global_step()
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          train_op=train_op,
                                          loss=loss)

In [0]:
if not os.path.exists(DATA_OUT_PATH):
    os.makedirs(DATA_OUT_PATH)
    
est = tf.estimator.Estimator(model_fn, model_dir=DATA_OUT_PATH+'checkpoint/rnn')

In [0]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

est.train(train_input_fn)

W0806 13:14:19.667761 140526941017984 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
W0806 13:14:19.773999 140526941017984 deprecation.py:323] From <ipython-input-5-b0057d75b21a>:14: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
W0806 13:14:19.981736 140526941017984 deprecation.py:506] From /usr/local/lib/python3.6/dist-p

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7fcea73e8080>

In [0]:
est.evaluate(eval_input_fn)

{'acc': 0.8368, 'global_step': 4221, 'loss': 0.40145606}