In [1]:
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility

  from ._conv import register_converters as _register_converters


In [9]:
# RNN parameter
# cell output's dim = 5 --> hidden_size
# cell inputs' dim = 5 --> input_dim
# 한번의 하나의 문자열만 투입 --> batch_size = 1
# 한 문자열의 길이는 6 --> sequence_length = 6
hidden_size = 5
input_dim = 5
batch_size = 1
sequence_length = 6
num_classes = 5
learning_rate = 0.1

In [3]:
# 데이터 만들기
idx2char = ['h', 'i', 'e', 'l', 'o'] #나중에 이 list 통해서 인덱스로 char 뽑아냄
x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3
y_data = [[1, 0, 2, 3, 3, 4]]    # ihello

In [4]:
# Input placeholders
# X: one hot 형태로, Y: label 형태
# batch_size는 현재 1이지만 N개가 들어와도 괜찮다고 None
X = tf.placeholder(
    tf.float32, [None, sequence_length, input_dim])  # X one-hot
Y = tf.placeholder(tf.int32, [None, sequence_length])  # Y label

In [5]:
# RNN cell 관련 정의
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(
    cell, X, initial_state=initial_state, dtype=tf.float32)

Instructions for updating:
Use the retry module or similar alternatives.


In [11]:
# Cost : sequence_loss 정의
weights = tf.ones([batch_size, sequence_length]) # 각 element들이 얼마나 loss 계산에 중요한지

# 주의! RNN에서 나오는 output을 바로 loss 함수의 logits에 투입하면 안좋음.
# FC layer을 거친 최종 output을 구하고, 그 output을 logits으로 투입

print("RNN output shape before flatten:", outputs.shape)
X_for_fc = tf.reshape(outputs, [-1, hidden_size]) # flatten 과정
print("RNN output shape after flatten:", X_for_fc.shape)

# FC 통과
outputs = tf.contrib.layers.fully_connected(
    inputs=X_for_fc, num_outputs=num_classes, activation_fn=None)
print("FC output shape after FC layer:", outputs.shape)

# sequence_loss에 적합한 형태로 FC output reshpae 과정
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])
print("output shape after reshaping. this may be same with RNN output shape before flatten:", outputs.shape)

sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=outputs,
                                                targets=Y,
                                                weights=weights)

loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

RNN output shape before flatten: (1, 6, 5)
RNN output shape after flatten: (6, 5)
FC output shape after FC layer: (6, 5)
output shape after reshaping. this may be same with RNN output shape before flatten: (1, 6, 5)


In [12]:
prediction = tf.argmax(outputs, axis=2)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(50):
        l, _ = sess.run([loss, train], feed_dict={X: x_one_hot,
                                                 Y: y_data})
        result = sess.run(prediction, feed_dict={X:x_one_hot})
        print(i, "loss:", l, "prediction:", result, "true Y:", y_data)
        
        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]
        print("\tPrediction str: ", ''.join(result_str))

0 loss: 1.6217622 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
1 loss: 1.5106652 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
2 loss: 1.4487553 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
3 loss: 1.2989862 prediction: [[2 2 2 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  eeelll
4 loss: 1.1089666 prediction: [[1 2 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ieello
5 loss: 0.8703295 prediction: [[1 2 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ieello
6 loss: 0.64302015 prediction: [[1 2 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ieello
7 loss: 0.47926503 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ihello
8 loss: 0.35349712 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ihello
9 loss: 0.2693797 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ihel