<a href="https://colab.research.google.com/github/ttury/Deep-Learning-For-Natural-Language-Processing/blob/master/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional


# 3D tensor: batch_size = 1, timesteps = 4, input_dim = 5
x_train = [[[0.1, 4.2, 1.5, 1.1, 2.8],
           [1.0, 3.1, 2.5, 0.7, 1.1],
           [0.3, 2.1, 1.5, 2.1, 0.1],
           [2.2, 1.4, 0.5, 0.9, 1.1]]]
x_train = np.array(x_train, dtype=np.float32)
print(x_train.shape)

[[[0.1 4.2 1.5 1.1 2.8]
  [1.  3.1 2.5 0.7 1.1]
  [0.3 2.1 1.5 2.1 0.1]
  [2.2 1.4 0.5 0.9 1.1]]]


In [6]:
# Vanila RNN

rnn = SimpleRNN(3)
# hidden_size=3, return_sequences=False, return_state=False
hidden_state = rnn(x_train) # 3차원 벡터

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
# (1 x 3) 크기 2D 텐서 출력

hidden state : [[ 0.9451857  -0.99160755 -0.8591239 ]], shape: (1, 3)


In [11]:
rnn = SimpleRNN(3, return_sequences=True)
# return_sequences=True -> 모든 시점의 은닉 상태 출력
hidden_states = rnn(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
# (1 x 4 x 3) 크기 3D 텐서 출력

hidden states : [[[ 0.9580028  -0.995145   -0.06869399]
  [ 0.88877547 -0.8864673   0.24821901]
  [ 0.9764063  -0.47579268 -0.63838434]
  [-0.94258726 -0.17061315 -0.8455981 ]]], shape : (1, 4, 3)


In [13]:
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
# return_state=True -> 마지막 시점의 은닉 상태 출력
hidden_states, last_state = rnn(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))

hidden states : [[[-0.99864423 -0.24700049  0.9042699 ]
  [-0.98120105  0.9859616   0.41887635]
  [-0.98417175  0.9810889   0.96586406]
  [-0.77088267  0.9594653   0.9841313 ]]], shape : (1, 4, 3)
last hidden state : [[-0.77088267  0.9594653   0.9841313 ]], shape : (1, 3)


In [15]:
rnn = SimpleRNN(3, return_sequences=False, return_state=True)
# return_state=True -> 마지막 시점의 은닉 상태 출력
hidden_states, last_state = rnn(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))

hidden states : [[ 0.45200625 -0.6268765  -0.8058161 ]], shape : (1, 3)
last hidden state : [[ 0.45200625 -0.6268765  -0.8058161 ]], shape : (1, 3)


In [17]:
# LSTM

lstm = LSTM(3, return_sequences=False, return_state=True)
# LSTM의 경우 return_state=True -> 마지막 시점의 은닉 상태와 셀 상태 출력
hidden_state, last_state, last_cell_state = lstm(x_train)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[ 0.02189064 -0.25749445 -0.39719772]], shape : (1, 3)
last hidden state : [[ 0.02189064 -0.25749445 -0.39719772]], shape : (1, 3)
last cell state : [[ 0.04299979 -0.5036748  -0.7481016 ]], shape : (1, 3)


In [20]:
lstm = LSTM(3, return_sequences=True, return_state=True)
# LSTM의 경우 return_state=True -> 마지막 시점의 은닉 상태와 셀 상태 출력
hidden_states, last_state, last_cell_state = lstm(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden states : [[[-0.00822654 -0.20784229 -0.2609498 ]
  [-0.01817766  0.35369754 -0.51294506]
  [-0.05708222  0.4261345  -0.66671336]
  [-0.0560012   0.5194714  -0.65003467]]], shape : (1, 4, 3)
last hidden state : [[-0.0560012   0.5194714  -0.65003467]], shape : (1, 3)
last cell state : [[-1.2356095   0.81141484 -1.5364548 ]], shape : (1, 3)


In [23]:
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

print(k_init, b_init)

<tensorflow.python.keras.initializers.initializers_v2.Constant object at 0x7f58e15650d0> <tensorflow.python.keras.initializers.initializers_v2.Constant object at 0x7f58e15656d0>


In [26]:
# Bidirectial LSTM

bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True,
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
# 양방향 LSTM에서 return_sequences가 False인 경우 정방향 LSTM의 (왼쪽에서)마지막 시점과 역방향 LSTM의 (오른쪽에서)첫번째 시점의 은닉 상태만 연결된 채 반환됨
print('forward state : {}, shape : {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape : {}'.format(backward_h, backward_h.shape))
# return_state가 True인 경우 정방향 LSTM의 (왼쪽에서)마지막 시점의 은닉 상태와 역방향 LSTM의 (오른쪽에서)첫번째 시점의 은닉 상태를 따로따로 반환함

hidden states : [[0.63031393 0.63031393 0.63031393 0.7038734  0.7038734  0.7038734 ]], shape : (1, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape : (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape : (1, 3)


In [27]:
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True,
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(x_train)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
# 양방향 LSTM에서 return_sequences가 True인 경우 정방향 LSTM의 (왼쪽에서)n번째 시점과 역방향 LSTM의 (오른쪽에서)n번째 시점의 은닉 상태가 연결되어 반환됨
print('forward state : {}, shape : {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape : {}'.format(backward_h, backward_h.shape))
# return_state가 True인 경우 정방향 LSTM의 (왼쪽에서)마지막 시점의 은닉 상태와 역방향 LSTM의 (오른쪽에서)첫번째 시점의 은닉 상태를 따로따로 반환함

hidden states : [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.5511133  0.5511133  0.5511133  0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.63031393 0.63031393 0.63031393 0.21942244 0.21942244 0.21942244]]], shape : (1, 4, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape : (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape : (1, 3)
