In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [5]:
# 5차원의 단어 벡터
# 문장의 길이 : 4
train_X = [
    [0.1, 4.2, 1.5, 1.1, 2.8], 
    [1.0, 3.1, 2.5, 0.7, 1.1], 
    [0.3, 2.1, 1.5, 2.1, 0.1], 
    [2.2, 1.4, 0.5, 0.9, 1.1]
    ]
print(np.shape(train_X))

(4, 5)


In [40]:
# RNN은 2D Tensor가 아닌 3D Tensor 입력 받음
# 배치 크기 1을 추가해 3D Tensor로 변경
# batch_size는 한 번에 RNN이 학습하는 데이터의 양, 샘플이 1개 이므로 1
train_X = [
    [[0.1, 4.2, 1.5, 1.1, 2.8],
     [1.0, 3.1, 2.5, 0.7, 1.1], 
     [0.3, 2.1, 1.5, 2.1, 0.1], 
     [2.2, 1.4, 0.5, 0.9, 1.1]]
    ]

# numpy array로 변환
train_X = np.array(train_X, dtype=np.float32)
print(np.shape(train_X))

(1, 4, 5)


## SimpleRNN

In [41]:
print(np.shape(train_X))
train_X

(1, 4, 5)


array([[[0.1, 4.2, 1.5, 1.1, 2.8],
        [1. , 3.1, 2.5, 0.7, 1.1],
        [0.3, 2.1, 1.5, 2.1, 0.1],
        [2.2, 1.4, 0.5, 0.9, 1.1]]], dtype=float32)

In [56]:
# units = hidden_state의 크기
rnn = SimpleRNN(units=3, return_sequences=False, return_state=False)
hidden_state = rnn(train_X)

# 마지막 timestep의 hidden state
# return_sequences=False : 마지막 시점의 hidden state 출력 
print(f'hidden state : {hidden_state}, shape: {hidden_state.shape}')

hidden state : [[-0.974531    0.8907833  -0.01162113]], shape: (1, 3)


In [57]:
# return_sequences=True : 모든 지점의 hidden_state 출력
# 문장의 길이에 맞게 timesteps = 4
# 각 timestep에 해당하는 hidden state 출력
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_X)

print(f'hidden state : {hidden_states}, shape: {hidden_states.shape}')


hidden state : [[[-0.9999829   0.99460703  0.07401923]
  [-0.99997455  0.99846363  0.55431914]
  [-0.99814117  0.8899799  -0.87345374]
  [-0.9985305   0.6077229  -0.13207197]]], shape: (1, 4, 3)


In [52]:
# return_state가 True일 경우에는 return_sequences의 True/False 여부와 상관없이 마지막 시점의 은닉 상태를 출력
# return_sequences가 True이면서, return_state를 True로 할 경우 SimpleRNN은 두 개의 출력 리턴
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_state = rnn(train_X)

print(f'hidden states : {hidden_states}, shape: {hidden_states.shape}')
print(f'last hidden state : {last_state}, shape: {last_state.shape}')

hidden states : [[[-0.9999829   0.99460703  0.07401923]
  [-0.99838936  0.99902976  0.9854075 ]
  [-0.8120765   0.96927166  0.51177096]
  [-0.93273544  0.9777664   0.84414005]]], shape: (1, 4, 3)
last hidden state : [[-0.93273544  0.9777664   0.84414005]], shape: (1, 3)


In [53]:
# return_sequences=False : 마지막 hidden state 출력
# return_state=True : 마지막 hidden state 출력
# 두 출력이 동일한 것을 확인
rnn = SimpleRNN(3, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

print(f'hidden state : {hidden_state}, shape: {hidden_state.shape}')
print(f'last hidden state : {last_state}, shape: {last_state.shape}')

hidden state : [[-0.97541666  0.84519523 -0.5688779 ]], shape: (1, 3)
last hidden state : [[-0.97541666  0.84519523 -0.5688779 ]], shape: (1, 3)


## LSTM

In [54]:
# SimpleRNN과 달리 3개를 결과를 return
# return_sequences=False : 마지막 hidden state 출력
# return_state=True : 마지막 hidden state, cell state 출력 
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print(f'hidden state : {hidden_state}, shape: {hidden_state.shape}')
print(f'last hidden state : {last_state}, shape: {last_state.shape}')
print(f'last cell state : {last_cell_state}, shape: {last_cell_state.shape}')

hidden state : [[-0.02925253 -0.01001881  0.7136197 ]], shape: (1, 3)
last hidden state : [[-0.02925253 -0.01001881  0.7136197 ]], shape: (1, 3)
last cell state : [[-0.10289744 -0.04061481  1.9114045 ]], shape: (1, 3)


In [60]:
# return_sequences=True : 모든 timestep의 hidden state 출력
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_states, last_hidden_state, last_cell_state = lstm(train_X)

print(f'hidden states : {hidden_states}, shape: {hidden_states.shape}')
print(f'last hidden state : {last_state}, shape: {last_state.shape}')
print(f'last cell state : {last_cell_state}, shape: {last_cell_state.shape}')

hidden states : [[[ 0.0026663  -0.13464242  0.59659475]
  [ 0.00436406 -0.10592887  0.78714824]
  [-0.03421792 -0.39460936  0.8686176 ]
  [-0.05326659 -0.15813753  0.5846672 ]]], shape: (1, 4, 3)
last hidden state : [[-0.02925253 -0.01001881  0.7136197 ]], shape: (1, 3)
last cell state : [[-0.17729081 -0.49351636  1.6238706 ]], shape: (1, 3)


## Bidirectional(LSTM)

In [61]:
# 츨력되는 hidden state 값 고정
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [65]:
# Bidirectional은 5개의 값 return
# return_sequences=False : 마지막 hidden state 출력
# return_state=True : 마지막 hidden state, 정방향 LSTM의 hidden state, cell state, 역방향 LSTM의 hidden state, cell state
# hidden_states.shape = (1, 6) : return_sequences=False 인 경우
# 마지막 hidden state는 정방향 LSTM의 마지막 timestep hidden state와 역방향 LSTM 첫 timestep hidden state가 연결된 채 반환
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, 
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print(f'hidden states : {hidden_states}, shape: {hidden_states.shape}')
print(f'forward state : {forward_h}, shape: {forward_h.shape}')
print(f'backward state : {backward_h}, shape: {backward_h.shape}')

hidden states : [[0.63031393 0.63031393 0.63031393 0.70387346 0.70387346 0.70387346]], shape: (1, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state : [[0.70387346 0.70387346 0.70387346]], shape: (1, 3)


In [66]:
# return_sequences=True : 모든 timestep의 hidden state 출력 
# 역방향 LSTM의 첫번째 시점의 은닉 상태는 정방향 LSTM의 첫번째 시점의 은닉 상태와 연결
# 0번 째 hidden state = 정방향, 역방향 0번 째 timestep
# 1번 째 hidden state = 정방향, 역방향 1번 째 timestep
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True,
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print(f'hidden states : {hidden_states}, shape: {hidden_states.shape}')
print(f'forward state : {forward_h}, shape: {forward_h.shape}')
print(f'backward state : {backward_h}, shape: {backward_h.shape}')

hidden states : [[[0.35906473 0.35906473 0.35906473 0.70387346 0.70387346 0.70387346]
  [0.5511133  0.5511133  0.5511133  0.5886358  0.5886358  0.5886358 ]
  [0.59115756 0.59115756 0.59115756 0.39516988 0.39516988 0.39516988]
  [0.63031393 0.63031393 0.63031393 0.21942246 0.21942246 0.21942246]]], shape: (1, 4, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state : [[0.70387346 0.70387346 0.70387346]], shape: (1, 3)
