<a href="https://colab.research.google.com/github/yoooniverse/NLP/blob/main/RNN_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

python으로 RNN 구현

In [None]:
# 아래의 코드는 가상의 코드(pseudocode)로 실제 동작하는 코드가 아님. 

hidden_state_t = 0 # 초기 은닉 상태를 0(벡터)로 초기화
for input_t in input_length: # 각 시점마다 입력을 받는다.
    output_t = tanh(input_t, hidden_state_t) # 각 시점에 대해서 입력과 은닉 상태를 가지고 연산
    hidden_state_t = output_t # 계산 결과는 현재 시점의 은닉 상태가 된다.

In [1]:
import numpy as np

timesteps = 10
input_dim = 4
hidden_units = 8

#input 2D tensor
inputs = np.random.random((timesteps, input_dim))

#초기 은닉 상태 : reset to zero vector
hidden_state_t = np.zeros((hidden_units,))

print('초기 은닉 상태 : ', hidden_state_t)

초기 은닉 상태 :  [0. 0. 0. 0. 0. 0. 0. 0.]


In [2]:
Wx = np.random.random((hidden_units, input_dim))
Wh = np.random.random((hidden_units, hidden_units))
b = np.random.random((hidden_units,))

print('가중치 Wx의 크기(shape) :',np.shape(Wx))
print('가중치 Wh의 크기(shape) :',np.shape(Wh))
print('편향의 크기(shape) :',np.shape(b))

가중치 Wx의 크기(shape) : (8, 4)
가중치 Wh의 크기(shape) : (8, 8)
편향의 크기(shape) : (8,)


In [3]:
total_hidden_states = []

for input_t in inputs:
  output_t = np.tanh(np.dot(Wx, input_t) + np.dot(Wh, hidden_state_t) + b)

  total_hidden_states.append(list(output_t))
  hidden_state_t = output_t

total_hidden_states = np.stack(total_hidden_states)

print('모든 시점의 은닉 상태 :')
print(total_hidden_states)

모든 시점의 은닉 상태 :
[[0.84884259 0.927387   0.86936882 0.91087557 0.82041033 0.46424226
  0.86445208 0.57114755]
 [0.99993763 0.99993091 0.99972907 0.99980169 0.99977882 0.99960558
  0.9993874  0.99755357]
 [0.99999918 0.99999254 0.99998702 0.99995991 0.99999566 0.99998361
  0.9999395  0.999875  ]
 [0.99999269 0.99997028 0.99991596 0.99983254 0.99997143 0.99993955
  0.99981116 0.99900576]
 [0.99999866 0.99997547 0.99998363 0.99990926 0.99999209 0.99997924
  0.99993646 0.99986372]
 [0.99999657 0.99998683 0.99995598 0.99995968 0.99998976 0.9999577
  0.99978785 0.99956326]
 [0.99999902 0.99999209 0.99998593 0.99996869 0.99999473 0.99997993
  0.99992049 0.99987201]
 [0.99999872 0.99998707 0.99998293 0.99992643 0.9999907  0.99997753
  0.99993701 0.9998329 ]
 [0.9999884  0.99996329 0.99987489 0.9998329  0.99996391 0.99991895
  0.99971391 0.99852006]
 [0.99999567 0.99995206 0.99995383 0.99977318 0.99997688 0.99995882
  0.99989353 0.9995437 ]]


keras로 RNN 구현하기

In [None]:
from tensorflow.keras.layers import SimpleRNN

model.add(SimpleRNN(hidden_units))
#or
model.add(SimpleRNN(hidden_units, input_shape=(timesteps, input_dim)))
#or
model.add(SimpleRNN(hidden_units, input_length=M, input_dim=N))

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN

model = Sequential()
model.add(SimpleRNN(3, input_shape=(2,10)))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 3)                 42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [4]:
model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10)))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (8, 3)                    42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [5]:
model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10), return_sequences=True))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (8, 2, 3)                 42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [7]:
model = Sequential()
model.add(SimpleRNN(3, input_length=10, input_dim=5, return_sequences=True))
model.add(SimpleRNN(3, return_sequences=True))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_3 (SimpleRNN)    (None, 10, 3)             27        
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 10, 3)             21        
                                                                 
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


1.   Embedding을 사용하며, 단어 집합(Vocabulary)의 크기가 5,000이고 임베딩 벡터의 차원은 100입니다.
2.   은닉층에서는 Simple RNN을 사용하며, 은닉 상태의 크기는 128입니다.
3. 훈련에 사용하는 모든 샘플의 길이는 30으로 가정합니다.
4. 이진 분류를 수행하는 모델로, 출력층의 뉴런은 1개로 시그모이드 함수를 사용합니다.
5. 은닉층은 1개입니다.



In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Embedding, Dense

vocab_size = 5000
embedding_dim = 100
hidden_size = 128

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(SimpleRNN(hidden_size))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 100)         500000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               29312     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 529,441
Trainable params: 529,441
Non-trainable params: 0
_________________________________________________________________


Embedding = 5,000(input) * 100(embedding) = 500,000  
Wx = 100(embedding) * 128(hidden) = 12,800  
Wh = 128 * 128 = 16,384  
bias(hidden)  = 128  
Wy = 128  
bias(output) = 1  

total = 529,441

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [4]:
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8],
           [1.0, 3.1, 2.5, 0.7, 1.1],
           [0.3, 2.1, 1.5, 2.1, 0.1],
           [2.2, 1.4, 0.5, 0.9, 1.1]]
print(np.shape(train_X))

(4, 5)


단어 벡터의 차원은 5이고, 문장의 길이가 4인 경우를 가정한 입력입니다. 다시 말해 4번의 시점(timesteps)이 존재하고, 각 시점마다 5차원의 단어 벡터가 입력으로 사용됩니다.

RNN은 2D 텐서가 아니라 3D 텐서를 입력을 받는다고 언급한 바 있습니다. 즉, 위에서 만든 2D 텐서를 3D 텐서로 변경합니다. 이는 배치 크기 1을 추가해주므로서 해결합니다.

In [5]:
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8],
            [1.0, 3.1, 2.5, 0.7, 1.1],
            [0.3, 2.1, 1.5, 2.1, 0.1],
            [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)
#(batch_size, timesteps, input_dim)에 해당되는 (1, 4, 5)의 크기를 가지는 3D 텐서가 생성

(1, 4, 5)


In [6]:
rnn = SimpleRNN(3)
hidden_state = rnn(train_X)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden state : [[-0.97659284 -0.11495369 -0.9749877 ]], shape: (1, 3)


In [8]:
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_X)

print('hidden state : {}, shape: {}'.format(hidden_states, hidden_states.shape))
#입력 데이터는 (1, 4, 5)의 크기를 가지는 3D 텐서였고,
# 그 중 4가 시점(timesteps)에 해당하는 값이므로 모든 시점에 대해서 은닉 상태의 값을 출력하여 (1, 4, 3) 크기의 텐서를 출력하는 것

hidden state : [[[ 0.58889574  0.9808168  -0.9569172 ]
  [ 0.92805004  0.73024505 -0.9198242 ]
  [ 0.84581876 -0.5124507  -0.9748819 ]
  [ 0.9905968  -0.67719275 -0.86083585]]], shape: (1, 4, 3)


In [9]:
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_states = rnn(train_X)

print('hidden state : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_states, last_states.shape))

hidden state : [[[ 0.45105985 -0.86667144 -0.9897267 ]
  [ 0.7262544  -0.90626967 -0.598509  ]
  [ 0.57387584  0.0099148   0.8488481 ]
  [ 0.9652415   0.32195154 -0.8810624 ]]], shape: (1, 4, 3)
last hidden state : [[ 0.9652415   0.32195154 -0.8810624 ]], shape: (1, 3)


In [10]:
rnn = SimpleRNN(3, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_X)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))

hidden state : [[-0.8435092  -0.8995878  -0.42616567]], shape: (1, 3)
last hidden state : [[-0.8435092  -0.8995878  -0.42616567]], shape: (1, 3)




---



LSTM

In [11]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[-0.09415786 -0.08462715 -0.13076529]], shape : (1, 3)
last hidden state : [[-0.09415786 -0.08462715 -0.13076529]], shape : (1, 3)
last cell state : [[-0.32589608 -0.30287904 -0.3596287 ]], shape : (1, 3)


In [12]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[[-0.23027447  0.56560117 -0.07420314]
  [-0.50826246  0.59377325 -0.13686897]
  [-0.5008584   0.57277244 -0.37630624]
  [-0.5111544   0.6771933  -0.31721458]]], shape : (1, 4, 3)
last hidden state : [[-0.5111544   0.6771933  -0.31721458]], shape : (1, 3)
last cell state : [[-1.3723986  1.9027838 -0.5632471]], shape : (1, 3)


Bidirectional LSTM

In [13]:
#출력되는 은닉 상태의 값을 고정
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [14]:
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[0.6303138 0.6303138 0.6303138 0.7038734 0.7038734 0.7038734]], shape: (1, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


5개의 값을 반환.  
return_state가 True인 경우에는 정방향 LSTM의 은닉 상태와 셀 상태, 역방향 LSTM의 은닉 상태와 셀 상태 4가지를 반환하기 때문

In [15]:
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.55111325 0.55111325 0.55111325 0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.6303138  0.6303138  0.6303138  0.21942244 0.21942244 0.21942244]]], shape: (1, 4, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)
