```
RNN 층을 추가하는 코드
model.add(SimpleRNN(hidden_size)) #가장 간단한 형태
```

```
# 추가 인자를 사용할 때
model.add(SimpleRNN(hidden_size, input_shape = (timesteps, input_dim))

# 다른 표기
model.add(SimpleRNN(hidden_size, input_length=M, input_dim=N))
```

In [31]:
# @title
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN

In [32]:
model = Sequential()
model.add(SimpleRNN(3, input_shape=(2,10)))
# model.add(SimpleRNN(3, input_length=2, input_dim=10)) 동일

model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_14 (SimpleRNN)   (None, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
model= Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10)))
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_15 (SimpleRNN)   (8, 3)                    42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [34]:
model= Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10), return_sequences=True))
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_16 (SimpleRNN)   (8, 2, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### 파이썬으로 RNN구현하기


$$ h_t = tanh(W_x X_t + W_h h_{t-1} +b) $$

```
hidden_state = 0 # 초기 은닉 상태를 0으로 초기화
for input_t in input_length: # 각 시점마다 입력을 받는다.
    output_t = tanh(input_t, hidden_state_t) # 각 시점에 대해서 입력과 은닉 상태를 가지고 연산
    hidden_state_t = output_t # 계산 결과는 현재 시점의 은닉 상태과 된다.
```

In [35]:
import numpy as np

timesteps = 10
input_dim = 4
hidden_size = 8

# 입력에 해당되는 2D 텐서
inputs = np.random.random((timesteps, input_dim))

# 초기 은닉 상태는 0벡터로 초기화
hidden_state_t = np.zeros((hidden_size,))

In [36]:
print(hidden_state_t) # 8의 크기를 가지는 hidden state. 현재는 초기 hidden state로 모든 차원이 0의 값을 가짐

[0. 0. 0. 0. 0. 0. 0. 0.]


In [37]:
Wx = np.random.random((hidden_size, input_dim)) #(8,4) 2D텐서 생성 입력에 대한 가중치
Wh = np.random.random((hidden_size, hidden_size)) # (8,8)크기의 2D텐서 생성. hidden state에 대한 가중치
b = np.random.random((hidden_size,)) #(8,)크기의 1D텐서 생성. 편향(bias)

In [38]:
print(np.shape(Wx)) # hidden state x 입력의 차원
print(np.shape(Wh)) # hidden state x hidden state size
print(np.shape(b)) # hidden state size

(8, 4)
(8, 8)
(8,)



$$ h_t = tanh(W_x X_t + W_h h_{t-1} +b) $$

In [40]:
total_hidden_states= []

# 메모리 셀 동작
for input_t in inputs: # 각 시점에 따라서 입력값이 입력이 됨.
    output_t = np.tanh(np.dot(Wx, input_t) + np.dot(Wh, hidden_state_t)+b)
    # Wx* wt + Wh * Ht-1 + b
    total_hidden_states.append(list(output_t)) #각 시점의 은닉 상태의 값을 계속해서 축적
    print(np.shape(total_hidden_states)) # 각 시점 t별 메모리 셀의 출력의 크기는 (timestep, output_dim)
    hidden_state_t = output_t

total_hidden_state = np.stack(total_hidden_states, axis=0)

# (timesteps, output_dim)
print(total_hidden_states)

(1, 8)
(2, 8)
(3, 8)
(4, 8)
(5, 8)
(6, 8)
(7, 8)
(8, 8)
(9, 8)
(10, 8)
[[0.9999968111881906, 0.9999966924256598, 0.9996149026683078, 0.9999993699124726, 0.9994673744777974, 0.9999666334270991, 0.9998946758753974, 0.9999901078179885], [0.9999929819126787, 0.9999966592206626, 0.9990395093830258, 0.9999993471934417, 0.9991806302992977, 0.999936387504811, 0.9997842362166128, 0.9999779585604742], [0.9999951255548077, 0.9999949098835752, 0.9994825972734243, 0.9999989043215116, 0.9992877910068588, 0.9999443384749753, 0.9998789871985915, 0.9999777738045521], [0.9999958069973859, 0.9999960044583834, 0.9995065011223105, 0.9999989085593104, 0.9993374089007864, 0.9999585270130482, 0.9998998419719333, 0.9999770325947879], [0.9999831885188399, 0.9999875817928514, 0.9981987237621114, 0.9999984073716665, 0.998797544647964, 0.9997898753946077, 0.9994555906143813, 0.9999544956232852], [0.9999957536324681, 0.9999958286601807, 0.9993688258093428, 0.9999984958106728, 0.9993983700858782, 0.9999676759382236,

### 더 깊은 RNN

In [41]:
model = Sequential()
model.add(SimpleRNN(hidden_size, input_length =10, input_dim =5, return_sequences=True))
model.add(SimpleRNN(hidden_size, return_sequences = True))

In [42]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_17 (SimpleRNN)   (None, 10, 8)             112       
                                                                 
 simple_rnn_18 (SimpleRNN)   (None, 10, 8)             136       
                                                                 
Total params: 248 (992.00 Byte)
Trainable params: 248 (992.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### BiLSTM

In [None]:
from tensorflow.keras.layers import Bidirectional

In [None]:
timesteps = 10
input_dim = 5

In [None]:
model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences=True, input_shape=(timesteps, input_dim))))

In [None]:
model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences=True, input_shape=(timesteps, input_dim))))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences=True)))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences=True)))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences=True)))

```
- Embedding을 사용하며, 단어 집합(Vocabulary)의 크기가 5,000이고 임베딩 벡터의 차원은 100입니다.
- 은닉층에서는 Simple RNN을 사용하며, 은닉 상태의 크기는 128입니다.
- 훈련에 사용하는 모든 샘플의 길이는 30으로 가정합니다.
- 이진 분류를 수행하는 모델로, 출력층의 뉴런은 1개로 시그모이드 함수를 사용합니다.
- 은닉층은 1개입니다.
```

```
## GRU 코드
model.add(GRU(hidden_size, input_shape=(timesteps, input_dim)))
```

### 임의의 입력으로 Simple RNN생성

In [43]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [45]:
train_x = [[0.1, 4.2, 1.5, 1.1, 2.8],
           [1.0, 3.1, 2.5, 0.7, 1.1],
           [0.3, 2.1, 1.5, 2.1, 0.1],
           [2.2, 1.4, 0.5, 0.9, 1.1]]
print(np.shape(train_x)) # 단어 벡터의 차원은 5, 문장의 길이 4

(4, 5)


In [46]:
train_x = [[[0.1, 4.2, 1.5, 1.1, 2.8],
           [1.0, 3.1, 2.5, 0.7, 1.1],
           [0.3, 2.1, 1.5, 2.1, 0.1],
           [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_x = np.array(train_x, dtype=np.float32)
print(train_x.shape) # (batch_size, timesteps, input_dim)

(1, 4, 5)


In [47]:
rnn = SimpleRNN(3)
# rnn = SimpleRNN(3, return_sequences=False, return_state=False)와 동일
hidden_state = rnn(train_x)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden state : [[-0.7324636   0.95256203 -0.9984306 ]], shape: (1, 3)


In [48]:
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_x)

print('hidden state : {}, shape : {}'.format(hidden_states, hidden_states.shape))

hidden state : [[[-0.999646    0.9999977   0.84889215]
  [-0.99779946  0.999908    0.99986005]
  [-0.9503089   0.99856675  0.99983925]
  [-0.9473149   0.99214023  0.9994461 ]]], shape : (1, 4, 3)


In [50]:
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_states = rnn(train_x)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape : {}'.format(last_states, last_states.shape))

hidden states : [[[-0.46605644 -0.9988273  -0.81044775]
  [ 0.84103954  0.37671164 -0.8500177 ]
  [-0.8516343  -0.97236484 -0.99702954]
  [ 0.8912779   0.6355896  -0.98882437]]], shape: (1, 4, 3)
last hidden state : [[ 0.8912779   0.6355896  -0.98882437]], shape : (1, 3)


In [51]:
rnn = SimpleRNN(3, return_sequences=False, return_state=True)
hidden_state, last_state = rnn(train_x)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))

hidden state : [[0.5467562  0.80450696 0.43750912]], shape : (1, 3)
last hidden state : [[0.5467562  0.80450696 0.43750912]], shape : (1, 3)


### LSTM 이해하기

In [52]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_x)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[ 0.16912776  0.1167499  -0.36896262]], shape : (1, 3)
last hidden state : [[ 0.16912776  0.1167499  -0.36896262]], shape : (1, 3)
last cell state : [[ 1.3020644  0.3639046 -1.2098768]], shape : (1, 3)


In [53]:
lstm = LSTM(3, return_sequences= True, return_state = True)
hidden_states, last_hidden_state, last_cell_state = lstm(train_x)

print('hidden states : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape : {}'.format(last_hidden_state, last_hidden_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden states : [[[ 0.10954388 -0.01204063  0.2143666 ]
  [ 0.06481206 -0.02553005  0.28299806]
  [ 0.16010317 -0.11609687  0.44722036]
  [ 0.08386193 -0.0954783   0.48900056]]], shape : (1, 4, 3)
last hidden state : [[ 0.08386193 -0.0954783   0.48900056]], shape : (1, 3)
last cell state : [[ 0.14065818 -1.177458    0.6832672 ]], shape : (1, 3)


### LSTM 이해하기 2

In [54]:
x = np.array([[1,2, 3],
              [2,3,4],
              [3,4,5],
              [4,5,6],
              [5,6,7],
              [6,7,8],
              [7,8,9],
              [8,9,10],
              [9,10,11],
              [10, 11, 12],
              [20, 30, 40],
              [30, 40, 50],
              [40, 50, 60]])
y = np.array([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 50, 60, 70])
x.shape, y.shape

((13, 3), (13,))

In [55]:
x = x.reshape((x.shape[0], x.shape[1], 1))
print(x.shape)

(13, 3, 1)


In [56]:
import tensorflow as tf
from tensorflow import keras

model = keras.Sequential()
model.add(keras.layers.LSTM(20, activation='relu', input_shape=(3,1)))
model.add(keras.layers.Dense(5, activation='relu'))
model.add(keras.layers.Dense(1))

In [57]:
model.compile(optimizer='adam', loss='mse')

In [58]:
es = keras.callbacks.EarlyStopping(monitor='loss', patience=10, mode='auto')
model.fit(x, y, epochs=1000, batch_size=1, verbose=1, callbacks=[es])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7a3f95bfbe80>

In [59]:
x_test = np.array([25, 35, 45])
x_test = x_test.reshape((1, 3, 1))

pred = model.predict(x_test)
pred



array([[55.67782]], dtype=float32)