# RNN(Recurrent Neural Network)
- 문장과 같은 문자 데이터의 경우, 여러 개의 연관성이 깊은 Token들로 분할되어 표현된다.
- 즉, 문장을 Input이나 Output으로 사용할 경우, 한 개의 벡터가 아닌 다수의 벡터의 형태로 사용해야 한다.
- 형태
    - Many-to-One : 댓글의 악플 가능성 정도를 측정하는 Sentence Classification
    - One-to-Many : 사진 속 내용을 설명하는 글을 만들어 내는 Image Captioning
    - Many-to-Many (token-by-token) : 문장의 모든 token에 대한 품사를 예측하는 Pos Tagging
    - Many-to-Many (Encoder-Decoder) : 입력 문장을 다른 언어의 문장으로 번역해주는 Translation

# 장단기 메모리(Long Short-Term Memory) LSTM
- 입력 게이트, 망각 게이트, 출력 게이트
- 

In [12]:
from keras.models import Sequential
from keras.layers import SimpleRNN

model = Sequential()
model.add(SimpleRNN(3, input_shape=(2, 10))) # input_shape - 각 시퀸스의 길이(timesteps : 2), 각 시퀸스 피처수(input_dim : 10)
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_10 (SimpleRNN)   (None, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10))) # batch_size : 8
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_11 (SimpleRNN)   (8, 3)                    42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
model = Sequential()
# return_sequences : 각 타입 스택에 대해 시퀀스를 반환(batch_size, timesteps, units)
model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10), return_sequences=True)) # False는 마지막 타입스탭 출력만 반환
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_12 (SimpleRNN)   (8, 2, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
import numpy as np

timesteps = 10
input_dim = 4
hidden_units = 8

inputs = np.random.random((timesteps, input_dim)) # 입력

hidden_state_t = np.zeros((hidden_units, )) # 초기 hidden state 0 초기화

print('초기 입력 : ', inputs)
print('초기 은닉 상태 : ', hidden_state_t)

초기 입력 :  [[0.83244596 0.97317203 0.96632453 0.96580037]
 [0.61539427 0.31099811 0.11296515 0.88155465]
 [0.13377058 0.14448534 0.7984165  0.50894758]
 [0.62448034 0.49997953 0.21231498 0.62609543]
 [0.9264382  0.19129316 0.17460837 0.11642955]
 [0.42417816 0.96678961 0.07221818 0.48438649]
 [0.10328317 0.25681741 0.77132073 0.70348754]
 [0.92306642 0.80156061 0.16796697 0.75018003]
 [0.66968235 0.53471724 0.3872255  0.31862822]
 [0.74132204 0.21320109 0.37134942 0.40683184]]
초기 은닉 상태 :  [0. 0. 0. 0. 0. 0. 0. 0.]


In [19]:
Wx = np.random.random((hidden_units, input_dim)) # 입력에 대한 가중치
Wh = np.random.random((hidden_units, hidden_units)) # 은닉 상태에 대한 가중치
b = np.random.random((hidden_units,)) # 편향 bias

print('가중치 Wx의 크기(shape) :', np.shape(Wx))
print('가중치 Wh의 크기(shape) :', np.shape(Wh))
print('편향의 크기 :', np.shape(b))

가중치 Wx의 크기(shape) : (8, 4)
가중치 Wh의 크기(shape) : (8, 8)
편향의 크기 : (8,)


In [20]:
total_hidden_states = []

for input_t in inputs:
    output_t = np.tanh(np.dot(Wx, input_t) + np.dot(Wh, hidden_state_t) + b)

    total_hidden_states.append(list(output_t))
    hidden_state_t = output_t

In [22]:
total_hidden_states = np.stack(total_hidden_states, axis=0)
print(total_hidden_states)

[[0.99057473 0.96683902 0.99020053 0.99798093 0.95961964 0.99150509
  0.99608987 0.98125777]
 [0.99980158 0.99998537 0.99998825 0.99998421 0.99993248 0.99999858
  0.99998302 0.99970768]
 [0.99965459 0.99997954 0.99996549 0.99998439 0.9999151  0.99999775
  0.99994692 0.99985678]
 [0.99984306 0.99998238 0.99998872 0.99998738 0.99994025 0.99999879
  0.99998169 0.99978348]
 [0.99975856 0.99996836 0.9999732  0.99996906 0.99986179 0.99999658
  0.99994636 0.99973029]
 [0.99984072 0.99997243 0.99999154 0.9999866  0.9999502  0.99999891
  0.99998386 0.99975859]
 [0.99973352 0.99998419 0.9999769  0.99998917 0.99993871 0.99999855
  0.99996719 0.99986563]
 [0.99994378 0.99998797 0.99999542 0.99999506 0.99996887 0.99999954
  0.99999297 0.99984421]
 [0.9998493  0.99997581 0.99998453 0.99998722 0.99993149 0.99999851
  0.99997187 0.9998304 ]
 [0.99980578 0.9999796  0.99997947 0.99998312 0.99991012 0.99999801
  0.99996534 0.9998024 ]]


In [None]:
# DRNN (Deep Recurrent Neural Network)
model = Sequential()
model.add(SimpleRNN(hidden_units, unput_length=10, input_dim=5, return_sequences=True))
model.add(SimpleRNN(hidden_units, return_sequences=True))

In [30]:
# 양방향 순환 신경망 (Bidirectional Recurrent Neural Network)
from keras.layers import Bidirectional

timesteps = 10
input_dim = 5

model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_units, return_sequences=True), input_shape=(timesteps, input_dim)))

In [31]:
model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_units, return_sequences=True), input_shape=(timesteps, input_dim)))
model.add(Bidirectional(SimpleRNN(hidden_units, return_sequences=True)))
model.add(Bidirectional(SimpleRNN(hidden_units, return_sequences=True)))

In [32]:
import numpy as np
import tensorflow as tf
from keras.layers import SimpleRNN, LSTM, Bidirectional

In [34]:
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_X = np.array(train_X, dtype=np.float32)

train_X.shape # batchsize = 1, timesteps = 4, input_dim = 5

(1, 4, 5)

In [35]:
rnn = SimpleRNN(3) # return_sequences = False

hidden_state = rnn(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))

hidden state : [[-0.81710523  0.97442234  0.40002725]], shape : (1, 3)


In [36]:
rnn = SimpleRNN(3, return_sequences=True) # return_sequences = False
hidden_state = rnn(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))

hidden state : [[[-0.6452802   0.9657079   0.45361212]
  [ 0.661944   -0.11003119 -0.38735315]
  [-0.92919374 -0.8862208  -0.68701094]
  [-0.5185234   0.98335373 -0.66300523]]], shape : (1, 4, 3)


In [46]:
rnn = SimpleRNN(3, return_state=True)
hidden_state, last_state = rnn(train_X)
# hidden_state = rnn(train_X)
# last_state = rnn(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))

hidden state : [[-0.9729656  -0.942357    0.89137435]], shape : (1, 3)
last hidden state : [[-0.9729656  -0.942357    0.89137435]], shape : (1, 3)


In [52]:
# return_sequences = True, return_state = True
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_state, last_state = rnn(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))

hidden state : [[[-0.97486466  0.19422278 -0.9977707 ]
  [-0.55663025  0.2638994  -0.38727406]
  [-0.894755   -0.5181923  -0.9285156 ]
  [ 0.9531567  -0.57312834 -0.91599023]]], shape : (1, 4, 3)
last hidden state : [[ 0.9531567  -0.57312834 -0.91599023]], shape : (1, 3)


In [53]:
lstm = LSTM(3, return_state = True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[-0.23971796 -0.19362903 -0.06029086]], shape : (1, 3)
last hidden state : [[-0.23971796 -0.19362903 -0.06029086]], shape : (1, 3)
last cell state : [[-2.101681  -1.1213248 -0.6812372]], shape : (1, 3)


In [54]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape : {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape : {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape : {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[[-0.07066065 -0.03245606 -0.20404486]
  [-0.10595854 -0.00335045 -0.4643949 ]
  [-0.22937365 -0.05235196 -0.5731826 ]
  [-0.14736535  0.03584218 -0.58428687]]], shape : (1, 4, 3)
last hidden state : [[-0.14736535  0.03584218 -0.58428687]], shape : (1, 3)
last cell state : [[-0.23852706  0.12280073 -1.2636827 ]], shape : (1, 3)


In [60]:
# 초기화 작업
k_init = tf.keras.initializers.Constant(value=0.1) # 커널 가중치
b_init = tf.keras.initializers.Constant(value=1) # 편향
r_init = tf.keras.initializers.Constant(value=0.1) # 순환 가중치

In [64]:
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True,
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)
# forward_h : 순방향 LSTM의 마지막 은닉 상태, forword_c : 셀 상태
# forward_c, forward_c : 역방향 LSTM의 마지막 은닉 상태, 셀 상태

print('hidden state : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape : {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape : {}'.format(backward_h, backward_h.shape))
print(forward_c, forward_c.shape)
print(backward_c, backward_c.shape)

hidden state : [[0.8583817  0.8583817  0.8583817  0.89662635 0.89662635 0.89662635]], shape : (1, 6)
forward state : [[0.8583817 0.8583817 0.8583817]], shape : (1, 3)
backward state : [[0.89662635 0.89662635 0.89662635]], shape : (1, 3)
tf.Tensor([[2.7335248 2.7335248 2.7335248]], shape=(1, 3), dtype=float32) (1, 3)
tf.Tensor([[2.8326864 2.8326864 2.8326864]], shape=(1, 3), dtype=float32) (1, 3)


In [65]:
# return_sequences = True, return_state = True
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True,
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden state : {}, shape : {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape : {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape : {}'.format(backward_h, backward_h.shape))

hidden state : [[[0.60378623 0.60378623 0.60378623 0.89662635 0.89662635 0.89662635]
  [0.8135507  0.8135507  0.8135507  0.86469495 0.86469495 0.86469495]
  [0.8438271  0.8438271  0.8438271  0.7678819  0.7678819  0.7678819 ]
  [0.8583817  0.8583817  0.8583817  0.53885394 0.53885394 0.53885394]]], shape : (1, 4, 6)
forward state : [[0.8583817 0.8583817 0.8583817]], shape : (1, 3)
backward state : [[0.89662635 0.89662635 0.89662635]], shape : (1, 3)


In [66]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

In [67]:
text = '''오늘 날씨가 정말 좋다.
집 앞 공원에 사람들이 모여 있다.
아이들이 놀이터에서 신나게 뛰어놀고 있다.'''

In [68]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
vocab_size = len(tokenizer.word_index) + 1

print('사전 크기 : ', vocab_size)

사전 크기 :  15


In [69]:
print(tokenizer.word_index)

{'있다': 1, '오늘': 2, '날씨가': 3, '정말': 4, '좋다': 5, '집': 6, '앞': 7, '공원에': 8, '사람들이': 9, '모여': 10, '아이들이': 11, '놀이터에서': 12, '신나게': 13, '뛰어놀고': 14}


In [72]:
sequences = list()

for line in text.split('\n'):
    encoded = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)

print('샘플 개수 : ', len(sequences))

샘플 개수 :  12


In [73]:
print(sequences)

[[2, 3], [2, 3, 4], [2, 3, 4, 5], [6, 7], [6, 7, 8], [6, 7, 8, 9], [6, 7, 8, 9, 10], [6, 7, 8, 9, 10, 1], [11, 12], [11, 12, 13], [11, 12, 13, 14], [11, 12, 13, 14, 1]]


In [75]:
max_len = max(len(_) for _ in sequences)
print('최대 길이 : ', max_len)

최대 길이 :  6


In [76]:
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')

In [77]:
print(sequences)

[[ 0  0  0  0  2  3]
 [ 0  0  0  2  3  4]
 [ 0  0  2  3  4  5]
 [ 0  0  0  0  6  7]
 [ 0  0  0  6  7  8]
 [ 0  0  6  7  8  9]
 [ 0  6  7  8  9 10]
 [ 6  7  8  9 10  1]
 [ 0  0  0  0 11 12]
 [ 0  0  0 11 12 13]
 [ 0  0 11 12 13 14]
 [ 0 11 12 13 14  1]]


In [83]:
sequences = np.array(sequences)
X = sequences[:, :-1]
y = sequences[:, -1]

In [84]:
print(X)

[[ 0  0  0  0  2]
 [ 0  0  0  2  3]
 [ 0  0  2  3  4]
 [ 0  0  0  0  6]
 [ 0  0  0  6  7]
 [ 0  0  6  7  8]
 [ 0  6  7  8  9]
 [ 6  7  8  9 10]
 [ 0  0  0  0 11]
 [ 0  0  0 11 12]
 [ 0  0 11 12 13]
 [ 0 11 12 13 14]]


In [85]:
print(y)

[ 3  4  5  7  8  9 10  1 12 13 14  1]


In [86]:
# 원핫인코딩
y = to_categorical(y, num_classes=vocab_size)
print(y)

[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [87]:
from keras.models import Sequential
from keras.layers import Embedding, Dense, SimpleRNN

In [89]:
embedding_dim = 10
hidden_units = 32

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(SimpleRNN(hidden_units))
model.add(Dense(vocab_size, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=200, verbose=2)

Epoch 1/200
1/1 - 0s - loss: 2.7071 - accuracy: 0.1667 - 456ms/epoch - 456ms/step
Epoch 2/200
1/1 - 0s - loss: 2.6955 - accuracy: 0.2500 - 3ms/epoch - 3ms/step
Epoch 3/200
1/1 - 0s - loss: 2.6840 - accuracy: 0.2500 - 2ms/epoch - 2ms/step
Epoch 4/200
1/1 - 0s - loss: 2.6726 - accuracy: 0.2500 - 3ms/epoch - 3ms/step
Epoch 5/200
1/1 - 0s - loss: 2.6613 - accuracy: 0.2500 - 2ms/epoch - 2ms/step
Epoch 6/200
1/1 - 0s - loss: 2.6498 - accuracy: 0.2500 - 2ms/epoch - 2ms/step
Epoch 7/200
1/1 - 0s - loss: 2.6383 - accuracy: 0.3333 - 3ms/epoch - 3ms/step
Epoch 8/200
1/1 - 0s - loss: 2.6266 - accuracy: 0.3333 - 3ms/epoch - 3ms/step
Epoch 9/200
1/1 - 0s - loss: 2.6147 - accuracy: 0.3333 - 3ms/epoch - 3ms/step
Epoch 10/200
1/1 - 0s - loss: 2.6025 - accuracy: 0.3333 - 3ms/epoch - 3ms/step
Epoch 11/200
1/1 - 0s - loss: 2.5900 - accuracy: 0.2500 - 3ms/epoch - 3ms/step
Epoch 12/200
1/1 - 0s - loss: 2.5771 - accuracy: 0.2500 - 3ms/epoch - 3ms/step
Epoch 13/200
1/1 - 0s - loss: 2.5639 - accuracy: 0.2500 -

<keras.src.callbacks.History at 0x28c5ee260>

In [97]:
def sentence_generation(model, tokenizer, current_word, n):
    init_word = current_word
    sentence = ''

    for _ in range(n):
        # 현재 단어에 대한 인코딩 및 패딩
        encoded = tokenizer.texts_to_sequences([current_word])[0]
        encoded = pad_sequences([encoded], maxlen=5, padding='pre')

        # 현재 단어에 대한 예측
        result = model.predict(encoded, verbose=0)
        result = np.argmax(result, axis=1)

        for word, index in tokenizer.word_index.items():
            if index == result:
                break

        current_word = current_word + ' ' + word

        sentence = sentence + ' ' + word

    sentence = init_word + sentence
    return sentence

In [98]:
print(sentence_generation(model, tokenizer, '오늘', 3))

오늘 날씨가 정말 좋다


In [99]:
print(sentence_generation(model, tokenizer, '집', 5))

집 앞 공원에 사람들이 모여 있다


In [100]:
print(sentence_generation(model, tokenizer, '아이들이', 4))

아이들이 놀이터에서 신나게 뛰어놀고 있다


In [101]:
print(sentence_generation(model, tokenizer, '거북이', 3))

거북이 놀이터에서 앞 놀이터에서


In [102]:
print(sentence_generation(model, tokenizer, '빵', 10))

빵 놀이터에서 앞 놀이터에서 신나게 뛰어놀고 있다 사람들이 모여 있다 사람들이
