<a href="https://colab.research.google.com/github/youngyoung-0/colab-notebook/blob/main/python_ML_study/LSTM_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### LSTM / GRU input/output shape
* return_sequences = False, True 일 때의 output 비교
* return_state = False, True 일 때의 internal state output 비교
* Bidirectional LSTM/GRU 의 output 비교

In [32]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, GRU
import numpy as np
import warnings
warnings.filterwarnings(action = 'ignore')

B = 2 # batch size
T = 5 # Time steps
D = 1 # features
U = 3 # LSTM units

X = np.random.randn(B,T,D)
X.shape

(2, 5, 1)

### LSTM
### return_sequences

* False (default) - last tiem step의 ouput만 반환
* True - 모든 timestep의 output을 모두 반환

In [33]:
def lstm(return_sequences=False):
  inp = Input(shape= (T,D))
  out = LSTM(U, return_sequences = return_sequences)(inp)

  model = Model(inputs = inp, outputs = out)
  return model.predict(X)

print('---- return_sequences = False ----> last timestep의 output만 반환')
lstm_out = lstm(return_sequences = False)
print(lstm_out.shape)
print(lstm_out)

print('---- return_sequences = True ----> 모든 timestep 별 output 출력')
lstm_out = lstm(return_sequences=True)
print(lstm_out.shape)
print(lstm_out)

---- return_sequences = False ----> last timestep의 output만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
(2, 3)
[[-0.02484355 -0.12347738 -0.0867759 ]
 [ 0.19273865  0.04247689  0.15610622]]
---- return_sequences = True ----> 모든 timestep 별 output 출력
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
(2, 5, 3)
[[[-0.03873381  0.01610112  0.03001868]
  [ 0.10739402 -0.07077955 -0.05304502]
  [-0.03218812  0.02301258  0.15669285]
  [ 0.23869507 -0.20741042  0.00774271]
  [ 0.13586992 -0.06171402  0.07172294]]

 [[ 0.11930078 -0.06490424 -0.06535405]
  [-0.00661125  0.01872853  0.06416775]
  [ 0.14169915 -0.07835159 -0.03152498]
  [ 0.21919958 -0.10567214 -0.07780068]
  [ 0.2622038  -0.10244879 -0.0988308 ]]]


### return_state
* False (default) - output만 반환
* True - output,last step의 hidden state, cell state (LSTM의 경우) 반환

In [34]:
def lstm(return_state=False):
  inp = Input(shape=(T,D))
  out = LSTM(U, return_state=return_state)(inp)

  model = Model(inputs = inp, outputs = out)

  if return_state:
    o, h, c = model.predict(X)

    print("o :", o.shape)
    print(o)
    print("h :", h.shape)
    print(h)
    print("c :", c.shape)
    print(c)

  else:
    o = model.predict(X)
    print("o :", o.shape)
    print(o)

print('---- return_state = False ----> output만 반환')
lstm(return_state=False)

print('---- return_state = False ----> output, hidden state, cell state 반환')
lstm(return_state=True)

---- return_state = False ----> output만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
o : (2, 3)
[[ 0.07738326  0.09350518 -0.04995616]
 [-0.10126107 -0.08902407 -0.11646794]]
---- return_state = False ----> output, hidden state, cell state 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
o : (2, 3)
[[-0.07340065 -0.09224098  0.05638191]
 [-0.14163929 -0.19141197 -0.08723353]]
h : (2, 3)
[[-0.07340065 -0.09224098  0.05638191]
 [-0.14163929 -0.19141197 -0.08723353]]
c : (2, 3)
[[-0.15675071 -0.20072149  0.09868527]
 [-0.29875726 -0.35763037 -0.20975819]]


### Bidirectional LSTM
* 순방향, 역방향이 concatenate된 output 출력
* hidden state, cell state는 순방향, 역방향 별도 출력

In [35]:
def bi_lstm(return_sequences=False, return_state=False):
  inp = Input(shape=(T,D))
  out = Bidirectional(LSTM(U, return_sequences=return_sequences, return_state=return_state))(inp)

  model = Model(inputs = inp, outputs = out)

  if return_state:
    o, h1, c1, h2, c2 = model.predict(X)
    print("o :",o.shape)
    print("h1 :", h1.shape)
    print("c1 :", c1.shape)
    print("h2 :", h2.shape)
    print("c2 :", c2.shape)
  else:
    o = model.predict(X)
    print("o :", o.shape)

print("*** 순방향, 역방향이 concatenate ***")
print("---- return_sequences=False ----> last timestep 의 output 만 반환")
bi_lstm(return_sequences=False, return_state=False)
print()
print("---- return_sequences=True ----> 모든 timestep 별 output 출력")
bi_lstm(return_sequences=True)
print()
print("---- return_sequences=True, return_state=True")
bi_lstm(return_state=True)

*** 순방향, 역방향이 concatenate ***
---- return_sequences=False ----> last timestep 의 output 만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436ms/step
o : (2, 6)

---- return_sequences=True ----> 모든 timestep 별 output 출력
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470ms/step
o : (2, 5, 6)

---- return_sequences=True, return_state=True
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step
o : (2, 6)
h1 : (2, 3)
c1 : (2, 3)
h2 : (2, 3)
c2 : (2, 3)


### GRU
* cell state가 없는 것만 LSTM과 차이

In [36]:
def gru(return_sequences = False, return_state = False):
  inp = Input(shape=(T, D))
  out = GRU(U, return_state=return_state, return_sequences=return_sequences)(inp)

  model = Model(inputs=inp, outputs=out)

  if return_state:
    o, h = model.predict(X)
    print("o :", o.shape)
    print("h :", h.shape)
  else:
    o = model.predict(X)
    print("o :", o.shape)

print("---- Many-to-One output ----")
gru(return_sequences=False, return_state=False)
print()
print("---- Many-to-Many output ----")
gru(return_sequences=True)
print()
print("---- Sequence-to-Vector output ----")
gru(return_state=True)

---- Many-to-One output ----
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step
o : (2, 3)

---- Many-to-Many output ----
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
o : (2, 5, 3)

---- Sequence-to-Vector output ----
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
o : (2, 3)
h : (2, 3)


### Bidirectinal GRU
* cell state 가 없는 것 외에 LSTM과 동일

In [37]:
def bi_gru(return_sequences=False, return_state=False):
    inp = Input(shape=(T, D))
    out = Bidirectional(
            GRU(U, return_state=return_state, return_sequences=return_sequences))(inp)

    model = Model(inputs=inp, outputs=out)
    if return_state:
        o, h1, h2 = model.predict(X)
        print("o :", o.shape)
        print("h1 :", h1.shape)
        print("h2 :", h2.shape)
    else:
        o = model.predict(X)
        print("o :", o.shape)

print("---- 순방향, 역방향이 concatenate 된 many-to-one output")
bi_gru(return_sequences=False, return_state=False)
print()
print("---- 순방향, 역방향이 concatenate 된 many-to-many output")
bi_gru(return_sequences=True)
print()
print("---- 순방향, 역방향이 concatenate 된 sequence-to-vector output")
bi_gru(return_state=True)

---- 순방향, 역방향이 concatenate 된 many-to-one output
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
o : (2, 6)

---- 순방향, 역방향이 concatenate 된 many-to-many output
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 390ms/step
o : (2, 5, 6)

---- 순방향, 역방향이 concatenate 된 sequence-to-vector output
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396ms/step
o : (2, 6)
h1 : (2, 3)
h2 : (2, 3)
