In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Bidirectional
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

## visualize model
from keras.utils.vis_utils import plot_model

Using TensorFlow backend.


## Data Preparation

In [2]:
seq = np.arange(10, 100, 10)
print(seq)
#
n_step = 3
pred_step = 1
def split_sequence(series, n_step, pred_step):
    x_list, y_list = [], [] 
    for s in range(series.shape[0]-(n_step+pred_step-1)):
        x_list.append(series[s:s+n_step]),
        y_list.append(series[s+n_step])
    X, y = np.array(x_list), np.array(y_list)
    return X, y
#
X, y = split_sequence(seq, n_step, pred_step)
for i in range(X.shape[0]):
    print(X[i], y[i])

[10 20 30 40 50 60 70 80 90]
[10 20 30] 40
[20 30 40] 50
[30 40 50] 60
[40 50 60] 70
[50 60 70] 80
[60 70 80] 90


## 9.2.2 Vanilla LSTM

- Key in the definition is the shape of the input; 
- that is what the model expects as input for each sample in terms of the number of time steps and the number of features.
- We are working with a univariate series, so the number of features is one, for one variable.
- The number of time steps as input is the number we chose when preparing our dataset as an argument to we chose when preparing our dataset as an argument to the split sequence() function.

In [3]:
# define the model
n_feature = 1
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_step, n_feature)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

Instructions for updating:
Colocations handled automatically by placer.


- The shape of the input for each sample is specified in the input shape argument on the definition of first hidden layer.
- We almost always have multiple samples, therefore, the model will expect the input component of training data to have the dimensions or shape: [samples, timesteps, features].
- Our split sequence() function in the previous section outputs the X with the shape [samples, timesteps], so we easily reshape it to have an additional dimension for the one feature.

In [4]:
X_lstm = X.reshape(X.shape[0], n_step, n_feature)
for i in range(X_lstm.shape[0]):
    print(X_lstm[i], y[i])

[[10]
 [20]
 [30]] 40
[[20]
 [30]
 [40]] 50
[[30]
 [40]
 [50]] 60
[[40]
 [50]
 [60]] 70
[[50]
 [60]
 [70]] 80
[[60]
 [70]
 [80]] 90


In [5]:
# fit model
model.fit(X_lstm, y, epochs=200, verbose=0)

Instructions for updating:
Use tf.cast instead.


<keras.callbacks.History at 0x7f614a36f978>

In [6]:
# Test
x_test = np.array([70, 80, 90])
x_test_lstm = x_test.reshape(1, n_step, n_feature)
print(x_test_lstm)

[[[70]
  [80]
  [90]]]


In [7]:
y_hat = model.predict(x_test_lstm)
print(y_hat)

[[102.419624]]


In [8]:
plot_model(model, to_file='9_2_lstm.png', show_shapes=True, show_layer_names=True)

### vanilla LSTM model
<img src="./9_2_lstm.png" alt="Drawing" style="width: 600px;"/>

## 9.2.3 Stacked LSTM

- Multiple hidden LSTM layers can be stacked one on top of another in what is referred to as a Stacked LSTM model.
- An LSTM layer requires a three-dimensional input and LSTMs by default will produce a two-dimensional output as an interpretation from the end of the sequence.
- We can address this by having the LSTM output a value for each time step in the input data by setting the return sequences=True argument on the layer.
- This allows us to have 3D output from hidden LSTM layer as input to the next. We can therefore define a Stacked LSTM as follows.

In [9]:
print(X_lstm.shape)

(6, 3, 1)


In [10]:
# define model
model_stack = Sequential()
model_stack.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_step, n_feature)))
model_stack.add(LSTM(50, activation='relu'))
model_stack.add(Dense(1))
model_stack.compile(optimizer='adam', loss='mse')

In [11]:
model_stack.fit(X_lstm, y, epochs=200, verbose=0)

<keras.callbacks.History at 0x7f61482c8240>

In [12]:
y_hat_stack = model_stack.predict(x_test_lstm)
print(y_hat_stack)

[[103.53959]]


In [13]:
plot_model(model_stack, to_file='9_2_3_lstm.png', show_shapes=True, show_layer_names=True)

### Stacked LSTM model
<img src="./9_2_3_lstm.png" alt="Drawing" style="width: 600px;"/>

## 9.2.4 Bidirectional LSTM

<img src="./7-23-2019-bidir-lstm.png" alt="Drawing" style="width: 600px;"/>

- On some sequence prediction problems, it can be beneficial to allow the LSTM model to learn the input sequence both forward and backwards and concatenate both interpretations.
- This is called a Bidirectional LSTM. We can implement a Bidirectional LSTM for univariate time series forecasting by wrapping the first hidden layer in a wrapper layer called Bidirectional.
- An example of defining a Bidirectional LSTM to read input both forward and backward is as follows.

In [14]:
# define model
model_bidir = Sequential()
model_bidir.add(Bidirectional(LSTM(50, activation='relu', input_shape=(n_step, n_feature))))
model_bidir.add(Dense(1))
model_bidir.compile(optimizer='adam', loss='mse')

In [15]:
# fit model
model_bidir.fit(X_lstm, y, epochs=200, verbose=0)

<keras.callbacks.History at 0x7f6130267eb8>

In [16]:
yhat_bidir = model_bidir.predict(x_test_lstm)
print(yhat_bidir)

[[100.1807]]


In [17]:
plot_model(model_bidir, to_file='9_2_4_lstm.png', show_shapes=True, show_layer_names=True)

### Bidirectional LSTM model
<img src="./9_2_4_lstm.png" alt="Drawing" style="width: 600px;"/>

## 9.2.5 CNN-LSTM

In [58]:
seq = np.arange(10, 300, 10)
print(seq)

[ 10  20  30  40  50  60  70  80  90 100 110 120 130 140 150 160 170 180
 190 200 210 220 230 240 250 260 270 280 290]


In [59]:
n_step = 8
X4, y = split_sequence(seq, n_step, pred_step)
for i in range(X4.shape[0]):
    print(X4[i], y[i])

[10 20 30 40 50 60 70 80] 90
[20 30 40 50 60 70 80 90] 100
[ 30  40  50  60  70  80  90 100] 110
[ 40  50  60  70  80  90 100 110] 120
[ 50  60  70  80  90 100 110 120] 130
[ 60  70  80  90 100 110 120 130] 140
[ 70  80  90 100 110 120 130 140] 150
[ 80  90 100 110 120 130 140 150] 160
[ 90 100 110 120 130 140 150 160] 170
[100 110 120 130 140 150 160 170] 180
[110 120 130 140 150 160 170 180] 190
[120 130 140 150 160 170 180 190] 200
[130 140 150 160 170 180 190 200] 210
[140 150 160 170 180 190 200 210] 220
[150 160 170 180 190 200 210 220] 230
[160 170 180 190 200 210 220 230] 240
[170 180 190 200 210 220 230 240] 250
[180 190 200 210 220 230 240 250] 260
[190 200 210 220 230 240 250 260] 270
[200 210 220 230 240 250 260 270] 280
[210 220 230 240 250 260 270 280] 290


In [60]:
# reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
n_feature = 1
n_seq = 2
n_step = 4 # height
X_cl = X4.reshape((X4.shape[0], n_seq, n_step, n_feature))
print(X_cl)

[[[[ 10]
   [ 20]
   [ 30]
   [ 40]]

  [[ 50]
   [ 60]
   [ 70]
   [ 80]]]


 [[[ 20]
   [ 30]
   [ 40]
   [ 50]]

  [[ 60]
   [ 70]
   [ 80]
   [ 90]]]


 [[[ 30]
   [ 40]
   [ 50]
   [ 60]]

  [[ 70]
   [ 80]
   [ 90]
   [100]]]


 [[[ 40]
   [ 50]
   [ 60]
   [ 70]]

  [[ 80]
   [ 90]
   [100]
   [110]]]


 [[[ 50]
   [ 60]
   [ 70]
   [ 80]]

  [[ 90]
   [100]
   [110]
   [120]]]


 [[[ 60]
   [ 70]
   [ 80]
   [ 90]]

  [[100]
   [110]
   [120]
   [130]]]


 [[[ 70]
   [ 80]
   [ 90]
   [100]]

  [[110]
   [120]
   [130]
   [140]]]


 [[[ 80]
   [ 90]
   [100]
   [110]]

  [[120]
   [130]
   [140]
   [150]]]


 [[[ 90]
   [100]
   [110]
   [120]]

  [[130]
   [140]
   [150]
   [160]]]


 [[[100]
   [110]
   [120]
   [130]]

  [[140]
   [150]
   [160]
   [170]]]


 [[[110]
   [120]
   [130]
   [140]]

  [[150]
   [160]
   [170]
   [180]]]


 [[[120]
   [130]
   [140]
   [150]]

  [[160]
   [170]
   [180]
   [190]]]


 [[[130]
   [140]
   [150]
   [160]]

  [[170]
   [180]
   [190]

- We want to reuse the same CNN model when reading in each sub-sequence of data separately.
- This can be achieved by wrapping the entire CNN model in a TimeDistributed wrapper that will apply the entire model once per input, in this case, once per input subsequence.
- The CNN model first has a convolutional layer for reading across the subsequence that requires a number of filters and a kernel size to be specified.
- The number of filters is the number of reads or interpretations of the input sequence.
- The kernel size is the number of time steps included of each read operation of the input sequence.
- The convolution layer is followed by a max pooling layer that distills the filter maps down to 1

In [61]:
print(n_step)

4


In [62]:
#
kernel_size = 2

# define the input model
model_cnnlstm = Sequential()
model_cnnlstm.add(TimeDistributed(Conv1D(64, kernel_size, activation='relu'), input_shape=(None, n_step, n_feature)))
model_cnnlstm.add(TimeDistributed(MaxPooling1D()))
model_cnnlstm.add(TimeDistributed(Flatten()))

In [63]:
# define the output model
model_cnnlstm.add(LSTM(50, activation='relu'))
model_cnnlstm.add(Dense(1))
model_cnnlstm.compile(optimizer='adam', loss='mse')

In [64]:
model_cnnlstm.fit(X_cl, y, epochs=200, verbose=0)

<keras.callbacks.History at 0x7f60bf7d3550>

In [65]:
model_cnnlstm.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_23 (TimeDis (None, None, 3, 64)       192       
_________________________________________________________________
time_distributed_24 (TimeDis (None, None, 1, 64)       0         
_________________________________________________________________
time_distributed_25 (TimeDis (None, None, 64)          0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 50)                23000     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 51        
Total params: 23,243
Trainable params: 23,243
Non-trainable params: 0
_________________________________________________________________


In [66]:
# test
x_test_cl = np.array([220, 230, 240, 250, 260, 270, 280, 290])
x_test_cl_reshape = x_test_cl.reshape(1, n_seq, n_step, n_feature)
print(x_test_cl_reshape)

[[[[220]
   [230]
   [240]
   [250]]

  [[260]
   [270]
   [280]
   [290]]]]


In [67]:
yhat_cl = model_cnnlstm.predict(x_test_cl_reshape)
print(yhat_cl)

[[316.13284]]


In [68]:
plot_model(model_cnnlstm, to_file='9_2_5a_lstm.png', show_shapes=True, show_layer_names=True)

### CNN-LSTM LSTM model
<img src="./9_2_5a_lstm.png" alt="Drawing" style="width: 600px;"/>