# LSTM autoencoder for COVID-19 data in UK

In [1]:
# lstm autoencoder to recreate a timeseries
import numpy as np
from tensorflow.keras.models import Sequential, Model 
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import TimeDistributed
'''
A UDF to convert input data into 3-D
array as required for LSTM network.
'''

In [2]:
import pandas as pd
data = pd.read_csv('data.csv')

In [3]:
# define input timeseries
timeseries = np.log(data.values +1)

timesteps = timeseries.shape[0]
n_features = timeseries.shape[1]
timeseries

array([[11.25544923,  3.17805383],
       [11.13996143,  4.12713439],
       [10.87981921,  4.41884061],
       ...,
       [ 3.49650756,  0.        ],
       [ 2.48490665,  1.09861229],
       [ 1.60943791,  0.69314718]])

In [None]:
# create temporal dataset
def temporalize(X, y, lookback):
    output_X = []
    output_y = []
    for i in range(len(X)-lookback-1):
        t = []
        for j in range(1,lookback+1):
            # Gather past records upto the lookback period
            t.append(X[[(i+j+1)], :])
        output_X.append(t)
        output_y.append(y[i+lookback+1])
    return output_X, output_y

In [44]:
timesteps = 7
X, y = temporalize(X = timeseries, y = np.zeros(len(timeseries)), lookback = timesteps)

n_features = 2
X = np.array(X)
X = X.reshape(X.shape[0], timesteps, n_features)

X

array([[[10.87981921,  4.41884061],
        [10.71290551,  4.29045944],
        [10.61432729,  4.49980967],
        ...,
        [10.79771671,  4.35670883],
        [10.68168794,  4.55387689],
        [10.69239936,  4.46590812]],

       [[10.71290551,  4.29045944],
        [10.61432729,  4.49980967],
        [10.79016408,  4.30406509],
        ...,
        [10.68168794,  4.55387689],
        [10.69239936,  4.46590812],
        [10.56790084,  4.52178858]],

       [[10.61432729,  4.49980967],
        [10.79016408,  4.30406509],
        [10.79771671,  4.35670883],
        ...,
        [10.69239936,  4.46590812],
        [10.56790084,  4.52178858],
        [10.63407502,  4.66343909]],

       ...,

       [[ 3.8501476 ,  1.38629436],
        [ 3.71357207,  1.79175947],
        [ 4.07753744,  1.38629436],
        ...,
        [ 3.68887945,  0.        ],
        [ 3.25809654,  1.38629436],
        [ 3.49650756,  0.        ]],

       [[ 3.71357207,  1.79175947],
        [ 4.07753744,  1.38

In [35]:
# define model
model = Sequential()
model.add(LSTM(256, activation='relu', input_shape=(timesteps,n_features), return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(RepeatVector(timesteps))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(LSTM(256, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 7, 256)            265216    
                                                                 
 lstm_7 (LSTM)               (None, 7, 128)            197120    
                                                                 
 lstm_8 (LSTM)               (None, 7, 64)             49408     
                                                                 
 lstm_9 (LSTM)               (None, 32)                12416     
                                                                 
 repeat_vector_1 (RepeatVect  (None, 7, 32)            0         
 or)                                                             
                                                                 
 lstm_10 (LSTM)              (None, 7, 32)             8320      
                                                      

In [46]:
# fit model
model.fit(X, X, epochs=100, batch_size=16, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f7291710310>

In [47]:
# demonstrate reconstruction
yhat = model.predict(X, verbose=0)
print('---Predicted---')
print(np.round(yhat,3))
print('---Actual---')
print(np.round(X, 3))

---Predicted---
[[[10.856  4.513]
  [10.797  4.539]
  [10.781  4.516]
  ...
  [10.797  4.571]
  [10.793  4.589]
  [10.809  4.599]]

 [[10.804  4.513]
  [10.741  4.536]
  [10.738  4.519]
  ...
  [10.767  4.577]
  [10.757  4.591]
  [10.77   4.598]]

 [[10.807  4.552]
  [10.745  4.574]
  [10.738  4.558]
  ...
  [10.768  4.612]
  [10.756  4.626]
  [10.768  4.632]]

 ...

 [[ 4.17   1.941]
  [ 3.868  1.788]
  [ 3.717  1.588]
  ...
  [ 3.562  0.846]
  [ 3.684 -0.034]
  [ 3.418 -0.058]]

 [[ 4.015  1.78 ]
  [ 3.714  1.642]
  [ 3.604  1.437]
  ...
  [ 3.602  0.394]
  [ 3.556 -0.237]
  [ 3.226  0.487]]

 [[ 3.791  1.582]
  [ 3.57   1.418]
  [ 3.522  1.144]
  ...
  [ 3.535 -0.142]
  [ 3.25   0.225]
  [ 3.088  1.167]]]
---Actual---
[[[10.88   4.419]
  [10.713  4.29 ]
  [10.614  4.5  ]
  ...
  [10.798  4.357]
  [10.682  4.554]
  [10.692  4.466]]

 [[10.713  4.29 ]
  [10.614  4.5  ]
  [10.79   4.304]
  ...
  [10.682  4.554]
  [10.692  4.466]
  [10.568  4.522]]

 [[10.614  4.5  ]
  [10.79   4.304]
 

In [48]:
np.exp(yhat[50])+1

array([[41244.066  ,   129.87068],
       [38290.785  ,   130.26025],
       [38425.438  ,   130.00404],
       [39072.133  ,   130.03923],
       [41369.098  ,   130.0028 ],
       [41018.406  ,   131.87883],
       [41535.44   ,   133.46997]], dtype=float32)

In [49]:
np.exp(X[50])+1

array([[33311.,   115.],
       [33223.,   122.],
       [41261.,   132.],
       [41747.,   104.],
       [44365.,   112.],
       [41500.,   120.],
       [36714.,   105.]])

In [54]:
layer_name = 'lstm_9'
encoder = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
encoder.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6_input (InputLayer)   [(None, 7, 2)]            0         
                                                                 
 lstm_6 (LSTM)               (None, 7, 256)            265216    
                                                                 
 lstm_7 (LSTM)               (None, 7, 128)            197120    
                                                                 
 lstm_8 (LSTM)               (None, 7, 64)             49408     
                                                                 
 lstm_9 (LSTM)               (None, 32)                12416     
                                                                 
Total params: 524,160
Trainable params: 524,160
Non-trainable params: 0
_________________________________________________________________


In [60]:
encode_data = encoder.predict(X[:1])

In [61]:
encode_data

array([[1.8673999e+02, 5.6607903e+01, 0.0000000e+00, 0.0000000e+00,
        6.3717782e-01, 6.8303711e+01, 1.1847310e-15, 6.8894774e-01,
        0.0000000e+00, 0.0000000e+00, 1.0257182e-03, 8.2866745e+01,
        0.0000000e+00, 3.4815879e-18, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 1.4630777e-29, 0.0000000e+00, 2.2294853e+01,
        4.4203594e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 3.5062007e-19, 0.0000000e+00, 0.0000000e+00]],
      dtype=float32)