In [1]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Dense, Lambda, Reshape, Dropout
from tensorflow.keras.layers import Bidirectional, RepeatVector, Dot, Activation
from tensorflow.keras.layers import Concatenate, Flatten

from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift
import matplotlib.pyplot as plt
from datetime import datetime
from utils import *


Bad key "text.kerning_factor" on line 4 in
/home/zafir/miniconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
train_encoder_input_data = np.load('./data/third-order/Centar/train_encoder_input_data.npy')
train_decoder_input_data = np.load('./data/third-order/Centar/train_decoder_input_data.npy')
train_decoder_target_data = np.load('./data/third-order/Centar/train_decoder_target_data.npy')

valid_encoder_input_data = np.load('./data/third-order/Centar/valid_encoder_input_data.npy')
valid_decoder_input_data = np.load('./data/third-order/Centar/valid_decoder_input_data.npy')
valid_decoder_target_data = np.load('./data/third-order/Centar/valid_decoder_target_data.npy')

test_encoder_input_data = np.load('./data/third-order/Centar/test_encoder_input_data.npy')
test_decoder_input_data = np.load('./data/third-order/Centar/test_decoder_input_data.npy')
test_decoder_target_data = np.load('./data/third-order/Centar/test_decoder_target_data.npy')
test_decoder_target_data = test_decoder_target_data.flatten()

In [3]:
m_train, Tx, encoder_input_dim = train_encoder_input_data.shape
    
Ty, decoder_input_dim = (train_decoder_input_data.shape[1], 
                         train_decoder_input_data.shape[2])

decoder_output_dim = train_decoder_target_data.shape[2]

m_val = valid_encoder_input_data.shape[0]
m_test = test_decoder_input_data.shape[0]

In [4]:
batch_size = 64
epochs = 150
patience = 10

### Standard

First, using the best hyperparameters found during the random search, we fine tune the model.

In [5]:
latent_dim = 64
dense_dropout_rate = 0.1
learning_rate = 0.000219

In [11]:
K.clear_session()

# ------------------- SHARED LAYERS ---------------------
encoder_lstm = LSTM(latent_dim, return_state=True, 
                      name='encoder_lstm')
decoder_lstm = LSTM(latent_dim, return_state=True, 
                    return_sequences=True, name='decoder_lstm')
decoder_dense = Dense(decoder_output_dim, 
                      activation='linear', name='decoder_dense')
dense_dropout = Dropout(rate=dense_dropout_rate, name='dense_dropout')

# Since the best standard model was not stacked and the dense dropout
# rate was 0, we basically can remove the Dropout layers.

# -------------------- TRAIN MODEL ----------------------
encoder_inputs = Input(shape=(Tx, encoder_input_dim), 
                       name='encoder_inputs')

# Obtain the hidden states of the encoder
_, h, c = encoder_lstm(encoder_inputs)

decoder_inputs = Input(shape=(Ty, decoder_input_dim), 
                       name='decoder_inputs')

# Obtain the outputs of the decoder (we don't care about
# the hidden states during training)
x, _, _ = decoder_lstm(decoder_inputs, initial_state=[h, c])
x = dense_dropout(x)
decoder_outputs = decoder_dense(x)

model = Model(inputs=[encoder_inputs, decoder_inputs], 
              outputs=decoder_outputs)
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='mse')

In [12]:
!rm -rf "./logs/standard"

In [13]:
model.fit(x=[train_encoder_input_data, 
            train_decoder_input_data], 
          y=train_decoder_target_data,
          validation_data=([
            valid_encoder_input_data,
            valid_decoder_input_data],
            valid_decoder_target_data),
          batch_size=batch_size,
          epochs=1,
          callbacks=[EarlyStopping(monitor='val_loss', 
                                  patience=patience, 
                                  verbose=1),
                     ModelCheckpoint('./checkpoints/standard.h5', 
                                     save_best_only=True),
                     TensorBoard(log_dir="./logs/standard", 
                                 histogram_freq=1)]
         )

Train on 60677 samples, validate on 3388 samples


<tensorflow.python.keras.callbacks.History at 0x7f80f41c67b8>

Now, load the best (trained) model, obtain the layers and build an inference model

In [5]:
best_model = load_model('./checkpoints/standard.h5')

encoder_lstm = best_model.get_layer('encoder_lstm')
decoder_lstm = best_model.get_layer('decoder_lstm')
decoder_dense = best_model.get_layer('decoder_dense')
dense_dropout = best_model.get_layer('dense_dropout')

In [6]:
K.clear_session()

encoder_inputs = Input(shape=(Tx, encoder_input_dim), 
                       name='encoder_inputs')

decoder_inputs = Input(shape=(Ty, decoder_input_dim), 
                       name='decoder_inputs')

# Obtain the hidden states of the encoder
_, h, c = encoder_lstm(encoder_inputs)

outputs = []
for t in range(Ty):
    if t == 0:
        x = Lambda(lambda z: z[:, t, :])(decoder_inputs)
    else:
        x = Lambda(lambda z: z[:, t, 1:])(decoder_inputs)
        x = Concatenate(axis=-1)([out, x])
    
    x = K.expand_dims(x, axis=1)
    
    # Obtain the output and hidden states of the decoder LSTM 
    out, h, c = decoder_lstm(x, initial_state=[h, c])
    out = dense_dropout(out)
    out = decoder_dense(out)
    out = Flatten()(out)
    outputs.append(out)

inference_model = Model(inputs=[encoder_inputs, decoder_inputs], 
              outputs=outputs, name='inference_model')

In [7]:
y_pred = inference_model.predict([test_encoder_input_data, test_decoder_input_data])
y_pred = format_model_output(y_pred)
loss = K.eval(tf.keras.losses.mean_squared_error(test_decoder_target_data, y_pred))
loss

0.6625962

### Attentive

In [5]:
def one_step_attention(encoder_outputs, h_prev, attention_repeat, 
                       attention_concatenate, attention_dense_1,
                       attention_dense_2, attention_activation,
                       attention_dot):
    
    x = attention_repeat(h_prev)
    x = attention_concatenate([encoder_outputs, x])
    x = attention_dense_1(x)
    energies = attention_dense_2(x)
    alphas = attention_activation(energies)
    context = attention_dot([alphas, encoder_outputs])
    
    return context

In [6]:
def create_attentive_model(encoder_latent_dim, decoder_latent_dim,
                           attention_dense_dim, seq_dropout_rate,
                           dense_dropout_rate, learning_rate):
    K.clear_session()

    # ------------------- SHARED LAYERS ---------------------
    # Encoder layers
    encoder_lstm = Bidirectional(LSTM(encoder_latent_dim, return_sequences=True, 
                                      name='encoder_lstm'), merge_mode='concat')

    # Attention layers
    attention_repeat = RepeatVector(n=Tx, name='attention_repeat')
    attention_concatenate = Concatenate(axis=-1, name='attention_concatenate')
    attention_dense_1 = Dense(attention_dense_dim, activation='tanh', 
                              name='attention_dense_1')
    attention_dense_2 = Dense(1, activation='relu', name='attention_dense_2')
    attention_activation = Activation(softmax, name='attention_activation') 
    attention_dot = Dot(axes=1, name='attention_dot')

    # Decoder layers
    decoder_concatenate = Concatenate(axis=-1, name='decoder_concatenate')
    decoder_lstm = LSTM(decoder_latent_dim, return_state=True, 
                        name='decoder_lstm')
    decoder_dense = Dense(decoder_output_dim, activation='linear',
                          name='decoder_dense')

    seq_dropout = Dropout(rate=seq_dropout_rate, name='seq_dropout')
    dense_dropout = Dropout(rate=dense_dropout_rate, name='dense_dropout')

    # -------------------- TRAIN MODEL ----------------------
    encoder_inputs = Input(shape=(Tx, encoder_input_dim), 
                           name='encoder_inputs')

    x = encoder_lstm(encoder_inputs)
    encoder_outputs = seq_dropout(x)

    decoder_inputs = Input(shape=(Ty, decoder_input_dim), 
                           name='decoder_inputs')
    h0 = Input(shape=(decoder_latent_dim,), name='h0')
    c0 = Input(shape=(decoder_latent_dim,), name='c0')
    h, c = h0, c0

    # Decoder outputs
    outputs = []

    for t in range(Ty):
        context = one_step_attention(encoder_outputs, h, attention_repeat, 
                                     attention_concatenate, attention_dense_1,
                                     attention_dense_2, attention_activation,
                                     attention_dot)

        # Obtain the decoder input at timestamp t
        x = Lambda(lambda z: z[:, t, :])(decoder_inputs)
        decoder_input = Reshape((1, x.shape[1]))(x)

        # Construct the full decoder input by concatenating the input at 
        # timestemp t with the calculated context
        full_decoder_input = decoder_concatenate([decoder_input, context])

        h, _, c = decoder_lstm(full_decoder_input, initial_state=[h, c])
        x = dense_dropout(h)
        decoder_output = decoder_dense(x)

        outputs.append(decoder_output)

    model = Model(inputs=[encoder_inputs, decoder_inputs, h0, c0], 
                  outputs=outputs)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')
    
    return model

First, using the best hyperparameters found during the random search, we fine tune the model.

In [7]:
encoder_latent_dim = 64
decoder_latent_dim = 64
attention_dense_dim = 12
seq_dropout_rate = 0.2
dense_dropout_rate = 0.2
learning_rate = 0.005

In [8]:
h0_train = np.zeros((m_train, decoder_latent_dim))
c0_train = np.zeros((m_train, decoder_latent_dim))

h0_val = np.zeros((m_val, decoder_latent_dim))
c0_val = np.zeros((m_val, decoder_latent_dim))

h0_test = np.zeros((m_test, decoder_latent_dim))
c0_test = np.zeros((m_test, decoder_latent_dim))

# due to the model architecture, we need to transform the output shape and type
train_attentive_decoder_target_data = list(np.swapaxes(
                                              train_decoder_target_data, 0, 1))
valid_attentive_decoder_target_data = list(np.swapaxes(
                                              valid_decoder_target_data, 0, 1))

In [9]:
model = create_attentive_model(encoder_latent_dim, decoder_latent_dim,
                               attention_dense_dim, seq_dropout_rate,
                               dense_dropout_rate, learning_rate)

In [10]:
!rm -rf "./logs/attentive"

In [11]:
model.fit(x=[train_encoder_input_data, 
            train_decoder_input_data,
            h0_train, c0_train], 
          y=train_attentive_decoder_target_data,
          validation_data=([
            valid_encoder_input_data,
            valid_decoder_input_data, 
            h0_val, c0_val],
            valid_attentive_decoder_target_data),
          batch_size=batch_size,
          epochs=1,
          verbose=0,
          callbacks=[EarlyStopping(monitor='val_loss', 
                                  patience=patience, 
                                  verbose=1),
                     LossPrintingCallback(Ty),
                     ModelCheckpoint('./checkpoints/attentive',
                                     save_weights_only=True,
                                     save_best_only=True),
                     TensorBoard(log_dir="./logs/attentive", 
                                 histogram_freq=1)]
         )

[2020-05-26 14:01:55]	epoch: 0	loss: 1.96127	val_loss:1.21645


<tensorflow.python.keras.callbacks.History at 0x7fefa489f470>

Now, load the best (trained) model, obtain the layers and build an inference model

In [12]:
best_model = create_attentive_model(encoder_latent_dim, decoder_latent_dim,
                               attention_dense_dim, seq_dropout_rate,
                               dense_dropout_rate, learning_rate)

best_model.load_weights('./checkpoints/attentive')

# Encoder layers
encoder_lstm = best_model.get_layer('bidirectional')

# Attention layers
attention_repeat = best_model.get_layer('attention_repeat')
attention_concatenate = best_model.get_layer('attention_concatenate')
attention_dense_1 = best_model.get_layer('attention_dense_1')
attention_dense_2 = best_model.get_layer('attention_dense_2')
attention_activation = best_model.get_layer('attention_activation')
attention_dot = best_model.get_layer('attention_dot')

# Decoder layers
decoder_concatenate = best_model.get_layer('decoder_concatenate')
decoder_lstm = best_model.get_layer('decoder_lstm')
decoder_dense = best_model.get_layer('decoder_dense')

seq_dropout = best_model.get_layer('seq_dropout')
dense_dropout = best_model.get_layer('dense_dropout')

## TODO: Inference model