In [None]:
from keras.models import Model
from keras.layers import Input, Flatten
from keras.layers import LSTM, Bidirectional, RepeatVector, Add, TimeDistributed, Reshape,Concatenate, Activation
from keras.layers import Dense, Lambda
import keras.backend as K
import numpy as np
from keras.utils import plot_model
from keras.models import model_from_json
from keras import layers
import tensorflow as tf
from keras.callbacks import ModelCheckpoint,EarlyStopping, TensorBoard
from keras.callbacks import Callback
from keras.optimizers import Adam
import os
import time
from keras.losses import kullback_leibler_divergence
#from keras.utils import multi_gpu_model

from my_classes import DataGenerator
from data_prep import one_hot_decode, get_bars_dataset

from sklearn.model_selection import ParameterGrid

In [None]:
# Only for GPU use:
os.environ["CUDA_VISIBLE_DEVICES"]="0"


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

### Constants

In [None]:
# configure problem
n_features = 131
timesteps = 16

learning_rate = 0.0001  # Learning rate.
decay_rate = 0.9999  # Learning rate decay per minibatch.
min_learning_rate = 0.00001 # Minimum learning rate.

n_encoder_units = 512
n_decoder_units = n_encoder_units
latent_dim = 64

dropout=0.3
beta=0.2
epochs = 20000
batch_size = 64
num_training_samples = 10000
num_validation_samples = 1000
steps_per_epoch = int(num_training_samples / batch_size)
validation_steps = int(num_validation_samples / batch_size)

cardinality = 131
start_of_sequence = np.zeros(cardinality)
start_of_sequence[cardinality-1] = 1

### Defining the model

In [None]:
# returns train, inference_encoder and inference_decoder models
def define_models(n_encoder_units, n_decoder_units, latent_dim, dropout, epsilon_std):
    
    # define training encoder
    encoder_inputs = Input(shape=(timesteps, n_features), name="encoder_inputs")
    
    encoder0 = Bidirectional(LSTM(n_encoder_units, 
                                  dropout=dropout,
                                  return_sequences=True, 
                                  unit_forget_bias=True,
                                  name="bidirectional_encoder0"))
                             
    encoder1 = Bidirectional(LSTM(n_encoder_units,
                                  unit_forget_bias=True,
                                  return_state=True,
                                  name="bidirectional_encoder1"))
    
    # intermediate outputs
    encoder_im_outputs = encoder0(encoder_inputs)

    # final outputs
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder1(encoder_im_outputs)
    
    
    # concatenating states
    state = Add(name='add_states')([forward_h, forward_c, backward_h, backward_c])


    # creating latent vectors
    z_mean = Dense(latent_dim, 
                   name="z_mean",
                   kernel_initializer=tf.random_normal_initializer(stddev=0.001), 
                   bias_initializer='zeros')(state)
    
    z_log_var = Dense(latent_dim, 
                          name="z_log_var",
                          activation=tf.math.softplus,
                          kernel_initializer=tf.random_normal_initializer(stddev=0.001), 
                          bias_initializer='zeros')(state)
    
    
    # sampling layer
    def sampling(args):
        """Sampling z from isotropic Gaussian"""
        z_mean, z_log_var = args

        eps = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std)
        return z_mean + K.exp(z_log_var)*eps

    # sampling z
    z = Lambda(sampling, name="z_sample")([z_mean, z_log_var])

        
    # Initial states for decoder is from z
    state_decoder_h = Dense(n_decoder_units, activation='tanh', name="state_decoder_h")(z)
    state_decoder_c = Dense(n_decoder_units, activation='tanh', name="state_decoder_c")(z)

    # Input to decoder lstm is concatenation of z and inputs
    z_repeated = RepeatVector(timesteps, name="z_repeated")(z)
    decoder_inputs = Input(shape=(timesteps, n_features), name="input_layer_decoder")
    decoder_train_input = Concatenate(axis=2, name="decoder_train_input")([decoder_inputs, z_repeated])

    # training decoder
    decoder_lstm0 = LSTM(n_decoder_units,
                         unit_forget_bias=True,
                         dropout=dropout,
                         return_sequences=True,
                         name="decoder_lstm0")
    
    decoder_lstm1 = LSTM(n_decoder_units,
                         unit_forget_bias=True,
                         return_sequences=True,
                         return_state=True,
                         name="decoder_lstm1")
    
    # intermediate outputs
    decoder_im_outputs = decoder_lstm0(decoder_train_input, initial_state=[state_decoder_h, state_decoder_c])
    decoder_outputs, _, _ = decoder_lstm1(decoder_im_outputs)

    decoder_dense = TimeDistributed(Dense(n_features, activation='softmax'), name="decoder_dense")
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)


    # define inference encoder
    encoder_model = Model(encoder_inputs, [state_decoder_h, state_decoder_c, z])
    
    
    
    # define inference decoder
    decinf_input = Input(shape=(1, n_features+latent_dim), name="decinf_inputs")

    # defime input states
    dec_input_state_c = Input(shape=(1, n_decoder_units), name="d_input_state_c")
    dec_input_state_h = Input(shape=(1, n_decoder_units), name="d_input_state_h")
    dec_input_states = [dec_input_state_h, dec_input_state_c]

    # intermediate lstm outputs
    decinf_im_outputs = decoder_lstm0(decinf_input,
                                      initial_state=dec_input_states)
    
    # output is a vector of sequences, needs reshaping
    decinf_im_outputs = Reshape((1,n_decoder_units))(decinf_im_outputs)
    
    # lstm outputs
    decinf_outputs, state_h, state_c = decoder_lstm1(decinf_im_outputs)
    decoder_states = [state_h, state_c]

    # During inference the decoder output one element at the time
    decoder_inference_dense = Dense(n_features, activation='softmax', name="decoder_inference_dense")
    decinf_outputs = decoder_inference_dense(decinf_outputs)
    decoder_model = Model([decinf_input, dec_input_state_c, dec_input_state_h], [decinf_outputs] + decoder_states)


    def vae_loss(encoder_inputs, decoder_outputs):
        xent_loss = K.categorical_crossentropy(encoder_inputs, decoder_outputs)
        kl_loss = beta * kullback_leibler_divergence(encoder_inputs, decoder_outputs)
        loss = xent_loss + kl_loss
        return loss
    
    optimizer = Adam(lr=learning_rate, amsgrad=True, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss=vae_loss, metrics=['acc'])     

    return model, encoder_model, decoder_model

### Generator setup

In [None]:
# Parameters
params = {'dim': (timesteps, n_features),
          'batch_size': batch_size,
          'shuffle': True}

ID_list_dict = {}

# Datasets
ID_list = np.load("ID_lists/ID_list.npy")

ID_list_dict["train"] = ID_list.item().get("train")
ID_list_dict["validation"] = ID_list.item().get("validation")

# Generators
training_generator = DataGenerator(ID_list_dict['train'], **params)
validation_generator = DataGenerator(ID_list_dict['validation'], **params)

### Training and saving tensorboard

In [None]:
# define model
train, _, _ = define_models(n_encoder_units=n_encoder_units,
                                        n_decoder_units=n_decoder_units,
                                        latent_dim=latent_dim,
                                        dropout=dropout,
                                        epsilon_std=1.)

train.load_weights("weights/512_64/beta0.2-weights-improvement-441-0.99.hdf5")

tensorboard_directory = "tb_trained_models/beta0.2"

tensorboard = TensorBoard(log_dir=tensorboard_directory, batch_size=batch_size)
filepath="weights/512_64/beta0.2-weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [tensorboard,checkpoint]

# Train model on dataset
train.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    use_multiprocessing=False,
                    epochs=epochs,verbose=2,callbacks=callbacks_list)
