In [None]:
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input, Flatten
from keras.layers import LSTM, Bidirectional, RepeatVector, Add, TimeDistributed, Reshape,Concatenate, Activation
from keras.layers import Dense, Lambda
from data_prep011018 import one_hot_decode, get_bars_dataset
import keras.backend as K
import numpy as np
from keras.utils import plot_model
from keras.models import model_from_json
from keras.losses import kullback_leibler_divergence
import os
from keras import layers
import tensorflow as tf
from my_classes import DataGenerator
from keras.callbacks import ModelCheckpoint,EarlyStopping, TensorBoard
from keras.callbacks import LearningRateScheduler, Callback
from keras.optimizers import Adam, Adadelta
from math import exp

In [None]:
# Only for GPU use:
os.environ["CUDA_VISIBLE_DEVICES"]="1"


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

In [None]:
# configure problem
# configure problem
n_features = 131
timesteps = 16

learning_rate = 0.0001  # Learning rate.
decay_rate = 0.9999  # Learning rate decay per minibatch.
min_learning_rate = 0.00001 # Minimum learning rate.

n_encoder_units = 1024
n_decoder_units = 1024
latent_dim = 512

dropout_rate=0.1

epochs = 20000
batch_size = 64
num_training_samples = 10000
num_validation_samples = 1000
steps_per_epoch = int(num_training_samples / batch_size)
validation_steps = int(num_validation_samples / batch_size)

cardinality = 131
start_of_sequence = np.zeros(cardinality)
start_of_sequence[cardinality-1] = 1

In [None]:
# returns train, inference_encoder and inference_decoder models
def define_models(n_encoder_units, n_decoder_units, n_features, timesteps, latent_dim, epsilon_std):
    
    # define training encoder
    encoder_inputs = Input(shape=(timesteps, n_features), name="encoder_inputs")
    
    encoder0 = Bidirectional(LSTM(n_encoder_units, 
                                  dropout=dropout_rate,
                                  return_sequences=True), 
                             
                                  name="bidirectional_encoder0")
    encoder1 = Bidirectional(LSTM(n_encoder_units,
                                  #dropout=dropout_rate,
                                  return_state=True),
                                  name="bidirectional_encoder1")
    
    # intermediate outputs
    encoder_im_outputs = encoder0(encoder_inputs)

    # final outputs
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder1(encoder_im_outputs)
    
    
    # concatenating states
    state = Add(name='add_states')([forward_h, forward_c, backward_h, backward_c])


    # creating latent vectors
    z_mean = Dense(latent_dim, 
                   name="z_mean",
                   kernel_initializer=tf.random_normal_initializer(stddev=0.001), 
                   bias_initializer='zeros')(state)
    
    z_log_var = Dense(latent_dim, 
                          name="z_log_var",
                          activation=tf.math.softplus,
                          kernel_initializer=tf.random_normal_initializer(stddev=0.001), 
                          bias_initializer='zeros')(state)
    
    
    # sampling layer
    def sampling(args):
        """Sampling z from isotropic Gaussian"""
        z_mean, z_log_var = args

        eps = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std)
        return z_mean + K.exp(z_log_var)*eps

    # sampling z
    z = Lambda(sampling, name="z_sample")([z_mean, z_log_var])

        
    # Initial states for decoder is from z
    state_decoder_h = Dense(n_decoder_units, activation='tanh', name="state_decoder_h")(z)
    state_decoder_c = Dense(n_decoder_units, activation='tanh', name="state_decoder_c")(z)

    # Input to decoder lstm is concatenation of z and inputs
    z_repeated = RepeatVector(timesteps, name="z_repeated")(z)
    decoder_inputs = Input(shape=(timesteps, n_features), name="input_layer_decoder")
    decoder_train_input = Concatenate(axis=2, name="decoder_train_input")([decoder_inputs, z_repeated])

    # training decoder
    decoder_lstm0 = LSTM(n_decoder_units,
                         #dropout=dropout_rate,
                         return_sequences=True,
                         name="decoder_lstm0")
    
    decoder_lstm1 = LSTM(n_decoder_units,
                         #dropout=dropout_rate,
                         return_sequences=True,
                         return_state=True,
                         name="decoder_lstm1")
    
    # intermediate outputs
    decoder_im_outputs = decoder_lstm0(decoder_train_input, initial_state=[state_decoder_h, state_decoder_c])
    decoder_outputs, _, _ = decoder_lstm1(decoder_im_outputs)

    decoder_dense = TimeDistributed(Dense(n_features, activation='softmax'), name="decoder_dense")
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)


    # define inference encoder
    encoder_model = Model(encoder_inputs, [state_decoder_h, state_decoder_c, z])
    
    
    
    # define inference decoder
    decinf_input = Input(shape=(1, n_features+latent_dim), name="decinf_inputs")

    # defime input states
    dec_input_state_c = Input(shape=(1, n_decoder_units), name="d_input_state_c")
    dec_input_state_h = Input(shape=(1, n_decoder_units), name="d_input_state_h")
    dec_input_states = [dec_input_state_h, dec_input_state_c]

    # intermediate lstm outputs
    decinf_im_outputs = decoder_lstm0(decinf_input,
                                      initial_state=dec_input_states)
    
    # output is a vector of sequences, needs reshaping
    decinf_im_outputs = Reshape((1,n_decoder_units))(decinf_im_outputs)
    
    # lstm outputs
    decinf_outputs, state_h, state_c = decoder_lstm1(decinf_im_outputs)
    decoder_states = [state_h, state_c]

    # During inference the decoder output one element at the time
    decoder_inference_dense = Dense(n_features, activation='softmax', name="decoder_inference_dense")
    decinf_outputs = decoder_inference_dense(decinf_outputs)
    decoder_model = Model([decinf_input, dec_input_state_c, dec_input_state_h], [decinf_outputs] + decoder_states)


    def vae_loss(encoder_inputs, decoder_outputs):
        xent_loss = K.categorical_crossentropy(encoder_inputs, decoder_outputs)
        kl_loss = 0.9*(- 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)))
        loss = xent_loss + kl_loss
        return loss


    # decaying learning rate with minimum value
    lr = learning_rate
    
    optimizer = Adam(lr=lr, amsgrad=True, decay=decay_rate)
    model.compile(optimizer=optimizer, loss=vae_loss, metrics=['acc'])     

    return model, encoder_model, decoder_model


# define model
train, infenc, infdec = define_models(n_encoder_units=n_encoder_units,
                                        n_decoder_units=n_decoder_units,
                                        n_features=n_features,
                                        timesteps=timesteps,
                                        latent_dim=latent_dim,
                                        epsilon_std=1.)

In [None]:
# Parameters
params = {'dim': (timesteps, cardinality),
          'batch_size': batch_size,
          'shuffle': True}

ID_list_dict = {}

# Datasets
ID_list = np.load("ID_list.npy")

ID_list_dict["train"] = ID_list.item().get("train")
ID_list_dict["validation"] = ID_list.item().get("validation")

#### Generators

In [None]:
# Generators
training_generator = DataGenerator(ID_list_dict['train'], **params)
validation_generator = DataGenerator(ID_list_dict['validation'], **params)

#### Callbacks

In [None]:
filepath="weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
#earlystopping = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=60, verbose=1, mode='auto')
tensorboard = TensorBoard(log_dir="./", batch_size=batch_size)

callbacks_list = [checkpoint,tensorboard]

In [None]:
"""class lr_schedule(Callback):

    def step_decay_schedule(self, initial_lr=learning_rate, decay_rate=decay_rate):
        '''
        Wrapper function to create a LearningRateScheduler with step decay schedule.
        '''
        def schedule(epoch):

            lr = K.eval(self.model.optimizer.lr)

            if lr <= minimum_learning_rate:
                lr = minimum_learning_rate

            else:
                lr = initial_lr * decay_rate

            print("Learning rate", K.eval(self.model.optimizer.lr))
        
        return LearningRateScheduler(schedule)


lrHistory = lr_schedule()"""

#### Loading model

In [None]:
# load weights into new model
train.load_weights("weights/weights.1379-3.40.hdf5")
print("Loaded model from disk")

#### Training

In [None]:
# Train model on dataset
train.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    use_multiprocessing=False,
                    epochs=epochs,verbose=2,callbacks=callbacks_list)

In [None]:
"""
max_seq_len=32,  # Maximum sequence length. Others will be truncated.
z_size=32,  # Size of latent vector z.
free_bits=0.0,  # Bits to exclude from KL loss per dimension.
max_beta=1.0,  # Maximum KL cost weight, or cost if not annealing.
beta_rate=0.0,  # Exponential rate at which to anneal KL cost.
batch_size=512,  # Minibatch size.
grad_clip=1.0,  # Gradient clipping. Recommend leaving at 1.0.
clip_mode='global_norm',  # value or global_norm.
# If clip_mode=global_norm and global_norm is greater than this value,
# the gradient will be clipped to 0, effectively ignoring the step.
grad_norm_clip_to_zero=10000,
learning_rate=0.001,  # Learning rate.
decay_rate=0.9999,  # Learning rate decay per minibatch.
min_learning_rate=0.00001, # Minimum learning rate.
"""