In [None]:
    import pandas as pd
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    from tensorflow.keras.preprocessing import timeseries_dataset_from_array
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout
    from tensorflow.keras.layers import Activation
    import tensorflow as tf

    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error

    from hyperas import optim
    from tensorflow.keras.models import Sequential
    from hyperopt import Trials, tpe, STATUS_OK


In [None]:
def data():
   
    df = pd.read_csv("differenced_data.csv")
    df.drop(["Date"], axis=1, inplace=True)

    train_df = df[0:int(len(df)*.7)]
    val_df = df[int(len(df)*.7):int(len(df)*.9)]
    # test_df = df[int(len(df)*.9):]

    scaler = StandardScaler()
    scaler.fit(train_df)

    train_scaled = scaler.transform(train_df)
    val_scaled = scaler.transform(val_df)
    # test_scaled = scaler.transform(test_df)

    input_width = 6
    label_width = 6
    shift = 1
    label_columns = None

    column_indeces = {name: i for i, name in enumerate(train_df.columns)}
    if label_columns is not None:
        label_column_indeces = {name: i for i, name in enumerate(label_columns)}

    window_size = input_width + shift

    input_slice = slice(0, input_width)
    input_indeces = np.arange(window_size)[input_slice]

    label_start = window_size - label_width
    label_slice = slice(label_start, None)
    label_indeces =  np.arange(window_size)[label_slice]

    def split_window(features):
        inputs = features[:, input_slice, :]
        labels = features[:, label_slice, :]
        if label_columns:
            labels = tf.stack([labels[:, :, column_indeces[name]] for name in label_columns], axis=1)

        inputs.set_shape([None, input_width, None])
        labels.set_shape([None, label_width, None])

        return inputs, labels

    def make_dataset(data):
        data = np.array(data, dtype=np.float64)
        dset = timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32
        )

        dset = dset.map(split_window)
        return dset

    model_train = make_dataset(train_scaled)
    model_val = make_dataset(val_scaled)

    # # X_train = np.array(list(model_train.as_numpy_iterator()))[:,0]
    # # X_train  = np.array(list(model_train.as_numpy_iterator()))[:,0].flatten()
    # X_train = np.array(list(model_train.unbatch().as_numpy_iterator()))[:,0]
    # # #594 batches with each batch containing an array of (32,6,12)

    # # y_train = np.array(list(model_train.as_numpy_iterator()))[:,1]
    # # y_train  = np.array(list(model_train.as_numpy_iterator()))[:,1].flatten()
    # y_train = np.array(list(model_train.unbatch().as_numpy_iterator()))[:,1]

    # # X_val = np.array(list(model_val.as_numpy_iterator()))[:,0]
    # # X_val = np.array(list(model_val.as_numpy_iterator()))[:,0].flatten()
    # X_val = np.array(list(model_val.unbatch().as_numpy_iterator()))[:,0]

    # # y_val = np.array(list(model_val.as_numpy_iterator()))[:,1]
    # # y_val = np.array(list(model_val.as_numpy_iterator()))[:,1].flatten()
    # y_val = np.array(list(model_val.unbatch().as_numpy_iterator()))[:,1]

    train_data = np.array(train_scaled)
    val_data = np.array(val_scaled)


    train_data = timeseries_dataset_from_array(
                                            data = train_data,
                                            targets=None, 
                                            sequence_length=7, 
                                            shuffle=True,   
                                            batch_size=32)

    val_data = timeseries_dataset_from_array(
                                            data = val_data,
                                            targets = None,
                                            sequence_length = 7,
                                            shuffle = True,
                                            batch_size = 32)

    # dataset.inputs.set_shape(None, 6, None)

    train_dataset = train_data.map(split_window)
    val_dataset = val_data.map(split_window)

    return train_dataset, val_dataset

In [None]:
def create_model(train_dataset, val_dataset):
    
    model = Sequential()
    model.add(LSTM({{choice([5, 10, 25, 50, 75, 100])}},
            return_sequences=True 
           # ,input_shape=(X_train.shape[1], X_train.shape[2]))
        #    , batch_input_shape=(None, X_train.shape[1], X_train.shape[2])
            ))

    model.add(Dense({{choice([10, 20, 50, 100])}}))
    model.add(Activation('relu'))
    model.add(Dense(12))
    model.compile(loss={{choice(["mae", "mse"])}}, 
                optimizer={{choice(["adam", "sgd", "rmsprop"])}}, 
                metrics=["mae", "mse"]
                )
    e_stop =  tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, mode="min")

    result = model.fit(train_dataset, epochs=25, batch_size=60, validation_data=val_dataset, verbose=2, callbacks = [e_stop], shuffle=False)

    # print(result.history)

    validation_loss = np.amin(result.history['val_loss'])
    print('Best Validation loss of epoch:', validation_loss)
    return {'loss': validation_loss, 'status': STATUS_OK, 'model':model}

In [None]:
# if __name__ == '__main__':

best_run, best_model = optim.minimize(model=create_model,
                                        data=data,
                                        algo=tpe.suggest,
                                        max_evals=5,
                                        trials=Trials(),
                                        notebook_name = 'HyperParameter_Tuning_v3')
                                        
train_dataset, val_dataset = data()
print('Evaluation of best performing model:')
print(best_model.evaluate(val_dataset))
print("Best Performing Model Hyper-Parameters:")
print(best_run)