In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.preprocessing import timeseries_dataset_from_array

from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe


Using TensorFlow backend.


In [2]:
df = pd.read_csv("differenced_data.csv")
df.drop(["Date"], axis=1, inplace=True)

In [3]:
train_df = df[0:int(len(df)*.7)]
val_df = df[int(len(df)*.7):int(len(df)*.9)]
test_df = df[int(len(df)*.9):]

In [15]:
scaler = StandardScaler()
scaler.fit(train_df)

train_scaled = scaler.transform(train_df)
val_scaled = scaler.transform(val_df)
test_scaled = scaler.transform(test_df)

In [5]:
# Various window parameters
input_width = 6
label_width = 6
shift = 1
label_columns = None

column_indeces = {name: i for i, name in enumerate(train_df.columns)}
if label_columns is not None:
    label_column_indeces = {name: i for i, name in enumerate(label_columns)}

window_size = input_width + shift

input_slice = slice(0, input_width)
input_indeces = np.arange(window_size)[input_slice]

label_start = window_size - label_width
label_slice = slice(label_start, None)
label_indeces =  np.arange(window_size)[label_slice]

In [6]:
def split_window(features):
    inputs = features[:, input_slice, :]
    labels = features[:, label_slice, :]
    if label_columns:
        labels = tf.stack([labels[:, :, column_indeces[name]] for name in label_columns], axis=1)

    inputs.set_shape([None, input_width, None])
    labels.set_shape([None, label_width, None])

    return inputs, labels

In [7]:
def make_dataset(data):
    data = np.array(data, dtype=np.float32)
    dset = timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32
    )

    dset = dset.map(split_window)
    return dset

In [8]:
model_train = make_dataset(train_scaled)
model_val = make_dataset(val_scaled)

In [10]:
    # # X_train = np.array(list(model_train.as_numpy_iterator()))[:,0]
    X_train  = np.array(list(model_train.as_numpy_iterator()))[:,0].flatten()
    # #594 batches with each batch containing an array of (32,6,12)

    # # y_train = np.array(list(model_train.as_numpy_iterator()))[:,1]
    y_train  = np.array(list(model_train.as_numpy_iterator()))[:,1].flatten()

    # # X_val = np.array(list(model_val.as_numpy_iterator()))[:,0]
    X_val = np.array(list(model_val.as_numpy_iterator()))[:,0].flatten()

    # # y_val = np.array(list(model_val.as_numpy_iterator()))[:,1]
    y_val = np.array(list(model_val.as_numpy_iterator()))[:,1].flatten()

In [11]:
def data(X_train, y_train, X_val, y_val):

    return X_train, y_train, X_val, y_val

In [13]:
def create_model(X_train, y_train, X_val, y_val):

    model = Sequential()
    model.add(LSTM({{choice([5, 10, 25, 50, 75, 100])}}, 
            input_shape=(X_train.shape[1], X_train.shape[2]))
            )

    model.add(Dense({{choice([10, 20, 50, 100])}}))
    model.add(Dense(1))
    model.compile(loss={{choice(["mae", "mse"])}}, 
                optimizer={{choice(["adam", "sgd", "rmsprop"])}}, 
                metrics=["mae", "mse"]
                )
    e_stop =  tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, mode="min")

    result = model.fit(X_train, y_train, epochs=25, batch_size=60, validation_split=0.1, verbose=2, callbacks = [e_stop], shuffle=False)

    # print(result.history)

    validation_loss = np.amin(result.history['val_loss'])
    print('Best Validation loss of epoch:', validation_loss)
    return {'loss': validation_loss, 'status': STATUS_OK, 'model':model}

In [16]:
best_run, best_model = optim.minimize(model=create_model,
                                        data=data,
                                        algo=tpe.suggest,
                                        max_evals=5,
                                        trials=Trials(),
                                        notebook_name = 'HyperParameter_Tuning_v2')
                                        
X_train, y_train, X_val, y_val = data()
print('Evaluation of best performing model:')
print(best_model.evaluate(X_val, y_val))
print("Best Performing Model Hyper-Parameters:")
print(best_run)

IndentationError: expected an indented block (<unknown>, line 137)