In [2]:
import os
import time
import requests
import csv
from io import StringIO

import pandas as pd
import numpy as np
import random as rn
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, max_error, r2_score

import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam, SGD, RMSprop, Adamax, Adagrad, Adadelta, Nadam
from keras.layers.core import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasRegressor
from keras import backend as K

Using TensorFlow backend.


In [3]:
# CUSTOM LOSS FUNCTIONS (not built into default Keras API)

# SOURCE:
# Simple regression example for Keras (v2.2.2) with Boston housing data
# @author: tobigithub
# Created on Wed Aug 15 18:44:28 2018


# root mean squared error (rmse) 
def rmse(y_true, y_pred):
    from keras import backend
    return backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))

# mean squared error (mse) 
def mse(y_true, y_pred):
    from keras import backend
    return backend.mean(backend.square(y_pred - y_true), axis=-1)

# coefficient of determination (R^2) 
def r_square(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return (1 - SS_res/(SS_tot + K.epsilon()))

In [4]:
energyFrame = pd.read_csv("UCI_data.csv")
energy = energyFrame.values

In [5]:
# for reproducibility
np.random.seed(12345)
rn.seed(123)

In [6]:
# split the data
X = energy[:, 1:(energy.shape[1]-1)]
y = energy[:, -1]
X_train_outer, X_test_outer, y_train_outer, y_test_outer = train_test_split(X, y, test_size=0.2)
n_cols = X_train_outer.shape[1]

In [21]:
# search hyperparameters: batch size and number of epochs

def create_model():
    model = Sequential()
    model.add(Dense(12, activation="relu", input_shape=(n_cols,)))
    model.add(Dense(1, activation="linear"))
    model.compile(optimizer="adam", loss = 'mean_squared_error', metrics=[mse])
    return model

In [22]:
model = KerasRegressor(build_fn=create_model, verbose=1)
pipe = Pipeline([("scale", MinMaxScaler()),("model", model)])

In [23]:
# search hyperparameters: batch size and number of epochs

batch_size = [5, 10, 25, 50]
epochs = [25, 50, 100, 250, 500]
param_dict = dict(model__batch_size=batch_size, model__epochs=epochs)
grid = RandomizedSearchCV(estimator=pipe, param_distributions=param_dict, n_jobs=-1, cv=3, verbose=10) # n_jobs=-1 : run all processes in parallel
grid_result = grid.fit(X_train_outer, y_train_outer)

print("Best parameters: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("Mean test score: %f, Std test score: %f with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 13.4min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed: 15.6min
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 26.8min
[Parallel(n_jobs=-1)]: Done  27 out of  30 | elapsed: 65.8min remaining:  7.3min
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed: 107.5min finished


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
...
...
...
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500
Best parameters: -9387.301086 using {'model__epochs': 500, 'model__batch_size': 5}
Mean test score: -9596.318692, Std test score: 717.504311 with: {'model__epochs': 100, 'model__batch_size': 5}
Mean test score: -10981.558695, Std test score: 787.401826 with: {'model__epochs': 25, 'model__batch_size': 50}
Mean test score: -9473.746881, Std test score: 758.600109 with: {'model__epochs': 500, 'model__batch_size': 25}
Mean test score: -9986.960862, Std test score: 765.051480 with: {'model__epochs': 100, 'model__batch_size': 50}
Mean test score: -10075.522709, Std test score: 791.398500 with: {'model__epochs': 50, 'model__batch_size': 10}
Mean test score: -10221.037396, Std test score: 766.353051 with: {'mode

In [29]:
# TUNE LEARNING RATE
# this depends on which optimizer is chosen / depends on results of above tuning mechanism

def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(12, activation="relu", input_shape=(n_cols,)))
    model.add(Dense(1, activation="linear"))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss = 'mean_squared_error', metrics=[mse])
    return model

In [30]:
model = KerasRegressor(build_fn=create_model, epochs=500, batch_size=5, verbose=1)
pipe = Pipeline([("scale", MinMaxScaler()),("model", model)])

In [35]:
learning_rate = [0.001, 0.01, 0.1, 0.25, 0.5]
param_grid = dict(model__learning_rate=learning_rate)
grid = GridSearchCV(estimator=pipe, param_grid=param_grid, n_jobs=-1, cv=3, verbose=10)
grid_result = grid.fit(X_train_outer, y_train_outer)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 246.3min finished


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
...
...
...
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500
Best: -9014.195436 using {'model__learning_rate': 0.01}
-9198.298861 (655.965349) with: {'model__learning_rate': 0.001}
-9014.195436 (787.387808) with: {'model__learning_rate': 0.01}
-9297.634027 (762.081967) with: {'model__learning_rate': 0.1}
-9602.858481 (874.751848) with: {'model__learning_rate': 0.25}
-11042.695133 (871.722187) with: {'model__learning_rate': 0.5}


In [7]:
# TUNE NETWORK WEIGHT INITIALISATION

def create_model(init_mode="uniform"):
    model = Sequential()
    model.add(Dense(12, kernel_initializer=init_mode, activation="relu", input_shape=(n_cols,)))
    model.add(Dense(1, kernel_initializer=init_mode, activation="linear"))
    optimizer = Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer, loss = "mean_squared_error", metrics=[mse])
    return model

In [8]:
model = KerasRegressor(build_fn=create_model, epochs=500, batch_size=5, verbose=1)
pipe = Pipeline([("scale", MinMaxScaler()),("model", model)])

In [9]:
init_mode = ["uniform", "normal","zero","glorot_normal","glorot_uniform"]
param_grid = dict(model__init_mode=init_mode)
grid = GridSearchCV(estimator=pipe, param_grid=param_grid, n_jobs=-1, cv=3, verbose=2)
grid_result = grid.fit(X_train_outer, y_train_outer)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 172.6min finished


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
...
...
...
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500
Best: -8787.795777 using {'model__init_mode': 'glorot_uniform'}
-9050.052618 (777.735861) with: {'model__init_mode': 'uniform'}
-8962.128831 (946.072533) with: {'model__init_mode': 'normal'}
-11007.294947 (843.683725) with: {'model__init_mode': 'zero'}
-8914.576357 (701.921851) with: {'model__init_mode': 'glorot_normal'}
-8787.795777 (633.583423) with: {'model__init_mode': 'glorot_uniform'}


In [17]:
# FINAL MODEL

def create_model():
    model = Sequential()
    model.add(Dense(12, kernel_initializer="glorot_uniform", activation="relu", input_shape=(n_cols,)))
    model.add(Dense(1, kernel_initializer="glorot_uniform", activation="linear"))
    optimizer = Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer, loss = 'mean_squared_error', metrics=[mse])
    
    return model

In [18]:
model = KerasRegressor(build_fn=create_model, epochs=500, batch_size=5, verbose=2)
final_model = Pipeline([("scale", MinMaxScaler()),("model", model)])

In [19]:
result = final_model.fit(X_train_outer, y_train_outer)

Epoch 1/500
 - 5s - loss: 11075.0773 - mse: 11075.0635
Epoch 2/500
 - 4s - loss: 10434.3489 - mse: 10434.3545
Epoch 3/500
 - 3s - loss: 10137.1994 - mse: 10137.1982
Epoch 4/500
 - 4s - loss: 9939.5530 - mse: 9939.5576
Epoch 5/500
 - 4s - loss: 9835.1738 - mse: 9835.1758
Epoch 6/500
 - 4s - loss: 9771.0881 - mse: 9771.0947
Epoch 7/500
 - 4s - loss: 9712.3982 - mse: 9712.3984
Epoch 8/500
 - 5s - loss: 9672.5989 - mse: 9672.6074
Epoch 9/500
 - 4s - loss: 9619.4734 - mse: 9619.4629
Epoch 10/500
 - 4s - loss: 9594.8808 - mse: 9594.8848
...
...
...
Epoch 490/500
 - 3s - loss: 8545.5179 - mse: 8545.5234
Epoch 491/500
 - 4s - loss: 8548.5375 - mse: 8548.5352
Epoch 492/500
 - 5s - loss: 8556.9566 - mse: 8556.9492
Epoch 493/500
 - 3s - loss: 8556.1504 - mse: 8556.1504
Epoch 494/500
 - 3s - loss: 8540.5815 - mse: 8540.5850
Epoch 495/500
 - 3s - loss: 8545.8279 - mse: 8545.8262
Epoch 496/500
 - 3s - loss: 8552.7196 - mse: 8552.7246
Epoch 497/500
 - 3s - loss: 8553.2256 - mse: 8553.2246
Epoch 498/5

In [22]:
train_mse_score = result.score(X_train_outer, y_train_outer) * (-1)
test_mse_score = result.score(X_test_outer, y_test_outer) * (-1)

In [23]:
print("TRAINING ERROR: %.2f" % train_mse_score)
print("TEST ERROR: %.2f" % test_mse_score)

TRAINING ERROR: 8470.90
TEST ERROR: 8177.98


In [24]:
y_true = y_test_outer
y_pred = final_model.predict(X_test_outer)

In [25]:
test_mae = mean_absolute_error(y_true, y_pred)
test_mse = mean_squared_error(y_true, y_pred)
test_rsquare = r2_score(y_true, y_pred)
test_max = max_error(y_true, y_pred)

In [26]:
print("RESULTS:")
print("Mean squared error: %.3f" % (test_mse))
print("Mean absolute error: %.3f" % (test_mae))
print("R-squared score: %.3f" % (test_rsquare))
print("Maximum residual error: %.3f" % (test_max))

RESULTS:
Mean squared error: 8177.984
Mean absolute error: 47.415
R-squared score: 0.218
Maximum residual error: 945.965
