In [319]:
# import some libraries
import pandas as pd # read_csv
import numpy as np # fast math
import sklearn.preprocessing as sk_pre #normalizer

import keras #deeplearning library - backend is tensorflow
import sklearn.metrics as mt #MSE metric
import math #sqrt
import matplotlib.pyplot as plt #plot graphs
%matplotlib inline 


In [320]:
dataset = pd.read_csv('./data/QLD_all.csv', index_col=0)
dataset.columns = ['region', 'date', 'demand', 'price', 'type']
dataset = dataset.drop('type',axis=1)
#dataset.index = dataset.date

In [321]:
dataset

Unnamed: 0,region,date,demand,price
0,QLD1,1999/01/01 00:30,3987.83333,19.34
1,QLD1,1999/01/01 01:00,3783.16667,17.47
2,QLD1,1999/01/01 01:30,3621.33333,17.08
3,QLD1,1999/01/01 02:00,3487.50000,15.84
4,QLD1,1999/01/01 02:30,3386.50000,15.68
5,QLD1,1999/01/01 03:00,3337.50000,15.56
6,QLD1,1999/01/01 03:30,3311.66667,15.79
7,QLD1,1999/01/01 04:00,3298.33333,15.68
8,QLD1,1999/01/01 04:30,3265.00000,16.00
9,QLD1,1999/01/01 05:00,3241.66667,16.15


In [322]:
# lets take difference for making stationary data
price_diff = dataset.price.diff()[1:]

In [323]:
supervised_dataset = pd.DataFrame()
supervised_dataset['price'] = price_diff.shift()
supervised_dataset['label'] = price_diff
supervised_dataset = supervised_dataset.fillna(0,axis=1)


In [324]:
supervised_dataset.head()


Unnamed: 0,price,label
1,0.0,-1.87
2,-1.87,-0.39
3,-0.39,-1.24
4,-1.24,-0.16
5,-0.16,-0.12


In [325]:
X,y = supervised_dataset.price.values, supervised_dataset.label.values
scaler = sk_pre.MinMaxScaler(feature_range=(-1,1))
X  = X.reshape(X.shape[0],1)
X.shape

(315585, 1)

In [326]:
scaler.fit(X)
X_scaled = scaler.transform(X)
y_scaled = scaler.transform(y.reshape(X.shape[0],X.shape[1]))


In [327]:
# split train and test data
ts_size = 250000
X_train,X_test,y_train,y_test = X_scaled[0:ts_size],X_scaled[ts_size:],y_scaled[0:ts_size],y_scaled[ts_size:]
X_train = X_train.reshape(X_train.shape[0],1,1)
X_test = X_test.reshape(X_test.shape[0],1,1)
y_train = y_train.reshape(1,len(y_train))[0]
y_test = y_test.reshape(1,len(y_test))[0]

train_size = X_train.shape[0]
test_size = X_test.shape[0]
test_start_idx = train_size

In [None]:
# fit an LSTM network to training data
def fit_lstm(X,y, batch_size, nb_epoch, neurons):
    print(X.shape)
    model = keras.models.Sequential()
    model.add(keras.layers.recurrent.LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(keras.layers.Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        print('Epoch #',i)
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=5, shuffle=False)
        model.reset_states()
    return model

In [None]:
#model params
epoch_num = 20
neuron_num = 4
batch_size = 1
#fit the model
lstm_model = fit_lstm(X_train,y_train,batch_size,epoch_num,neuron_num)

(250000, 1, 1)
Epoch # 0
Epoch 1/1


In [None]:
# make forecast
yhat = lstm_model.predict(X_test.reshape(test_size,1,1),batch_size=1)
X_test.shape

In [None]:
y_test = y_test.reshape(test_size,1)

#erase below please
#yhat = y_test

In [None]:
# invert scaling and forecasting
yhat = scaler.inverse_transform(\
                            np.hstack((X_test.reshape(test_size,1),yhat.reshape(test_size,1))))[:,1] + \
                            dataset.price.values[0 + test_start_idx: test_size + test_start_idx]

In [None]:
dataset.price.values[0 + test_start_idx: 1 + test_size + test_start_idx].shape

In [None]:
result = pd.DataFrame()
result['predicted'] = yhat
result['expected'] = dataset.price.values[0 + 1 + test_start_idx: 1 + test_size + test_start_idx]
result['predicted_fixed'] = result['predicted'].map(lambda x: x if x>0 else 0)
result.index = dataset.date.values[0 + 1 + test_start_idx: 1 + test_size + test_start_idx]
result


In [None]:
result.expected.mean()

In [None]:
result.expected.var()

In [None]:
result.predicted.mean()

In [None]:
result.predicted.var()

In [None]:
# report performance
rmse = math.sqrt(mt.mean_squared_error(result.expected,result.predicted_fixed))
plt.rcParams['figure.figsize'] = (20, 10)
result.predicted_fixed.plot()

rmse

In [None]:
#fake report performance
fake_ind = result.expected < 1000
fake_expected = result.expected.ix[fake_ind].values
fake_predicted = result.predicted_fixed.ix[fake_ind].values

fake_rmse = math.sqrt(mt.mean_squared_error(fake_expected, fake_predicted))
fake_rmse

In [None]:
plt.plot(fake_expected)
plt.plot(fake_predicted)

In [None]:
result.expected.plot()

In [None]:
threshold = 20
result['difference'] = result.expected.subtract(result.predicted).values
result.difference = result.difference.map(np.abs)


In [None]:
result['thresholded'] = result.difference < threshold

In [None]:
thresholded_arr = result.thresholded.values
np.sum(thresholded_arr) / len(thresholded_arr)