[View in Colaboratory](https://colab.research.google.com/github/vincentei/predict_power_prices/blob/master/power_prices_RNN_v2.ipynb)

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import datetime as dt

from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout,Activation,GRU,RNN,TimeDistributed,RepeatVector,SimpleRNN,CuDNNGRU

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error

In [0]:
needUpload = True
days_look_back = 7

In [47]:
if needUpload == True:
  from google.colab import files

  uploaded = files.upload()

  for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])))

Saving make_prices_lstm2.csv to make_prices_lstm2 (1).csv
User uploaded file "make_prices_lstm2.csv" with length 1117728 bytes


In [0]:
# import the data
df = pd.read_csv('make_prices_lstm2.csv',usecols=['price','year'])

In [49]:
# scale to make easy for RNN
df['price'] = df['price']/100
df.head(2)

Unnamed: 0,price,year
0,0.3243,2014
1,0.3249,2014


In [50]:
print ('there are {} hours in the dataset'.format(len(df)))
print ('there are {} days in the dataset'.format(len(df)/24))

there are 38328 hours in the dataset
there are 1597.0 days in the dataset


In [0]:
# reshape to
"""
X = [Mon, Tue
     Tue, Wed
     Wed, Thu]

Y = [Wed
    Thu
    Fri]

"""
dataset = df['price'].values
dataX = []
dataY = []
look_back = days_look_back * 24
for i in range(0,len(dataset)-look_back-1,24):
  a = dataset[i:(i+look_back)]
  b = dataset [i+look_back:i+look_back+24]
  dataX.append(a)  
  dataY.append(b)

In [52]:
# to numpy
dataX = np.asarray(dataX)
dataY = np.asarray(dataY)
print("X has shape {}".format(dataX.shape))
print("Y has shape {}".format(dataY.shape))

X has shape (1590, 168)
Y has shape (1590, 24)


In [53]:
# check
dataX[-1]

array([0.4703, 0.38  , 0.32  , 0.2209, 0.2539, 0.35  , 0.3952, 0.5684,
       0.6419, 0.664 , 0.5731, 0.4875, 0.4   , 0.41  , 0.4283, 0.434 ,
       0.3697, 0.4398, 0.4576, 0.5287, 0.5259, 0.6   , 0.55  , 0.524 ,
       0.5809, 0.501 , 0.374 , 0.336 , 0.336 , 0.3226, 0.2601, 0.4477,
       0.3923, 0.3813, 0.34  , 0.321 , 0.32  , 0.324 , 0.274 , 0.251 ,
       0.402 , 0.2368, 0.334 , 0.499 , 0.5588, 0.7957, 0.7748, 0.5756,
       0.424 , 0.574 , 0.39  , 0.3263, 0.338 , 0.387 , 0.48  , 0.5174,
       0.5173, 0.5   , 0.5571, 0.42  , 0.4177, 0.3893, 0.3744, 0.3818,
       0.413 , 0.49  , 0.525 , 0.5967, 0.5812, 0.5538, 0.5617, 0.5873,
       0.3996, 0.364 , 0.3026, 0.3233, 0.3515, 0.3285, 0.3205, 0.3614,
       0.3995, 0.4121, 0.399 , 0.397 , 0.374 , 0.3101, 0.3   , 0.3   ,
       0.3   , 0.349 , 0.38  , 0.4   , 0.4   , 0.4   , 0.4292, 0.5839,
       0.39  , 0.2158, 0.1706, 0.1492, 0.2053, 0.2717, 0.32  , 0.1686,
       0.259 , 0.35  , 0.324 , 0.313 , 0.388 , 0.32  , 0.374 , 0.312 ,
      

In [54]:
# check the Y
dataY[0]

array([0.271 , 0.2649, 0.2631, 0.2598, 0.2646, 0.2699, 0.3031, 0.4642,
       0.5796, 0.5782, 0.537 , 0.5451, 0.5646, 0.543 , 0.5111, 0.47  ,
       0.5206, 0.7001, 0.65  , 0.6189, 0.6303, 0.55  , 0.4744, 0.4496])

In [0]:
# split in train and test
numtraindays = 3*365
trainX = dataX[:numtraindays]
testX = dataX[numtraindays:]

trainY = dataY[:numtraindays]
testY = dataY[numtraindays:]

In [56]:
# reshape X into [samples,timestep,features]
trainX = trainX.reshape(numtraindays,look_back,1)
trainY = trainY.reshape(numtraindays,24,1)
print(trainX.shape)
print(trainY.shape)

(1095, 168, 1)
(1095, 24, 1)


In [57]:
len(testX)

495

In [58]:
numtestdays = int(len(testX))
testX = testX.reshape(numtestdays,look_back,1)
testY = testY.reshape(numtestdays,24,1)
print(testX.shape)
print(testY.shape)

(495, 168, 1)
(495, 24, 1)


In [0]:
def model_GRU(n_neurons, batch_size,n_epoch,dropout,testX):
  
  # define RNN configuration
  np.random.seed(5)

  # create RNN
  model = Sequential()

  model.add(CuDNNGRU(n_neurons, input_shape=(look_back, 1)))
  model.add(Dropout(dropout))
  model.add(RepeatVector(24))

  model.add(CuDNNGRU(n_neurons, return_sequences=True))
  model.add(Dropout(dropout))

  model.add(TimeDistributed(Dense(1,activation = 'linear')))
  model.compile(loss='mean_squared_error', optimizer='adam')
  
  #print(model.summary())

  model.fit(trainX, trainY, epochs=n_epoch, batch_size=batch_size, verbose=0)
  
  testPredict = model.predict(testX)
  
  a = testY.reshape(days*24,1)
  b = testPredict.reshape(days*24,1)
  MAE = mean_absolute_error(a, b)*100
  
  return MAE

In [60]:
MAE = np.round(model_GRU(20,30,2,0.3,testX),2)
print(MAE)

8.35


In [0]:
l_n_neurons = [10,20,30,40,50,60,70,80,90,100]
l_batch_size = [32,64]
l_n_epoch = [500,1000]
l_dropout = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95]

#l_n_neurons = [1,5]
#l_batch_size = [32]
#l_n_epoch = [10,50]
#l_dropout = [0,0.1]

In [0]:
for n_neurons in l_n_neurons:
  for batch_size in l_batch_size:
    for n_epoch in l_n_epoch:
      for dropout in l_dropout:
        MAE = model_GRU(n_neurons,batch_size,n_epoch,dropout,testX)
        print ("n_neurons: {}, batch_size: {}, n_epoch: {}, dropout: {}, MAE: {}".format(n_neurons,batch_size,n_epoch,dropout,np.round(MAE,2)))

n_neurons: 10, batch_size: 32, n_epoch: 500, dropout: 0, MAE: 6.23
n_neurons: 10, batch_size: 32, n_epoch: 500, dropout: 0.1, MAE: 6.31


In [24]:
# make predictions
#trainPredict = model.predict(trainX)
#testPredict = model.predict(testX)
#print(testPredict.shape)

(495, 24, 1)


In [0]:
# calc the mean absolute error
#print(testY.shape)
#days = testPredict.shape[0]
#a = testY.reshape(days*24,1)
#b = testPredict.reshape(days*24,1)
#mean_absolute_error(a, b)*100

In [0]:
#mean_squared_error(a, b)

In [29]:
"""

# define RNN configuration
np.random.seed(5)
n_neurons = 5
batch_size = 73
n_epoch = 100
# create RNN
model = Sequential()
model.add(GRU(n_neurons, batch_input_shape=(batch_size, 48, 1),stateful=True))
#model.add(LSTM(n_neurons, batch_input_shape=(batch_size, 48, 1),stateful=True))
model.add(RepeatVector(24))
model.add(GRU(n_neurons, return_sequences=True,stateful=True))
#model.add(LSTM(n_neurons, return_sequences=True,stateful=True))
model.add(TimeDistributed(Dense(1,activation = 'linear')))
model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())
# train RNN
#model.fit(trainX, trainY, epochs=n_epoch, batch_size=batch_size, verbose=2)
# train LSTM
for epoch in range(n_epoch):

  # fit model for one epoch on this sequence
  model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
  model.reset_states()
  
"""  

"\n\n  # define RNN configuration\n  np.random.seed(5)\n  n_neurons = 5\n  batch_size = 73\n  n_epoch = 100\n  # create RNN\n  model = Sequential()\n  model.add(GRU(n_neurons, batch_input_shape=(batch_size, 48, 1),stateful=True))\n  #model.add(LSTM(n_neurons, batch_input_shape=(batch_size, 48, 1),stateful=True))\n  model.add(RepeatVector(24))\n  model.add(GRU(n_neurons, return_sequences=True,stateful=True))\n  #model.add(LSTM(n_neurons, return_sequences=True,stateful=True))\n  model.add(TimeDistributed(Dense(1,activation = 'linear')))\n  model.compile(loss='mean_squared_error', optimizer='adam')\n  print(model.summary())\n  # train RNN\n  #model.fit(trainX, trainY, epochs=n_epoch, batch_size=batch_size, verbose=2)\n  # train LSTM\n  for epoch in range(n_epoch):\n\n    # fit model for one epoch on this sequence\n    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)\n    model.reset_states()\n  \n"

In [0]:
# https://stackoverflow.com/questions/38294046/simple-recurrent-neural-network-input-shape
#http://adventuresinmachinelearning.com/keras-lstm-tutorial/
#np.random.seed(1337)

#sample_size = 256
#x_seed = [1, 0, 0, 0, 0, 0]
#y_seed = [1, 0.8, 0.6, 0, 0, 0]

#x_train = np.array([[x_seed] * sample_size]).reshape(sample_size,len(x_seed),1)
#y_train = np.array([[y_seed]*sample_size]).reshape(sample_size,len(y_seed),1)

#model=Sequential()
#model.add(SimpleRNN(input_dim  =  1, output_dim = 50, return_sequences = True))
#model.add(TimeDistributed(Dense(output_dim = 1, activation  =  "sigmoid")))
#model.compile(loss = "mse", optimizer = "rmsprop")
#print(model.summary())
#model.fit(x_train, y_train, nb_epoch = 10, batch_size = 32)

#print(model.predict(np.array([[[1],[0],[0],[0],[0],[0]]])))
#[[[ 0.87810659]
#[ 0.80646527]
#[ 0.61600274]
#[ 0.01652312]
#[ 0.00930419]
#[ 0.01328572]]]