In [None]:
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit,train_test_split

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)

In [None]:
# fix random seed for reproducibility
numpy.random.seed(7)

In [None]:
#load the dataset
dataframe = read_csv('scada_data.csv', index_col=['time','turbine_num'],engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
print(dataframe)
print(dataset)

                                 wind_speed        kw  ...   rt  eect
time                turbine_num                        ...           
2015-11-01 00:00:00 22             0.148473  0.009655  ...  0.0   0.0
2015-11-01 00:10:00 22             0.125081  0.004962  ...  0.0   0.0
2015-11-01 00:20:00 22             0.121183  0.004913  ...  0.0   0.0
2015-11-01 00:30:00 22             0.137752  0.004454  ...  0.0   0.0
2015-11-01 00:40:00 22             0.171540  0.040889  ...  0.0   0.0
...                                     ...       ...  ...  ...   ...
2015-12-31 23:10:00 21             0.172515  0.069347  ...  0.0   0.0
2015-12-31 23:20:00 21             0.166017  0.056484  ...  0.0   0.0
2015-12-31 23:30:00 21             0.147498  0.018113  ...  0.0   0.0
2015-12-31 23:40:00 21             0.157570  0.002269  ...  0.0   0.0
2015-12-31 23:50:00 21             0.152047  0.007971  ...  0.0   0.0

[17125 rows x 20 columns]
[[0.14847304 0.0096552  0.06469298 ... 0.         0.         0.

In [None]:
#normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [None]:
#split into training and test data
train, test = train_test_split(dataset, test_size=0.2)

In [None]:
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
print(trainX)
print(testX)

[[[0.24063517]]

 [[0.482899  ]]

 [[0.68159604]]

 ...

 [[0.20236154]]

 [[0.25040716]]

 [[0.34079802]]]
[[[0.14942996]]

 [[0.49511397]]

 [[0.21701951]]

 ...

 [[0.33916938]]

 [[0.04193811]]

 [[0.59975564]]]


In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=11, batch_size=2, verbose=2)

Epoch 1/11
6849/6849 - 11s - loss: 0.0276 - 11s/epoch - 2ms/step
Epoch 2/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 3/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 4/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 5/11
6849/6849 - 10s - loss: 0.0262 - 10s/epoch - 1ms/step
Epoch 6/11
6849/6849 - 10s - loss: 0.0262 - 10s/epoch - 1ms/step
Epoch 7/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 8/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 9/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 10/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step
Epoch 11/11
6849/6849 - 9s - loss: 0.0262 - 9s/epoch - 1ms/step


<keras.callbacks.History at 0x7fd881996650>

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [None]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

ValueError: ignored

In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

TypeError: ignored

In [None]:
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

In [None]:
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

In [1]:
# # plot baseline and predictions
# plt.plot(scaler.inverse_transform(dataset))
# plt.plot(trainPredictPlot)
# plt.plot(testPredictPlot)
# plt.show()