In [144]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout

Using TensorFlow backend.


In [64]:
df = pd.read_csv('./data/SR99_VDS1005210_2016_fill.csv', delimiter=';', parse_dates=True, index_col='datetime')

In [152]:
train = df[: '2016-10'].copy()
test = df['2016-11':].copy()
scaler = StandardScaler()
train['flow_scaler'] = scaler.fit_transform(train['flow_5'].values[:, np.newaxis])

In [137]:
def series2supervised(data, time_steps=20, size=(1, 2)):
    assert isinstance(size[0], int) & isinstance(size[1], int), 'size[0] size[1] must be integer'
    assert size[0] <= size[1], 'size[1] not less than size[0]'
    start = '2016-0%d' % size[0]
    end = '2016-0%d' % size[1]
    value = data[start: end].copy().values
    result = []
    for index in range(len(value) - time_steps):
        result.append(value[index: index+time_steps+1])
    result = np.array(result)
    x = result[:, :-1]
    y = result[:, -1]
    x = np.reshape(x, (x.shape[0], x.shape[1], 1))
    return x, y

In [141]:
x_train, y_train = series2supervised(train['flow_scaler'], time_steps=20, size=(1, 2))
x_valid, y_valid = series2supervised(train['flow_scaler'], time_steps=20, size=(3, 4))

In [142]:
interval = 5
time_steps = 20
batch_size = int(60/interval * 24 * 7)  # 每批处理一周数据

In [150]:
model = Sequential()
model.add(LSTM(32, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(Dense(1, activation='linear'))
model.compile(loss='mae', optimizer='adam')
model.fit(x_train, y_train, validation_data=(x_valid, y_valid), epochs=5, batch_size=batch_size, verbose=2)

Train on 17260 samples, validate on 17548 samples
Epoch 1/5
3s - loss: 0.8274 - val_loss: 0.7280
Epoch 2/5
3s - loss: 0.6059 - val_loss: 0.4802
Epoch 3/5
3s - loss: 0.3476 - val_loss: 0.2791
Epoch 4/5
3s - loss: 0.2548 - val_loss: 0.2330
Epoch 5/5
3s - loss: 0.2284 - val_loss: 0.2302


<keras.callbacks.History at 0x2ac7050e3c8>

In [153]:
test['flow_scaler'] = scaler.transform(test['flow_5'].values[:, np.newaxis])

In [154]:
x_test, y_test = series2supervised(test['flow_scaler'], time_steps=20, size=(11, 12))

In [155]:
predicted = model.predict(x_test)

In [156]:
predicted

array([[-1.45806825],
       [-1.45800209],
       [-1.45288444],
       ..., 
       [-1.34100986],
       [-1.3519634 ],
       [-1.36561632]], dtype=float32)

In [157]:
scaler.inverse_transform(predicted)

array([[ 59.93305969],
       [ 59.94113159],
       [ 60.56602478],
       ..., 
       [ 74.22654724],
       [ 72.8890686 ],
       [ 71.2219696 ]], dtype=float32)

In [159]:
y_test

array([-1.58855397, -1.51484735, -1.57217473, ..., -1.52303698,
       -1.5639851 , -1.71139834])

In [161]:
predict = model.predict(x_test)
predict = scaler.inverse_transform(predict).flatten()

In [173]:
print_error(scaler.inverse_transform(y_test).flatten(), predict)

MSE: 1657809.753
RMSE: 1287.560
MAE: 27.838
MAPE: 17.354%


In [166]:
len(test['flow_scaler'].values)

19008

In [172]:
scaler.inverse_transform(y_test).flatten()[:20]

array([  44.,   53.,   46.,   43.,   47.,   55.,   60.,   48.,   53.,
         52.,   57.,   53.,   72.,   75.,   79.,   73.,   74.,   84.,
         83.,  112.])

In [174]:
predict

array([ 59.93305969,  59.94113159,  60.56602478, ...,  74.22654724,
        72.8890686 ,  71.2219696 ], dtype=float32)