In [1]:
import datetime
import sys
import os

import IPython
import IPython.display
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
from sklearn.preprocessing import MinMaxScaler

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation

np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_seq_items = 2000

In [2]:
def train_test_split(df, n_test):
    if len(df) < 8760:
        n_test = round(len(df) * 0.2)
    test_df = df.copy()[-(n_test+71):]
    train_df = df.copy()[:-(len(test_df)-71)]
    return train_df, test_df

In [35]:
def MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72):
    df = preprocess(df, 'Belgium')
    df = df.merge(series_to_supervised(df), how='right', left_index=True, right_index=True)
    train_df, test_df = train_test_split(df, n_test)
    y_scaler = MinMaxScaler()
    y_scaler.fit(train_df[['value']])
    long_scaler = MinMaxScaler()
    test_df[test_df.columns] = long_scaler.fit_transform(test_df)
    train_df[train_df.columns] = long_scaler.fit_transform(train_df)
    #print(train_df.columns)
    tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:5]), 'X2':(range(1, 73), train_df.columns[-1:]), 'static':(None, train_df.columns[5:-1])}
    #tensor_structure = {'X':(range(-24+1, 1), train_df.columns[:1]), 'X2':(range(-168+1, -96+1), train_df.columns[:1]), 'static':(None, train_df.columns[3:])}
    #tensor_structure = {'X':(range(-T+1, 1), train_df.columns)}
    #print(tensor_structure[0])
    train_inputs = TimeSeriesTensor(train_df, 'value', HORIZON, tensor_structure)
    test_inputs = TimeSeriesTensor(test_df, 'value', HORIZON, tensor_structure)
    return train_inputs, test_inputs, y_scaler

In [36]:
GEP1 = pd.read_csv('../data/GEP/Consumption_1H.csv', index_col=0, header=0, names=['value'])
GEP4 = pd.read_csv('../data/GEP/B4_Consumption_1H.csv', index_col=0, header=0, names=['value'])
datasets = [GEP1, GEP4]
names = ['GEP1', 'GEP4']

In [37]:
train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(GEP4, n_test=4380, T=72, HORIZON=72)

range(-71, 1)
Index(['value', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos'],
      dtype='object')
range(1, 73)
Index(['value(t-168)'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7'],
      dtype='object')
range(-71, 1)
Index(['value', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos'],
      dtype='object')
range(1, 73)
Index(['value(t-168)'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7'],
      dtype='object')


In [38]:
train_inputs['static'].shape

(11293, 8)

In [39]:
train_inputs['X'].shape

(11293, 72, 5)

In [40]:
train_inputs['X2'].shape

(11293, 72, 1)

In [None]:
train_inputs['X']

In [11]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 15)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])


metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i,df in enumerate(datasets):
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(train_inputs['X'])
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=32, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
for i,df in enumerate(datasets):
    FD_predictions = FULL_LSTMIMO.predict(dX_test[i-1]['X'])
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i-1], HORIZON, dX_scaler[i-1])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': names[i]})
metrics.to_csv('./results/GEP/global/3days/fulldata_LSTM.csv')

range(-71, 1)
Index(['value'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'value(t-168)'],
      dtype='object')
range(-71, 1)
Index(['value'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'value(t-168)'],
      dtype='object')
range(-71, 1)
Index(['value'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'value(t-168)'],
      dtype='object')
range(-71, 1)
Index(['value'], dtype='object')
None
Index(['working day', 'week_1', 'week_2', 'week_3', 'week_4', 'week_5',
       'week_6', 'week_7', 'fract

ValueError: Error when checking input: expected lstm_input to have shape (72, 15) but got array with shape (72, 1)