In [2]:
import datetime
import sys
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model

# Import mlcompute module to use the optional set_mlc_device API for device selection with ML Compute.
#from tensorflow.python.compiler.mlcompute import mlcompute
# Select CPU device.
#mlcompute.set_mlc_device(device_name='any') # Available options are 'cpu', 'gpu', and 'any'.

from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

In [11]:
def train_test_split(df, n_test, horizon):
    if len(df) < 8760:
        n_test = round(len(df) * 0.2)
    test_df = df.copy()[-(n_test+horizon-1):]
    train_df = df.copy()[:-(len(test_df)-horizon+1)]
    return train_df, test_df


def MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72, country='Canada'):
    df = df.merge(series_to_supervised(df), how='right', left_index=True, right_index=True)
    df = preprocess(df, country)
    train_df, test_df = train_test_split(df, n_test, horizon=HORIZON)
    y_scaler = MinMaxScaler()
    y_scaler.fit(train_df[['value']])    
    long_scaler = MinMaxScaler()
    train_df[train_df.columns] = long_scaler.fit_transform(train_df)
    test_df[test_df.columns] = long_scaler.transform(test_df)
    tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, HORIZON+1), train_df.columns[1:])}
    train_inputs = TimeSeriesTensor(train_df, 'value', HORIZON, tensor_structure)
    test_inputs = TimeSeriesTensor(test_df, 'value', HORIZON, tensor_structure)
    return train_inputs, test_inputs, y_scaler

In [12]:
def build_model(l, drop, n, lr):
    if l==1:
        model = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(n, input_shape=(HORIZON, 14)),
            tf.keras.layers.Dense(HORIZON)
        ])
    elif l==2:
        model = tf.keras.models.Sequential([
            # Shape [batch, time, features] => [batch, time, lstm_units]
            tf.keras.layers.LSTM(n, input_shape=(HORIZON, 14), return_sequences=True),
            tf.keras.layers.Dropout(drop),
            tf.keras.layers.LSTM(n),
            # Shape => [batch, time, features]
            tf.keras.layers.Dense(HORIZON)
        ])
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    # Compile model
    model.compile(loss='mse', optimizer=opt,metrics=['mse'])
    return model

def format_output(df):
    df['h'] = df['h'].str.extract('(\d+)', expand=False).astype(int)
    ppivot = pd.pivot_table(df, values='prediction', index=['timestamp'], columns=['h'])
    ppivot = ppivot.add_prefix('h_')
    ppivot.index = pd.to_datetime(ppivot.index)
    apivot = pd.pivot_table(df, values='actual', index=['timestamp'], columns=['h'])
    apivot = apivot.add_prefix('h_')
    apivot.index = pd.to_datetime(ppivot.index)
    return ppivot, apivot

def flatten(data):
    flat_list = []
    # iterating over the data
    for item in data:
        # appending elements to the flat_list
        flat_list += item
    return flat_list


In [13]:
datasets = []
names = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    datasets.append(df)
    names.append('B'+str(i))

In [27]:
datasets = []
names = []
hourly = pd.read_csv('../data/London_smart_meters/London_hourly_all.csv', index_col='tstp')
for house in hourly['LCLid'].unique():
    temp = hourly.loc[hourly['LCLid'] == house]
    datasets.append(temp)
    names.append(house)

In [36]:
LSTMIMO = load_model('./models/London_models/global_skilled-frog-284')

Loaded model from disk


In [37]:
dX_test = []
dX_scaler = []
HORIZON = 72
country = 'UK'
dset = 'London'
run_name = 'scaled_life'
metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
for i,df in enumerate(datasets):
        train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=HORIZON, HORIZON=HORIZON, country=country)
        dX_test.append(test_inputs)
        dX_scaler.append(y_scaler)
        concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
        FD_predictions = LSTMIMO.predict(concat_input)
        FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
        preds, actuals = format_output(FD_eval_df)
        preds = preds[np.where(preds.index.hour == 0)[0][0]:][::24]
        actuals = actuals[np.where(actuals.index.hour == 0)[0][0]:][::24]
        full = actuals.merge(preds, how='inner', left_index=True, right_index=True, suffixes=('_actuals', '_preds'))
        #full.to_csv('./results/'+dset+'/'+wandb.run.name+'_'+str(i)+'.csv')
        preds = flatten(preds.values.tolist())
        actuals = flatten(actuals.values.tolist())
        mae = validation(preds, actuals, 'MAE')
        mape = validation(preds, actuals, 'MAPE')
        rmse = validation(preds, actuals, 'RMSE')
        #print('rmse {}'.format(rmse))
        metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': names[i]})

In [38]:
metrics

Unnamed: 0,mae,mape,rmse,B
0,0.131583,47.6225,0.22736,MAC000020
1,0.0905566,67.3195,0.168249,MAC001814
2,0.0809991,74.1025,0.113524,MAC003721
3,0.148538,40.9636,0.246479,MAC003341
4,0.048963,63.947,0.0722627,MAC001688
...,...,...,...,...
85,0.112996,79.8493,0.256048,MAC003618
86,0.283427,61.4328,0.469732,MAC001611
87,0.326917,85.0468,0.501918,MAC003622
88,0.27822,65.4824,0.407986,MAC002385


In [39]:
if HORIZON == 72:
    metrics.to_csv('./results/'+dset+'/global/3days/LSTM_'+run_name+'.csv')
if HORIZON == 24:
    metrics.to_csv('./results/'+dset+'/global/dayahead/LSTM_'+run_name+'.csv')

In [40]:
print(metrics['rmse'].mean())

0.3270023862973313


In [41]:
print(metrics['mape'].mean())

80.50297365607194
