In [2]:
import datetime
import sys
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model

# Import mlcompute module to use the optional set_mlc_device API for device selection with ML Compute.
#from tensorflow.python.compiler.mlcompute import mlcompute
# Select CPU device.
#mlcompute.set_mlc_device(device_name='any') # Available options are 'cpu', 'gpu', and 'any'.

from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

In [14]:
def train_test_split(df, n_test, horizon):
    if len(df) < 8760:
        n_test = round(len(df) * 0.2)
    test_df = df.copy()[-(n_test+horizon-1):]
    train_df = df.copy()[:-(len(test_df)-horizon+1)]
    return train_df, test_df

def MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72, country='Canada'):
    df = df.merge(series_to_supervised(df), how='right', left_index=True, right_index=True)
    df = preprocess(df, country)
    train_df, test_df = train_test_split(df, n_test, horizon=HORIZON)
    y_scaler = MinMaxScaler()
    y_scaler.fit(train_df[['value']])
    long_scaler = MinMaxScaler()
    print(test_df.columns)
    test_df[test_df.columns] = long_scaler.fit_transform(test_df)
    train_df[train_df.columns] = long_scaler.fit_transform(train_df)
    tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, HORIZON+1), train_df.columns[1:])}
    train_inputs = TimeSeriesTensor(train_df, 'value', HORIZON, tensor_structure)
    test_inputs = TimeSeriesTensor(test_df, 'value', HORIZON, tensor_structure)
    return train_inputs, test_inputs, y_scaler

In [5]:
def build_model(l, drop, n, lr):
    if l==1:
        model = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(n, input_shape=(HORIZON, 14)),
            tf.keras.layers.Dense(HORIZON)
        ])
    elif l==2:
        model = tf.keras.models.Sequential([
            # Shape [batch, time, features] => [batch, time, lstm_units]
            tf.keras.layers.LSTM(n, input_shape=(HORIZON, 14), return_sequences=True),
            tf.keras.layers.Dropout(drop),
            tf.keras.layers.LSTM(n),
            # Shape => [batch, time, features]
            tf.keras.layers.Dense(HORIZON)
        ])
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    # Compile model
    model.compile(loss='mse', optimizer=opt,metrics=['mse'])
    return model

In [7]:
datasets = []
names = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    datasets.append(df)
    names.append('B'+str(i))

In [8]:
dset = 'Columbia'
country = 'Canada'
HORIZON = 24


net = 'stlf'
LAYERS = 1
DROPOUT = 0.3
NEURONS = 64
LR = 1e-3
BATCHSIZE = 1500

MAX_EPOCHS = 100
PATIENCE = 10

In [15]:
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for df in tqdm(datasets):
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=HORIZON, HORIZON=HORIZON, country=country)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)

  0%|          | 0/28 [00:00<?, ?it/s]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


  4%|▎         | 1/28 [00:01<00:48,  1.81s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


  7%|▋         | 2/28 [00:03<00:50,  1.94s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 11%|█         | 3/28 [00:05<00:46,  1.86s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 14%|█▍        | 4/28 [00:07<00:42,  1.77s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 18%|█▊        | 5/28 [00:08<00:39,  1.71s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 21%|██▏       | 6/28 [00:10<00:37,  1.71s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 25%|██▌       | 7/28 [00:12<00:34,  1.64s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 29%|██▊       | 8/28 [00:13<00:32,  1.64s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 32%|███▏      | 9/28 [00:15<00:30,  1.63s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 36%|███▌      | 10/28 [00:17<00:30,  1.68s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')


 39%|███▉      | 11/28 [00:19<00:29,  1.74s/it]

Index(['value', 'value(t-168)', 'fractional hour_sin', 'fractional hour_cos',
       'day of year_sin', 'day of year_cos', 'working day', 'week_1', 'week_2',
       'week_3', 'week_4', 'week_5', 'week_6', 'week_7'],
      dtype='object')





KeyboardInterrupt: 

In [20]:
global_inputs_X[24]

<tf.Tensor: shape=(24, 14), dtype=float64, numpy=
array([[0.05011316, 0.11542192, 0.04393689, 0.99809971, 0.44111344,
        0.80543055, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.        ],
       [0.07492726, 0.03516004, 0.08786856, 0.99239908, 0.44111344,
        0.80543055, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.        ],
       [0.06894601, 0.04073715, 0.13178977, 0.98289877, 0.44111344,
        0.80543055, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.        ],
       [0.06369221, 0.0376657 , 0.1756953 , 0.96959992, 0.44111344,
        0.80543055, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.        ],
       [0.05916586, 0.03629163, 0.21957992, 0.95250412, 0.44111344,
        0.80543055, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        

In [16]:
global_inputs_T[0]

<tf.Tensor: shape=(24,), dtype=float64, numpy=
array([0.05011316, 0.07492726, 0.06894601, 0.06369221, 0.05916586,
       0.05536696, 0.05229551, 0.05011316, 0.04881992, 0.07508891,
       0.10515681, 0.05746848, 0.07775622, 0.05803427, 0.04833495,
       0.03556418, 0.04178791, 0.0671678 , 0.21475913, 0.20958616,
       0.09707404, 0.05924669, 0.05148723, 0.0362108 ])>

In [23]:
results = pd.read_csv('./results/Columbia/firm-sea-117_11.csv', index_col = 0)

In [24]:
results

Unnamed: 0,timestamp,h,prediction,actual
1970-01-01 00:00:00.000000000,2017-08-22 12:00:00,t+1,0.465304,0.456369
1970-01-01 00:00:00.000000001,2017-08-22 13:00:00,t+1,0.436799,0.518040
1970-01-01 00:00:00.000000002,2017-08-22 14:00:00,t+1,0.491822,0.419366
1970-01-01 00:00:00.000000003,2017-08-22 15:00:00,t+1,0.544101,0.419366
1970-01-01 00:00:00.000000004,2017-08-22 16:00:00,t+1,0.652825,0.505706
...,...,...,...,...
1970-01-01 00:00:00.000104539,2018-02-19 19:00:00,t+24,1.129356,1.406110
1970-01-01 00:00:00.000104540,2018-02-19 20:00:00,t+24,0.809294,0.357695
1970-01-01 00:00:00.000104541,2018-02-19 21:00:00,t+24,0.558248,0.333026
1970-01-01 00:00:00.000104542,2018-02-19 22:00:00,t+24,0.425174,0.308357
