In [20]:
import numpy as np
import pandas as pd
import math 
import os
import datetime
import matplotlib.pyplot as plt
import tensorflow as tf

from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

In [21]:
# dirs
DATA_DIR = "./load.csv"
RESULT_DIR = "./result/"
TEST_PLOT_DIR = "./plots/"
load_col = 'out.site_energy.total.energy_consumption.kwh'
# MWh

In [22]:
if not os.path.exists(RESULT_DIR):
    os.makedirs(RESULT_DIR)

if not os.path.exists(TEST_PLOT_DIR):
    os.makedirs(TEST_PLOT_DIR)

In [23]:
data = pd.read_csv(DATA_DIR)
display(data[load_col])
data['timestamp'] = pd.to_datetime(data['timestamp'])
data[load_col] = data[load_col] * 4 / 1e3


0        96783.589350
1        97411.955013
2        98471.266660
3        99516.538686
4        97957.806899
             ...     
35035    98582.780350
35036    98693.526323
35037    97083.021398
35038    96588.769233
35039    96596.935578
Name: out.site_energy.total.energy_consumption.kwh, Length: 35040, dtype: float64

In [24]:
# scaler
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data[load_col].to_numpy().reshape(-1, 1))
data[load_col] = data_scaled

In [25]:
"""
!! parameter settings
n_predict: predict steps
height: final height of the image:
            height * 2 if the n_predict <= width,
            height * 2 + 1 if the n_predict > width
width: width of the image
n_days: use past n days historical time series data as input (number of channel)
n_window_shift: the shift interval of sliding window
"""
n_predict = 96
height = 8
width = 12
n_days = 3
n_window_shift = "15min"

In [26]:
class TimeSeriesImageCoder():
    def __init__(
            self,
        X: pd.DataFrame,
        n_predict: int,
        height: int,
        width: int,
        n_days: int,
        n_window_shift: str
    ) -> None:
        self.X = X
        self.h = height
        self.m = width
        self.d_b = n_days
        self.shift = n_window_shift
        self.n_predict = n_predict
        self.Lb = self.h * self.m
        self.Ls = math.ceil(self.n_predict / self.m) * self.m
        self.timestamps = self.generate_timestamps()
        print(f"Lb: {self.Lb}")
        print(f"Ls: {self.Ls}")

    def generate_timestamps(self):
        start = self.X['timestamp'].min() + DateOffset(days=3)
        end = self.X['timestamp'].max() - DateOffset(minutes=96*15)
        timestamps = pd.date_range(start=start, end=end, freq=self.shift)
        return timestamps
    
    def __make_it_symmetric_3d(self, sets_3d):
        symmetry_training_sets = []
        for slice_2d in np.array(sets_3d):
            reversed_slice_2d = slice_2d[::-1]
            combined_slice_2d = np.concatenate((slice_2d, reversed_slice_2d), axis=0)
            symmetry_training_sets.append(combined_slice_2d)
        return np.array(symmetry_training_sets)
    
    def __make_it_symmetric_2d(self, sets_2d):
        reversed_slice_2d = sets_2d[::-1]
        combined_slice_2d = np.concatenate((sets_2d, reversed_slice_2d), axis=0)
        return np.array(combined_slice_2d)
    

    def encode_b(self):
        training_sets = []
        target_sets = []
        self.X_timeseries_flatten = []
        self.X_timestamp = []
        self.y_timestamp = []
        for steps in self.timestamps:
            training_start_b = steps - DateOffset(days=self.d_b-1, hours=23, minutes=45)
            training_end = steps
            target_start = training_end + DateOffset(minutes=15)
            target_end = steps + DateOffset(minutes=(self.n_predict)*15)
            training_data = self.X[(self.X['timestamp'] >= training_start_b) & (self.X['timestamp'] <= training_end)]
            target_data = self.X[(self.X['timestamp'] >= target_start) & (self.X['timestamp'] <= target_end)]
            if not training_data.empty and not target_data.empty:
                self.X_timeseries_flatten.append(training_data[load_col])
                self.X_timestamp.append(training_data['timestamp'])
                self.y_timestamp.append(target_data['timestamp'])
                training_reshaped = np.array(training_data[load_col]).reshape(self.d_b, self.h, self.m)
                # symmetric_3d = self.__make_it_symmetric_3d(training_reshaped)
                training_sets.append(training_reshaped)
                target_reshaped = np.array(target_data[load_col]).reshape(math.ceil(self.n_predict/self.m), min(self.n_predict, self.m))
                # symmetric_2d = self.__make_it_symmetric_2d(target_reshaped)
                target_sets.append(target_reshaped.flatten())
        training_sets = np.array(training_sets)
        target_sets = np.array(target_sets)

        self.X_timeseries_flatten = np.array(self.X_timeseries_flatten)
        self.X_timestamp = np.array(self.X_timestamp)
        self.y_timestamp = np.array(self.y_timestamp)
        return training_sets, target_sets
    
    def encode(self):
        training_sets_b, target_sets = self.encode_b()
        # training_sets_s = self.encode_s()
        training_sets_b = np.transpose(training_sets_b, (0, 2, 3, 1))
        # training_sets_s = np.transpose(training_sets_s, (0, 2, 3, 1))
        return training_sets_b, target_sets
    
    """calculate the final output of model prediction"""
    def __sum_np(self, matrix):
        n_pairs = len(matrix) // 2
        sums = []
        for i in range(n_pairs):
            sums.append(list(map(sum, zip(matrix[i], matrix[-(i + 1)]))))
        if len(matrix) % 2 != 0:
            sums.append(matrix[n_pairs])

        return [num for row in sums for num in row]
    
    def __x_timeseries_to_image(self, vector):
        matrix_1d = vector.reshape(self.d_b, self.h, self.m)
        image = self.__make_it_symmetric_3d(matrix_1d)
        return image
    
    def pairwise_sum(self, matrix):
        summed_3d_np = np.array([self.__sum_np(layer) for layer in matrix]) / 2
        return summed_3d_np
    
    """Use predictions from previous steps to add new inputs to rolling predictions"""
    def image_shift(self, original_input, new_input):
        input = np.transpose(original_input, (0, 3, 1, 2))
        output = []
        output.append(self.pairwise_sum(input[0]))
        output = np.array(output).flatten()
        output = np.concatenate([output, new_input], axis=0)[-len(output):]
        image = self.__x_timeseries_to_image(output)
        image = image.reshape(1, *image.shape)
        image = np.transpose(image, (0, 2, 3, 1))
        return image


In [27]:
encoder = TimeSeriesImageCoder(
    X=data,
    n_predict=n_predict,
    height=height,
    width=width,
    n_days=n_days,
    n_window_shift=n_window_shift
)
encoded_Xb, encoded_y = encoder.encode()
X_timeseries = np.copy(encoder.X_timeseries_flatten)
X_timestamp = np.copy(encoder.X_timestamp)
y_timestamp = np.copy(encoder.y_timestamp)

Lb: 96
Ls: 96


In [28]:
print(encoded_Xb.shape)
print(encoded_y.shape)

print(X_timeseries.shape)
print(X_timestamp.shape)
print(y_timestamp.shape)

(34656, 8, 12, 3)
(34656, 96)
(34656, 288)
(34656, 288)
(34656, 96)


In [29]:
MONTH_TIME_STEP = math.floor(encoder.timestamps.shape[0] / 24)
X_test_b = []
y_test = []
X_test_b_flatten = []
X_test_b_timestamp = []
y_test_timestamp = []

for i in range(0, 24):
    start = (i+1)*MONTH_TIME_STEP-(192*(i+1))
    end = (i+1)*MONTH_TIME_STEP-(192*i)
    X_test_b.append(encoded_Xb[start:end])
    y_test.append(encoded_y[start:end])
    X_test_b_flatten.append(X_timeseries[start:end])
    X_test_b_timestamp.append(X_timestamp[start:end])
    y_test_timestamp.append(y_timestamp[start:end])


    encoded_Xb = np.concatenate([encoded_Xb[:start], encoded_Xb[end:]])
    encoded_y = np.concatenate([encoded_y[:start], encoded_y[end:]])
    X_timeseries = np.concatenate([X_timeseries[:start], X_timeseries[end:]])
    X_timestamp = np.concatenate([X_timestamp[:start], X_timestamp[end:]])
    y_timestamp = np.concatenate([y_timestamp[:start], y_timestamp[end:]])


In [30]:
X_test_b = np.concatenate([i for i in X_test_b])
y_test = np.concatenate([i for i in y_test])
X_test_b_flatten = np.concatenate([i for i in X_test_b_flatten])
X_test_b_timestamp = np.concatenate([i for i in X_test_b_timestamp])
y_test_timestamp = np.concatenate([i for i in y_test_timestamp])

In [31]:
X_train_b = encoded_Xb
y_train = encoded_y

In [32]:
print(np.array(X_train_b).shape)
print(np.array(X_test_b).shape)
print(np.array(y_train).shape)
print(np.array(y_test).shape)
print(X_test_b_flatten.shape)
print(X_test_b_timestamp.shape)
print(y_test_timestamp.shape)

(30048, 8, 12, 3)
(4608, 8, 12, 3)
(30048, 96)
(4608, 96)
(4608, 288)
(4608, 288)
(4608, 96)


In [33]:
PhaCIA_TCN = pd.read_csv(RESULT_DIR+"PhaCIA_TCN.csv", index_col=0)
ETR = pd.read_csv(RESULT_DIR+"etr.csv", index_col=0)
CNN = pd.read_csv(RESULT_DIR+"Image_CNN.csv", index_col=0)
SVR = pd.read_csv(RESULT_DIR+"SVR.csv", index_col=0)
XGB = pd.read_csv(RESULT_DIR+"XGBoost.csv", index_col=0)
Proposed = pd.read_csv(RESULT_DIR+"proposed_rolling16.csv", index_col=0)

In [34]:
def cal_error(y_test, y_pred):
    mse = mean_squared_error(y_test, y_pred)
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("-" * 86)
    print(f'mse: {mse:.4f}')
    print(f'rmse: {rmse:.4f}')
    print(f'mae: {mae:.4f}')
    print(f'mape: {mape: .4f}')
    print(f'r2: {r2:.4f}')
    print("-" * 86)

    y_test_inv = scaler.inverse_transform(y_test)
    y_pred_inv = scaler.inverse_transform(y_pred)


    mse_inv = mean_squared_error(y_test_inv, y_pred_inv)
    rmse_inv = math.sqrt(mse_inv)
    mae_inv = mean_absolute_error(y_test_inv, y_pred_inv)
    mape_inv = mean_absolute_percentage_error(y_test_inv, y_pred_inv)
    r2_inv = r2_score(y_test_inv, y_pred_inv)

    print("-" * 86)
    print(f'mse_inv: {mse_inv:.4f}')
    print(f'rmse_inv: {rmse_inv:.4f}')
    print(f'mae_inv: {mae_inv:.4f}')
    print(f'mape_inv: {mape_inv: .4f}')
    print(f'r2_inv: {r2_inv:.4f}')
    print("-" * 86)