In [793]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import xgboost as xgb
import tensorflow as tf
import os
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor

In [794]:
# dirs
DATA_DIR = "./load.csv"
TEST_PLOT_DIR = "./test_plots/svm_xgboost/"
TRAINING_HISTORY_DIR = "./training_history/svm_xgboost.png"

In [795]:
if not os.path.exists(TEST_PLOT_DIR):
    os.makedirs(TEST_PLOT_DIR)
if not os.path.exists("./model"):
    os.makedirs("./model")
if not os.path.exists("./training_history"):
    os.makedirs("./training_history")

In [796]:
data = pd.read_csv(DATA_DIR)
# Convert the 'date' column to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%Y/%m/%d %H:%M')

# Sort the data by date
groups = data.groupby(pd.Grouper(key='Timestamp', freq='D'))

In [797]:
train_df = []
for date, data in groups:
    train_df.append(list(data["Load"]))
train_df = np.array(train_df)

In [798]:
scaler = MinMaxScaler()
scaler.fit(train_df)
train_df = scaler.transform(train_df)

In [799]:
display(train_df)

array([[0.81048387, 0.70979899, 0.76529338, ..., 0.53458382, 0.59077381,
        0.70172911],
       [0.32258065, 0.24623116, 0.66292135, ..., 0.25791325, 0.41666667,
        0.36023055],
       [0.42204301, 0.40326633, 0.31585518, ..., 0.2919109 , 0.375     ,
        0.41642651],
       ...,
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095],
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095],
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095]])

In [800]:
def create_dataset(data):
    X, y = [], []
    for i in range(9, len(data) - 1):
        last_two_days = data[i-2:i].flatten()
        last_week_same_days = data[i-7-2:i-7].flatten()

        input_data = np.concatenate([last_two_days, last_week_same_days])
        X.append(input_data)

        next_day = data[i+1]
        y.append(next_day)

    return np.array(X), np.array(y)
X, y = create_dataset(train_df)

In [801]:
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

X shape: (355, 384)
y shape: (355, 96)


In [802]:
X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.6, random_state=35)

X_val, X_test, y_val, y_test = train_test_split(X_test_val, y_test_val, test_size=0.3, random_state=42)

In [803]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val shape: {y_val.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (142, 384)
y_train shape: (142, 96)
X_val shape: (149, 384)
y_val shape: (149, 96)
X_test shape: (64, 384)
y_test shape: (64, 96)


In [804]:
svr = SVR(kernel='rbf', C=10, epsilon=0.05)
multioutput_svr = MultiOutputRegressor(svr)
multioutput_svr.fit(X_train, y_train)

In [805]:
y_pred_ori = multioutput_svr.predict(X_val)
residual = y_val - y_pred_ori

In [806]:
xgboost = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2000,
    max_depth=6,                    # Maximum depth of a tree
)

xgboost.fit(X_val, residual)

In [807]:
svr_pred = multioutput_svr.predict(X_test)
xgboost_pred = xgboost.predict(X_test)

In [808]:
print(svr_pred.shape)
print(xgboost_pred.shape)

(64, 96)
(64, 96)


In [809]:
loss = mean_squared_error(y_test, svr_pred)

print("-" * 86)
print(f'Test Loss: {loss:.4f}')
print("-" * 86)

pred_data = scaler.inverse_transform(svr_pred)
actual_data = scaler.inverse_transform(y_test)
for i in range(actual_data.shape[0]):
    plt.figure(figsize=(16, 6))
    X = np.arange(1, actual_data.shape[1]+1, 1)
    y_p1 = pred_data[i]
    y_a1 = actual_data[i]
    plt.title(f"Time Series {i+1} prediction result")
    plt.plot(X, y_p1, label='Predict')
    plt.plot(X, y_a1, label='Actual')
    plt.ylim(0, 30)
    plt.xlabel('Time step')
    plt.ylabel('Usage (kWh)')
    plt.legend()
    plt.savefig(TEST_PLOT_DIR+f"Day{i+1}_without_residual.png")
    plt.close()

--------------------------------------------------------------------------------------
Test Loss: 0.0194
--------------------------------------------------------------------------------------


In [810]:
y_pred = svr_pred + xgboost_pred
loss = mean_squared_error(y_test, y_pred)

print("-" * 86)
print(f'Test Loss: {loss:.4f}')
print("-" * 86)

pred_data = scaler.inverse_transform(y_pred)
actual_data = scaler.inverse_transform(y_test)
for i in range(actual_data.shape[0]):
    plt.figure(figsize=(16, 6))
    X = np.arange(1, actual_data.shape[1]+1, 1)
    y_p2 = pred_data[i]
    y_a2 = actual_data[i]
    plt.title(f"Time Series {i+1} prediction result")
    plt.plot(X, y_p2, label='Predict')
    plt.plot(X, y_a2, label='Actual')
    plt.ylim(0, 30)
    plt.xlabel('Time step')
    plt.ylabel('Usage (kWh)')
    plt.legend()
    plt.savefig(TEST_PLOT_DIR+f"Day{i+1}_with_residual.png")
    plt.close()

--------------------------------------------------------------------------------------
Test Loss: 0.0235
--------------------------------------------------------------------------------------
