In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
import os
from scipy.signal import savgol_filter
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.cluster import KMeans

In [61]:
PREDICT_STEP = 96
INPUT_STEP = 384
N_FEATURE = 1

In [62]:
# dirs
DATA_DIR = "./load.csv"
TEST_PLOT_DIR = "./test_plots/savgol_xgboost_svm/"
MODEL_FILE_DIR = "./model/savgol_xgboost_svm.keras"
TRAINING_HISTORY_DIR = "./training_history/savgol_xgboost_svm.png"

In [63]:
if not os.path.exists(TEST_PLOT_DIR):
    os.makedirs(TEST_PLOT_DIR)
if not os.path.exists("./model"):
    os.makedirs("./model")
if not os.path.exists("./training_history"):
    os.makedirs("./training_history")


In [64]:
data = pd.read_csv(DATA_DIR)
# Convert the 'date' column to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%Y/%m/%d %H:%M')

# Sort the data by date
groups = data.groupby(pd.Grouper(key='Timestamp', freq='D'))

In [65]:
train_df = []
for date, data in groups:
    train_df.append(list(data["Load"]))
train_df = np.array(train_df)

In [66]:
scaler = MinMaxScaler()
scaler.fit(train_df)
train_df = scaler.transform(train_df)

In [67]:
display(train_df)

array([[0.81048387, 0.70979899, 0.76529338, ..., 0.53458382, 0.59077381,
        0.70172911],
       [0.32258065, 0.24623116, 0.66292135, ..., 0.25791325, 0.41666667,
        0.36023055],
       [0.42204301, 0.40326633, 0.31585518, ..., 0.2919109 , 0.375     ,
        0.41642651],
       ...,
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095],
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095],
       [0.22849462, 0.56658291, 0.21473159, ..., 0.50410317, 0.51041667,
        0.20461095]])

In [68]:
def create_dataset(data):
    X, y = [], []
    for i in range(9, len(data) - 1):
        last_two_days = data[i-2:i].flatten()
        last_week_same_days = data[i-7-2:i-7].flatten()

        input_data = np.concatenate([last_two_days, last_week_same_days])
        X.append(input_data)

        next_day = data[i+1]
        y.append(next_day)

    return np.array(X), np.array(y)

In [69]:
X, y = create_dataset(train_df)
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

X shape: (355, 384)
y shape: (355, 96)


In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [71]:
kmeans = KMeans(n_clusters=4, random_state=10, n_init='auto')
kmeans.fit(X_train)

In [72]:
X_train_label = kmeans.predict(X_train)

X_test_label = kmeans.predict(X_test)

In [73]:
k0 = np.where(X_train_label == 0)
k1 = np.where(X_train_label == 1)
k2 = np.where(X_train_label == 2)
k3 = np.where(X_train_label == 3)
X_train_0 = X_train[k0]
X_train_1 = X_train[k1]
X_train_2 = X_train[k2]
X_train_3 = X_train[k3]

y_train_0 = y_train[k0]
y_train_1 = y_train[k1]
y_train_2 = y_train[k2]
y_train_3 = y_train[k3]

k0 = np.where(X_test_label == 0)
k1 = np.where(X_test_label == 1)
k2 = np.where(X_test_label == 2)
k3 = np.where(X_test_label == 3)
X_test_0 = X_test[k0]
X_test_1 = X_test[k1]
X_test_2 = X_test[k2]
X_test_3 = X_test[k3]

y_test_0 = y_test[k0]
y_test_1 = y_test[k1]
y_test_2 = y_test[k2]
y_test_3 = y_test[k3]

In [74]:
X_train_filtered_0 = np.array(savgol_filter(X_train_0, 30, 5))
X_train_filtered_1 = np.array(savgol_filter(X_train_1, 30, 5))
X_train_filtered_2 = np.array(savgol_filter(X_train_2, 30, 5))
X_train_filtered_3 = np.array(savgol_filter(X_train_3, 30, 5))

y_train_filtered_0 = np.array(savgol_filter(y_train_0, 30, 5))
y_train_filtered_1 = np.array(savgol_filter(y_train_1, 30, 5))
y_train_filtered_2 = np.array(savgol_filter(y_train_2, 30, 5))
y_train_filtered_3 = np.array(savgol_filter(y_train_3, 30, 5))

X_test_filtered_0 = np.array(savgol_filter(X_test_0, 30, 5))
X_test_filtered_1 = np.array(savgol_filter(X_test_1, 30, 5))
X_test_filtered_2 = np.array(savgol_filter(X_test_2, 30, 5))
X_test_filtered_3 = np.array(savgol_filter(X_test_3, 30, 5))

y_test_filtered_0 = np.array(savgol_filter(y_test_0, 30, 5))
y_test_filtered_1 = np.array(savgol_filter(y_test_1, 30, 5))
y_test_filtered_2 = np.array(savgol_filter(y_test_2, 30, 5))
y_test_filtered_3 = np.array(savgol_filter(y_test_3, 30, 5))

In [75]:
X_train_residual_0 = X_train_0 - X_train_filtered_0
X_train_residual_1 = X_train_1 - X_train_filtered_1
X_train_residual_2 = X_train_2 - X_train_filtered_2
X_train_residual_3 = X_train_3 - X_train_filtered_3

y_train_residual_0 = y_train_0 - y_train_filtered_0
y_train_residual_1 = y_train_1 - y_train_filtered_1
y_train_residual_2 = y_train_2 - y_train_filtered_2
y_train_residual_3 = y_train_3 - y_train_filtered_3


X_test_residual_0 = X_test_0 - X_test_filtered_0
X_test_residual_1 = X_test_1 - X_test_filtered_1
X_test_residual_2 = X_test_2 - X_test_filtered_2
X_test_residual_3 = X_test_3 - X_test_filtered_3

y_test_residual_0 = y_test_0 - y_test_filtered_0
y_test_residual_1 = y_test_1 - y_test_filtered_1
y_test_residual_2 = y_test_2 - y_test_filtered_2
y_test_residual_3 = y_test_3 - y_test_filtered_3


In [76]:
xgboost0 = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2500,
    max_depth=6, 
)
xgboost0.fit(X_train_residual_0, y_train_residual_0)

xgboost1 = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2500,
    max_depth=6, 
)
xgboost1.fit(X_train_residual_1, y_train_residual_1)

xgboost2 = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2500,
    max_depth=6, 
)
xgboost2.fit(X_train_residual_2, y_train_residual_2)

xgboost3 = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=2500,
    max_depth=6, 
)
xgboost3.fit(X_train_residual_3, y_train_residual_3)

In [77]:
svr0 = SVR(kernel='rbf', C=10, epsilon=0.05)
multioutput_svr0 = MultiOutputRegressor(svr0)
multioutput_svr0.fit(X_train_filtered_0, y_train_filtered_0)

svr1 = SVR(kernel='rbf', C=10, epsilon=0.05)
multioutput_svr1 = MultiOutputRegressor(svr1)
multioutput_svr1.fit(X_train_filtered_1, y_train_filtered_1)

svr2 = SVR(kernel='rbf', C=10, epsilon=0.05)
multioutput_svr2 = MultiOutputRegressor(svr2)
multioutput_svr2.fit(X_train_filtered_2, y_train_filtered_2)

svr3 = SVR(kernel='rbf', C=10, epsilon=0.05)
multioutput_svr3 = MultiOutputRegressor(svr3)
multioutput_svr3.fit(X_train_filtered_3, y_train_filtered_3)

In [79]:
pred_svr0 = multioutput_svr0.predict(X_test_filtered_0)
pred_xgboost0 = xgboost0.predict(X_test_residual_0)

pred_svr1 = multioutput_svr1.predict(X_test_filtered_1)
pred_xgboost1 = xgboost1.predict(X_test_residual_1)

pred_svr2 = multioutput_svr2.predict(X_test_filtered_2)
pred_xgboost2 = xgboost2.predict(X_test_residual_2)

pred_svr3 = multioutput_svr3.predict(X_test_filtered_3)
pred_xgboost3 = xgboost3.predict(X_test_residual_3)

In [86]:
y_pred_0 = pred_svr0 + pred_xgboost0
y_pred_1 = pred_svr1 + pred_xgboost1
y_pred_2 = pred_svr2 + pred_xgboost2
y_pred_3 = pred_svr3 + pred_xgboost3

y_pred = np.concatenate([y_pred_0, y_pred_1, y_pred_2, y_pred_3], axis=0)
y_test_clustered = np.concatenate([y_test_0, y_test_1, y_test_2, y_test_3], axis=0)
loss = mean_squared_error(y_test_clustered, y_pred)

print("-" * 86)
print(f'Deep Learnong Testing Loss: {loss:.4f}')
print("-" * 86)

pred_data = scaler.inverse_transform(y_pred)
actual_data = scaler.inverse_transform(y_test_clustered)
for i in range(actual_data.shape[0]):
    plt.figure(figsize=(16, 6))
    X = np.arange(1, actual_data.shape[1]+1, 1)
    y_p = pred_data[i]
    y_a = actual_data[i]
    e = mean_squared_error(y_p, y_a)
    plt.title(f"Day {i+1} prediction result Loss:{e}")
    plt.plot(X, y_p, label='Predict')
    plt.plot(X, y_a, label='Actual')
    plt.ylim(0, 30)
    plt.xlabel('Time step')
    plt.ylabel('Usage (kWh)')
    plt.legend()
    plt.savefig(TEST_PLOT_DIR+f"Day{i+1}_with_compensate.png")
    plt.close()

--------------------------------------------------------------------------------------
Deep Learnong Testing Loss: 0.0208
--------------------------------------------------------------------------------------


In [87]:
y_pred_0 = pred_svr0
y_pred_1 = pred_svr1
y_pred_2 = pred_svr2
y_pred_3 = pred_svr3

y_pred = np.concatenate([y_pred_0, y_pred_1, y_pred_2, y_pred_3], axis=0)
y_test_clustered = np.concatenate([y_test_0, y_test_1, y_test_2, y_test_3], axis=0)
loss = mean_squared_error(y_test_clustered, y_pred)

print("-" * 86)
print(f'Deep Learnong Testing Loss: {loss:.4f}')
print("-" * 86)

pred_data = scaler.inverse_transform(y_pred)
actual_data = scaler.inverse_transform(y_test_clustered)
for i in range(actual_data.shape[0]):
    plt.figure(figsize=(16, 6))
    X = np.arange(1, actual_data.shape[1]+1, 1)
    y_p = pred_data[i]
    y_a = actual_data[i]
    e = mean_squared_error(y_p, y_a)
    plt.title(f"Day {i+1} prediction result Loss:{e}")
    plt.plot(X, y_p, label='Predict')
    plt.plot(X, y_a, label='Actual')
    plt.ylim(0, 30)
    plt.xlabel('Time step')
    plt.ylabel('Usage (kWh)')
    plt.legend()
    plt.savefig(TEST_PLOT_DIR+f"Day{i+1}_without_compensate.png")
    plt.close()

--------------------------------------------------------------------------------------
Deep Learnong Testing Loss: 0.0224
--------------------------------------------------------------------------------------
