In [23]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import xgboost as xgb
import tensorflow as tf
import os
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor

In [24]:
# dirs
DATA_DIR = "./load.csv"
TEST_PLOT_DIR = "./test_plots/xgboost/"
TRAINING_HISTORY_DIR = "./training_history/xgboost.png"

In [25]:
if not os.path.exists(TEST_PLOT_DIR):
    os.makedirs(TEST_PLOT_DIR)
if not os.path.exists("./model"):
    os.makedirs("./model")
if not os.path.exists("./training_history"):
    os.makedirs("./training_history")

In [26]:
data = pd.read_csv(DATA_DIR)
maxValue = np.max(data['Load'])
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data['Load'].to_numpy().reshape(-1, 1))
print(data_scaled)
data['Load'] = data_scaled.reshape(-1, 1)

# Convert the 'date' column to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%Y/%m/%d %H:%M')

# Sort the data by date
groups = data.groupby(pd.Grouper(key='Timestamp', freq='D'))

[[0.4454918 ]
 [0.42704918]
 [0.4454918 ]
 ...
 [0.36557377]
 [0.33729508]
 [0.25368852]]


In [27]:
train_df = []
for date, data in groups:
    train_df.append(list(data["Load"]))
train_df = np.array(train_df)

In [28]:
display(train_df)

array([[0.4454918 , 0.42704918, 0.4454918 , ..., 0.37622951, 0.35942623,
        0.39508197],
       [0.29672131, 0.27581967, 0.41188525, ..., 0.2795082 , 0.31147541,
        0.29795082],
       [0.32704918, 0.32704918, 0.29795082, ..., 0.29139344, 0.3       ,
        0.31393443],
       ...,
       [0.26803279, 0.38032787, 0.2647541 , ..., 0.36557377, 0.33729508,
        0.25368852],
       [0.26803279, 0.38032787, 0.2647541 , ..., 0.36557377, 0.33729508,
        0.25368852],
       [0.26803279, 0.38032787, 0.2647541 , ..., 0.36557377, 0.33729508,
        0.25368852]])

In [29]:
def create_dataset(data):
    X, y = [], []
    for i in range(9, len(data) - 1):
        last_two_days = data[i-2:i].flatten()
        last_week_same_days = data[i-7-2:i-7].flatten()

        input_data = np.concatenate([last_two_days, last_week_same_days])
        X.append(input_data)

        next_day = data[i+1]
        y.append(next_day)

    return np.array(X), np.array(y)

In [30]:
X, y = create_dataset(train_df)
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

X shape: (355, 384)
y shape: (355, 96)


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [32]:
xgboost = xgb.XGBRegressor(
    objective='reg:squarederror',
    learning_rate=0.01110442342472048,
    max_depth=9,
    min_child_weight=9,
    n_estimators=126,
)

xgboost.fit(X_train, y_train)

In [47]:
testing_data = pd.read_csv("test_load_data.csv", index_col=0)

X_test = testing_data.iloc[:, :384].to_numpy()
y_test = testing_data.iloc[:, 384:].to_numpy()
display(X_test.shape)
display(y_test.shape)

(54, 384)

(54, 96)

In [50]:
y_pred = xgboost.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)


print("-" * 86)
print(f'mse: {mse:.4f}')
print(f'rmse: {rmse:.4f}')
print(f'mae: {mae:.4f}')
print(f'mape: {mape:.4f}')
print("-" * 86)

pred_data = scaler.inverse_transform(y_pred)
actual_data = scaler.inverse_transform(y_test)

for i in range(actual_data.shape[0]):
    plt.figure(figsize=(16, 6))
    X = np.arange(1, actual_data.shape[1]+1, 1)
    y_p = pred_data[i]
    y_a = actual_data[i]
    e = mean_squared_error(y_p, y_a)
    plt.title(f"Xgboost Day {i+1} Prediction")
    plt.plot(X, y_p, label='Predict')
    plt.plot(X, y_a, label='Actual')
    plt.ylim(0, 30)
    plt.xlabel('Time step')
    plt.ylabel('Usage (kWh)')
    plt.legend()
    plt.savefig(TEST_PLOT_DIR+f"Day{i+1}.png")
    plt.close()

--------------------------------------------------------------------------------------
mse: 0.0094
rmse: 0.0968
mae: 0.0690
mape: 0.1861
--------------------------------------------------------------------------------------
