In [4]:
#import lightgbm
import random
import pandas as pd
import numpy as np
import os
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit, KFold, TimeSeriesSplit
from sklearn.metrics import make_scorer
import matplotlib.pyplot as plt
from tqdm import tqdm
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting import plot_series

import warnings
warnings.filterwarnings(action='ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [5]:
## save the preprocessed data
merged_train_df = pd.read_csv('./data/train_preprocessed_holly3_lbw.csv')
merged_test_df = pd.read_csv('./data/test_preprocessed_holly3_lbw.csv')
final_train_df = pd.read_csv('./data/final_train_preprocessed_holly3_lbw.csv')
merged_valid_df = pd.read_csv('./data/valid_preprocessed_holly3_lbw.csv')
merged_train_df = merged_train_df.drop(columns = 'Unnamed: 0')
merged_test_df = merged_test_df.drop(columns = 'Unnamed: 0')
final_train_df = final_train_df.drop(columns = 'Unnamed: 0')
merged_valid_df = merged_valid_df.drop(columns = 'Unnamed: 0')

xgb_params = pd.read_csv('./parameters/hyperparameter_xgb_holly2_lbw.csv')

In [6]:
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

 #점수 측정을 위한 코드
def SMAPE(y_true, y_pred):
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

In [31]:
preds = np.array([])
scores=[]
for i in tqdm(np.arange(1,10,1)):

    pred_df = pd.DataFrame()   # 시드별 예측값을 담을 data frame

    for seed in [0,1,2,3,4,5]: # 각 시드별 예측
        y_train = merged_train_df.loc[merged_train_df.building_number == i, 'power_consumption']
        x_train = merged_train_df.loc[merged_train_df.building_number == i].drop(['power_consumption'], axis=1)
        y_test = merged_valid_df.loc[merged_valid_df.building_number == i, 'power_consumption']
        x_test = merged_valid_df.loc[merged_valid_df.building_number == i].drop(['power_consumption'], axis=1)

        xgb = XGBRegressor(seed = seed, n_estimators = xgb_params.iloc[i-1, 7], eta = 0.01,
                           min_child_weight = xgb_params.iloc[i-1, 2], max_depth = xgb_params.iloc[i-1, 3],
                           colsample_bytree=xgb_params.iloc[i-1, 4], subsample=xgb_params.iloc[i-1, 5])

        if xgb_params.iloc[i-1,6] != 0:  # 만약 alpha가 0이 아니면 weighted_mse 사용
            xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i-1,6])})

        xgb.fit(x_train, y_train)
        y_pred = xgb.predict(x_test)
        pred_df.loc[:,seed] = y_pred   # 각 시드별 예측 담기

    pred = pred_df.mean(axis=1)        # (i+1)번째 건물의 예측 =  (i+1)번째 건물의 각 시드별 예측 평균값
    pred_np = pred.to_numpy()
    y_test_np = y_test.to_numpy()
    score = SMAPE(y_test_np, pred_np)
    
    scores.append(score)
    preds = np.append(preds, pred)

100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


In [30]:
pred_df = pd.DataFrame()   # 시드별 예측값을 담을 data frame
i = 55
for seed in [0,1,2,3,4,5]: # 각 시드별 예측
    y_train = merged_train_df.loc[merged_train_df.building_number == i, 'power_consumption']
    x_train = merged_train_df.loc[merged_train_df.building_number == i].drop(['power_consumption'], axis=1)
    y_test = merged_valid_df.loc[merged_valid_df.building_number == i, 'power_consumption']
    x_test = merged_valid_df.loc[merged_valid_df.building_number == i].drop(['power_consumption'], axis=1)

    xgb = XGBRegressor(seed = seed, n_estimators = xgb_params.iloc[i-1, 7], eta = 0.01,
                        min_child_weight = xgb_params.iloc[i-1, 2], max_depth = xgb_params.iloc[i-1, 3],
                        colsample_bytree=xgb_params.iloc[i-1, 4], subsample=xgb_params.iloc[i-1, 5])

    if xgb_params.iloc[i-1,6] != 0:  # 만약 alpha가 0이 아니면 weighted_mse 사용
        xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i-1,6])})

    xgb.fit(x_train, y_train)
    y_pred = xgb.predict(x_test)
    pred_df.loc[:,seed] = y_pred   # 각 시드별 예측 담기

pred = pred_df.mean(axis=1)        # (i+1)번째 건물의 예측 =  (i+1)번째 건물의 각 시드별 예측 평균값
pred_np = pred.to_numpy()
y_test_np = y_test.to_numpy()
score = SMAPE(y_test_np, pred_np)

print(score)
# scores.append(score)
# preds = np.append(preds, pred)

1.494984194488569
