In [7]:
#import lightgbm
import random
import pandas as pd
import numpy as np
import os
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import PredefinedSplit, GridSearchCV, TimeSeriesSplit
from sktime.forecasting.model_selection import temporal_train_test_split
from bayes_opt import BayesianOptimization
from sktime.utils.plotting import plot_series
from xgboost import XGBRegressor
import time
from sklearn.metrics import make_scorer

t = time.strftime('%m%d-%H%M', time.localtime(time.time()))
import warnings
warnings.filterwarnings(action='ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정
#### alpha를 argument로 받는 함수로 실제 objective function을 wrapping하여 alpha값을 쉽게 조정할 수 있도록 작성했습니다.
# custom objective function for forcing model not to underestimate
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

 #점수 측정을 위한 코드
def SMAPE(y_true, y_pred):
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

In [27]:
hyperparameters = pd.DataFrame(columns = ['n_estimators', 'eta', 'min_child_weight','max_depth', 'colsample_bytree', 'subsample'])

idnex = [1,2,7,10,11,12,13,14,15,26,27,28,29,30,31,32,33,34,35,56,59,61,62,63,64,66,67,68,69,72,73,83,87,88,89,91,92,94,99,100]

print(len(idnex))
for i in [1]:
    print(f'building_{i}')
    train_df = pd.read_csv(f'./submit/train_building{i}.csv')
    
    t_y = train_df['power_consumption']
    t_x = train_df.drop(['power_consumption'], axis=1)
    y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = t_y, X = t_x, test_size = 168)
    
        # 목적 함수 정의
    def xgb_evaluate(n_estimators, eta):
        params = {
            'n_estimators': int(n_estimators),
            'eta': eta,
            'objective': 'reg:squarederror',
            
        }
        model = XGBRegressor(**params,tree_method="hist", gpu_id=0)
        model.fit(x_train, y_train)
        y_pred = model.predict(x_valid)
        return -SMAPE(y_valid, y_pred)

    # 베이지안 최적화 객체 생성
    optimizer = BayesianOptimization(
        f=xgb_evaluate,
        pbounds={
            'n_estimators': (50, 10000),
            'eta': (0.001, 0.01)
        },
        random_state=42
    )

    # 최적화 실행
    
    optimizer.maximize(init_points=7, n_iter=5)
    best_params  = optimizer.max['params']
    best_params['n_estimators'] = int(best_params['n_estimators'])
    hyperparameters.loc[i-1] = best_params
    hyperparameters.to_csv(f'./parameters/hyperparameter_xgb_wsw{t}.csv', index=False)

40
building_1
|   iter    |  target   |    eta    | n_esti... |
-------------------------------------------------
| [0m1        [0m | [0m-5.027   [0m | [0m0.004371 [0m | [0m9.51e+03 [0m |
| [0m2        [0m | [0m-5.128   [0m | [0m0.007588 [0m | [0m6.007e+03[0m |
| [0m3        [0m | [0m-5.448   [0m | [0m0.002404 [0m | [0m1.602e+03[0m |
| [0m4        [0m | [0m-5.07    [0m | [0m0.001523 [0m | [0m8.668e+03[0m |
| [0m5        [0m | [0m-5.108   [0m | [0m0.00641  [0m | [0m7.095e+03[0m |
| [0m6        [0m | [0m-5.085   [0m | [0m0.001185 [0m | [0m9.701e+03[0m |
| [0m7        [0m | [0m-5.062   [0m | [0m0.008492 [0m | [0m2.163e+03[0m |
| [0m8        [0m | [0m-5.132   [0m | [0m0.006163 [0m | [0m9.146e+03[0m |
| [0m9        [0m | [0m-5.146   [0m | [0m0.005031 [0m | [0m9.508e+03[0m |
| [0m10       [0m | [0m-5.066   [0m | [0m0.001912 [0m | [0m5.241e+03[0m |
| [0m11       [0m | [0m-5.185   [0m | [0m0.001051 [0m | [0m

In [33]:
grid = {'n_estimators': [hyperparameters.at[i-1,'n_estimators']], 
        'eta':[hyperparameters.at[i-1,'eta']],'min_child_weight' : np.arange(1, 11, 1),
        'max_depth' : np.arange(2,8,1) , 'colsample_bytree' :[0.8, 0.9],
        'subsample' : [0.8, 0.9]}

#tscv = TimeSeriesSplit(n_splits=3)
smape_score = make_scorer(SMAPE, greater_is_better=False)

for i in [1]:
    train_df = pd.read_csv(f'./submit/train_building{i}.csv')
    # pretest_df = pd.read_csv(f'./pretest/pretest_building{i}.csv')
    ################################################
    #merge = pd.concat([train_df,pretest_df])
    ################################################
    #하이퍼 파라미터 튜닝 method 1
    #leaf method
    print(f"__________Buildding Number:{i} Hyperparameter Tuning__________")

    # preds = np.array([])
    grid_under = {}
    #합병된 데이터 사용
    y = train_df['power_consumption']
    x = train_df.drop(columns=['power_consumption'])
    #y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = y, X = x, test_size = 168)
    #fit
    gcv = GridSearchCV(estimator = XGBRegressor(tree_method="hist",  gpu_id=0, seed = 0), 
                       param_grid=grid, 
                       scoring=smape_score, 
                       cv=PredefinedSplit(np.append(-np.ones(len(x)-168), np.zeros(168))),
                       refit=True, verbose=True)
    gcv.fit(x, y)

    best = gcv.best_estimator_
    params = gcv.best_params_ 
    hyperparameters.loc[i-1] = best_params
    #pred
    # x_pretest = pretest_df.drop(columns=['power_consumption'])
    # y_pretest = pretest_df['power_consumption']
    # pred = best.predict(x_pretest)
    print("_____Best Parameters_____",params)
    # print("_____SMAPE Score________", SMAPE(y_pretest, pred))
    
hyperparameters.to_csv(f'./specific/hyperparameter_xgb_wsw{t}.csv', index=False) # save the tuned parameters


__________Buildding Number:1 Hyperparameter Tuning__________
Fitting 1 folds for each of 240 candidates, totalling 240 fits


KeyboardInterrupt: 

In [None]:
xgb_params = pd.read_csv(f'./parameters/hyperparameter_xgb_wsw{t}.csv')
#alpha
alpha_list = []
smape_list = []
for i in tqdm(range(100)):
    train_df = pd.read_csv(f'./pretest/train_building{i+1}.csv')
    pretest_df = pd.read_csv(f'./pretest/pretest_building{i+1}.csv')
    
    ################################################
    t_y = train_df['power_consumption']
    t_x = train_df.drop(['power_consumption'], axis=1)
    y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = t_y, X = t_x, test_size = 168)
    
    
    p_y = pretest_df['power_consumption']
    p_x = pretest_df.drop(['power_consumption'], axis=1)

    xgb = XGBRegressor(seed = 0, tree_method="hist",  gpu_id=0,
                    n_estimators = hyperparameters.at[i-1,'n_estimators'], 
                    eta = hyperparameters.at[i-1,'eta'],
                    min_child_weight = hyperparameters.at[i-1,'min_child_weight'],
                    max_depth = hyperparameters.at[i-1,'max_depth'], 
                    colsample_bytree = hyperparameters.at[i-1,'colsample_bytree'], 
                    subsample = hyperparameters.at[i-1,'subsample'])

    xgb.fit(x_train, y_train ,verbose=False)
    pred0 = xgb.predict(x_valid)
    best_alpha = 0
    score0 = SMAPE(y_valid,pred0)

    for j in [1, 3, 5, 7, 10, 25, 50, 75, 100]:
        xgb = XGBRegressor(seed = 0, tree_method="hist",  gpu_id=0,
                    n_estimators = hyperparameters.at[i-1,'n_estimators'], 
                    eta = hyperparameters.at[i-1,'eta'],
                    min_child_weight = hyperparameters.at[i-1,'min_child_weight'],
                    max_depth = hyperparameters.at[i-1,'max_depth'], 
                    colsample_bytree = hyperparameters.at[i-1,'colsample_bytree'], 
                    subsample = hyperparameters.at[i-1,'subsample'])
        xgb.set_params(**{'objective' : weighted_mse(j)})

        xgb.fit(x_train, y_train,verbose=False)
        
        pred1 = xgb.predict(x_valid)
        score1 = SMAPE(y_valid, pred1)
        if score1 < score0: 
            best_alpha = j
            score0 = score1

    alpha_list.append(best_alpha)
    smape_list.append(score0)
    
    print("building {} || best score : {} || alpha : {}".format(i+1, score0, best_alpha))
    
    
hyperparameters['alpha'] = alpha_list