In [7]:
import pandas as pd
import numpy as np
import optuna
from optuna.integration import XGBoostPruningCallback
from optuna.samplers import TPESampler
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error

In [8]:
def evaluation_metric(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def nrmse(y_true, y_pred):
    rmse_val = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse_val / (y_true.max() - y_true.min())

def test_error_percentage(score):
    return (score/4104996.104768301) * 100

def rmse(preds, dtrain):
    labels = dtrain.get_label()
    return 'rmse', evaluation_metric(labels, preds), True

def rmse_xgboost(preds, dtrain):
    labels = dtrain.get_label()
    return 'rmse', evaluation_metric(labels, preds)

In [9]:
train = pd.read_csv('train_data_nan_wind.csv', index_col='Date')
test = pd.read_csv('test_data_nan_wind.csv', index_col = 'Date')

train.fillna(-1, inplace=True)
test.fillna(-1, inplace=True)

drop_columns = ['Day', 'Precipitation(mm)',
   'Humidity(%)', 'VaporPressure(hPa)', 'DewPointTemperature(C)',
   'Sunshine(hr)', 'Snowfall(cm)',
   'SnowfallLast3Hours(cm)', 'TotalCloudCoverage(1/10)',
   'MidLowCloudCoverage(1/10)', 'CloudForm', 'LowCloud(100m)',
   'Visibility(10m)', 'GroundState(code)', 'PhenomenonNumber', 'd2m',
   't2m', 'aerosol', 'power', 'Year']

X_train = train.drop(columns = drop_columns)
y_train = train['power']
X_val = test.drop(columns = drop_columns)
y_val = test['power']

In [10]:
# random sampler
sampler = TPESampler(seed=10)

# define function
def objective(trial):

    cbrm_param = {
        'iterations':trial.suggest_int("iterations", 4000, 25000),
        'od_wait':trial.suggest_int('od_wait', 500, 2300),
        'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
        'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
        'subsample': trial.suggest_uniform('subsample',0,1),
        'random_strength': trial.suggest_uniform('random_strength',10,50),
        'depth': trial.suggest_int('depth',1, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,30),
        'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15),
        'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
        'colsample_bylevel':trial.suggest_float('colsample_bylevel', 0.4, 1.0),
    }

    # Generate model
    model_cbrm = CatBoostRegressor(**cbrm_param)
    model_cbrm = model_cbrm.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                           verbose=0, early_stopping_rounds=25)
                           
	# 평가지표 원하는 평가 지표가 있을 시 바꾸어 준다.
    RMSE = evaluation_metric(y_val, model_cbrm.predict(X_val))
    return RMSE

optuna_cbrm = optuna.create_study(direction='minimize', sampler=sampler)
optuna_cbrm.optimize(objective, n_trials=50)

[I 2023-09-07 11:06:57,951] A new study created in memory with name: no-name-cde22d14-7396-49cf-875a-290d7c483809
  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
  'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
  'subsample': trial.suggest_uniform('subsample',0,1),
  'random_strength': trial.suggest_uniform('random_strength',10,50),
  'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
[I 2023-09-07 11:06:58,664] Trial 0 finished with value: 1008780.4733906232 and parameters: {'iterations': 20198, 'od_wait': 537, 'learning_rate': 0.6373117525770127, 'reg_lambda': 74.88039076582236, 'subsample': 0.4985070123025904, 'random_strength': 18.991865821233908, 'depth': 3, 'min_data_in_leaf': 23, 'leaf_estimation_iterations': 3, 'bagging_temperature': 0.022561047334047252, 'colsample_bylevel': 0.8112158910206784}. Best is trial 0 with value: 1008780.4733906232.
  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),


  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
  'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
  'subsample': trial.suggest_uniform('subsample',0,1),
  'random_strength': trial.suggest_uniform('random_strength',10,50),
  'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
[I 2023-09-07 11:07:08,168] Trial 4 finished with value: 990839.6065273294 and parameters: {'iterations': 11385, 'od_wait': 1859, 'learning_rate': 0.30300208981088195, 'reg_lambda': 88.39364911675383, 'subsample': 0.3255116378322488, 'random_strength': 16.600635908765938, 'depth': 6, 'min_data_in_leaf': 3, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.040235377334273736, 'colsample_bylevel': 0.6304686692153197}. Best is trial 3 with value: 988853.4094403795.
  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
  'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
  'subsample': trial.suggest_uniform('subsample',0,1

  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
  'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
  'subsample': trial.suggest_uniform('subsample',0,1),
  'random_strength': trial.suggest_uniform('random_strength',10,50),
  'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
[I 2023-09-07 11:07:36,992] Trial 8 finished with value: 992127.8157602778 and parameters: {'iterations': 15709, 'od_wait': 1518, 'learning_rate': 0.48038092494090007, 'reg_lambda': 29.279804700971145, 'subsample': 0.06425106069482445, 'random_strength': 49.152765830305704, 'depth': 6, 'min_data_in_leaf': 15, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.5795558575717401, 'colsample_bylevel': 0.5909636832873707}. Best is trial 3 with value: 988853.4094403795.
  'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
  'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
  'subsample': trial.suggest_uniform('subsample',0,

[W 2023-09-07 11:08:51,929] Trial 11 failed with value None.


CatBoostError: 

In [None]:
cbrm_trial = optuna_cbrm.best_trial
cbrm_trial_params = cbrm_trial.params
print('Best Trial: score {},\nparams {}'.format(cbrm_trial.value, cbrm_trial_params))

In [None]:
cbrm_trial_params