## 하이퍼파리미터 튜닝

In [11]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error

In [5]:
params = {
    "learning_rate": [0.07, 0.05],
    "max_depth": [3, 5, 7],
    "n_estimators": [100, 200],
    "subsample": [0.9, 0.8, 0.7]
}

In [10]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# 데이터 로드
def make_dataset():
    dataset = load_diabetes()
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    df['target'] = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(
        df.drop('target', axis=1), df['target'], test_size=0.2, random_state=1004)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = make_dataset()

def make_dataset2():
    dataset = load_diabetes()
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    df['target'] = dataset.target
    return df.drop('target', axis=1), df['target']
X, y = make_dataset2()



### GridSearchCV
- 모든 경우의 수 탐색

In [7]:
from xgboost import XGBRegressor
xgb = XGBRegressor()
grid = GridSearchCV(estimator=xgb, param_grid=params, n_jobs=-1, cv=3)
grid.fit(X, y)

In [8]:
grid.best_params_

{'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.7}

In [12]:
xgb = XGBRegressor(**grid.best_params_)
xgb.fit(X_train, y_train)
pred = xgb.predict(X_test)
mean_squared_error(y_test, pred)

3020.722217577996

### RandomSearch
- N개의 조합만 탐색

In [15]:
xgb = XGBRegressor()
random = RandomizedSearchCV(estimator=xgb, param_distributions=params, n_jobs=-1, cv=3, n_iter=10)
random.fit(X, y)

In [16]:
random.best_params_

{'subsample': 0.7, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.07}

### 평가

In [20]:
# MAE
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, pred)

46.60036254197024

In [21]:
# MSE
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, pred)

3020.722217577996

In [25]:
# RMSE
import numpy as np
from sklearn.metrics import root_mean_squared_error
print(root_mean_squared_error(y_test, pred))
print(np.sqrt(mean_squared_error(y_test, pred)))

54.96109731053408
54.96109731053408


In [27]:
# RMSLE
from sklearn.metrics import root_mean_squared_log_error, mean_squared_log_error
print(root_mean_squared_log_error(y_test, pred))
print(np.sqrt(mean_squared_log_error(y_test, pred)))

0.4239413701297469
0.4239413701297469


In [24]:
# R2
from sklearn.metrics import r2_score
r2_score(y_test, pred)

0.5386462845667569