In [1]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import pandas as pd
import numpy as np

import optuna
from objective import Objective
# import sys
# sys.path.append('../')
# from src.objective import Objective

KeyboardInterrupt: 

#### 再現のための乱数値指定

In [2]:
SEED = 334

#### 学習データの読み込み

In [3]:
boston = load_boston()
df_X = pd.DataFrame(boston.data, columns = boston.feature_names, dtype = float)
df_y = pd.Series(boston.target, name = 'PRICE', dtype = float)

#### データの分割

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size = 0.2, random_state = SEED)

#### インスタンス化

In [5]:
objective = Objective(RandomForestRegressor, X_train, y_train, random_state = SEED, cv = 5)

#### 最適化

In [6]:
study = optuna.create_study()
study.optimize(objective, n_trials=10)

[I 2020-07-23 19:40:34,883] Finished trial#0 with value: -0.8449988821931251 with parameters: {'min_samples_split': 3, 'max_depth': 51, 'n_estimators': 303}. Best is trial#0 with value: -0.8449988821931251.
[I 2020-07-23 19:40:37,438] Finished trial#1 with value: -0.8420195466859287 with parameters: {'min_samples_split': 8, 'max_depth': 380, 'n_estimators': 335}. Best is trial#0 with value: -0.8449988821931251.
[I 2020-07-23 19:40:40,906] Finished trial#2 with value: -0.8450194523671344 with parameters: {'min_samples_split': 4, 'max_depth': 186, 'n_estimators': 434}. Best is trial#2 with value: -0.8450194523671344.
[I 2020-07-23 19:40:41,221] Finished trial#3 with value: -0.8400237369319165 with parameters: {'min_samples_split': 5, 'max_depth': 472, 'n_estimators': 33}. Best is trial#2 with value: -0.8450194523671344.
[I 2020-07-23 19:40:44,287] Finished trial#4 with value: -0.8308163277545966 with parameters: {'min_samples_split': 16, 'max_depth': 411, 'n_estimators': 455}. Best is tr

#### 最適モデルを得る

In [7]:
best_clf = objective.clf(**study.best_params, **objective.fixed_params)

#### スコアで性能評価

In [8]:
best_clf.fit(X_train, y_train)
y_pred_on_test = best_clf.predict(X_test)
print(r2_score(y_test, y_pred_on_test))

0.8658682623468608
