In [1]:
from yikit.models import Objective
from yikit.models import SupportVectorRegressor, GBDTRegressor, LinearModelRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston
from sklearn.neural_network import MLPRegressor
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
from kennard_stone import KFold, train_test_split
import optuna
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
SEED = 334

In [3]:
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.DataFrame(boston.target, columns=['PRICE'])
display(X.head(), y.head())

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


Unnamed: 0,PRICE
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
kf = KFold(n_splits=5)

In [5]:
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)
estimators = [MLPRegressor(max_iter=1000), GBDTRegressor(), RandomForestRegressor(), SupportVectorRegressor(), GBDTRegressor(), LinearModelRegressor()]
def get_best_estimator(estimator):
    objective = Objective(estimator=estimator, X=X_train, y=y_train.ravel(), scoring='neg_mean_squared_error', random_state=SEED, cv=kf)
    study = optuna.create_study(sampler=objective.sampler, direction='maximize')
    study.optimize(objective, n_trials=10, n_jobs=1)
    best_estimator_ = clone(estimator)
    best_estimator_.set_params(**objective.fixed_params_, **study.best_params)
    return best_estimator_

optuna.logging.disable_default_handler()
[get_best_estimator(estimator) for estimator in tqdm(estimators)]

  0%|          | 0/6 [00:00<?, ?it/s]



[MLPRegressor(alpha=4.087245269103505e-05, hidden_layer_sizes=173,
              learning_rate_init=0.00040881779824664973, max_iter=1000,
              random_state=RandomState(MT19937) at 0x7F8068919E40),
 GBDTRegressor(colsample_bytree=0.7144499731975308,
               min_child_weight=0.7555608813623558, n_estimators=456,
               num_leaves=9, objective='regression',
               random_state=RandomState(MT19937) at 0x7F804AC4D140,
               subsample=0.8909736279248479),
 RandomForestRegressor(max_depth=92, min_samples_split=5, n_estimators=363,
                       n_jobs=-1,
                       random_state=RandomState(MT19937) at 0x7F804AC4D640),
 SupportVectorRegressor(C=6.244967999525347, epsilon=0.00545957921024158),
 GBDTRegressor(colsample_bytree=0.7144499731975308,
               min_child_weight=0.7555608813623558, n_estimators=456,
               num_leaves=9, objective='regression',
               random_state=RandomState(MT19937) at 0x7F804AC4DA40,

In [6]:
optuna.logging.enable_default_handler()