# Optuna with SKLearn

- Note that test data is used. This is not a good practice. See next notebook for correct usage.


In [None]:
import pandas as pd
import optuna

In [None]:
from sklearn.datasets import load_diabetes

dataset = load_diabetes()
_X = dataset.data
_y = dataset.target
print(_X.shape, _y.shape)

In [None]:
from sklearn.model_selection import train_test_split

_X_train, _X_test, _y_train, _y_test = train_test_split(
    _X, _y, test_size=0.3, random_state=42
)

In [None]:
from sklearn.preprocessing import StandardScaler

scX = StandardScaler()
X_train = scX.fit_transform(_X_train)
X_test = scX.transform(_X_test)

scY = StandardScaler()
y_train = scY.fit_transform(_y_train.reshape(-1, 1)).flatten()
y_test = scY.transform(_y_test.reshape(-1, 1)).flatten()

In [None]:
pd.DataFrame(X_train).hist(bins=30, figsize=(15, 10))

In [None]:
pd.DataFrame(y_train).hist(bins=30, figsize=(5, 5))

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 10, 200)
    max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
    forrest = RandomForestRegressor(
        n_estimators=n_estimators, max_depth=max_depth, random_state=42
    )
    forrest.fit(X_train, y_train)
    y_pred = forrest.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials=50)

In [None]:
best_params = study.best_params
print("Best parameters:", best_params)
print("Best MSE:", study.best_value)