In [13]:
import pandas as pd
import optuna

In [14]:
from sklearn.datasets import load_diabetes

dataset = load_diabetes()
_X = dataset.data
_y = dataset.target
print(_X.shape, _y.shape)

(442, 10) (442,)


In [15]:
from sklearn.model_selection import train_test_split

_X_train, _X_test, _y_train, _y_test = train_test_split(
    _X, _y, test_size=0.3, random_state=42
)

In [16]:
from sklearn.preprocessing import StandardScaler

scX = StandardScaler()
_X_train = scX.fit_transform(_X_train)
_X_test = scX.transform(_X_test)

scY = StandardScaler()
_y_train = scY.fit_transform(_y_train.reshape(-1, 1)).flatten()
_y_test = scY.transform(_y_test.reshape(-1, 1)).flatten()

In [17]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

def objective(trial: optuna.trial.Trial):

    n_estimators = trial.suggest_int("n_estimators", 20, 200)
    max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
    forrest = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)

    scores = cross_val_score(forrest, _X_train, _y_train, cv=3, scoring="neg_mean_squared_error")
    return -scores.mean()

In [18]:
study = optuna.create_study()

[I 2025-12-25 11:04:20,162] A new study created in memory with name: no-name-6239ef7b-4cd5-43e1-b976-7be851123507


In [22]:
study.optimize(objective, n_trials=50)

[I 2025-12-25 11:05:35,128] Trial 50 finished with value: 0.5730606422386085 and parameters: {'n_estimators': 146, 'max_depth': 3}. Best is trial 21 with value: 0.5664930016803386.
[I 2025-12-25 11:05:36,095] Trial 51 finished with value: 0.5817204410519233 and parameters: {'n_estimators': 193, 'max_depth': 6}. Best is trial 21 with value: 0.5664930016803386.
[I 2025-12-25 11:05:37,073] Trial 52 finished with value: 0.5817879726962252 and parameters: {'n_estimators': 180, 'max_depth': 11}. Best is trial 21 with value: 0.5664930016803386.
[I 2025-12-25 11:05:37,994] Trial 53 finished with value: 0.5837761407214794 and parameters: {'n_estimators': 193, 'max_depth': 7}. Best is trial 21 with value: 0.5664930016803386.
[I 2025-12-25 11:05:38,933] Trial 54 finished with value: 0.5846690917402008 and parameters: {'n_estimators': 189, 'max_depth': 8}. Best is trial 21 with value: 0.5664930016803386.
[I 2025-12-25 11:05:39,675] Trial 55 finished with value: 0.5693201711532113 and parameters: {

In [23]:
study.best_params

{'n_estimators': 40, 'max_depth': 5}

In [24]:
study.best_value

0.5626973622166126

In [25]:
from sklearn.metrics import mean_squared_error

forrest = RandomForestRegressor(**study.best_params)
forrest.fit(_X_train, _y_train)

y_pred = forrest.predict(_X_test)

mse_test = mean_squared_error(y_pred, _y_test)

In [26]:
mse_test

0.45782707160965735