In [6]:
# conda env: pyg (Python3.9.16)
import os
import optuna
from optuna.integration import OptunaSearchCV

import sklearn

# Optuna example
https://github.com/optuna/optuna-examples

### On README Page

In [None]:
def objective(trial):
    x = trial.suggest_float("x", -100, 100)
    return x ** 2


if __name__ == "__main__":
    study = optuna.create_study()
    # The optimization finishes after evaluating 1000 times or 3 seconds.
    study.optimize(objective, n_trials=1000, timeout=3)
    print(f"Best params is {study.best_params} with value {study.best_value}")

### sklearn/sklearn_optuna_search_cv_simple.py
https://github.com/optuna/optuna-examples/blob/main/sklearn/sklearn_optuna_search_cv_simple.py

In [8]:
"""
Optuna example that optimizes a classifier configuration using OptunaSearchCV.

In this example, we optimize a classifier configuration for Iris dataset using OptunaSearchCV.
Classifier is from scikit-learn.

"""

import optuna

from sklearn.datasets import load_iris
from sklearn.svm import SVC


if __name__ == "__main__":
    clf = SVC(gamma="auto")

    param_distributions = {
        "C": optuna.distributions.FloatDistribution(1e-10, 1e10, log=True),
        "degree": optuna.distributions.IntDistribution(1, 5),
    }

    optuna_search = optuna.integration.OptunaSearchCV(
        clf, param_distributions, n_trials=10, timeout=600, verbose=2
    )

    X, y = load_iris(return_X_y=True)
    optuna_search.fit(X, y)

    print("Best trial:")
    trial = optuna_search.study_.best_trial

    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

  optuna_search = optuna.integration.OptunaSearchCV(
[I 2024-11-15 18:06:00,034] A new study created in memory with name: no-name-239c6a0d-fa01-46bd-84b4-b33ca0431efd
[I 2024-11-15 18:06:00,042] Trial 0 finished with value: 0.9400000000000001 and parameters: {'C': 1201736.5843739184, 'degree': 3}. Best is trial 0 with value: 0.9400000000000001.
[I 2024-11-15 18:06:00,048] Trial 1 finished with value: 0.9800000000000001 and parameters: {'C': 11.902188878345026, 'degree': 1}. Best is trial 1 with value: 0.9800000000000001.
[I 2024-11-15 18:06:00,055] Trial 2 finished with value: 0.9400000000000001 and parameters: {'C': 152929494.68415025, 'degree': 4}. Best is trial 1 with value: 0.9800000000000001.
[I 2024-11-15 18:06:00,062] Trial 3 finished with value: 0.9400000000000001 and parameters: {'C': 270506035.0077117, 'degree': 1}. Best is trial 1 with value: 0.9800000000000001.
[I 2024-11-15 18:06:00,071] Trial 4 finished with value: 0.9333333333333333 and parameters: {'C': 6.88058242013002

Best trial:
  Value:  0.9800000000000001
  Params: 
    C: 11.902188878345026
    degree: 1


In [9]:
trial

FrozenTrial(number=1, state=TrialState.COMPLETE, values=[0.9800000000000001], datetime_start=datetime.datetime(2024, 11, 15, 18, 6, 0, 42774), datetime_complete=datetime.datetime(2024, 11, 15, 18, 6, 0, 48033), params={'C': 11.902188878345026, 'degree': 1}, user_attrs={'mean_fit_time': 0.0004397392272949219, 'std_fit_time': 9.893193232283176e-06, 'mean_score_time': 0.00020771026611328126, 'std_score_time': 5.299975836195566e-06, 'split0_test_score': 0.9666666666666667, 'split1_test_score': 1.0, 'split2_test_score': 0.9666666666666667, 'split3_test_score': 0.9666666666666667, 'split4_test_score': 1.0, 'mean_test_score': 0.9800000000000001, 'std_test_score': 0.016329931618554516}, system_attrs={'terminator:cv_scores': [0.9666666666666667, 1.0, 0.9666666666666667, 0.9666666666666667, 1.0]}, intermediate_values={}, distributions={'C': FloatDistribution(high=10000000000.0, log=True, low=1e-10, step=None), 'degree': IntDistribution(high=5, log=False, low=1, step=1)}, trial_id=1, value=None)

### sklearn/sklearn_additional_args.py (including cv)

In [None]:
"""
Optuna example that optimizes a classifier configuration for Iris dataset using sklearn.

This example is the same as `sklearn_simple.py` except that it uses a callable class for
implementing the objective function. It takes the Iris dataset by a constructor's argument
instead of loading it in each trial execution. This will speed up the execution of each trial
compared to `sklearn_simple.py`.

"""

import optuna

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm


class Objective(object):
    def __init__(self, iris):
        self.iris = iris

    def __call__(self, trial):
        x, y = self.iris.data, self.iris.target

        classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
        if classifier_name == "SVC":
            svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
            classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
        else:
            rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
            classifier_obj = sklearn.ensemble.RandomForestClassifier(
                max_depth=rf_max_depth, n_estimators=10
            )

        score = sklearn.model_selection.cross_val_score(classifier_obj, x, y, n_jobs=-1, cv=3)
        accuracy = score.mean()
        return accuracy


if __name__ == "__main__":
    # Load the dataset in advance for reusing it each trial execution.
    iris = sklearn.datasets.load_iris()
    objective = Objective(iris)

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)
    print(study.best_trial)

# Optuna API
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html

### optuna.integration.OptunaSearchCV
https://optuna.readthedocs.io/en/v2.0.0/reference/generated/optuna.integration.OptunaSearchCV.html

In [None]:
import optuna
from sklearn.datasets import load_iris
from sklearn.svm import SVC

clf = SVC(gamma='auto')
param_distributions = {
    'C': optuna.distributions.LogUniformDistribution(1e-10, 1e+10)
}
optuna_search = optuna.integration.OptunaSearchCV(
    clf,
    param_distributions
)
X, y = load_iris(return_X_y=True)
optuna_search.fit(X, y)
y_pred = optuna_search.predict(X)

# stackoverflow
https://stackoverflow.com/questions/63224426/how-can-i-cross-validate-by-pytorch-and-optuna

In [None]:
def objective_cv(trial):

    # Get the MNIST dataset.
    dataset = datasets.MNIST(DIR, train=True, download=True, transform=transforms.ToTensor())

    fold = KFold(n_splits=3, shuffle=True, random_state=0)
    scores = []
    for fold_idx, (train_idx, valid_idx) in enumerate(fold.split(range(len(dataset)))):
        train_data = torch.utils.data.Subset(dataset, train_idx)
        valid_data = torch.utils.data.Subset(dataset, valid_idx)

        train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=BATCHSIZE,
            shuffle=True,
        )
        valid_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=BATCHSIZE,
            shuffle=True,
        )

        accuracy = objective(trial, train_loader, valid_loader)
        scores.append(accuracy)
    return np.mean(scores)


study = optuna.create_study(direction="maximize")
study.optimize(objective_cv, n_trials=20, timeout=600)

# Framework: Scikit-learn

In [3]:
# 1. Define an objective function to be maximized.
def objective(trail):

    # 2. Suggest values for hyperparameter using a trail object.
    classifier_name = trail.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        svc_c = trail.suggest_float("svc_c", 1e-10, 1e10, log=True)
        classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
    else:
        rf_max_depth = trail.suggest_int("rf_max_depth", 2, 32, log=True)
        classifier_obj = sklearn.ensemble.RandomForestClassifier(max_depth=rf_max_depth, n_estimators=10)
    ...
    return accuracy

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

[I 2024-11-15 17:09:14,287] A new study created in memory with name: no-name-a4cafc78-b8b5-4cec-b2d2-299ddef1f418
[W 2024-11-15 17:09:14,290] Trial 0 failed with parameters: {'classifier': 'SVC', 'svc_c': 2311426.263447518} because of the following error: AttributeError("module 'sklearn' has no attribute 'svm'").
Traceback (most recent call last):
  File "/storage/homefs/yc24j783/miniconda3/envs/pyg/lib/python3.9/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_1533273/2795152918.py", line 8, in objective
    classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
AttributeError: module 'sklearn' has no attribute 'svm'
[W 2024-11-15 17:09:14,299] Trial 0 failed with value None.


AttributeError: module 'sklearn' has no attribute 'svm'

In [None]:
def objective(trial):
    x = trial.suggest_float("x", -100, 100)
    return x ** 2


if __name__ == "__main__":
    study = optuna.create_study()
    # The optimization finishes after evaluating 1000 times or 3 seconds.
    study.optimize(objective, n_trials=1000, timeout=3)
    print(f"Best params is {study.best_params} with value {study.best_value}")

# Framework: PyTorch

In [None]:
import torch

# 1. Define an objective function to be maximized.
def object(trail):

    # 2. Suggest values for hyperparameter using a trail object.
    n_layers = trail.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trail.suggest_int(f"n_units_l{i}".format(i), 4, 128)
        layers.append(torch.nn.Linear(in_features, out_features))
        layers.append(torch.nn.ReLU())
        in_features = out_features
    layers.append(torch.nn.Linear(in_features, 10))
    layers.append(torch.nn.Softmax(dim=1))
    model = torch.nn.Sequential(*layers).to(torch.device("cpu"))
    ...
    return accuracy