**Imports**

In [1]:
import time
import numpy as np
import optuna
import optuna.visualization as vis
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


**Load data**

In [2]:
def load_data():
    digits = load_digits()
    X_train, X_test, y_train, y_test = train_test_split(
        digits.data, digits.target, test_size=0.2, random_state=42
    )
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_data()


**OPTIMIZATIONS**

**Grid Search**

In [None]:
def grid_search(trial):
    n_estimators = trial.suggest_categorical("n_estimators", [50, 100, 200])
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_categorical("min_samples_split", [2, 5, 10])
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Random Search**

In [None]:
def random_search(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200, step=50)
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=3)
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Bayesian Search**

In [None]:
def bayesian_search(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200, step=50)
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=3)
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Base model**

In [6]:
def baseline_model():
    clf = RandomForestClassifier(random_state=42)
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Run the optimizations**

In [None]:
def run_optimization():
    global X_train, X_test, y_train, y_test
    X_train, X_test, y_train, y_test = load_data()
    
    studies = {}
    
    print("Running Baseline Model...")
    start_time = time.time()
    baseline_accuracy = baseline_model()
    end_time = time.time()
    print(f"Baseline Model Accuracy: {baseline_accuracy:.4f}, Time: {end_time - start_time:.2f}s")
    
    for name, objective in zip(["Grid search", "Random search", "Bayesian search"], 
                               [grid_search, random_search, bayesian_search]):
        print(f"Running {name} Optimization...")
        study = optuna.create_study(direction="maximize")
        start_time = time.time()
        study.optimize(objective, n_trials=50)
        end_time = time.time()
        print(f"Best Parameters ({name}): {study.best_params}, Time: {end_time - start_time:.2f}s")
        studies[name] = study
    
    studies["Baseline"] = baseline_accuracy
    return studies


**List of Searches for the plotting**

In [8]:
studies = {
    "grid": optuna.create_study(direction="maximize"),
    "random": optuna.create_study(direction="maximize"),
    "bayesian": optuna.create_study(direction="maximize"),
}

for name, study in studies.items():
    study.study_name = name

[I 2025-03-28 20:08:17,468] A new study created in memory with name: no-name-08b60fc4-c99a-4722-b8db-88f6fa18c596
[I 2025-03-28 20:08:17,469] A new study created in memory with name: no-name-a8b3d80b-255a-4afe-b04d-17e3ec1e29b9
[I 2025-03-28 20:08:17,470] A new study created in memory with name: no-name-83814f83-85fb-4e6c-9a13-4a743f4aed9b


**Plotting**

In [9]:
def plot_results(studies):
    optuna_studies = {name: study for name, study in studies.items() if name != "Baseline"}

    # Ensure study names are correctly assigned (this should already be done)
    for name, study in optuna_studies.items():
        study.study_name = name  

    # Plotting the optimization history (with semicolon to suppress text output)
    vis.plot_optimization_history(list(optuna_studies.values())).show()

    # Printing the best scores (optional, you can suppress this as well if needed)
    best_scores = {name: study.best_value for name, study in optuna_studies.items()}

    print("Best Accuracy Scores:")
    for name, score in best_scores.items():
        print(f"{name}: {score:.4f}")
        
studies = run_optimization()

Running Baseline Model...


[I 2025-03-28 20:08:18,882] A new study created in memory with name: no-name-537809a0-f2d2-4001-87aa-1da2e4281952


Baseline Model Accuracy: 0.9701, Time: 1.39s
Running Grid search Optimization...


[I 2025-03-28 20:08:20,018] Trial 0 finished with value: 0.9728601252609604 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}. Best is trial 0 with value: 0.9728601252609604.
[I 2025-03-28 20:08:21,185] Trial 1 finished with value: 0.9575504523312456 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_split': 10}. Best is trial 0 with value: 0.9728601252609604.
[I 2025-03-28 20:08:22,115] Trial 2 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 5}. Best is trial 0 with value: 0.9728601252609604.
[I 2025-03-28 20:08:22,995] Trial 3 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 5}. Best is trial 0 with value: 0.9728601252609604.
[I 2025-03-28 20:08:23,885] Trial 4 finished with value: 0.9610299234516354 and parameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 10}. Best is trial 0 with value:

Best Parameters (Grid search): {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}, Time: 21.78s
Running Random search Optimization...


[I 2025-03-28 20:08:40,896] Trial 0 finished with value: 0.9638135003479471 and parameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5}. Best is trial 0 with value: 0.9638135003479471.
[I 2025-03-28 20:08:41,131] Trial 1 finished with value: 0.9700765483646486 and parameters: {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2}. Best is trial 1 with value: 0.9700765483646486.
[I 2025-03-28 20:08:41,572] Trial 2 finished with value: 0.964509394572025 and parameters: {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 5}. Best is trial 1 with value: 0.9700765483646486.
[I 2025-03-28 20:08:41,701] Trial 3 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 5}. Best is trial 1 with value: 0.9700765483646486.
[I 2025-03-28 20:08:41,947] Trial 4 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 8}. Best is trial 1 with val

Best Parameters (Random search): {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2}, Time: 13.27s
Running Bayesian search Optimization...


[I 2025-03-28 20:08:54,172] Trial 0 finished with value: 0.9700765483646486 and parameters: {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 20:08:54,614] Trial 1 finished with value: 0.964509394572025 and parameters: {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 5}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 20:08:54,952] Trial 2 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_split': 5}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 20:08:55,102] Trial 3 finished with value: 0.9617258176757133 and parameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 8}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 20:08:55,306] Trial 4 finished with value: 0.9659011830201809 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_split': 5}. Best is trial 0 with val

Best Parameters (Bayesian search): {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}, Time: 18.11s


In [10]:
plot_results(studies);        

Best Accuracy Scores:
Grid search: 0.9729
Random search: 0.9708
Bayesian search: 0.9729


In [11]:
def plot_params(studies):
    for name, study in studies.items():
        if name == "Baseline":
            print(f"Baseline Model Accuracy: {study:.4f}")
        else:
            fig = vis.plot_param_importances(study)  # Get the Plotly figure
            fig.update_layout(title_text=f"{name}")
            fig.show()  # Display the figure

plot_params(studies)

Baseline Model Accuracy: 0.9701
