**Imports**

In [28]:
import time
import numpy as np
import optuna
import optuna.visualization as vis
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

**Load data**

In [29]:
def load_data():
    digits = load_digits()
    X_train, X_test, y_train, y_test = train_test_split(
        digits.data, digits.target, test_size=0.2, random_state=42
    )
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_data()


**OPTIMIZATIONS**

**Grid Search**

In [30]:
def grid_search(trial):
    n_estimators = trial.suggest_categorical("n_estimators", [50, 100, 200])
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_categorical("min_samples_split", [2, 5, 10])
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Random Search**

In [31]:
def random_search(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200, step=50)
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=3)
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Bayesian Search**

In [32]:
def bayesian_search(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200, step=50)
    max_depth = trial.suggest_categorical("max_depth", [None, 10, 20])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=3)
    
    clf = RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=42
    )
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Base model**

In [33]:
def baseline_model():
    clf = RandomForestClassifier(random_state=42)
    return np.mean(cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1))

**Run the optimizations**

In [34]:
def run_optimization():
    global X_train, X_test, y_train, y_test
    X_train, X_test, y_train, y_test = load_data()
    
    studies = {}
    
    print("Running Baseline Model...")
    start_time = time.time()
    baseline_accuracy = baseline_model()
    end_time = time.time()
    print(f"Baseline Model Accuracy: {baseline_accuracy:.4f}, Time: {end_time - start_time:.2f}s")
    
    for name, objective in zip(["Grid search", "Random search", "Bayesian search"], 
                               [grid_search, random_search, bayesian_search]):
        print(f"Running {name} Optimization...")
        study = optuna.create_study(direction="maximize")
        start_time = time.time()
        study.optimize(objective, n_trials=50)
        end_time = time.time()
        print(f"Best Parameters ({name}): {study.best_params}, Time: {end_time - start_time:.2f}s")
        studies[name] = study
    
    studies["Baseline"] = baseline_accuracy
    return studies


**List of Searches for the plotting**

In [39]:
execution_times = {}

studies = {
    "grid": optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42)),
    "random": optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42)),
    "bayesian": optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42)),
}

for name, study in studies.items():
    study.study_name = name

[I 2025-03-28 21:30:02,792] A new study created in memory with name: no-name-9192873f-6652-4457-974f-4441c1ce84e8
[I 2025-03-28 21:30:02,793] A new study created in memory with name: no-name-e5773ada-3c87-4182-aeb1-f36616c2de5b
[I 2025-03-28 21:30:02,794] A new study created in memory with name: no-name-5dfc403b-49de-4358-b215-fcf409c87486


**Plotting**

In [36]:
def plot_results(studies):
    optuna_studies = {name: study for name, study in studies.items() if name != "Baseline"}

    # Ensure study names are correctly assigned (this should already be done)
    for name, study in optuna_studies.items():
        study.study_name = name  

    # Plotting the optimization history (with semicolon to suppress text output)
    vis.plot_optimization_history(list(optuna_studies.values())).show()

    # Printing the best scores (optional, you can suppress this as well if needed)
    best_scores = {name: study.best_value for name, study in optuna_studies.items()}

    print("Best Accuracy Scores:")
    for name, score in best_scores.items():
        print(f"{name}: {score:.4f}")
        
studies = run_optimization()

Running Baseline Model...


[I 2025-03-28 21:29:08,025] A new study created in memory with name: no-name-0e0a61e0-83d0-4e31-93b6-77c0fa922f49


Baseline Model Accuracy: 0.9701, Time: 1.33s
Running Grid search Optimization...


[I 2025-03-28 21:29:08,924] Trial 0 finished with value: 0.9700765483646486 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_split': 2}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 21:29:09,761] Trial 1 finished with value: 0.965205288796103 and parameters: {'n_estimators': 50, 'max_depth': 10, 'min_samples_split': 2}. Best is trial 0 with value: 0.9700765483646486.
[I 2025-03-28 21:29:10,824] Trial 2 finished with value: 0.9728601252609604 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}. Best is trial 2 with value: 0.9728601252609604.
[I 2025-03-28 21:29:11,868] Trial 3 finished with value: 0.9728601252609604 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}. Best is trial 2 with value: 0.9728601252609604.
[I 2025-03-28 21:29:12,683] Trial 4 finished with value: 0.9540709812108559 and parameters: {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 10}. Best is trial 2 with value: 

Best Parameters (Grid search): {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}, Time: 17.98s
Running Random search Optimization...


[I 2025-03-28 21:29:26,331] Trial 1 finished with value: 0.9665970772442588 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_split': 2}. Best is trial 1 with value: 0.9665970772442588.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I 2025-03-28 21:29:26,644] Trial 2 finished with value: 0.9686847599164926 and parameters: {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 2}. Best is trial 2 with value: 0.9686847599164926.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I 2025-03-28 21:29:26,949] Trial 3 finished with value: 0.9617258176757133 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 8}. Best is trial 2 with value: 0.9686847599164926.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I

Best Parameters (Random search): {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2}, Time: 9.19s
Running Bayesian search Optimization...


[I 2025-03-28 21:29:35,482] Trial 0 finished with value: 0.9617258176757133 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 8}. Best is trial 0 with value: 0.9617258176757133.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I 2025-03-28 21:29:35,640] Trial 1 finished with value: 0.9700765483646486 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_split': 2}. Best is trial 1 with value: 0.9700765483646486.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I 2025-03-28 21:29:35,726] Trial 2 finished with value: 0.9631176061238692 and parameters: {'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 5}. Best is trial 1 with value: 0.9700765483646486.

The distribution is specified by [2, 10] and step=3, but the range is not divisible by `step`. It will be replaced by [2, 8].

[I 20

Best Parameters (Bayesian search): {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 2}, Time: 11.47s


In [37]:
plot_results(studies);        

Best Accuracy Scores:
Grid search: 0.9729
Random search: 0.9708
Bayesian search: 0.9729


In [38]:
def plot_params(studies):
    for name, study in studies.items():
        if name == "Baseline":
            print(f"Baseline Model Accuracy: {study:.4f}")
        else:
            fig = vis.plot_param_importances(study)  # Get the Plotly figure
            fig.update_layout(title_text=f"{name}")
            fig.show()  # Display the figure

plot_params(studies)

Baseline Model Accuracy: 0.9701
