In [37]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from skopt import gp_minimize
from skopt.space import Integer, Categorical
from skopt.utils import use_named_args

# Load the digits dataset
digits = load_digits()

# Split the dataset into a training set and a validation set
X_train, X_val, y_train, y_val = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)

# Define the search space for the hyperparameters
search_space = [
    Integer(10, 1000, name='n_estimators'),
    Integer(2, 20, name='max_depth'),
    Integer(2, 20, name='min_samples_split'),
    Integer(1, 20, name='min_samples_leaf'),
    Categorical(['auto', 'sqrt'], name='max_features'),
    Categorical([True, False], name='bootstrap')
]
RandomForestClassifier(bootstrap=False, max_depth=5, min_samples_leaf=12,
                       min_samples_split=16, n_estimators=799, random_state=42)
# Define the objective function
@use_named_args(search_space)
def evaluate_model(**params):

    # Initialize the random forest classifier with the given hyperparameters
    rf = RandomForestClassifier(**params, random_state=42)

    # Fit the classifier to the training data
    rf.fit(X_train, y_train)

    # Predict the labels for the validation data
    y_pred = rf.predict(X_val)

    # Compute the F1 score of the predictions
    f1 = f1_score(y_val, y_pred, average='weighted')

    print('----------------------------------------------------------')
    param_txt ='['+str(f1)+']'
    for key in rf.get_params().keys():
        param_txt = param_txt+' | '+str(key)+' = '+str(rf.get_params()[key])
    print(param_txt)

    # Return the negative F1 score (to maximize instead of minimize)
    return -f1

# Run the Bayesian optimization loop
result = gp_minimize(evaluate_model, search_space, n_calls=12, random_state=42, verbose=True)

# Print the best hyperparameters found and their F1 score
print('Best hyperparameters: ', result.x)
print('Best F1 score: ', -result.fun)

# Train the final random forest classifier on the entire training set with the best hyperparameters
rf = RandomForestClassifier(**dict(zip([s.name for s in search_space], result.x)), random_state=42)
rf.fit(digits.data, digits.target)

Iteration No: 1 started. Evaluating function at random point.
----------------------------------------------------------
[0.9362189123471103] | bootstrap = False | ccp_alpha = 0.0 | class_weight = None | criterion = gini | max_depth = 5 | max_features = auto | max_leaf_nodes = None | max_samples = None | min_impurity_decrease = 0.0 | min_impurity_split = None | min_samples_leaf = 12 | min_samples_split = 16 | min_weight_fraction_leaf = 0.0 | n_estimators = 799 | n_jobs = None | oob_score = False | random_state = 42 | verbose = 0 | warm_start = False
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.7726
Function value obtained: -0.9362
Current minimum: -0.9362
Iteration No: 2 started. Evaluating function at random point.
----------------------------------------------------------
[0.9390813433404653] | bootstrap = True | ccp_alpha = 0.0 | class_weight = None | criterion = gini | max_depth = 8 | max_features = auto | max_leaf_nodes = None | max_samples = None | min_im

RandomForestClassifier(bootstrap=False, max_depth=11, max_features='sqrt',
                       min_samples_leaf=2, min_samples_split=9, n_estimators=33,
                       random_state=42)