In [1]:
import numpy as np
from sklearn.base import clone
from sklearn.model_selection import KFold
from sklearn.metrics import get_scorer
from joblib import Parallel, delayed
import pandas as pd
import warnings

class CMAEvolutionStrategySearchCV:
    def __init__(self, estimator, param_bounds, n_iter=10, cv=5, scoring=None, refit=True,
                 population_size=None, random_state=None, verbose=0, n_jobs=-1, early_stopping=False,
                 tol=1e-4, max_no_improvement=5):
        """
        Implementation of hyperparameter optimization using the Covariance Matrix Adaptation
        Evolution Strategy (CMA-ES).

        Parameters:
        - estimator: The machine learning estimator to optimize.
        - param_bounds: Dictionary with parameter names as keys and tuples/lists of (min, max) values.
        - n_iter: Number of iterations to perform.
        - cv: Cross-validation splitting strategy.
        - scoring: A string or a scorer callable object/function with signature scorer(estimator, X, y).
        - refit: Refit the estimator with the best found parameters on the whole dataset.
        - population_size: Number of candidate solutions (offspring) in each generation.
        - random_state: Random seed for reproducibility.
        - verbose: Verbosity level.
        - n_jobs: Number of jobs to run in parallel. -1 means using all processors.
        - early_stopping: Whether to stop early if no improvement is seen.
        - tol: Tolerance for early stopping based on the score improvement.
        - max_no_improvement: Maximum number of iterations with no improvement to tolerate.
        """
        self.estimator = estimator
        self.param_bounds = param_bounds
        self.n_iter = n_iter
        self.cv = self._get_cv_object(cv)
        self.scoring = self._validate_scoring(scoring)
        self.refit = refit
        self.population_size = population_size or 4 + int(3 * np.log(len(param_bounds)))
        self.random_state = np.random.RandomState(random_state)
        self.verbose = verbose
        self.best_params_ = None
        self.best_score_ = -np.inf
        self.results_ = []
        self.log_ = []
        self.n_jobs = n_jobs
        self.early_stopping = early_stopping
        self.tol = tol
        self.max_no_improvement = max_no_improvement
        self.no_improvement_count = 0

    def _get_cv_object(self, cv):
        if isinstance(cv, int):
            return KFold(n_splits=cv, shuffle=True, random_state=42)
        elif hasattr(cv, 'split'):
            return cv
        else:
            raise ValueError("cv must be an integer or an initialized cross-validation object")

    def _validate_scoring(self, scoring):
        if isinstance(scoring, str):
            scorer = get_scorer(scoring)
            self.maximize_score = True if 'error' not in scoring and scoring != 'neg_log_loss' else False
            return scorer
        elif callable(scoring):
            self.maximize_score = True  # Assume maximization if custom scorer
            return scoring
        else:
            default_scorer = get_scorer('accuracy')
            self.maximize_score = True
            return default_scorer

    def fit(self, X, y):
        # Initialize CMA-ES parameters
        dim = len(self.param_bounds)
        mean = np.array([(bounds[0] + bounds[1]) / 2 for bounds in self.param_bounds.values()])
        sigma = 0.3 * (np.array([bounds[1] - bounds[0] for bounds in self.param_bounds.values()]))
        cov = np.eye(dim)
        self._init_strategy_parameters(dim)

        # Evolution loop
        for iteration in range(self.n_iter):
            # Sample new candidate solutions
            solutions = []
            for _ in range(self.population_size):
                z = self.random_state.randn(dim)
                try:
                    c_matrix = np.linalg.cholesky(cov)
                except np.linalg.LinAlgError:
                    # Handle non-positive definite covariance matrix
                    cov += 1e-6 * np.eye(dim)
                    c_matrix = np.linalg.cholesky(cov)
                x = mean + sigma * np.dot(c_matrix, z)
                x = self._ensure_bounds(x)
                solutions.append((x, z))

            # Evaluate solutions in parallel
            eval_results = Parallel(n_jobs=self.n_jobs)(
                delayed(self._evaluate_solution)(x_vec, X, y) for x_vec, _ in solutions
            )

            # Extract scores and parameters
            scores = [result[0] for result in eval_results]
            params_list = [result[1] for result in eval_results]

            # Handle NaN scores
            scores = np.array(scores)
            valid_indices = ~np.isnan(scores)
            if not np.any(valid_indices):
                warnings.warn("All candidate solutions resulted in NaN scores. Optimization stopped.")
                break
            scores = scores[valid_indices]
            params_list = [params_list[i] for i in np.where(valid_indices)[0]]
            solutions = [solutions[i] for i in np.where(valid_indices)[0]]

            # Select the top-performing solutions
            if self.maximize_score:
                indices = np.argsort(-scores)  # Descending order
            else:
                indices = np.argsort(scores)  # Ascending order

            selected = [solutions[i] for i in indices[:self.mu]]
            weights = self.weights[:len(selected)]

            # Update mean
            mean_old = mean.copy()
            mean = np.sum([w * x for (x, _), w in zip(selected, weights)], axis=0)

            # Update evolution paths
            y_w = np.sum([w * z for (_, z), w in zip(selected, weights)], axis=0)
            self.p_sigma = (1 - self.cs) * self.p_sigma + np.sqrt(self.cs * (2 - self.cs) * self.mu_eff) * y_w
            norm_p_sigma = np.linalg.norm(self.p_sigma)
            h_sigma_cond = norm_p_sigma / np.sqrt(1 - (1 - self.cs) ** (2 * (iteration + 1))) < \
                (1.4 + 2 / (dim + 1)) * self.chi_N
            self.p_c = (1 - self.cc) * self.p_c + h_sigma_cond * \
                np.sqrt(self.cc * (2 - self.cc) * self.mu_eff) * y_w

            # Update covariance matrix
            rank_one = np.outer(self.p_c, self.p_c)
            rank_mu = np.sum([w * np.outer(z, z) for (_, z), w in zip(selected, weights)], axis=0)
            cov = (1 - self.c1 - self.cmu) * cov + self.c1 * rank_one + self.cmu * rank_mu

            # Ensure covariance matrix is symmetric
            cov = (cov + cov.T) / 2

            # Update step size
            sigma = sigma * np.exp((self.cs / self.damps) * (norm_p_sigma / self.chi_N - 1))

            # Logging
            best_score_iter = scores[indices[0]]
            best_params_iter = params_list[indices[0]]
            self._log_hyperparameters(best_params_iter, best_score_iter)
            if self.verbose > 0:
                print(f"Iteration {iteration + 1}/{self.n_iter}, Best Score: {best_score_iter}")

            # Update best score
            improved = False
            if self.maximize_score:
                if best_score_iter > self.best_score_ + self.tol:
                    self.best_score_ = best_score_iter
                    self.best_params_ = best_params_iter
                    improved = True
            else:
                if best_score_iter < self.best_score_ - self.tol:
                    self.best_score_ = best_score_iter
                    self.best_params_ = best_params_iter
                    improved = True

            if not improved:
                self.no_improvement_count += 1
                if self.verbose > 0:
                    print(f"No improvement in iteration {iteration + 1}. No improvement count: {self.no_improvement_count}")
                if self.early_stopping and self.no_improvement_count >= self.max_no_improvement:
                    if self.verbose > 0:
                        print("Early stopping criteria met. Stopping optimization.")
                    break
            else:
                self.no_improvement_count = 0

        # Refit the estimator with the best found parameters
        if self.refit:
            self.estimator.set_params(**self.best_params_)
            self.estimator.fit(X, y)

        return self

    def _init_strategy_parameters(self, dim):
        # Strategy parameter settings: Selection
        self.mu = self.population_size // 2
        weights = np.log(self.mu + 0.5) - np.log(np.arange(1, self.mu + 1))
        self.weights = weights / np.sum(weights)
        self.mu_eff = np.sum(self.weights) ** 2 / np.sum(self.weights ** 2)

        # Strategy parameter settings: Adaptation
        self.cc = (4 + self.mu_eff / dim) / (dim + 4 + 2 * self.mu_eff / dim)
        self.cs = (self.mu_eff + 2) / (dim + self.mu_eff + 5)
        self.c1 = 2 / ((dim + 1.3) ** 2 + self.mu_eff)
        self.cmu = min(1 - self.c1, 2 * (self.mu_eff - 2 + 1 / self.mu_eff) / ((dim + 2) ** 2 + self.mu_eff))
        self.damps = 1 + 2 * max(0, np.sqrt((self.mu_eff - 1) / (dim + 1)) - 1) + self.cs
        self.chi_N = np.sqrt(dim) * (1 - 1 / (4 * dim) + 1 / (21 * dim ** 2))
        self.p_sigma = np.zeros(dim)
        self.p_c = np.zeros(dim)

    def _ensure_bounds(self, x):
        x_bounded = np.zeros_like(x)
        for i, key in enumerate(self.param_bounds.keys()):
            min_bound, max_bound = self.param_bounds[key]
            x_bounded[i] = np.clip(x[i], min_bound, max_bound)
        return x_bounded

    def _vector_to_params(self, x_vec):
        params = {}
        for i, key in enumerate(self.param_bounds.keys()):
            if isinstance(self.param_bounds[key][0], int) and isinstance(self.param_bounds[key][1], int):
                params[key] = int(round(x_vec[i]))
            else:
                params[key] = x_vec[i]
        return params

    def _evaluate_solution(self, x_vec, X, y):
        """
        Evaluates a single candidate solution over cross-validation folds.

        Returns:
        - avg_score: The average score over CV folds.
        - params: The hyperparameters corresponding to x_vec.
        """
        params = self._vector_to_params(x_vec)
        try:
            scores = Parallel(n_jobs=self.n_jobs)(
                delayed(self._evaluate_fold)(clone(self.estimator), params, X, y, train_idx, val_idx)
                for train_idx, val_idx in self.cv.split(X)
            )
            avg_score = np.mean(scores)
        except Exception as e:
            avg_score = np.nan
            if self.verbose > 1:
                print(f"Exception during evaluation: {e}")
        return avg_score, params

    def _evaluate_fold(self, estimator, params, X, y, train_idx, val_idx):
        """
        Evaluates the estimator on a single cross-validation fold.

        Returns:
        - score: The score on the validation set.
        """
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        estimator.set_params(**params)
        estimator.fit(X_train, y_train)
        score = self.scoring(estimator, X_val, y_val)
        return score

    def _log_hyperparameters(self, params, score):
        log_entry = params.copy()
        log_entry['score'] = score
        self.log_.append(log_entry)

    def display_log(self):
        """
        Display the log of hyperparameters and their corresponding scores as a DataFrame.
        """
        return pd.DataFrame(self.log_)

    def save_log(self, filepath='cmaes_search_log.csv'):
        """
        Save the hyperparameter log to a CSV file.
        """
        pd.DataFrame(self.log_).to_csv(filepath, index=False)
        if self.verbose > 0:
            print(f"Hyperparameter log saved to {filepath}")

    def score(self, X, y):
        return self.scoring(self.estimator, X, y)

    def predict(self, X):
        """
        Returns predictions using the refitted estimator.
        """
        return self.estimator.predict(X)

    def predict_proba(self, X):
        """
        Returns probability estimates for the test data X.
        """
        return self.estimator.predict_proba(X)

    def get_params(self, deep=True):
        """
        Get parameters for this estimator.
        """
        return {
            'estimator': self.estimator,
            'param_bounds': self.param_bounds,
            'n_iter': self.n_iter,
            'cv': self.cv,
            'scoring': self.scoring,
            'refit': self.refit,
            'population_size': self.population_size,
            'random_state': self.random_state,
            'verbose': self.verbose,
            'n_jobs': self.n_jobs,
            'early_stopping': self.early_stopping,
            'tol': self.tol,
            'max_no_improvement': self.max_no_improvement
        }

    def set_params(self, **params):
        """
        Set the parameters of this estimator.
        """
        for key, value in params.items():
            setattr(self, key, value)
        return self

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

# Load dataset
X, y = load_iris(return_X_y=True)

# Define parameter bounds
param_bounds = {
    'n_estimators': (10, 200),
    'max_depth': (1, 30),
    'min_samples_split': (2, 20)
}

# Initialize CMAEvolutionStrategySearchCV with enhanced features
cma_es_search = CMAEvolutionStrategySearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_bounds=param_bounds,
    n_iter=50,
    scoring='accuracy',
    cv=5,
    random_state=42,
    verbose=1,
    n_jobs=-1,
    early_stopping=True,
    tol=1e-4,
    max_no_improvement=10
)

# Fit the model
cma_es_search.fit(X, y)

# Display the log of hyperparameters and their scores
print("\n")
display(cma_es_search.display_log())

# Save the log to a CSV file
print("\n")
cma_es_search.save_log('rf_cmaes_search_log.csv')

# Check best parameters and score
print("\nBest Parameters:", cma_es_search.best_params_)
print("Best Score:", cma_es_search.best_score_)

Iteration 1/50, Best Score: 0.9666666666666668
Iteration 2/50, Best Score: 0.9600000000000002
No improvement in iteration 2. No improvement count: 1
Iteration 3/50, Best Score: 0.9666666666666668
No improvement in iteration 3. No improvement count: 2
Iteration 4/50, Best Score: 0.9600000000000002
No improvement in iteration 4. No improvement count: 3
Iteration 5/50, Best Score: 0.9600000000000002
No improvement in iteration 5. No improvement count: 4
Iteration 6/50, Best Score: 0.9666666666666668
No improvement in iteration 6. No improvement count: 5
Iteration 7/50, Best Score: 0.9600000000000002
No improvement in iteration 7. No improvement count: 6
Iteration 8/50, Best Score: 0.9666666666666668
No improvement in iteration 8. No improvement count: 7
Iteration 9/50, Best Score: 0.9666666666666668
No improvement in iteration 9. No improvement count: 8
Iteration 10/50, Best Score: 0.9666666666666668
No improvement in iteration 10. No improvement count: 9
Iteration 11/50, Best Score: 0.96

Unnamed: 0,n_estimators,max_depth,min_samples_split,score
0,53,3,19,0.966667
1,74,8,9,0.96
2,49,3,14,0.966667
3,49,11,12,0.96
4,54,7,15,0.96
5,51,10,20,0.966667
6,27,9,17,0.96
7,55,4,19,0.966667
8,66,10,19,0.966667
9,77,3,20,0.966667




Hyperparameter log saved to rf_cmaes_search_log.csv

Best Parameters: {'n_estimators': 53, 'max_depth': 3, 'min_samples_split': 19}
Best Score: 0.9666666666666668
