In [1]:
!pip install -q gplearn

In [2]:
import numpy as np
import warnings
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from gplearn.genetic import SymbolicClassifier

In [3]:
# Monkey-patch BaseEstimator._validate_data to use the newer validate_data function instead
import sklearn.base
from sklearn.utils.validation import validate_data as new_validate_data
sklearn.base.BaseEstimator._validate_data = new_validate_data

In [4]:
# Define a helper class to mimic the estimator tags with attribute access
class DummyTags:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
    def __getitem__(self, key):
        return self.__dict__[key]
    def get(self, key, default=None):
        return self.__dict__.get(key, default)

In [5]:
# Monkey-patch __sklearn_tags__ for SymbolicClassifier
def my_sk_tags(self):
    # Create a dummy tags object with required attributes
    tags = DummyTags(
        binary_only=False,          # Allow multi-class classification
        X_types=["2darray"],        # Expect a 2D array input
        requires_y=True,            # Requires target values
        non_deterministic=True,     # The algorithm is stochastic
        estimator_type="classifier" # Specify that this is a classifier
    )
    # Provide nested input_tags required by some meta-estimators
    tags.input_tags = DummyTags(
        pairwise=False,             # Does not use a pairwise kernel
        sparse=False                # Does not support sparse input by default
    )
    return tags

# Apply the monkey-patch
SymbolicClassifier.__sklearn_tags__ = my_sk_tags

In [6]:
# Load a multi-class dataset
data = load_iris()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:
# Wrap the monkey-patched SymbolicClassifier with OneVsRestClassifier
ovr_classifier = OneVsRestClassifier(
    SymbolicClassifier(random_state=42, generations=10, population_size=50)
)

In [8]:
# Fit the model on the training data
ovr_classifier.fit(X_train, y_train)

In [9]:
# Make predictions on the test set
y_pred = ovr_classifier.predict(X_test)

In [10]:
# Evaluate the classifier's performance
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.91      0.95        11
           2       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



In [11]:
# Define distributions for hyperparameter tuning
# When using OneVsRestClassifier, the estimator's parameters must be prefixed with "estimator__".
param_dist = {
    "estimator__generations": [5, 10, 15],
    "estimator__population_size": [20, 50, 100]
}

In [12]:
# Set up a RandomizedSearchCV to tune hyperparameters
random_search = RandomizedSearchCV(
    ovr_classifier,
    param_distributions=param_dist,
    n_iter=3,   # Number of parameter settings sampled
    cv=3,       # 3-fold cross-validation
    random_state=42
)

# Fit the random search on the training data
random_search.fit(X_train, y_train)

In [13]:
# Evaluate on the test set
print("Best parameters:", random_search.best_params_)
print("Test set score:", random_search.score(X_test, y_test))
print("\nClassification Report:\n", classification_report(y_test, random_search.predict(X_test)))

Best parameters: {'estimator__population_size': 50, 'estimator__generations': 15}
Test set score: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

