diff --git a/examples/optimizer/skopt_example.py b/examples/optimizer/skopt_example.py index a63599ef..76245221 100644 --- a/examples/optimizer/skopt_example.py +++ b/examples/optimizer/skopt_example.py @@ -1,30 +1,36 @@ -from sklearn.datasets import load_boston -from sklearn.model_selection import KFold, ShuffleSplit +from sklearn.datasets import load_diabetes +from sklearn.model_selection import ShuffleSplit from photonai.base import Hyperpipe, PipelineElement from photonai.optimization import FloatRange, Categorical -# WE USE THE BOSTON HOUSING DATA FROM SKLEARN -X, y = load_boston(return_X_y=True) +# WE USE THE DIABETES DATA FROM SKLEARN +X, y = load_diabetes(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe('skopt_example', optimizer='sk_opt', # which optimizer PHOTONAI shall use, in this case sk_opt - optimizer_params={'n_configurations': 25, 'acq_func': 'LCB', 'acq_func_kwargs': {'kappa': 1.96}}, - metrics=['mean_squared_error', 'pearson_correlation'], - best_config_metric='mean_squared_error', - outer_cv=ShuffleSplit(n_splits=4, test_size=0.2), - inner_cv=KFold(n_splits=3), + optimizer_params={'n_configurations': 25, + 'n_initial_points': 10, + 'base_estimator': 'GP', + 'initial_point_generator': 'grid', + 'acq_func': 'LCB', + 'acq_func_kwargs': {'kappa': 1.96} + }, + metrics=['mean_squared_error', 'mean_absolute_error'], + best_config_metric='mean_absolute_error', + outer_cv=ShuffleSplit(n_splits=3, test_size=0.2), + inner_cv=ShuffleSplit(n_splits=3, test_size=0.3), verbosity=0, project_folder='./tmp/') # ADD ELEMENTS TO YOUR PIPELINE -# first normalize all features +# first scale all features my_pipe += PipelineElement('StandardScaler') # engage and optimize SVR -# linspace and logspace is converted to uniform and log-uniform priors in skopt -my_pipe += PipelineElement('SVR', hyperparameters={'C': FloatRange(1e-3, 100, range_type='logspace'), +# linspace and logspace are converted to uniform and log-uniform priors in skopt +my_pipe += PipelineElement('SVR', hyperparameters={'C': FloatRange(0.1, 100, range_type='linspace'), 'epsilon': FloatRange(1e-3, 10, range_type='logspace'), 'tol': FloatRange(1e-4, 1e-2, range_type='linspace'), 'kernel': Categorical(['linear', 'rbf', 'poly'])}) diff --git a/photonai/optimization/grid_search/grid_search.py b/photonai/optimization/grid_search/grid_search.py index 9f68d7a6..9dca10b9 100644 --- a/photonai/optimization/grid_search/grid_search.py +++ b/photonai/optimization/grid_search/grid_search.py @@ -33,7 +33,7 @@ def __init__(self): def prepare(self, pipeline_elements: list, maximize_metric: bool) -> None: """ - Creates a grid from a list of PipelineElements. + Create a grid from a list of PipelineElements. Hyperparameters can be accessed via pipe_element.hyperparameters. Parameters: @@ -54,7 +54,7 @@ def next_config_generator(self) -> Generator: Generator for new configs - ask method. Returns: - Yields the next config. + Yield the next config. """ for parameters in self.param_grid: @@ -64,8 +64,8 @@ def next_config_generator(self) -> Generator: class RandomGridSearchOptimizer(GridSearchOptimizer): """Random grid search optimizer. - Searches for the best configuration by randomly - testing n points of a grid of possible hyperparameters. + Search for the best configuration by randomly + testing n points in a grid of possible hyperparameters. Example: ``` python @@ -85,10 +85,10 @@ def __init__(self, limit_in_minutes: Union[float, None] = None, n_configurations Parameters: limit_in_minutes: - Total time in minutes. + Total time limit in minutes. n_configurations: - Number of configurations to be calculated. + Maximum number of configurations to be calculated. """ super(RandomGridSearchOptimizer, self).__init__() @@ -126,7 +126,7 @@ def next_config_generator(self) -> Generator: Generator for new configs - ask method. Returns: - Yields the next config. + Yield the next config. """ if self.start_time is None and self.limit_in_minutes is not None: diff --git a/photonai/optimization/random_search/random_search.py b/photonai/optimization/random_search/random_search.py index d66e2cb0..c8599948 100644 --- a/photonai/optimization/random_search/random_search.py +++ b/photonai/optimization/random_search/random_search.py @@ -20,10 +20,10 @@ def __init__(self, limit_in_minutes: Union[float, None] = 60, n_configurations: Parameters: limit_in_minutes: - Total time in minutes. + Total time limit in minutes. n_configurations: - Number of configurations to be calculated. + Maximum number of configurations to be calculated. """ self.pipeline_elements = None @@ -51,7 +51,7 @@ def __init__(self, limit_in_minutes: Union[float, None] = 60, n_configurations: def prepare(self, pipeline_elements: list, maximize_metric: bool) -> None: """ - Initializes grid free random hyperparameter search. + Initialize the grid-free random hyperparameter search. Parameters: pipeline_elements: @@ -70,7 +70,7 @@ def next_config_generator(self) -> Generator: Generator for new configs - ask method. Returns: - Yields the next config. + Yield the next config. """ while True: diff --git a/photonai/optimization/scikit_optimize/sk_opt.py b/photonai/optimization/scikit_optimize/sk_opt.py index 3708dbd7..6815e42a 100644 --- a/photonai/optimization/scikit_optimize/sk_opt.py +++ b/photonai/optimization/scikit_optimize/sk_opt.py @@ -23,7 +23,7 @@ class SkOptOptimizer(PhotonSlaveOptimizer): skopt aims to be accessible and easy to use in many contexts. - Scikit-optimize [usage and implementation details](https://scikit-optimize.github.io/stable/) + Scikit-Optimize's [usage and implementation details](https://scikit-optimize.github.io/stable/) A detailed parameter documentation [here.]( https://scikit-optimize.github.io/stable/modules/generated/skopt.optimizer.Optimizer.html#skopt.optimizer.Optimizer) @@ -43,23 +43,25 @@ def __init__(self, n_configurations: int = 20, n_initial_points: int = 10, limit_in_minutes: Union[float, None] = None, - base_estimator: Union[str, sklearn.base.RegressorMixin] = "ET", + base_estimator: Union[str, sklearn.base.RegressorMixin] = "GP", initial_point_generator: str = "random", acq_func: str = 'gp_hedge', - acq_func_kwargs: dict = None): + acq_optimizer: str = 'auto', + acq_func_kwargs: dict = None, + acq_optimizer_kwargs: dict = None): """ Initialize the object. Parameters: n_configurations: - Number of configurations to be calculated. + Maximum number of configurations to be calculated. n_initial_points: Number of evaluations with initialization points before approximating it with `base_estimator`. limit_in_minutes: - Total time in minutes. + Total time limit in minutes. base_estimator: Estimator for returning std(Y | x) along with E[Y | x]. @@ -70,9 +72,17 @@ def __init__(self, acq_func: Function to minimize over the posterior distribution. + acq_optimizer: + Method to minimize the acquisition function. + The fit model is updated with the optimal value + obtained by optimizing acq_func with acq_optimizer. + acq_func_kwargs: Additional arguments to be passed to the acquisition function. + acq_optimizer_kwargs: + Additional arguments to be passed to the acquisition optimizer. + """ self.metric_to_optimize = '' self.n_configurations = n_configurations @@ -80,7 +90,9 @@ def __init__(self, self.base_estimator = base_estimator self.initial_point_generator = initial_point_generator self.acq_func = acq_func + self.acq_optimizer = acq_optimizer self.acq_func_kwargs = acq_func_kwargs + self.acq_optimizer_kwargs = acq_optimizer_kwargs self.limit_in_minutes = limit_in_minutes self.start_time, self.end_time = None, None @@ -95,7 +107,7 @@ def ask_generator(self) -> Generator: Generator for new configs - ask method. Returns: - Yields the next config. + Yield the next config. """ if self.start_time is None and self.limit_in_minutes is not None: @@ -116,9 +128,9 @@ def ask_generator(self) -> Generator: def prepare(self, pipeline_elements: list, maximize_metric: bool) -> None: """ - Initializes hyperparameter search with scikit-optimize. + Initialize the hyperparameter search with Scikit-Optimize. - Assembles all hyperparameters of the list of PipelineElements + Assemble all hyperparameters of the list of PipelineElements in order to prepare the hyperparameter space. Hyperparameters can be accessed via pipe_element.hyperparameters. @@ -158,8 +170,9 @@ def prepare(self, pipeline_elements: list, maximize_metric: bool) -> None: space.append(skopt_param) if self.constant_dictionary: - msg = "PHOTONAI has detected some one-valued params in your hyperparameters. Pleas use the kwargs for " \ - "constant values. This run ignores following settings: " + str(self.constant_dictionary.keys()) + msg = "PHOTONAI has detected some one-valued params in your hyperparameters. " \ + "Please use the kwargs for constant values. " \ + "This run ignores the setting: {}".format(str(self.constant_dictionary.keys())) logger.warning(msg) warnings.warn(msg) @@ -173,7 +186,9 @@ def prepare(self, pipeline_elements: list, maximize_metric: bool) -> None: n_initial_points=self.n_initial_points, initial_point_generator=self.initial_point_generator, acq_func=self.acq_func, - acq_func_kwargs=self.acq_func_kwargs) + acq_optimizer=self.acq_optimizer, + acq_func_kwargs=self.acq_func_kwargs, + acq_optimizer_kwargs=self.acq_optimizer_kwargs) self.ask = self.ask_generator() def tell(self, config: dict, performance: float) -> None: @@ -208,14 +223,14 @@ def _convert_photonai_to_skopt_space(self, hyperparam: Union[PhotonHyperparam, l elif hyperparam.range_type == 'logspace': return Real(hyperparam.start, hyperparam.stop, name=name, prior='log-uniform') else: - msg = "The hyperparam.range_type "+hyperparam.range_type+" is not supported by scikit-optimize." + msg = "The hyperparam.range_type {} is not supported by Scikit-Optimize.".format(hyperparam.range_type) logger.error(msg) raise ValueError(msg) elif isinstance(hyperparam, IntegerRange): return Integer(hyperparam.start, hyperparam.stop, name=name) - msg = "Cannot convert hyperparameter " + str(hyperparam) + ". " \ - "Supported types: Categorical, IntegerRange, FloatRange, list." + msg = "Cannot convert hyperparameter {}. Supported types: " \ + "Categorical, IntegerRange, FloatRange, list.".format(str(hyperparam)) logger.error(msg) raise ValueError(msg) diff --git a/setup.py b/setup.py index 5465a8c5..50487159 100644 --- a/setup.py +++ b/setup.py @@ -51,8 +51,8 @@ 'prettytable', 'seaborn', 'joblib', - 'dask==2.30.0', - 'distributed==2.30.1', + 'dask', + 'distributed', 'scikit-optimize', 'xlrd'] ) diff --git a/test/optimization_tests/grid_search_tests/test_grid_search.py b/test/optimization_tests/grid_search_tests/test_grid_search.py index 3336b088..9f9e3062 100644 --- a/test/optimization_tests/grid_search_tests/test_grid_search.py +++ b/test/optimization_tests/grid_search_tests/test_grid_search.py @@ -116,5 +116,5 @@ def test_slave_interface(): @staticmethod def test_master_interface(): opt = PhotonMasterOptimizer() - opt.prepare(list(), True, None) + opt.prepare(list(), True, lambda x: x) opt.optimize() diff --git a/test/optimization_tests/nevergrad_tests/test_nevergrad.py b/test/optimization_tests/nevergrad_tests/test_nevergrad.py index 9fe979e4..3933a4f2 100644 --- a/test/optimization_tests/nevergrad_tests/test_nevergrad.py +++ b/test/optimization_tests/nevergrad_tests/test_nevergrad.py @@ -111,7 +111,10 @@ def test_other(self): opt = NevergradOptimizer(facade="NGO", n_configurations=10) pipeline_elements = [PipelineElement('SVC', hyperparameters={'kernel': ["sigmoid", "rbf"], 'C': [0.6], 'coef0': Categorical([0.5])})] - of = lambda x: x ** 2 + + def of(x): + return x ** 2 + with warnings.catch_warnings(record=True) as w: opt.prepare(pipeline_elements=pipeline_elements, maximize_metric=True, objective_function=of) assert any("PHOTONAI has detected some" in s for s in [e.message.args[0] for e in w]) diff --git a/test/optimization_tests/nevergrad_tests/test_nevergrad_not_installed.py b/test/optimization_tests/nevergrad_tests/test_nevergrad_not_installed.py index e8090008..ccadee88 100644 --- a/test/optimization_tests/nevergrad_tests/test_nevergrad_not_installed.py +++ b/test/optimization_tests/nevergrad_tests/test_nevergrad_not_installed.py @@ -11,9 +11,7 @@ def setUp(self) -> None: photonai_ng.__found__ = False def test_imports(self): - """ - Test for ModuleNotFoundError (requirements.txt). - """ + """Test for ModuleNotFoundError (requirements.txt).""" with self.assertRaises(ModuleNotFoundError): NevergradOptimizer() diff --git a/test/optimization_tests/smac_tests/test_smac.py b/test/optimization_tests/smac_tests/test_smac.py index 5922c2ee..4a633967 100644 --- a/test/optimization_tests/smac_tests/test_smac.py +++ b/test/optimization_tests/smac_tests/test_smac.py @@ -135,7 +135,7 @@ def test_further_parameters(self): pipe.add(PipelineElement('StandardScaler')) pipe += PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 30)}) pipe += PipelineElement('SVC', hyperparameters={'kernel': Categorical(["rbf", 'poly']), - 'C': FloatRange(0.5, 200)}, gamma='auto') + 'C': FloatRange(0.5, 200)}, gamma='auto') X, y = self.simple_classification() pipe.fit(X, y) self.assertEqual(len(pipe.results.outer_folds[0].tested_config_list), n_configurations) diff --git a/test/processing_tests/test_permutation_test.py b/test/processing_tests/test_permutation_test.py index 7a321853..40779a26 100644 --- a/test/processing_tests/test_permutation_test.py +++ b/test/processing_tests/test_permutation_test.py @@ -1,4 +1,5 @@ import uuid +import os import numpy as np from bson.objectid import ObjectId from sklearn.datasets import load_breast_cancer @@ -10,6 +11,36 @@ from photonai.helper.photon_base_test import PhotonBaseTest +def create_hyperpipe(): + # this is needed here for the parallelization + from photonai.base import Hyperpipe, PipelineElement, OutputSettings + from photonai.optimization import IntegerRange + from sklearn.model_selection import GroupKFold, KFold + + base_folder = os.path.dirname(os.path.abspath(__file__)) + settings = OutputSettings(mongodb_connect_url='mongodb://localhost:27017/photon_results', save_output=False) + my_pipe = Hyperpipe('permutation_test_1', + optimizer='grid_search', + metrics=['accuracy', 'precision', 'recall'], + best_config_metric='accuracy', + outer_cv=GroupKFold(n_splits=2), + inner_cv=KFold(n_splits=2), + calculate_metrics_across_folds=True, + use_test_set=True, + project_folder=os.path.join(base_folder, "tmp"), + verbosity=0, + output_settings=settings) + + my_pipe += PipelineElement("StandardScaler", hyperparameters={}, + test_disabled=False, with_mean=True, with_std=True) + my_pipe += PipelineElement("PCA", hyperparameters={'n_components': IntegerRange(3, 5)}, + test_disabled=False) + my_pipe += PipelineElement("SVC", hyperparameters={'kernel': ['linear', 'rbf']}, # C': FloatRange(0.1, 5), + gamma='scale', max_iter=1000000) + + return my_pipe + + class PermutationTestTests(PhotonBaseTest): @classmethod @@ -61,39 +92,6 @@ def test_find_reference(self): ObjectId(wizard_obj_id), True) self.assertEqual(latest_item.name, wizard_obj_id) - def create_hyperpipe(self): - # this is needed here for the parallelisation - from photonai.base import Hyperpipe, PipelineElement, OutputSettings - from photonai.optimization import IntegerRange - from sklearn.model_selection import GroupKFold - from sklearn.model_selection import KFold - - settings = OutputSettings(mongodb_connect_url='mongodb://localhost:27017/photon_results') - my_pipe = Hyperpipe('permutation_test_1', - optimizer='grid_search', - metrics=['accuracy', 'precision', 'recall'], - best_config_metric='accuracy', - outer_cv=GroupKFold(n_splits=2), - inner_cv=KFold(n_splits=2), - calculate_metrics_across_folds=True, - use_test_set=True, - verbosity=0, - project_folder=self.tmp_folder_path, - output_settings=settings) - - # Add transformer elements - my_pipe += PipelineElement("StandardScaler", hyperparameters={}, - test_disabled=False, with_mean=True, with_std=True) - - my_pipe += PipelineElement("PCA", hyperparameters={'n_components': IntegerRange(3, 5)}, - test_disabled=False) - - # Add estimator - my_pipe += PipelineElement("SVC", hyperparameters={'kernel': ['linear', 'rbf']}, # C': FloatRange(0.1, 5), - gamma='scale', max_iter=1000000) - - return my_pipe - def create_hyperpipe_no_mongo(self): from photonai.base import Hyperpipe from sklearn.model_selection import KFold @@ -120,7 +118,7 @@ def test_run_parallelized_perm_test(self): X, y = load_breast_cancer(return_X_y=True) my_perm_id = str(uuid.uuid4()) groups = np.random.random_integers(0, 3, (len(y),)) - perm_tester = PermutationTest(self.create_hyperpipe, n_perms=2, n_processes=3, random_state=11, + perm_tester = PermutationTest(create_hyperpipe, n_perms=2, n_processes=3, random_state=11, permutation_id=my_perm_id) perm_tester.fit(X, y, groups=groups) @@ -129,7 +127,7 @@ def test_setup_non_useful_perm_test(self): X, y = np.random.random((200, 5)), np.random.randint(0, 2, size=(200, )) my_perm_id = str(uuid.uuid4()) groups = np.random.random_integers(0, 3, (len(y),)) - perm_tester = PermutationTest(self.create_hyperpipe, n_perms=2, n_processes=3, random_state=11, + perm_tester = PermutationTest(create_hyperpipe, n_perms=2, n_processes=3, random_state=11, permutation_id=my_perm_id) with self.assertRaises(RuntimeError): perm_tester.fit(X, y, groups=groups) @@ -138,7 +136,7 @@ def test_run_perm_test(self): X, y = load_breast_cancer(return_X_y=True) my_perm_id = str(uuid.uuid4()) groups = np.random.random_integers(0, 3, (len(y),)) - perm_tester = PermutationTest(self.create_hyperpipe, n_perms=2, n_processes=1, random_state=11, + perm_tester = PermutationTest(create_hyperpipe, n_perms=2, n_processes=1, random_state=11, permutation_id=my_perm_id) perm_tester.fit(X, y, groups=groups)