Skip to content

Commit

Permalink
count warnings -> content based warnings testing, improve cnn example
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasplagwitz committed Aug 28, 2020
1 parent f3a0110 commit a0d00b1
Show file tree
Hide file tree
Showing 15 changed files with 131 additions and 68 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# content by J. Brownlee:
# https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
# HAR-Dataset: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones

from numpy import dstack
from pandas import read_csv
import numpy as np
Expand Down Expand Up @@ -38,7 +42,7 @@ def load_dataset_group(group, prefix=''):


# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
def load_har(prefix=''):
# load all train
trainX, trainy = load_dataset_group('train', prefix + '/')
print(trainX.shape, trainy.shape)
Expand Down
9 changes: 5 additions & 4 deletions examples/neural_networks/keras_cnn_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# cnn model with PHOTONAI
# example: https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
# HARDataset: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones
# content by J. Brownlee:
# https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
# HAR-Dataset: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones
# required file: data.py from examples/neural_network

from keras.utils import data_utils
Expand All @@ -12,7 +13,7 @@
from keras.layers.convolutional import MaxPooling1D
from sklearn.model_selection import KFold

from examples.neural_networks.data import load_dataset
from examples.neural_networks.dataset import load_har

from photonai.base import Hyperpipe, PipelineElement, OutputSettings
from photonai.optimization import Categorical
Expand All @@ -27,7 +28,7 @@
archive_format='zip'
)

X, y = load_dataset(prefix=dataset_path.replace('.zip', ''))
X, y = load_har(prefix=dataset_path.replace('.zip', ''))

n_timesteps, n_features, n_outputs = X.shape[1], X.shape[2], 6
model = Sequential()
Expand Down
87 changes: 71 additions & 16 deletions examples/neural_networks/keras_cnn_optimization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# optimized cnn model with PHOTONAI
# example: https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
# HARDataset: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones
# required file: data.py from examples/neural_network
# content by J. Brownlee:
# https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
# HAR-Dataset: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones
# required file: dataset.py from examples/neural_network

import os

from keras.utils import data_utils
Expand All @@ -12,12 +14,14 @@
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator

from examples.neural_networks.data import load_dataset
from examples.neural_networks.dataset import load_har

from photonai.base import Hyperpipe, PipelineElement, OutputSettings, PhotonRegistry
from photonai.modelwrapper.keras_base_models import KerasBaseClassifier
from photonai.optimization import IntegerRange
from photonai.optimization import IntegerRange, BooleanSwitch

dataset_path = data_utils.get_file(
fname='UCI HAR Dataset.zip',
Expand All @@ -29,23 +33,67 @@
archive_format='zip'
)

X, y = load_dataset(prefix=dataset_path.replace('.zip', ''))
X, y = load_har(prefix=dataset_path.replace('.zip', ''))


# Transformer and Estimator Definition
class MyCnnScaler(BaseEstimator):

def __init__(self, standardize: bool = True,):
# it is important that you name your params the same in the constructor
# stub as well as in your class variables!
self.standardize = standardize

def fit(self, data, targets=None, **kwargs):
"""
Adjust the underlying model or method to the data.
Returns
-------
IMPORTANT: must return self!
"""
return self

def transform(self, X, targets=None, **kwargs):
"""
Apply the method's logic to the data.
"""
# remove overlap
cut = int(X.shape[1] / 2)
longX = X[:, -cut:, :]
# flatten windows
longX = longX.reshape((longX.shape[0] * longX.shape[1], longX.shape[2]))
# flatten train and test
flatX = X.reshape((X.shape[0] * X.shape[1], X.shape[2]))
# standardize
if self.standardize:
s = StandardScaler()
# fit on training data
s.fit(longX)
# apply to training and test data
flatX = s.transform(flatX)
# reshape
flatX = flatX.reshape((X.shape))
return flatX


class MyOptimizedCnnEstimator(KerasBaseClassifier):

def __init__(self, n_filters: int = 64, epochs: int = 10, verbosity: int = 1):
def __init__(self, n_filters: int = 64,
kernel_size: int = 3,
epochs: int = 10,
verbosity: int = 1):
# it is important that you name your params the same in the constructor
# stub as well as in your class variables!
model = self.build_model(n_filters, X.shape[1], X.shape[2], 6)
model = self.build_model(n_filters, kernel_size, X.shape[1], X.shape[2], 6)
super(MyOptimizedCnnEstimator, self).__init__(model=model,
epochs=epochs,
nn_batch_size=32,
multi_class=True,
verbosity=verbosity)

@classmethod
def build_model(cls, n_filters, n_timesteps, n_features, n_outputs):
@staticmethod
def build_model(n_filters, kernel_size, n_timesteps, n_features, n_outputs):
model = Sequential()
model.add(Conv1D(filters=n_filters, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Conv1D(filters=n_filters, kernel_size=3, activation='relu'))
Expand All @@ -65,27 +113,34 @@ def build_model(cls, n_filters, n_timesteps, n_features, n_outputs):
registry = PhotonRegistry(custom_elements_folder=custom_elements_folder)

# This needs to be done only once on your device
registry.register(photon_name='MyCnnScaler',
class_str='keras_cnn_optimization.MyCnnScaler',
element_type='Transformer')

registry.register(photon_name='MyOptimizedCnnEstimator',
class_str='keras_cnn_optimization.MyOptimizedCnnEstimator',
element_type='Estimator')

# This needs to be done every time you run the script
registry.activate()


# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe('cnn_keras_multiclass_pipe',
optimizer='sk_opt',
optimizer_params={'n_configurations': 10},
optimizer_params={'n_configurations': 30},
metrics=['accuracy'],
best_config_metric='accuracy',
outer_cv=KFold(n_splits=3),
inner_cv=KFold(n_splits=2),
outer_cv=KFold(n_splits=5),
inner_cv=KFold(n_splits=3),
verbosity=1,
output_settings=OutputSettings(project_folder='./tmp/'))

my_pipe += PipelineElement('MyCnnScaler', hyperparameters={'standardize': BooleanSwitch()})

my_pipe += PipelineElement('MyOptimizedCnnEstimator',
hyperparameters={'n_filters': IntegerRange(8, 256)},
epochs=3, verbosity=1)
hyperparameters={'n_filters': IntegerRange(8, 256),
'kernel_size': IntegerRange(2, 11)},
epochs=10, verbosity=0)

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)
28 changes: 14 additions & 14 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,23 @@ class OutputSettings:
if the chosen estimators have an attribute 'coef_' or 'feature_importances_'.
* `project_folder` [bool, default=True]:
If True, PHOTON writes a summary_file, the results of the hyperparameter optimization, the best model and the
If True, PHOTONAI writes a summary_file, the results of the hyperparameter optimization, the best model and the
console output to the filesystem into the given project folder.
* `project_folder` [str, default='']:
The output folder in which all files generated by the PHOTON project are saved to.
The output folder in which all files generated by the PHOTONAI project are saved to.
* `result_file_mode` [str, default='best']
The possible save_modes for .photon file.
* `user_id` [str]:
The user name of the according PHOTON Wizard login
The user name of the according PHOTONAI Wizard login
* `wizard_object_id` [str]:
The object id to map the designed pipeline in the PHOTON Wizard to the results in the PHOTON CORE Database
The object id to map the designed pipeline in the PHOTONAI Wizard to the results in the PHOTONAI CORE Database
* `wizard_project_name` [str]:
How the project is titled in the PHOTON Wizard
How the project is titled in the PHOTONAI Wizard
"""
def __init__(self,
mongodb_connect_url: str = None,
Expand Down Expand Up @@ -332,10 +332,10 @@ def __init__(self, name,
# check if both calculate_metrics_per_folds and calculate_metrics_across_folds is False
if not calculate_metrics_across_folds and not calculate_metrics_per_fold:
raise NotImplementedError("Apparently, you've set calculate_metrics_across_folds=False and "
"calculate_metrics_per_fold=False. In this case PHOTON does not calculate "
"calculate_metrics_per_fold=False. In this case PHOTONAI does not calculate "
"any metrics which doesn't make any sense. Set at least one to True.")
if inner_cv is None:
msg = "PHOTON requires an inner_cv split. Please enable inner cross-validation. As exmaple: Hyperpipe(..." \
msg = "PHOTONAI requires an inner_cv split. Please enable inner cross-validation. As exmaple: Hyperpipe(..." \
" inner_cv = KFold(n_splits = 3), ...). Ensure you import the cross_validation object first."
logger.error(msg)
raise AttributeError(msg)
Expand Down Expand Up @@ -486,7 +486,7 @@ def sanity_check_metrics(self):
if self.best_config_metric is not None:
if isinstance(self.best_config_metric, list):
warning_text = "Best Config Metric must be a single metric given as string, no list. " \
"PHOTON chose the first one from the list of metrics to calculate."
"PHOTONAI chose the first one from the list of metrics to calculate."

self.best_config_metric = self.best_config_metric[0]
logger.warning(warning_text)
Expand All @@ -509,13 +509,13 @@ def sanity_check_metrics(self):
self.metrics = list(filter(None, self.metrics))
else:
error_msg = "No metrics were chosen. Please choose metrics to quantify your performance and set " \
"the best_config_metric so that PHOTON which optimizes for"
"the best_config_metric so that PHOTONAI which optimizes for"
logger.error(error_msg)
raise ValueError(error_msg)

if self.best_config_metric is None and self.metrics is not None and len(self.metrics) > 0:
self.best_config_metric = self.metrics[0]
warning_text = "No best config metric was given, so PHOTON chose the first in the list of metrics as " \
warning_text = "No best config metric was given, so PHOTONAI chose the first in the list of metrics as " \
"criteria for choosing the best configuration."
logger.warning(warning_text)
warnings.warn(warning_text)
Expand Down Expand Up @@ -673,7 +673,7 @@ def _prepare_result_logging(self, start_time):
if self.permutation_id is not None:
self.results.permutation_id = self.permutation_id

# save wizard information to photon db in order to map results to the wizard design object
# save wizard information to PHOTONAI db in order to map results to the wizard design object
if self.output_settings and hasattr(self.output_settings, 'wizard_object_id'):
if self.output_settings.wizard_object_id:
self.name = self.output_settings.wizard_object_id
Expand Down Expand Up @@ -916,7 +916,7 @@ def _input_data_sanity_checks(self, data, targets, **kwargs):
nr_of_nans = len(np.where(nans_in_y == 1)[0])
if nr_of_nans > 0:
logger.info("You have " + str(nr_of_nans) + " Nans in your target vector, "
"PHOTON erases every data item that has a Nan Target")
"PHOTONAI erases every data item that has a Nan Target")
self.data.X = self.data.X[~nans_in_y]
self.data.y = self.data.y[~nans_in_y]
except Exception as e:
Expand Down Expand Up @@ -1021,9 +1021,9 @@ def fit(self, data, targets, **kwargs):
self.output_settings._update_settings(self.name, start.strftime("%Y-%m-%d_%H-%M-%S"))

logger.photon_system_log('***************************************************************************************************************')
logger.photon_system_log('PHOTON ANALYSIS: ' + self.name)
logger.photon_system_log('PHOTONAI ANALYSIS: ' + self.name)
logger.photon_system_log('***************************************************************************************************************')
logger.info("Preparing data and PHOTON objects for analysis...")
logger.info("Preparing data and PHOTONAI objects for analysis...")

# loop over outer cross validation
if self.nr_of_processes > 1:
Expand Down
21 changes: 10 additions & 11 deletions test/base_tests/test_hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,26 +114,25 @@ def my_func(X, y, **kwargs):
def test_sanity(self):
# make sure that no metrics means raising an error
with self.assertRaises(ValueError):
hyperpipe = Hyperpipe("hp_name", inner_cv=self.inner_cv_object)
Hyperpipe("hp_name", inner_cv=self.inner_cv_object)

# make sure that if no best config metric is given, PHOTON raises a warning
with warnings.catch_warnings(record=True) as w:
hyperpipe = Hyperpipe("hp_name", inner_cv=self.inner_cv_object, metrics=["accuracy", "f1_score"])
assert len(w) == 1
Hyperpipe("hp_name", inner_cv=self.inner_cv_object, metrics=["accuracy", "f1_score"])
assert any("No best config metric was given" in s for s in [e.message.args[0] for e in w])

with warnings.catch_warnings(record=True) as w:
hyperpipe = Hyperpipe("hp_name", inner_cv=self.inner_cv_object, best_config_metric=["accuracy", "f1_score"])
assert len(w) == 1
Hyperpipe("hp_name", inner_cv=self.inner_cv_object, best_config_metric=["accuracy", "f1_score"])
assert any("Best Config Metric must be a single" in s for s in [e.message.args[0] for e in w])

with self.assertRaises(NotImplementedError):
hyperpipe = Hyperpipe("hp_name", inner_cv=self.inner_cv_object,
best_config_metric='accuracy', metrics=["accuracy"],
calculate_metrics_across_folds=False,
calculate_metrics_per_fold=False)
Hyperpipe("hp_name", inner_cv=self.inner_cv_object,
best_config_metric='accuracy', metrics=["accuracy"],
calculate_metrics_across_folds=False,
calculate_metrics_per_fold=False)

with self.assertRaises(AttributeError):
hyperpipe = Hyperpipe("hp_name",
best_config_metric='accuracy', metrics=["accuracy"])
Hyperpipe("hp_name", best_config_metric='accuracy', metrics=["accuracy"])

data = np.random.random((500, 50))

Expand Down
4 changes: 2 additions & 2 deletions test/base_tests/test_photon_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_transform(self):

with warnings.catch_warnings(record=True) as w:
self.neuro_batch.transform('str')
assert len(w) == 1
assert any("Cannot do batching" in s for s in [e.message.args[0] for e in w])

def test_predict(self):
y_predicted = self.neuro_batch.predict(self.data, **self.kwargs)
Expand All @@ -87,4 +87,4 @@ def test_predict(self):
self.assertEqual(y_predicted[-1], (self.data.shape[0]/self.batch_size))
with warnings.catch_warnings(record=True) as w:
self.neuro_batch.predict('str')
assert len(w) == 1
assert any("Cannot do batching" in s for s in [e.message.args[0] for e in w])
6 changes: 3 additions & 3 deletions test/base_tests/test_photon_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ def callback_func(X, y, **kwargs):
self.assertTrue(no_callback_pipe.elements[-1][1] is my_callback)
test_branch.sanity_check_pipeline(no_callback_pipe)
self.assertFalse(no_callback_pipe.elements)
assert len(w) == 1
assert any("Last element of pipeline cannot be callback" in s for s in [e.message.args[0] for e in w])

def test_prepare_pipeline(self):
self.assertEqual(len(self.transformer_branch.elements), 2)
Expand Down Expand Up @@ -911,7 +911,7 @@ def test_predict_warning(self):
pe.add(PipelineElement('SVC'))
with warnings.catch_warnings(record=True) as w:
pe.predict([0, 1, 2])
assert len(w) == 1
assert any("There is no predict function" in s for s in [e.message.args[0] for e in w])


class DataFilterTests(unittest.TestCase):
Expand Down Expand Up @@ -991,4 +991,4 @@ def test_callback(self):

with warnings.catch_warnings(record=True) as w:
self.callback_branch_pipeline_error.fit(self.X, self.y).predict(self.X)
assert len(w) == 2
assert any("Last element of pipeline cannot be callback" in s for s in [e.message.args[0] for e in w])
2 changes: 1 addition & 1 deletion test/optimization_tests/nevergrad/test_nevergrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def test_other(self):
of = lambda x: x ** 2
with warnings.catch_warnings(record=True) as w:
opt.prepare(pipeline_elements=pipeline_elements, maximize_metric=True, objective_function=of)
assert len(w) == 1
assert any("PHOTONAI has detected some" in s for s in [e.message.args[0] for e in w])

pipeline_elements = [PipelineElement("SVC", hyperparameters={'C': FloatRange(0.1, 0.5,
range_type='geomspace')})]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_time_limit(self):
for config in self.optimizer.ask:
configs.append(config)
stop = time.time()
self.assertAlmostEqual(stop-start, 3, 2)
self.assertAlmostEqual(stop-start, 3, 1)

def test_run(self):
pass
4 changes: 2 additions & 2 deletions test/optimization_tests/sk_opt/test_sk_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_empty_hspace(self):
with warnings.catch_warnings(record=True) as w:
self.optimizer.prepare([], True)
self.assertIsNone(self.optimizer.optimizer)
assert len(w) == 1
assert any("Did not find any" in s for s in [e.message.args[0] for e in w])

def test_eliminate_one_value_hyperparams(self):
pipeline_elements = [PipelineElement('PCA', hyperparameters={'n_components': Categorical([5])}),
Expand All @@ -36,7 +36,7 @@ def test_eliminate_one_value_hyperparams(self):
'tol': FloatRange(0.1, 1, range_type='logspace')})]
with warnings.catch_warnings(record=True) as w:
self.optimizer.prepare(pipeline_elements, True)
assert len(w) == 1
assert any("PHOTONAI has detected some" in s for s in [e.message.args[0] for e in w])
self.assertIn('SVC__C', self.optimizer.hyperparameter_list)
self.assertIn('SVC__shrinking', self.optimizer.hyperparameter_list)
self.assertNotIn('PCA__n_components', self.optimizer.hyperparameter_list)
Expand Down
Loading

0 comments on commit a0d00b1

Please sign in to comment.