Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/score train #83

Merged
merged 8 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def __init__(self, name: Optional[str],
cache_folder: str = None,
nr_of_processes: int = 1,
multi_threading: bool = True,
allow_multidim_targets: bool = False):
allow_multidim_targets: bool = False,
score_train: bool = True):
"""
Initialize the object.

Expand Down Expand Up @@ -420,6 +421,9 @@ def __init__(self, name: Optional[str],
allow_multidim_targets:
Allows multidimensional targets.

score_train:
metrics for the train-set are only calculated if score_train is true.

"""

self.name = re.sub(r'\W+', '', name)
Expand Down Expand Up @@ -514,6 +518,7 @@ def __init__(self, name: Optional[str],
self.permutation_id = permutation_id
self.allow_multidim_targets = allow_multidim_targets
self.is_final_fit = False
self.score_train = score_train

# ====================== Random Seed ===========================
self.random_state = random_seed
Expand Down Expand Up @@ -939,7 +944,6 @@ def _finalize_optimization(self):
if not feature_importances:
logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0]))
else:
self.results.best_config_feature_importances = feature_importances

# write backmapping file only if optimum_pipes inverse_transform works completely.
# restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
Expand Down Expand Up @@ -1085,7 +1089,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
cache_folder=self.cache_folder,
cache_updater=self.recursive_cache_folder_propagation,
dummy_estimator=dummy_estimator,
result_obj=outer_fold)
result_obj=outer_fold,
score_train=self.score_train)
# 2. monitor outputs
self.results.outer_folds.append(outer_fold)

Expand Down
29 changes: 22 additions & 7 deletions photonai/processing/inner_folds.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
training: bool = False,
cache_folder=None,
cache_updater=None,
scorer: Scorer = None):
scorer: Scorer = None,
score_train: bool = True):

self.params = specific_config
self.pipe = pipe_ctor
Expand All @@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,

self.raise_error = raise_error
self.training = training
self.score_train = score_train

def fit(self, X, y, **kwargs):
"""Iterates over cross-validation folds and trains the pipeline,
Expand Down Expand Up @@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs):
kwargs_cv_train),
test_data=InnerFoldManager.JobData(test_X, test_y, test,
kwargs_cv_test),
scorer=self.scorer)
scorer=self.scorer,
score_train=self.score_train)

# only for unparallel processing
# inform children in which inner fold we are
Expand Down Expand Up @@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t
callbacks=self.optimization_constraints,
train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs),
test_data=self.JobData(test_X, test_y, test, kwargs_cv_test),
scorer=self.scorer)
scorer=self.scorer,
score_train=self.score_train)
curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data)
learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics,
curr_train_cut.metrics])
Expand All @@ -239,14 +243,15 @@ def __init__(self, X, y, indices, cv_kwargs):

class InnerCVJob:

def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer):
def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train):
self.pipe = pipe
self.config = config
self.metrics = metrics
self.callbacks = callbacks
self.train_data = train_data
self.test_data = test_data
self.scorer = scorer
self.score_train = score_train

@staticmethod
def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor,
Expand Down Expand Up @@ -344,17 +349,27 @@ def fit_and_score(job: InnerCVJob):
# start fitting
pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs)

logger.debug('Scoring Training Data')
logger.debug('Scoring Test Data')

# score test data
curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics,
indices=job.test_data.indices,
scorer=job.scorer,
**job.test_data.cv_kwargs)

logger.debug('Scoring Test Data')
logger.debug('Scoring Training Data')
# score train data
curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics,
scores = {}
for metric in list(curr_test_fold.metrics.keys()):
scores[metric] = 0
curr_train_fold = MDBScoreInformation(metrics=scores,
score_duration=0,
y_pred=list(np.zeros_like(job.train_data.y)),
y_true=list(job.train_data.y),
indices=np.asarray(job.train_data.indices).tolist(),
probabilities=[])
if job.score_train:
curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics,
indices=job.train_data.indices,
training=True,
scorer=job.scorer, **job.train_data.cv_kwargs)
Expand Down
4 changes: 2 additions & 2 deletions photonai/processing/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ class Scorer:
'precision': ('sklearn.metrics', 'precision_score', 'score'),
'recall': ('sklearn.metrics', 'recall_score', 'score'),
'auc': ('sklearn.metrics', 'roc_auc_score', 'score'),
'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'),
'sensitivity': ('sklearn.metrics', 'recall_score', 'score'),
'specificity': ('photonai.processing.metrics', 'specificity', 'score'),
'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'),
'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'),
'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'),

# Regression
Expand Down
10 changes: 7 additions & 3 deletions photonai/processing/outer_folds.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,16 @@ def __init__(self, pipe,
cache_folder=None,
cache_updater=None,
dummy_estimator=None,
result_obj=None):
result_obj=None,
score_train: bool = True):
self.outer_fold_id = outer_fold_id
self.cross_validation_info = cross_validation_info
self.scorer = Scorer(optimization_info.metrics)
self.optimization_info = optimization_info
self._pipe = pipe
self.copy_pipe_fnc = self._pipe.copy_me
self.dummy_estimator = dummy_estimator
self.score_train = score_train

self.cache_folder = cache_folder
self.cache_updater = cache_updater
Expand Down Expand Up @@ -246,6 +248,7 @@ def fit(self, X, y=None, **kwargs):
indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices,
metrics=self.optimization_info.metrics,
scorer=self.scorer,
score_train=self.score_train,
**self._test_kwargs)

logger.debug('... scoring training data')
Expand All @@ -255,6 +258,7 @@ def fit(self, X, y=None, **kwargs):
metrics=self.optimization_info.metrics,
training=True,
scorer=self.scorer,
score_train=self.score_train,
**self._validation_kwargs)

best_config_performance_mdb.training = train_score_mdb
Expand Down Expand Up @@ -386,7 +390,7 @@ def _fit_dummy(self):
self.dummy_estimator.fit(dummy_y, self._validation_y)
train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y,
metrics=self.optimization_info.metrics,
scorer=self.scorer)
scorer=self.scorer, score_train=self.score_train)

# fill result tree with fold information
inner_fold = MDBInnerFold()
Expand All @@ -396,7 +400,7 @@ def _fit_dummy(self):
test_scores = InnerFoldManager.score(self.dummy_estimator,
self._test_X, self._test_y,
metrics=self.optimization_info.metrics,
scorer=self.scorer)
scorer=self.scorer, score_train=self.score_train)
print_metrics("DUMMY", test_scores.metrics)
inner_fold.validation = test_scores

Expand Down
Loading