Skip to content

Commit

Permalink
add median absolute error to metrics and fix missing kwargs in Defaul…
Browse files Browse the repository at this point in the history
…tPipeline
  • Loading branch information
RLeenings committed Sep 12, 2023
1 parent e042908 commit 126b242
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 3 deletions.
85 changes: 85 additions & 0 deletions examples/advanced/gpboost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# pip install gpboost -U
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GroupKFold, KFold
from photonai.base import Hyperpipe, PipelineElement
import numpy as np
import pandas as pd
import gpboost as gpb
# from gpboost import GPBoostRegressor


class GPBoostDataWrapper(BaseEstimator, ClassifierMixin):

def __init__(self):
self.needs_covariates = True
# self.gpmodel = gpb.GPModel(likelihood="gaussian")
self.gpboost = None


def fit(self, X, y, **kwargs):
self.gpboost = gpb.GPBoostRegressor()
if "clusters" in kwargs:
clst = pd.Series(kwargs["clusters"])
gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst)
self.gpboost.fit(X, y, gp_model=gpmodel)
else:
raise NotImplementedError("GPBoost needs clusters")
return self

def predict(self, X, **kwargs):
clst = pd.Series(kwargs["clusters"])
preds = self.gpboost.predict(X, group_data_pred=clst)
preds = preds["response_mean"]
return preds

def save(self):
return None


def get_gpboost_pipe(pipe_name, project_folder, split="group"):

if split == "group":
outercv = GroupKFold(n_splits=10)
else:
outercv = KFold(n_splits=10)

my_pipe = Hyperpipe(pipe_name,
optimizer='grid_search',
metrics=['mean_absolute_error', 'mean_squared_error',
'spearman_correlation', 'pearson_correlation'],
best_config_metric='mean_absolute_error',
outer_cv=outercv,
inner_cv=KFold(n_splits=10),
calculate_metrics_across_folds=True,
use_test_set=True,
verbosity=1,
project_folder=project_folder)

# Add transformer elements
my_pipe += PipelineElement("StandardScaler", hyperparameters={},
test_disabled=True, with_mean=True, with_std=True)

my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={})

return my_pipe


def get_mock_data():

X = np.random.randint(10, size=(200, 9))
y = np.sum(X, axis=1)
clst = np.random.randint(10, size=200)

return X, y, clst


if __name__ == '__main__':


X, y, clst = get_mock_data()

# define project folder
project_folder = "/tmp/gpboost_debug"

my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
my_pipe.fit(X, y, clusters=clst)
2 changes: 1 addition & 1 deletion examples/basic/classification_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
my_pipe = Hyperpipe('basic_svm_pipe',
inner_cv=KFold(n_splits=5),
outer_cv=KFold(n_splits=3),
optimizer='sk_opt',
optimizer='random_grid_search',
optimizer_params={'n_configurations': 15},
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
best_config_metric='accuracy',
Expand Down
1 change: 1 addition & 0 deletions examples/basic/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from photonai import RegressionPipe

my_pipe = RegressionPipe('diabetes',
best_config_metric='median_absolute_error',
add_default_pipeline_elements=True,
scaling=True,
imputation=False,
Expand Down
4 changes: 2 additions & 2 deletions photonai/base/model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def set_default_pipeline(self, scaling, imputation, imputation_nan_value, featur
logger.photon_system_log("---")
logger.stars()

def fit(self, X=None, y=None):
def fit(self, X=None, y=None, **kwargs):
if (X is not None and self.X_csv_path is not None) or (y is not None and self.y_csv_path is not None):
raise ValueError("You can either give the fit function data or the pipe definition paths "
"to csv files to load data from. Not both.")
Expand All @@ -228,7 +228,7 @@ def fit(self, X=None, y=None):

X = X if X is not None else pd.read_csv(self.X_csv_path, delimiter=self.delimiter)
y = y if y is not None else pd.read_csv(self.y_csv_path, delimiter=self.delimiter)
super().fit(X, y)
super().fit(X, y, **kwargs)


class ClassificationPipe(DefaultPipeline):
Expand Down
1 change: 1 addition & 0 deletions photonai/processing/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Scorer:
# Regression
'mean_squared_error': ('sklearn.metrics', 'mean_squared_error', 'error'),
'mean_absolute_error': ('sklearn.metrics', 'mean_absolute_error', 'error'),
'median_absolute_error': ('sklearn.metrics', 'median_absolute_error', 'error'),
'explained_variance': ('sklearn.metrics', 'explained_variance_score', 'score'),
'r2': ('sklearn.metrics', 'r2_score', 'score'),
'pearson_correlation': ('photonai.processing.metrics', 'pearson_correlation', 'score'),
Expand Down

0 comments on commit 126b242

Please sign in to comment.