From 6f7f38b45c0680cf81e98dcd3b9f24326124854c Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Sun, 4 Aug 2019 18:50:48 +0800 Subject: [PATCH] code optimization --- combo/models/classifier_comb.py | 4 +- combo/models/classifier_dcs.py | 10 ++-- combo/models/classifier_stacking.py | 3 +- combo/models/cluster_comb.py | 3 +- combo/models/detector_comb.py | 2 - combo/models/detector_lscp.py | 4 -- combo/models/score_comb.py | 3 +- combo/test/test_utility.py | 71 ------------------------ combo/utils/data.py | 2 - combo/utils/utility.py | 86 +---------------------------- 10 files changed, 15 insertions(+), 173 deletions(-) diff --git a/combo/models/classifier_comb.py b/combo/models/classifier_comb.py index e9f8a37..72a21fc 100644 --- a/combo/models/classifier_comb.py +++ b/combo/models/classifier_comb.py @@ -11,9 +11,11 @@ from sklearn.utils import check_X_y from sklearn.utils import column_or_1d +from pyod.utils.utility import check_parameter + from .base import BaseAggregator from .score_comb import average, maximization, majority_vote, median -from ..utils.utility import check_parameter + from ..utils.utility import score_to_proba diff --git a/combo/models/classifier_dcs.py b/combo/models/classifier_dcs.py index 1a8fac0..ed1a9c3 100644 --- a/combo/models/classifier_dcs.py +++ b/combo/models/classifier_dcs.py @@ -15,7 +15,7 @@ from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets -from ..utils.utility import check_parameter +from pyod.utils.utility import check_parameter from .base import BaseAggregator @@ -61,17 +61,17 @@ def __init__(self, base_estimators, local_region_size=30, threshold=None, # validate input parameters if not isinstance(local_region_size, int): raise ValueError('local_region_size must be an integer variable') - check_parameter(local_region_size, low=1, include_left=True, - param_name='n_folds') + check_parameter(local_region_size, low=2, include_left=True, + param_name='local_region_size') self.local_region_size = local_region_size if threshold is not None: warnings.warn( - "Stacking does not support threshold setting option. " + "DCS does not support threshold setting option. " "Please set the threshold in classifiers directly.") if pre_fitted is not None: - warnings.warn("Stacking does not support pre_fitted option.") + warnings.warn("DCS does not support pre_fitted option.") def fit(self, X, y): """Fit classifier. diff --git a/combo/models/classifier_stacking.py b/combo/models/classifier_stacking.py index b390faf..3c89383 100644 --- a/combo/models/classifier_stacking.py +++ b/combo/models/classifier_stacking.py @@ -15,7 +15,8 @@ from sklearn.utils import check_X_y from sklearn.utils.validation import check_is_fitted -from ..utils.utility import check_parameter +from pyod.utils.utility import check_parameter + from ..utils.utility import list_diff from .base import BaseAggregator diff --git a/combo/models/cluster_comb.py b/combo/models/cluster_comb.py index 02ca746..4acbbe0 100644 --- a/combo/models/cluster_comb.py +++ b/combo/models/cluster_comb.py @@ -11,9 +11,10 @@ from sklearn.utils.validation import check_is_fitted from sklearn.utils.testing import assert_equal +from pyod.utils.utility import check_parameter + from .base import BaseAggregator from .score_comb import majority_vote -from ..utils.utility import check_parameter OFFSET_FACTOR = 1000000 diff --git a/combo/models/detector_comb.py b/combo/models/detector_comb.py index 5184011..b4ff082 100644 --- a/combo/models/detector_comb.py +++ b/combo/models/detector_comb.py @@ -14,8 +14,6 @@ from .base import BaseAggregator from .score_comb import average, maximization, median -from ..utils.utility import check_parameter -from ..utils.utility import score_to_proba class SimpleDetectorAggregator(BaseAggregator): diff --git a/combo/models/detector_lscp.py b/combo/models/detector_lscp.py index 059de7e..15523fb 100644 --- a/combo/models/detector_lscp.py +++ b/combo/models/detector_lscp.py @@ -10,13 +10,9 @@ from sklearn.utils import check_array from sklearn.utils import column_or_1d from sklearn.utils.validation import check_is_fitted -from pyod.utils.utility import standardizer from pyod.models.lscp import LSCP as PyOD_LSCP from .base import BaseAggregator -from .score_comb import average, maximization, median -from ..utils.utility import check_parameter -from ..utils.utility import score_to_proba class LSCP(BaseAggregator): diff --git a/combo/models/score_comb.py b/combo/models/score_comb.py index b2bd4a5..0654d8e 100644 --- a/combo/models/score_comb.py +++ b/combo/models/score_comb.py @@ -15,7 +15,8 @@ from sklearn.utils.random import sample_without_replacement from sklearn.utils.testing import assert_equal from sklearn.utils.multiclass import check_classification_targets -from ..utils.utility import check_parameter + +from pyod.utils.utility import check_parameter def _aom_moa_helper(mode, scores, n_buckets, method, bootstrap_estimators, diff --git a/combo/test/test_utility.py b/combo/test/test_utility.py index 51a8788..30c98e0 100644 --- a/combo/test/test_utility.py +++ b/combo/test/test_utility.py @@ -18,7 +18,6 @@ # if combo is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from utils.utility import check_parameter from utils.utility import standardizer from utils.utility import get_label_n from utils.utility import precision_n_scores @@ -30,76 +29,6 @@ from utils.utility import score_to_proba -class TestParameters(unittest.TestCase): - def setUp(self): - pass - - def test_check_parameter_range(self): - # verify parameter type correction - with assert_raises(TypeError): - check_parameter('f', 0, 100) - - with assert_raises(TypeError): - check_parameter(1, 'f', 100) - - with assert_raises(TypeError): - check_parameter(1, 0, 'f') - - with assert_raises(TypeError): - check_parameter(argmaxn(value_list=[1, 2, 3], n=1), 0, 100) - - # if low and high are both unset - with assert_raises(ValueError): - check_parameter(50) - - # if low <= high - with assert_raises(ValueError): - check_parameter(50, 100, 99) - - with assert_raises(ValueError): - check_parameter(50, 100, 100) - - # check one side - with assert_raises(ValueError): - check_parameter(50, low=100) - with assert_raises(ValueError): - check_parameter(50, high=0) - - assert_equal(True, check_parameter(50, low=10)) - assert_equal(True, check_parameter(50, high=100)) - - # if check fails - with assert_raises(ValueError): - check_parameter(-1, 0, 100) - - with assert_raises(ValueError): - check_parameter(101, 0, 100) - - with assert_raises(ValueError): - check_parameter(0.5, 0.2, 0.3) - - # if check passes - assert_equal(True, check_parameter(50, 0, 100)) - - assert_equal(True, check_parameter(0.5, 0.1, 0.8)) - - # if includes left or right bounds - with assert_raises(ValueError): - check_parameter(100, 0, 100, include_left=False, - include_right=False) - assert_equal(True, check_parameter(0, 0, 100, include_left=True, - include_right=False)) - assert_equal(True, check_parameter(0, 0, 100, include_left=True, - include_right=True)) - assert_equal(True, check_parameter(100, 0, 100, include_left=False, - include_right=True)) - assert_equal(True, check_parameter(100, 0, 100, include_left=True, - include_right=True)) - - def tearDown(self): - pass - - class TestScaler(unittest.TestCase): def setUp(self): diff --git a/combo/utils/data.py b/combo/utils/data.py index dc5a3d2..6280622 100644 --- a/combo/utils/data.py +++ b/combo/utils/data.py @@ -20,8 +20,6 @@ from sklearn.metrics import roc_auc_score from sklearn.metrics import f1_score -from .utility import check_parameter - MAX_INT = np.iinfo(np.int32).max diff --git a/combo/utils/utility.py b/combo/utils/utility.py index 7994373..fa34da8 100644 --- a/combo/utils/utility.py +++ b/combo/utils/utility.py @@ -7,7 +7,6 @@ import numpy as np from numpy import percentile -import numbers import sklearn from sklearn.metrics import precision_score @@ -20,94 +19,11 @@ from sklearn.utils import check_random_state from sklearn.utils.random import sample_without_replacement -MAX_INT = np.iinfo(np.int32).max -MIN_INT = -1 * MAX_INT +from pyod.utils.utility import check_parameter # TODO: change outlier detection tasks to model combination -def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='', - include_left=False, include_right=False): - """Check if an input is within the defined range. - - Parameters - ---------- - param : int, float - The input parameter to check. - - low : int, float - The lower bound of the range. - - high : int, float - The higher bound of the range. - - param_name : str, optional (default='') - The name of the parameter. - - include_left : bool, optional (default=False) - Whether includes the lower bound (lower bound <=). - - include_right : bool, optional (default=False) - Whether includes the higher bound (<= higher bound). - - Returns - ------- - within_range : bool or raise errors - Whether the parameter is within the range of (low, high) - - """ - - # param, low and high should all be numerical - if not isinstance(param, (numbers.Integral, np.integer, np.float)): - raise TypeError('{param_name} is set to {param} Not numerical'.format( - param=param, param_name=param_name)) - - if not isinstance(low, (numbers.Integral, np.integer, np.float)): - raise TypeError('low is set to {low}. Not numerical'.format(low=low)) - - if not isinstance(high, (numbers.Integral, np.integer, np.float)): - raise TypeError('high is set to {high}. Not numerical'.format( - high=high)) - - # at least one of the bounds should be specified - if low is MIN_INT and high is MAX_INT: - raise ValueError('Neither low nor high bounds is undefined') - - # if wrong bound values are used - if low > high: - raise ValueError( - 'Lower bound > Higher bound') - - # value check under different bound conditions - if (include_left and include_right) and (param < low or param > high): - raise ValueError( - '{param_name} is set to {param}. ' - 'Not in the range of [{low}, {high}].'.format( - param=param, low=low, high=high, param_name=param_name)) - - elif (include_left and not include_right) and ( - param < low or param >= high): - raise ValueError( - '{param_name} is set to {param}. ' - 'Not in the range of [{low}, {high}).'.format( - param=param, low=low, high=high, param_name=param_name)) - - elif (not include_left and include_right) and ( - param <= low or param > high): - raise ValueError( - '{param_name} is set to {param}. ' - 'Not in the range of ({low}, {high}].'.format( - param=param, low=low, high=high, param_name=param_name)) - - elif (not include_left and not include_right) and ( - param <= low or param >= high): - raise ValueError( - '{param_name} is set to {param}. ' - 'Not in the range of ({low}, {high}).'.format( - param=param, low=low, high=high, param_name=param_name)) - else: - return True - def check_detector(detector): """Checks if fit and decision_function methods exist for given detector