code optimization

yzhao062 · Aug 4, 2019 · 6f7f38b · 6f7f38b
1 parent 2b47b14
commit 6f7f38b
Show file tree

Hide file tree

Showing 10 changed files with 15 additions and 173 deletions.
diff --git a/combo/models/classifier_comb.py b/combo/models/classifier_comb.py
@@ -11,9 +11,11 @@
 from sklearn.utils import check_X_y
 from sklearn.utils import column_or_1d
 
+from pyod.utils.utility import check_parameter
+
 from .base import BaseAggregator
 from .score_comb import average, maximization, majority_vote, median
-from ..utils.utility import check_parameter
+
 from ..utils.utility import score_to_proba
 
 

diff --git a/combo/models/classifier_dcs.py b/combo/models/classifier_dcs.py
@@ -15,7 +15,7 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
-from ..utils.utility import check_parameter
+from pyod.utils.utility import check_parameter
 
 from .base import BaseAggregator
 
@@ -61,17 +61,17 @@ def __init__(self, base_estimators, local_region_size=30, threshold=None,
         # validate input parameters
         if not isinstance(local_region_size, int):
             raise ValueError('local_region_size must be an integer variable')
-        check_parameter(local_region_size, low=1, include_left=True,
-                        param_name='n_folds')
+        check_parameter(local_region_size, low=2, include_left=True,
+                        param_name='local_region_size')
         self.local_region_size = local_region_size
 
         if threshold is not None:
             warnings.warn(
-                "Stacking does not support threshold setting option. "
+                "DCS does not support threshold setting option. "
                 "Please set the threshold in classifiers directly.")
 
         if pre_fitted is not None:
-            warnings.warn("Stacking does not support pre_fitted option.")
+            warnings.warn("DCS does not support pre_fitted option.")
 
     def fit(self, X, y):
         """Fit classifier.

diff --git a/combo/models/classifier_stacking.py b/combo/models/classifier_stacking.py
@@ -15,7 +15,8 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.validation import check_is_fitted
 
-from ..utils.utility import check_parameter
+from pyod.utils.utility import check_parameter
+
 from ..utils.utility import list_diff
 from .base import BaseAggregator
 

diff --git a/combo/models/cluster_comb.py b/combo/models/cluster_comb.py
@@ -11,9 +11,10 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.testing import assert_equal
 
+from pyod.utils.utility import check_parameter
+
 from .base import BaseAggregator
 from .score_comb import majority_vote
-from ..utils.utility import check_parameter
 
 OFFSET_FACTOR = 1000000
 

diff --git a/combo/models/detector_comb.py b/combo/models/detector_comb.py
@@ -14,8 +14,6 @@
 
 from .base import BaseAggregator
 from .score_comb import average, maximization, median
-from ..utils.utility import check_parameter
-from ..utils.utility import score_to_proba
 
 
 class SimpleDetectorAggregator(BaseAggregator):

diff --git a/combo/models/detector_lscp.py b/combo/models/detector_lscp.py
@@ -10,13 +10,9 @@
 from sklearn.utils import check_array
 from sklearn.utils import column_or_1d
 from sklearn.utils.validation import check_is_fitted
-from pyod.utils.utility import standardizer
 from pyod.models.lscp import LSCP as PyOD_LSCP
 
 from .base import BaseAggregator
-from .score_comb import average, maximization, median
-from ..utils.utility import check_parameter
-from ..utils.utility import score_to_proba
 
 
 class LSCP(BaseAggregator):

diff --git a/combo/models/score_comb.py b/combo/models/score_comb.py
@@ -15,7 +15,8 @@
 from sklearn.utils.random import sample_without_replacement
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.multiclass import check_classification_targets
-from ..utils.utility import check_parameter
+
+from pyod.utils.utility import check_parameter
 
 
 def _aom_moa_helper(mode, scores, n_buckets, method, bootstrap_estimators,

diff --git a/combo/test/test_utility.py b/combo/test/test_utility.py
@@ -18,7 +18,6 @@
 # if combo is installed, no need to use the following line
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
-from utils.utility import check_parameter
 from utils.utility import standardizer
 from utils.utility import get_label_n
 from utils.utility import precision_n_scores
@@ -30,76 +29,6 @@
 from utils.utility import score_to_proba
 
 
-class TestParameters(unittest.TestCase):
-    def setUp(self):
-        pass
-
-    def test_check_parameter_range(self):
-        # verify parameter type correction
-        with assert_raises(TypeError):
-            check_parameter('f', 0, 100)
-
-        with assert_raises(TypeError):
-            check_parameter(1, 'f', 100)
-
-        with assert_raises(TypeError):
-            check_parameter(1, 0, 'f')
-
-        with assert_raises(TypeError):
-            check_parameter(argmaxn(value_list=[1, 2, 3], n=1), 0, 100)
-
-        # if low and high are both unset
-        with assert_raises(ValueError):
-            check_parameter(50)
-
-        # if low <= high
-        with assert_raises(ValueError):
-            check_parameter(50, 100, 99)
-
-        with assert_raises(ValueError):
-            check_parameter(50, 100, 100)
-
-        # check one side
-        with assert_raises(ValueError):
-            check_parameter(50, low=100)
-        with assert_raises(ValueError):
-            check_parameter(50, high=0)
-
-        assert_equal(True, check_parameter(50, low=10))
-        assert_equal(True, check_parameter(50, high=100))
-
-        # if check fails
-        with assert_raises(ValueError):
-            check_parameter(-1, 0, 100)
-
-        with assert_raises(ValueError):
-            check_parameter(101, 0, 100)
-
-        with assert_raises(ValueError):
-            check_parameter(0.5, 0.2, 0.3)
-
-        # if check passes
-        assert_equal(True, check_parameter(50, 0, 100))
-
-        assert_equal(True, check_parameter(0.5, 0.1, 0.8))
-
-        # if includes left or right bounds
-        with assert_raises(ValueError):
-            check_parameter(100, 0, 100, include_left=False,
-                            include_right=False)
-        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
-                                           include_right=False))
-        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
-                                           include_right=True))
-        assert_equal(True, check_parameter(100, 0, 100, include_left=False,
-                                           include_right=True))
-        assert_equal(True, check_parameter(100, 0, 100, include_left=True,
-                                           include_right=True))
-
-    def tearDown(self):
-        pass
-
-
 class TestScaler(unittest.TestCase):
 
     def setUp(self):

diff --git a/combo/utils/data.py b/combo/utils/data.py
@@ -20,8 +20,6 @@
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import f1_score
 
-from .utility import check_parameter
-
 MAX_INT = np.iinfo(np.int32).max
 
 

diff --git a/combo/utils/utility.py b/combo/utils/utility.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 from numpy import percentile
-import numbers
 
 import sklearn
 from sklearn.metrics import precision_score
@@ -20,94 +19,11 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.random import sample_without_replacement
 
-MAX_INT = np.iinfo(np.int32).max
-MIN_INT = -1 * MAX_INT
+from pyod.utils.utility import check_parameter
 
 
 # TODO: change outlier detection tasks to model combination
 
-def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='',
-                    include_left=False, include_right=False):
-    """Check if an input is within the defined range.
-
-    Parameters
-    ----------
-    param : int, float
-        The input parameter to check.
-
-    low : int, float
-        The lower bound of the range.
-
-    high : int, float
-        The higher bound of the range.
-
-    param_name : str, optional (default='')
-        The name of the parameter.
-
-    include_left : bool, optional (default=False)
-        Whether includes the lower bound (lower bound <=).
-
-    include_right : bool, optional (default=False)
-        Whether includes the higher bound (<= higher bound).
-
-    Returns
-    -------
-    within_range : bool or raise errors
-        Whether the parameter is within the range of (low, high)
-
-    """
-
-    # param, low and high should all be numerical
-    if not isinstance(param, (numbers.Integral, np.integer, np.float)):
-        raise TypeError('{param_name} is set to {param} Not numerical'.format(
-            param=param, param_name=param_name))
-
-    if not isinstance(low, (numbers.Integral, np.integer, np.float)):
-        raise TypeError('low is set to {low}. Not numerical'.format(low=low))
-
-    if not isinstance(high, (numbers.Integral, np.integer, np.float)):
-        raise TypeError('high is set to {high}. Not numerical'.format(
-            high=high))
-
-    # at least one of the bounds should be specified
-    if low is MIN_INT and high is MAX_INT:
-        raise ValueError('Neither low nor high bounds is undefined')
-
-    # if wrong bound values are used
-    if low > high:
-        raise ValueError(
-            'Lower bound > Higher bound')
-
-    # value check under different bound conditions
-    if (include_left and include_right) and (param < low or param > high):
-        raise ValueError(
-            '{param_name} is set to {param}. '
-            'Not in the range of [{low}, {high}].'.format(
-                param=param, low=low, high=high, param_name=param_name))
-
-    elif (include_left and not include_right) and (
-            param < low or param >= high):
-        raise ValueError(
-            '{param_name} is set to {param}. '
-            'Not in the range of [{low}, {high}).'.format(
-                param=param, low=low, high=high, param_name=param_name))
-
-    elif (not include_left and include_right) and (
-            param <= low or param > high):
-        raise ValueError(
-            '{param_name} is set to {param}. '
-            'Not in the range of ({low}, {high}].'.format(
-                param=param, low=low, high=high, param_name=param_name))
-
-    elif (not include_left and not include_right) and (
-            param <= low or param >= high):
-        raise ValueError(
-            '{param_name} is set to {param}. '
-            'Not in the range of ({low}, {high}).'.format(
-                param=param, low=low, high=high, param_name=param_name))
-    else:
-        return True
-
 
 def check_detector(detector):
     """Checks if fit and decision_function methods exist for given detector