Improve parameter check functionality

yzhao062 · Jun 4, 2018 · 5391b8d · 5391b8d
1 parent 941d9cd
commit 5391b8d
Show file tree

Hide file tree

Showing 6 changed files with 113 additions and 38 deletions.
diff --git a/pyod/models/abod.py b/pyod/models/abod.py
@@ -11,7 +11,7 @@
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted
 from .base import BaseDetector
-from ..utils.utility import check_parameter_range
+from ..utils.utility import check_parameter
 
 
 def _calculate_wocs(curr_pt, X, X_ind):
@@ -137,7 +137,7 @@ def _fit_fast(self):
         """
 
         # make sure the n_neighbors is in the range
-        check_parameter_range(self.n_neighbors, 1, self.n_train_)
+        check_parameter(self.n_neighbors, 1, self.n_train_)
 
         self.tree_ = KDTree(self.X_train_)
 

diff --git a/pyod/models/combination.py b/pyod/models/combination.py
@@ -10,7 +10,7 @@
 from sklearn.utils import shuffle
 from sklearn.utils.random import sample_without_replacement
 from sklearn.utils.testing import assert_equal
-from ..utils.utility import check_parameter_range
+from ..utils.utility import check_parameter
 
 
 def aom(scores, n_buckets, method='static', replace=False, random_state=None):
@@ -49,7 +49,7 @@ def aom(scores, n_buckets, method='static', replace=False, random_state=None):
     #       for now it is fixed to n_estimators/2
     scores = check_array(scores)
     n_estimators = scores.shape[1]
-    check_parameter_range(n_buckets, 2, n_estimators)  # range check
+    check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')
 
     scores_aom = np.zeros([scores.shape[0], n_buckets])
 
@@ -102,7 +102,8 @@ def aom(scores, n_buckets, method='static', replace=False, random_state=None):
 
 def moa(scores, n_buckets, method='static', replace=False, random_state=None):
     """
-    Maximization of Average - An ensemble method for combining multiple detectors
+    Maximization of Average - An ensemble method for combining multiple
+    detectors
 
     First dividing detectors into subgroups, take the average score as the
     subgroup score.
@@ -136,7 +137,7 @@ def moa(scores, n_buckets, method='static', replace=False, random_state=None):
     #       for now it is fixed to n_estimators/2
     scores = check_array(scores)
     n_estimators = scores.shape[1]
-    check_parameter_range(n_buckets, 2, n_estimators)  # range check
+    check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')
 
     scores_aom = np.zeros([scores.shape[0], n_buckets])
 

diff --git a/pyod/models/hbos.py b/pyod/models/hbos.py
@@ -7,7 +7,7 @@
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted
 
-from ..utils.utility import check_parameter_range
+from ..utils.utility import check_parameter
 
 from .base import BaseDetector
 
@@ -46,8 +46,8 @@ def __init__(self, n_bins=10, alpha=0.1, tol=0.5, contamination=0.1):
         self.alpha = alpha
         self.tol = tol
 
-        check_parameter_range(alpha, 0, 1)
-        check_parameter_range(tol, 0, 1)
+        check_parameter(alpha, 0, 1, param_name='alpha')
+        check_parameter(tol, 0, 1, param_name='tol')
 
     def fit(self, X, y=None):
 

diff --git a/pyod/test/test_stat_models.py b/pyod/test/test_stat_models.py
@@ -20,7 +20,7 @@
 from sklearn.metrics import precision_score
 
 from pyod.utils.data import generate_data
-from pyod.utils.utility import check_parameter_range
+from pyod.utils.utility import check_parameter
 from pyod.utils.utility import standardizer
 from pyod.utils.utility import get_label_n
 from pyod.utils.utility import precision_n_scores

diff --git a/pyod/test/test_utility.py b/pyod/test/test_utility.py
@@ -23,7 +23,7 @@
 from pyod.utils.data import visualize
 from pyod.utils.data import evaluate_print
 
-from pyod.utils.utility import check_parameter_range
+from pyod.utils.utility import check_parameter
 from pyod.utils.utility import standardizer
 from pyod.utils.utility import get_label_n
 from pyod.utils.utility import precision_n_scores
@@ -95,27 +95,61 @@ class TestParameters(unittest.TestCase):
     def setUp(self):
         pass
 
-    def test_check_para_range(self):
+    def test_check_parameter_range(self):
+        # verify parameter type correction
+        with assert_raises(TypeError):
+            check_parameter('f', 0, 100)
+
+        with assert_raises(TypeError):
+            check_parameter(argmaxn(), 0, 100)
+
+        # if low and high are both unset
         with assert_raises(ValueError):
-            check_parameter_range(50)
+            check_parameter(50)
 
+        # if low <= high
         with assert_raises(ValueError):
-            check_parameter_range(50, 100, 99)
+            check_parameter(50, 100, 99)
 
         with assert_raises(ValueError):
-            check_parameter_range(50, 100, 100)
+            check_parameter(50, 100, 100)
 
+        # check one side
+        with assert_raises(ValueError):
+            check_parameter(50, low=100)
         with assert_raises(ValueError):
-            check_parameter_range(-1, 0, 100)
+            check_parameter(50, high=0)
 
+        assert_equal(True, check_parameter(50, low=10))
+        assert_equal(True, check_parameter(50, high=100))
+
+        # if check fails
         with assert_raises(ValueError):
-            check_parameter_range(101, 0, 100)
+            check_parameter(-1, 0, 100)
 
-        # verify parameter type correction
-        with assert_raises(TypeError):
-            check_parameter_range('f', 0, 100)
+        with assert_raises(ValueError):
+            check_parameter(101, 0, 100)
+
+        with assert_raises(ValueError):
+            check_parameter(0.5, 0.2, 0.3)
 
-        assert_equal(True, check_parameter_range(50, 0, 100))
+        # if check passes
+        assert_equal(True, check_parameter(50, 0, 100))
+
+        assert_equal(True, check_parameter(0.5, 0.1, 0.8))
+
+        # if includes left or right bounds
+        with assert_raises(ValueError):
+            check_parameter(100, 0, 100, include_left=False,
+                            include_right=False)
+        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
+                                           include_right=False))
+        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
+                                           include_right=True))
+        assert_equal(True, check_parameter(100, 0, 100, include_left=False,
+                                           include_right=True))
+        assert_equal(True, check_parameter(100, 0, 100, include_left=True,
+                                           include_right=True))
 
     def tearDown(self):
         pass

diff --git a/pyod/utils/utility.py b/pyod/utils/utility.py
@@ -6,43 +6,83 @@
 from __future__ import print_function
 
 import numpy as np
+import numbers
 from scipy.stats import scoreatpercentile
 from sklearn.metrics import precision_score
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import column_or_1d
 from sklearn.utils.validation import check_consistent_length
 
+MAX_INT = np.iinfo(np.int32).max
+MIN_INT = -1 * MAX_INT
 
-def check_parameter_range(param, low=None, high=None):
+
+def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='',
+                    include_left=False, include_right=False):
     """
     check if input parameter is with in the range low and high
 
-    :param param: the input parameter to check
+    :param param: The input parameter to check
     :type param: int, float
 
-    :param low: lower bound of the range
+    :param low: The lower bound of the range
     :type low: int, float
 
-    :param high: higher bound of the range
+    :param high: The higher bound of the range
     :type high: int, float
 
-    :return: whether the parameter is within the range of (low, high)
-    :rtype: bool
+    :param param_name: The name of the parameter
+    :type param_name: str, optional (default='')
+
+    :param include_left: Whether includes the lower bound (lower bound <=)
+    :type include_left: bool, optional (default=False)
+
+    :param include_right: Whether includes the higher bound (<= higher bound )
+    :type include_right: bool, optional (default=False)
+
+    :return: Whether the parameter is within the range of (low, high)
+    :rtype: bool or raise errors
     """
-    if low is None or high is None:
-        raise ValueError('either low or high bounds is undefined')
 
-    if low is not None and high is not None:
-        if low >= high:
-            raise ValueError('low is equal or larger than high')
+    if not isinstance(param, (numbers.Integral, np.integer, np.float)):
+        raise TypeError(
+            '{param_name} is set to {param}. '
+            'Not numerical'.format(param=param,
+                                   param_name=param_name))
+
+    if low is MIN_INT and high is MAX_INT:
+        raise ValueError('Neither low nor high bounds is undefined')
+
+    if low >= high:
+        raise ValueError(
+            'Lower bound is equal or larger than the higher bound')
+
+    if (include_left and include_right) and (param < low or param > high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of {low} and {high}'.format(
+                param=param, low=low, high=high, param_name=param_name))
 
-    if not isinstance(param, int) and not isinstance(param, float):
-        raise TypeError('{param} is not numerical'.format(param=param))
+    elif (include_left and not include_right) and (
+            param < low or param >= high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of {low} and {high}'.format(
+                param=param, low=low, high=high, param_name=param_name))
+
+    elif (not include_left and include_right) and (
+            param <= low or param > high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of {low} and {high}'.format(
+                param=param, low=low, high=high, param_name=param_name))
 
-    if param < low or param > high:
+    elif (not include_left and not include_right) and (
+            param <= low or param >= high):
         raise ValueError(
-            '{param} is not in the range of {low} and {high}'.format(
-                param=param, low=low, high=high))
+            '{param_name} is set to {param}. '
+            'Not in the range of {low} and {high}'.format(
+                param=param, low=low, high=high, param_name=param_name))
     else:
         return True
 
@@ -148,7 +188,7 @@ def argmaxn(value_list, n, order='desc'):
     length = len(value_list)
 
     # validate the choice of n
-    check_parameter_range(n, 1, length)
+    check_parameter(n, 1, length)
 
     # for the smallest n, flip the value
     if order != 'desc':