Skip to content

Commit

Permalink
Improve parameter check functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 authored and yuezhao@cs.toronto.edu committed Jun 4, 2018
1 parent 941d9cd commit 5391b8d
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 38 deletions.
4 changes: 2 additions & 2 deletions pyod/models/abod.py
Expand Up @@ -11,7 +11,7 @@
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from .base import BaseDetector
from ..utils.utility import check_parameter_range
from ..utils.utility import check_parameter


def _calculate_wocs(curr_pt, X, X_ind):
Expand Down Expand Up @@ -137,7 +137,7 @@ def _fit_fast(self):
"""

# make sure the n_neighbors is in the range
check_parameter_range(self.n_neighbors, 1, self.n_train_)
check_parameter(self.n_neighbors, 1, self.n_train_)

self.tree_ = KDTree(self.X_train_)

Expand Down
9 changes: 5 additions & 4 deletions pyod/models/combination.py
Expand Up @@ -10,7 +10,7 @@
from sklearn.utils import shuffle
from sklearn.utils.random import sample_without_replacement
from sklearn.utils.testing import assert_equal
from ..utils.utility import check_parameter_range
from ..utils.utility import check_parameter


def aom(scores, n_buckets, method='static', replace=False, random_state=None):
Expand Down Expand Up @@ -49,7 +49,7 @@ def aom(scores, n_buckets, method='static', replace=False, random_state=None):
# for now it is fixed to n_estimators/2
scores = check_array(scores)
n_estimators = scores.shape[1]
check_parameter_range(n_buckets, 2, n_estimators) # range check
check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')

scores_aom = np.zeros([scores.shape[0], n_buckets])

Expand Down Expand Up @@ -102,7 +102,8 @@ def aom(scores, n_buckets, method='static', replace=False, random_state=None):

def moa(scores, n_buckets, method='static', replace=False, random_state=None):
"""
Maximization of Average - An ensemble method for combining multiple detectors
Maximization of Average - An ensemble method for combining multiple
detectors
First dividing detectors into subgroups, take the average score as the
subgroup score.
Expand Down Expand Up @@ -136,7 +137,7 @@ def moa(scores, n_buckets, method='static', replace=False, random_state=None):
# for now it is fixed to n_estimators/2
scores = check_array(scores)
n_estimators = scores.shape[1]
check_parameter_range(n_buckets, 2, n_estimators) # range check
check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')

scores_aom = np.zeros([scores.shape[0], n_buckets])

Expand Down
6 changes: 3 additions & 3 deletions pyod/models/hbos.py
Expand Up @@ -7,7 +7,7 @@
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from ..utils.utility import check_parameter_range
from ..utils.utility import check_parameter

from .base import BaseDetector

Expand Down Expand Up @@ -46,8 +46,8 @@ def __init__(self, n_bins=10, alpha=0.1, tol=0.5, contamination=0.1):
self.alpha = alpha
self.tol = tol

check_parameter_range(alpha, 0, 1)
check_parameter_range(tol, 0, 1)
check_parameter(alpha, 0, 1, param_name='alpha')
check_parameter(tol, 0, 1, param_name='tol')

def fit(self, X, y=None):

Expand Down
2 changes: 1 addition & 1 deletion pyod/test/test_stat_models.py
Expand Up @@ -20,7 +20,7 @@
from sklearn.metrics import precision_score

from pyod.utils.data import generate_data
from pyod.utils.utility import check_parameter_range
from pyod.utils.utility import check_parameter
from pyod.utils.utility import standardizer
from pyod.utils.utility import get_label_n
from pyod.utils.utility import precision_n_scores
Expand Down
56 changes: 45 additions & 11 deletions pyod/test/test_utility.py
Expand Up @@ -23,7 +23,7 @@
from pyod.utils.data import visualize
from pyod.utils.data import evaluate_print

from pyod.utils.utility import check_parameter_range
from pyod.utils.utility import check_parameter
from pyod.utils.utility import standardizer
from pyod.utils.utility import get_label_n
from pyod.utils.utility import precision_n_scores
Expand Down Expand Up @@ -95,27 +95,61 @@ class TestParameters(unittest.TestCase):
def setUp(self):
pass

def test_check_para_range(self):
def test_check_parameter_range(self):
# verify parameter type correction
with assert_raises(TypeError):
check_parameter('f', 0, 100)

with assert_raises(TypeError):
check_parameter(argmaxn(), 0, 100)

# if low and high are both unset
with assert_raises(ValueError):
check_parameter_range(50)
check_parameter(50)

# if low <= high
with assert_raises(ValueError):
check_parameter_range(50, 100, 99)
check_parameter(50, 100, 99)

with assert_raises(ValueError):
check_parameter_range(50, 100, 100)
check_parameter(50, 100, 100)

# check one side
with assert_raises(ValueError):
check_parameter(50, low=100)
with assert_raises(ValueError):
check_parameter_range(-1, 0, 100)
check_parameter(50, high=0)

assert_equal(True, check_parameter(50, low=10))
assert_equal(True, check_parameter(50, high=100))

# if check fails
with assert_raises(ValueError):
check_parameter_range(101, 0, 100)
check_parameter(-1, 0, 100)

# verify parameter type correction
with assert_raises(TypeError):
check_parameter_range('f', 0, 100)
with assert_raises(ValueError):
check_parameter(101, 0, 100)

with assert_raises(ValueError):
check_parameter(0.5, 0.2, 0.3)

assert_equal(True, check_parameter_range(50, 0, 100))
# if check passes
assert_equal(True, check_parameter(50, 0, 100))

assert_equal(True, check_parameter(0.5, 0.1, 0.8))

# if includes left or right bounds
with assert_raises(ValueError):
check_parameter(100, 0, 100, include_left=False,
include_right=False)
assert_equal(True, check_parameter(0, 0, 100, include_left=True,
include_right=False))
assert_equal(True, check_parameter(0, 0, 100, include_left=True,
include_right=True))
assert_equal(True, check_parameter(100, 0, 100, include_left=False,
include_right=True))
assert_equal(True, check_parameter(100, 0, 100, include_left=True,
include_right=True))

def tearDown(self):
pass
Expand Down
74 changes: 57 additions & 17 deletions pyod/utils/utility.py
Expand Up @@ -6,43 +6,83 @@
from __future__ import print_function

import numpy as np
import numbers
from scipy.stats import scoreatpercentile
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils import column_or_1d
from sklearn.utils.validation import check_consistent_length

MAX_INT = np.iinfo(np.int32).max
MIN_INT = -1 * MAX_INT

def check_parameter_range(param, low=None, high=None):

def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='',
include_left=False, include_right=False):
"""
check if input parameter is with in the range low and high
:param param: the input parameter to check
:param param: The input parameter to check
:type param: int, float
:param low: lower bound of the range
:param low: The lower bound of the range
:type low: int, float
:param high: higher bound of the range
:param high: The higher bound of the range
:type high: int, float
:return: whether the parameter is within the range of (low, high)
:rtype: bool
:param param_name: The name of the parameter
:type param_name: str, optional (default='')
:param include_left: Whether includes the lower bound (lower bound <=)
:type include_left: bool, optional (default=False)
:param include_right: Whether includes the higher bound (<= higher bound )
:type include_right: bool, optional (default=False)
:return: Whether the parameter is within the range of (low, high)
:rtype: bool or raise errors
"""
if low is None or high is None:
raise ValueError('either low or high bounds is undefined')

if low is not None and high is not None:
if low >= high:
raise ValueError('low is equal or larger than high')
if not isinstance(param, (numbers.Integral, np.integer, np.float)):
raise TypeError(
'{param_name} is set to {param}. '
'Not numerical'.format(param=param,
param_name=param_name))

if low is MIN_INT and high is MAX_INT:
raise ValueError('Neither low nor high bounds is undefined')

if low >= high:
raise ValueError(
'Lower bound is equal or larger than the higher bound')

if (include_left and include_right) and (param < low or param > high):
raise ValueError(
'{param_name} is set to {param}. '
'Not in the range of {low} and {high}'.format(
param=param, low=low, high=high, param_name=param_name))

if not isinstance(param, int) and not isinstance(param, float):
raise TypeError('{param} is not numerical'.format(param=param))
elif (include_left and not include_right) and (
param < low or param >= high):
raise ValueError(
'{param_name} is set to {param}. '
'Not in the range of {low} and {high}'.format(
param=param, low=low, high=high, param_name=param_name))

elif (not include_left and include_right) and (
param <= low or param > high):
raise ValueError(
'{param_name} is set to {param}. '
'Not in the range of {low} and {high}'.format(
param=param, low=low, high=high, param_name=param_name))

if param < low or param > high:
elif (not include_left and not include_right) and (
param <= low or param >= high):
raise ValueError(
'{param} is not in the range of {low} and {high}'.format(
param=param, low=low, high=high))
'{param_name} is set to {param}. '
'Not in the range of {low} and {high}'.format(
param=param, low=low, high=high, param_name=param_name))
else:
return True

Expand Down Expand Up @@ -148,7 +188,7 @@ def argmaxn(value_list, n, order='desc'):
length = len(value_list)

# validate the choice of n
check_parameter_range(n, 1, length)
check_parameter(n, 1, length)

# for the smallest n, flip the value
if order != 'desc':
Expand Down

0 comments on commit 5391b8d

Please sign in to comment.