diff --git a/pyod/models/base.py b/pyod/models/base.py index a3ab07772..bc3b7a438 100644 --- a/pyod/models/base.py +++ b/pyod/models/base.py @@ -27,9 +27,7 @@ def _first_and_last_element(arr): - """ - Returns first and last element of numpy array or sparse matrix. - + """Returns first and last element of numpy array or sparse matrix. See sklearn/base.py for more information. """ @@ -44,8 +42,7 @@ def _first_and_last_element(arr): def clone(estimator, safe=True): - """ - Constructs a new estimator with the same parameters. + """Constructs a new estimator with the same parameters. Clone does a deep copy of the model in an estimator without actually copying attached data. It yields a new estimator @@ -139,8 +136,7 @@ def clone(estimator, safe=True): def _pprint(params, offset=0, printer=repr): - """ - Pretty print the dictionary 'params' + """Pretty print the dictionary 'params' See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html and sklearn/base.py for more information. @@ -194,8 +190,7 @@ def _pprint(params, offset=0, printer=repr): @six.add_metaclass(abc.ABCMeta) class BaseDetector(object): - """ - Abstract class for all outlier detection algorithms. + """Abstract class for all outlier detection algorithms. :param contamination: The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to @@ -214,10 +209,11 @@ def __init__(self, contamination=0.1): @abc.abstractmethod def decision_function(self, X): - """ - Predict anomaly score of X of the base classifiers. The anomaly score - of an input sample is computed based on different detector algorithms. - For consistency, outliers have larger anomaly scores. + """Predict anomaly score of X of the base classifiers. + + The anomaly score of an input sample is computed based on different + detector algorithms. For consistency, outliers are assigned with + larger anomaly scores. :param X: The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. @@ -230,8 +226,7 @@ def decision_function(self, X): @abc.abstractmethod def fit(self, X, y=None): - """ - Fit detector. + """Fit detector. :param X: The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. @@ -243,8 +238,8 @@ def fit(self, X, y=None): pass def fit_predict(self, X, y=None): - """ - Fit detector and predict if a particular sample is an outlier or not. + """Fit detector and predict if a particular sample is an outlier or + not. :param X: The input samples :type X: numpy array of shape (n_samples, n_features) @@ -259,8 +254,7 @@ def fit_predict(self, X, y=None): return self.labels_ def predict(self, X): - """ - Predict if a particular sample is an outlier or not. + """Predict if a particular sample is an outlier or not. :param X: The input samples :type X: numpy array of shape (n_samples, n_features) @@ -277,8 +271,7 @@ def predict(self, X): return (pred_score > self.threshold_).astype('int').ravel() def predict_proba(self, X, method='linear'): - """ - Predict the probability of a sample being outlier. Two approaches + """Predict the probability of a sample being outlier. Two approaches are possible: 1. simply use Min-max conversion to linearly transform the outlier @@ -329,8 +322,7 @@ def predict_proba(self, X, method='linear'): 'is not a valid probability conversion method') def predict_rank(self, X): - """ - Predict the outlyingness rank of a sample in a fitted model. The + """Predict the outlyingness rank of a sample in a fitted model. The method is specifically for combining various outlier detectors. :param X: The input samples @@ -358,8 +350,8 @@ def predict_rank(self, X): return ranks_norm def fit_predict_evaluate(self, X, y): - """ - Fit the detector, predict on samples, and evaluate the model + """Fit the detector, predict on samples, and evaluate the model by + ROC and Precision @ rank n :param X: The input samples :type X: numpy array of shape (n_samples, n_features) @@ -381,8 +373,8 @@ def fit_predict_evaluate(self, X, y): return roc, prec_n def _set_n_classes(self, y): - """ - Set the number of classes if y is presented, which is not expected. + """Set the number of classes if y is presented, which is not expected. + It could be useful for multi-class outlier detection. :param y: Ground truth :type y: numpy array of shape (n_samples,) @@ -396,10 +388,10 @@ def _set_n_classes(self, y): "y should not be presented in unsupervised learning.") def _process_decision_scores(self): - """ - Internal function to calculate key attributes: - threshold: used to decide the binary label - labels_: binary labels of training data + """Internal function to calculate key attributes: + + - threshold: used to decide the binary label + - labels_: binary labels of training data :return: self :rtype: object @@ -419,8 +411,7 @@ def _process_decision_scores(self): # noinspection PyMethodParameters def _get_param_names(cls): - """ - Get parameter names for the estimator + """Get parameter names for the estimator See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html and sklearn/base.py for more information. diff --git a/pyod/utils/utility.py b/pyod/utils/utility.py index d2bfb5858..4766c48c0 100644 --- a/pyod/utils/utility.py +++ b/pyod/utils/utility.py @@ -117,10 +117,9 @@ def standardizer(X_train, X_test): def score_to_label(pred_scores, outlier_perc=0.1): - """ - turn raw outlier outlier scores to binary labels (0 or 1) + """Turn raw outlier outlier scores to binary labels (0 or 1) - :param pred_scores: raw outlier decision_scores_ + :param pred_scores: raw outlier scores :param outlier_perc: percentage of outliers :return: binary labels (1 stands for outlier) :rtype: int @@ -157,7 +156,7 @@ def get_label_n(y, y_pred, n=None): n outlier scores. Example y: [0,1,1,0,0,0] - labels_: [0.1, 0.5, 0.3, 0.2, 0.7] + y_pred: [0.1, 0.5, 0.3, 0.2, 0.7] return [0, 1, 0, 0, 1] :param y: ground truth