Improve documentation

yzhao062 · Jun 7, 2018 · f9d5829 · f9d5829
1 parent 724e372
commit f9d5829
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 37 deletions.
diff --git a/pyod/models/base.py b/pyod/models/base.py
@@ -27,9 +27,7 @@
 
 
 def _first_and_last_element(arr):
-    """
-    Returns first and last element of numpy array or sparse matrix.
-
+    """Returns first and last element of numpy array or sparse matrix.
     See sklearn/base.py for more information.
     """
 
@@ -44,8 +42,7 @@ def _first_and_last_element(arr):
 
 
 def clone(estimator, safe=True):
-    """
-    Constructs a new estimator with the same parameters.
+    """Constructs a new estimator with the same parameters.
 
     Clone does a deep copy of the model in an estimator
     without actually copying attached data. It yields a new estimator
@@ -139,8 +136,7 @@ def clone(estimator, safe=True):
 
 
 def _pprint(params, offset=0, printer=repr):
-    """
-    Pretty print the dictionary 'params'
+    """Pretty print the dictionary 'params'
 
     See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
     and sklearn/base.py for more information.
@@ -194,8 +190,7 @@ def _pprint(params, offset=0, printer=repr):
 
 @six.add_metaclass(abc.ABCMeta)
 class BaseDetector(object):
-    """
-    Abstract class for all outlier detection algorithms.
+    """Abstract class for all outlier detection algorithms.
 
     :param contamination: The amount of contamination of the data set,
         i.e. the proportion of outliers in the data set. Used when fitting to
@@ -214,10 +209,11 @@ def __init__(self, contamination=0.1):
 
     @abc.abstractmethod
     def decision_function(self, X):
-        """
-        Predict anomaly score of X of the base classifiers. The anomaly score
-        of an input sample is computed based on different detector algorithms.
-        For consistency, outliers have larger anomaly scores.
+        """Predict anomaly score of X of the base classifiers.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
 
         :param X: The training input samples. Sparse matrices are accepted only
             if they are supported by the base estimator.
@@ -230,8 +226,7 @@ def decision_function(self, X):
 
     @abc.abstractmethod
     def fit(self, X, y=None):
-        """
-        Fit detector.
+        """Fit detector.
 
         :param X: The training input samples. Sparse matrices are accepted only
             if they are supported by the base estimator.
@@ -243,8 +238,8 @@ def fit(self, X, y=None):
         pass
 
     def fit_predict(self, X, y=None):
-        """
-        Fit detector and predict if a particular sample is an outlier or not.
+        """Fit detector and predict if a particular sample is an outlier or
+        not.
 
         :param X: The input samples
         :type X: numpy array of shape (n_samples, n_features)
@@ -259,8 +254,7 @@ def fit_predict(self, X, y=None):
         return self.labels_
 
     def predict(self, X):
-        """
-        Predict if a particular sample is an outlier or not.
+        """Predict if a particular sample is an outlier or not.
 
         :param X: The input samples
         :type X: numpy array of shape (n_samples, n_features)
@@ -277,8 +271,7 @@ def predict(self, X):
         return (pred_score > self.threshold_).astype('int').ravel()
 
     def predict_proba(self, X, method='linear'):
-        """
-        Predict the probability of a sample being outlier. Two approaches
+        """Predict the probability of a sample being outlier. Two approaches
         are possible:
 
         1. simply use Min-max conversion to linearly transform the outlier
@@ -329,8 +322,7 @@ def predict_proba(self, X, method='linear'):
                              'is not a valid probability conversion method')
 
     def predict_rank(self, X):
-        """
-        Predict the outlyingness rank of a sample in a fitted model. The
+        """Predict the outlyingness rank of a sample in a fitted model. The
         method is specifically for combining various outlier detectors.
 
         :param X: The input samples
@@ -358,8 +350,8 @@ def predict_rank(self, X):
         return ranks_norm
 
     def fit_predict_evaluate(self, X, y):
-        """
-        Fit the detector, predict on samples, and evaluate the model
+        """Fit the detector, predict on samples, and evaluate the model by
+        ROC and Precision @ rank n
 
         :param X: The input samples
         :type X: numpy array of shape (n_samples, n_features)
@@ -381,8 +373,8 @@ def fit_predict_evaluate(self, X, y):
         return roc, prec_n
 
     def _set_n_classes(self, y):
-        """
-        Set the number of classes if y is presented, which is not expected.
+        """Set the number of classes if y is presented, which is not expected.
+        It could be useful for multi-class outlier detection.
 
         :param y: Ground truth
         :type y: numpy array of shape (n_samples,)
@@ -396,10 +388,10 @@ def _set_n_classes(self, y):
                 "y should not be presented in unsupervised learning.")
 
     def _process_decision_scores(self):
-        """
-        Internal function to calculate key attributes:
-        threshold: used to decide the binary label
-        labels_: binary labels of training data
+        """Internal function to calculate key attributes:
+
+        - threshold: used to decide the binary label
+        - labels_: binary labels of training data
 
         :return: self
         :rtype: object
@@ -419,8 +411,7 @@ def _process_decision_scores(self):
 
     # noinspection PyMethodParameters
     def _get_param_names(cls):
-        """
-        Get parameter names for the estimator
+        """Get parameter names for the estimator
 
         See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
         and sklearn/base.py for more information.

diff --git a/pyod/utils/utility.py b/pyod/utils/utility.py
@@ -117,10 +117,9 @@ def standardizer(X_train, X_test):
 
 
 def score_to_label(pred_scores, outlier_perc=0.1):
-    """
-    turn raw outlier outlier scores to binary labels (0 or 1)
+    """Turn raw outlier outlier scores to binary labels (0 or 1)
 
-    :param pred_scores: raw outlier decision_scores_
+    :param pred_scores: raw outlier scores
     :param outlier_perc: percentage of outliers
     :return: binary labels (1 stands for outlier)
     :rtype: int
@@ -157,7 +156,7 @@ def get_label_n(y, y_pred, n=None):
     n outlier scores.
 
     Example y: [0,1,1,0,0,0]
-            labels_: [0.1, 0.5, 0.3, 0.2, 0.7]
+            y_pred: [0.1, 0.5, 0.3, 0.2, 0.7]
             return [0, 1, 0, 0, 1]
 
     :param y: ground truth