Skip to content

Commit

Permalink
Improve documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 authored and yuezhao@cs.toronto.edu committed Jun 7, 2018
1 parent 724e372 commit f9d5829
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 37 deletions.
57 changes: 24 additions & 33 deletions pyod/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@


def _first_and_last_element(arr):
"""
Returns first and last element of numpy array or sparse matrix.
"""Returns first and last element of numpy array or sparse matrix.
See sklearn/base.py for more information.
"""

Expand All @@ -44,8 +42,7 @@ def _first_and_last_element(arr):


def clone(estimator, safe=True):
"""
Constructs a new estimator with the same parameters.
"""Constructs a new estimator with the same parameters.
Clone does a deep copy of the model in an estimator
without actually copying attached data. It yields a new estimator
Expand Down Expand Up @@ -139,8 +136,7 @@ def clone(estimator, safe=True):


def _pprint(params, offset=0, printer=repr):
"""
Pretty print the dictionary 'params'
"""Pretty print the dictionary 'params'
See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
and sklearn/base.py for more information.
Expand Down Expand Up @@ -194,8 +190,7 @@ def _pprint(params, offset=0, printer=repr):

@six.add_metaclass(abc.ABCMeta)
class BaseDetector(object):
"""
Abstract class for all outlier detection algorithms.
"""Abstract class for all outlier detection algorithms.
:param contamination: The amount of contamination of the data set,
i.e. the proportion of outliers in the data set. Used when fitting to
Expand All @@ -214,10 +209,11 @@ def __init__(self, contamination=0.1):

@abc.abstractmethod
def decision_function(self, X):
"""
Predict anomaly score of X of the base classifiers. The anomaly score
of an input sample is computed based on different detector algorithms.
For consistency, outliers have larger anomaly scores.
"""Predict anomaly score of X of the base classifiers.
The anomaly score of an input sample is computed based on different
detector algorithms. For consistency, outliers are assigned with
larger anomaly scores.
:param X: The training input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Expand All @@ -230,8 +226,7 @@ def decision_function(self, X):

@abc.abstractmethod
def fit(self, X, y=None):
"""
Fit detector.
"""Fit detector.
:param X: The training input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Expand All @@ -243,8 +238,8 @@ def fit(self, X, y=None):
pass

def fit_predict(self, X, y=None):
"""
Fit detector and predict if a particular sample is an outlier or not.
"""Fit detector and predict if a particular sample is an outlier or
not.
:param X: The input samples
:type X: numpy array of shape (n_samples, n_features)
Expand All @@ -259,8 +254,7 @@ def fit_predict(self, X, y=None):
return self.labels_

def predict(self, X):
"""
Predict if a particular sample is an outlier or not.
"""Predict if a particular sample is an outlier or not.
:param X: The input samples
:type X: numpy array of shape (n_samples, n_features)
Expand All @@ -277,8 +271,7 @@ def predict(self, X):
return (pred_score > self.threshold_).astype('int').ravel()

def predict_proba(self, X, method='linear'):
"""
Predict the probability of a sample being outlier. Two approaches
"""Predict the probability of a sample being outlier. Two approaches
are possible:
1. simply use Min-max conversion to linearly transform the outlier
Expand Down Expand Up @@ -329,8 +322,7 @@ def predict_proba(self, X, method='linear'):
'is not a valid probability conversion method')

def predict_rank(self, X):
"""
Predict the outlyingness rank of a sample in a fitted model. The
"""Predict the outlyingness rank of a sample in a fitted model. The
method is specifically for combining various outlier detectors.
:param X: The input samples
Expand Down Expand Up @@ -358,8 +350,8 @@ def predict_rank(self, X):
return ranks_norm

def fit_predict_evaluate(self, X, y):
"""
Fit the detector, predict on samples, and evaluate the model
"""Fit the detector, predict on samples, and evaluate the model by
ROC and Precision @ rank n
:param X: The input samples
:type X: numpy array of shape (n_samples, n_features)
Expand All @@ -381,8 +373,8 @@ def fit_predict_evaluate(self, X, y):
return roc, prec_n

def _set_n_classes(self, y):
"""
Set the number of classes if y is presented, which is not expected.
"""Set the number of classes if y is presented, which is not expected.
It could be useful for multi-class outlier detection.
:param y: Ground truth
:type y: numpy array of shape (n_samples,)
Expand All @@ -396,10 +388,10 @@ def _set_n_classes(self, y):
"y should not be presented in unsupervised learning.")

def _process_decision_scores(self):
"""
Internal function to calculate key attributes:
threshold: used to decide the binary label
labels_: binary labels of training data
"""Internal function to calculate key attributes:
- threshold: used to decide the binary label
- labels_: binary labels of training data
:return: self
:rtype: object
Expand All @@ -419,8 +411,7 @@ def _process_decision_scores(self):

# noinspection PyMethodParameters
def _get_param_names(cls):
"""
Get parameter names for the estimator
"""Get parameter names for the estimator
See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
and sklearn/base.py for more information.
Expand Down
7 changes: 3 additions & 4 deletions pyod/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,9 @@ def standardizer(X_train, X_test):


def score_to_label(pred_scores, outlier_perc=0.1):
"""
turn raw outlier outlier scores to binary labels (0 or 1)
"""Turn raw outlier outlier scores to binary labels (0 or 1)
:param pred_scores: raw outlier decision_scores_
:param pred_scores: raw outlier scores
:param outlier_perc: percentage of outliers
:return: binary labels (1 stands for outlier)
:rtype: int
Expand Down Expand Up @@ -157,7 +156,7 @@ def get_label_n(y, y_pred, n=None):
n outlier scores.
Example y: [0,1,1,0,0,0]
labels_: [0.1, 0.5, 0.3, 0.2, 0.7]
y_pred: [0.1, 0.5, 0.3, 0.2, 0.7]
return [0, 1, 0, 0, 1]
:param y: ground truth
Expand Down

0 comments on commit f9d5829

Please sign in to comment.