Merge pull request #270 from yzhao062/development

V0.8.6
yzhao062 · Jan 12, 2021 · 885f73c · 885f73c
2 parents beaa254 + 6dc35b7
commit 885f73c
Show file tree

Hide file tree

Showing 10 changed files with 34 additions and 28 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -114,7 +114,10 @@ v<0.8.4>, <11/02/2020> -- Add support for Tensorflow 2.
 v<0.8.4>, <11/12/2020> -- Merge PR #!02 for categortical data generation.
 v<0.8.5>, <12/22/2020> -- Refactor test from sklearn to numpy
 v<0.8.5>, <12/22/2020> -- Refactor COPOD for consistency
-v<0.8.5>, <12/22/2020> -- Refactor due to sklearn 0.24 (issue # 265)
+v<0.8.5>, <12/22/2020> -- Refactor due to sklearn 0.24 (issue #265)
+v<0.8.6>, <01/09/2021> -- Improve COF speed (PR #159)
+v<0.8.6>, <01/10/2021> -- Fix LMDD parameter inconsistenct.
+v<0.8.6>, <01/12/2021> -- Add option to specify feature names in copod explanation plot (PR #261).
 
 
 

diff --git a/README.rst b/README.rst
@@ -104,14 +104,6 @@ PyOD is featured for:
 * **Compatible with both Python 2 & 3**.
 
 
-**Note on Python 2.7**\ :
-The maintenance of Python 2.7 will be stopped by January 1, 2020 (see `official announcement <https://github.com/python/devguide/pull/344>`_).
-To be consistent with the Python change and PyOD's dependent libraries, e.g., scikit-learn, we will
-stop supporting Python 2.7 in the near future (dates are still to be decided). We encourage you to use
-Python 3.5 or newer for the latest functions and bug fixes. More information can
-be found at `Moving to require Python 3 <https://python3statement.org/>`_.
-
-
 **API Demo**\ :
 
 
@@ -210,6 +202,7 @@ be found at `Moving to require Python 3 <https://python3statement.org/>`_.
 
 * Python 2.7, 3.5, 3.6, or 3.7
 * combo>=0.0.8
+* joblib
 * numpy>=1.13
 * numba>=0.35
 * pandas>=0.25

diff --git a/docs/index.rst b/docs/index.rst
@@ -108,13 +108,6 @@ PyOD is featured for:
 * **Compatible with both Python 2 & 3**.
 
 
-**Note on Python 2.7**\ :
-The maintenance of Python 2.7 will be stopped by January 1, 2020 (see `official announcement <https://github.com/python/devguide/pull/344>`_)
-To be consistent with the Python change and PyOD's dependent libraries, e.g., scikit-learn, we will
-stop supporting Python 2.7 in the near future (dates are still to be decided). We encourage you to use
-Python 3.5 or newer for the latest functions and bug fixes. More information can
-be found at `Moving to require Python 3 <https://python3statement.org/>`_.
-
 **API Demo**\ :
 
 

diff --git a/docs/install.rst b/docs/install.rst
@@ -32,6 +32,7 @@ Alternatively, you could clone and run setup.py file:
 
 * Python 2.7, 3.5, 3.6, or 3.7
 * combo>=0.0.8
+* joblib
 * numpy>=1.13
 * numba>=0.35
 * pandas>=0.25

diff --git a/pyod/models/cof.py b/pyod/models/cof.py
@@ -137,8 +137,7 @@ def _cof(self, X):
         dist_matrix = np.array(distance_matrix(X, X))
         sbn_path_index, ac_dist, cof_ = [], [], []
         for i in range(X.shape[0]):
-            sbn_path = sorted(range(len(dist_matrix[i])),
-                              key=dist_matrix[i].__getitem__)
+            sbn_path = np.argsort(dist_matrix[i])
             sbn_path_index.append(sbn_path[1: self.n_neighbors_ + 1])
             cost_desc = []
             for j in range(self.n_neighbors_):

diff --git a/pyod/models/copod.py b/pyod/models/copod.py
@@ -117,7 +117,7 @@ def decision_function(self, X):
             decision_scores_ = self.O.sum(axis=1).to_numpy()
         return decision_scores_.ravel()
 
-    def explain_outlier(self, ind, cutoffs=None):  # pragma: no cover
+    def explain_outlier(self, ind, cutoffs=None, feature_names=None):  # pragma: no cover
         """Plot dimensional outlier graph for a given data
             point within the dataset.
         Parameters
@@ -129,6 +129,10 @@ def explain_outlier(self, ind, cutoffs=None):  # pragma: no cover
         cutoffs : list of floats in (0., 1), optional (default=[0.95, 0.99])
             The significance cutoff bands of the dimensional outlier graph.
         
+        feature_names: list of strings
+            The display names of all columns of the dataset,
+            to show on the x-axis of the plot.
+
         Returns
         -------
         Plot : matplotlib plot
@@ -146,7 +150,15 @@ def explain_outlier(self, ind, cutoffs=None):  # pragma: no cover
         plt.ylim([0, int(self.O.max().max()) + 1])
         plt.ylabel('Dimensional Outlier Score')
         plt.xlabel('Dimension')
-        plt.xticks(range(1, self.O.shape[1] + 1))
+
+        ticks = range(1, self.O.shape[1] + 1)
+        if feature_names is not None:
+            assert len(feature_names) == len(ticks), \
+                "Length of feature_names does not match dataset dimensions."
+            plt.xticks(ticks, labels=feature_names)
+        else:
+            plt.xticks(ticks)
+
         plt.yticks(range(0, int(self.O.max().max()) + 1))
         label = 'Outlier' if self.labels_[ind] == 1 else 'Inlier'
         plt.title('Outlier Score Breakdown for Data #{index} ({label})'.format(

diff --git a/pyod/models/lmdd.py b/pyod/models/lmdd.py
@@ -110,11 +110,12 @@ class LMDD(BaseDetector):
     def __init__(self, contamination=0.1, n_iter=50, dis_measure='aad',
                  random_state=None):
         super(LMDD, self).__init__(contamination=contamination)
+        self.n_iter, self.n_iter_ = n_iter, n_iter
+        self.dis_measure, self.dis_measure_ = dis_measure, dis_measure
+
         self.random_state_, self.dis_measure_ = _check_params(n_iter,
                                                               dis_measure,
                                                               random_state)
-        self.n_iter_ = n_iter
-        self.decision_scores_ = None
 
     def fit(self, X, y=None):
         """Fit detector. y is ignored in unsupervised methods.

diff --git a/pyod/models/loci.py b/pyod/models/loci.py
@@ -212,7 +212,7 @@ def _calculate_decision_score(self, X):
                     outlier_scores[p_ix] = mdef / sigma_mdef
                     if mdef > (self.threshold_ * sigma_mdef):
                         break
-        return outlier_scores
+        return np.asarray(outlier_scores)
 
     def fit(self, X, y=None):
         """Fit the model using X as training data.
@@ -232,8 +232,7 @@ def fit(self, X, y=None):
         """
         X = check_array(X)
         self._set_n_classes(y)
-        outlier_scores = self._calculate_decision_score(X)
-        self.decision_scores_ = np.array(outlier_scores)
+        self.decision_scores_ = self._calculate_decision_score(X)
         self.labels_ = (self.decision_scores_ > self.threshold_).astype(
             'int').ravel()
 
@@ -246,5 +245,4 @@ def fit(self, X, y=None):
     def decision_function(self, X):
         check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
         X = check_array(X)
-        outlier_scores = self._calculate_decision_score(X)
-        return np.array(outlier_scores)
+        return self._calculate_decision_score(X)
diff --git a/pyod/version.py b/pyod/version.py
@@ -20,4 +20,4 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '0.8.5'  # pragma: no cover
+__version__ = '0.8.6'  # pragma: no cover
diff --git a/temp_text.txt b/temp_text.txt
@@ -0,0 +1,6 @@
+**Note on Python 2.7**\ :
+The maintenance of Python 2.7 will be stopped by January 1, 2020 (see `official announcement <https://github.com/python/devguide/pull/344>`_).
+To be consistent with the Python change and PyOD's dependent libraries, e.g., scikit-learn, we will
+stop supporting Python 2.7 in the near future (dates are still to be decided). We encourage you to use
+Python 3.5 or newer for the latest functions and bug fixes. More information can
+be found at `Moving to require Python 3 <https://python3statement.org/>`_.