update documentation

yzhao062 · Jul 29, 2019 · 79525e2 · 79525e2
1 parent e148b1b
commit 79525e2
Show file tree

Hide file tree

Showing 4 changed files with 148 additions and 2 deletions.
diff --git a/README.rst b/README.rst
@@ -222,8 +222,9 @@ Some of the methods are tasks specific:
 `compare_selected_classifiers.py <https://github.com/yzhao062/combo/blob/master/examples/compare_selected_classifiers.py>`_\).
 
 
-.. figure:: figs/ALL.png
-    :alt: Comparison of selected models
+.. image:: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
+   :target: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
+   :alt: Comparison of Selected Models
 
 -----
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -179,6 +179,7 @@ Some of the methods are tasks specific:
 
 .. image:: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
    :target: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
+   :alt: Comparison of Selected Models
 
 
 ----

diff --git a/docs/rebuilt.sh b/docs/rebuilt.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+make clean
+make html
+cd ..
+cp examples/*.png /docs/figs/
diff --git a/examples/temp_do_not_use.py b/examples/temp_do_not_use.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""Compare all detection algorithms by plotting decision boundaries and
+the number of decision boundaries.
+"""
+# Author: Yue Zhao <zhaoy@cmu.edu>
+# License: BSD 2 clause
+
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+# temporary solution for relative imports in case pyod is not installed
+# if pyod is installed, no need to use the following line
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
+
+# supress warnings for clean output
+import warnings
+
+warnings.filterwarnings("ignore")
+import numpy as np
+from numpy import percentile
+import matplotlib.pyplot as plt
+import matplotlib.font_manager
+
+# Import all models
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+
+from combo.models.classifier_comb import SimpleClassifierAggregator
+from combo.models.stacking import Stacking
+
+# Define the number of inliers and outliers
+n_samples = 300
+outliers_fraction = 0.5
+clusters_separation = [3]
+
+# Compare given detectors under given settings
+# Initialize the data
+xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
+n_inliers = int((1. - outliers_fraction) * n_samples)
+n_outliers = int(outliers_fraction * n_samples)
+ground_truth = np.zeros(n_samples, dtype=int)
+ground_truth[-n_outliers:] = 1
+
+# Show the statics of the data
+print('Number of inliers: %i' % n_inliers)
+print('Number of outliers: %i' % n_outliers)
+print(
+    'Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(
+        shape=ground_truth.shape))
+print(ground_truth, '\n')
+
+random_state = np.random.RandomState(42)
+
+classifiers = [LogisticRegression(), GaussianNB(), SVC(probability=True),
+               KNeighborsClassifier()]
+
+# Define nine outlier detection tools to be compared
+classifiers = {
+    # 'Decision Tree': DecisionTreeClassifier(),
+    'Logistic Regression': LogisticRegression(),
+    # 'Ada': AdaBoostClassifier(random_state=random_state),
+    # 'Random Forest': RandomForestClassifier(random_state=random_state),
+    'Gaussian': GaussianNB(),
+    'SVM': SVC(probability=True),
+    'kNN': KNeighborsClassifier(),
+    'Average': SimpleClassifierAggregator(base_estimators=classifiers,
+                                          method='average'),
+    'Max': SimpleClassifierAggregator(base_estimators=classifiers,
+                                      method='maximization'),
+    'Stacking': Stacking(base_clfs=classifiers, shuffle_data=True),
+    'Stacking_RF': Stacking(base_clfs=classifiers, shuffle_data=True,
+                            meta_clf=RandomForestClassifier(
+                                random_state=random_state))
+}
+
+# Show all detectors
+for i, clf in enumerate(classifiers.keys()):
+    print('Model', i + 1, clf)
+
+# Fit the models with the generated data and
+# compare model performances
+for i, offset in enumerate(clusters_separation):
+    np.random.seed(42)
+    # Data generation
+    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
+    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
+    X = np.r_[X1, X2]
+    # Add outliers
+    X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
+
+    # Fit the model
+    plt.figure(figsize=(15, 8))
+    for i, (clf_name, clf) in enumerate(classifiers.items()):
+        print()
+        print(i + 1, 'fitting', clf_name)
+        # fit the data and tag outliers
+
+        clf.fit(X, ground_truth)
+        scores_pred = clf.predict_proba(X)[:, 1] * -1
+
+        y_pred = clf.predict(X)
+        threshold = percentile(scores_pred, 100 * outliers_fraction)
+        n_errors = (y_pred != ground_truth).sum()
+        # plot the levels lines and the points
+        Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] * -1
+        Z = Z.reshape(xx.shape)
+        subplot = plt.subplot(2, 4, i + 1)
+        subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
+                         cmap=plt.cm.Blues_r)
+        a = subplot.contour(xx, yy, Z, levels=[threshold],
+                            linewidths=2, colors='red')
+        subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
+                         colors='orange')
+        b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
+                            s=20, edgecolor='k')
+        c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
+                            s=20, edgecolor='k')
+        subplot.axis('tight')
+        subplot.legend(
+            [a.collections[0], b, c],
+            ['learned boundary', 'class 0', 'class 1'],
+            prop=matplotlib.font_manager.FontProperties(size=10),
+            loc='lower right')
+        subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
+        subplot.set_xlim((-7, 7))
+        subplot.set_ylim((-7, 7))
+    plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
+    plt.suptitle("Model Combination")
+plt.savefig('ALL.png', dpi=300)
+plt.show()