Skip to content

Commit

Permalink
update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 committed Jul 29, 2019
1 parent e148b1b commit 79525e2
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 2 deletions.
5 changes: 3 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,9 @@ Some of the methods are tasks specific:
`compare_selected_classifiers.py <https://github.com/yzhao062/combo/blob/master/examples/compare_selected_classifiers.py>`_\).


.. figure:: figs/ALL.png
:alt: Comparison of selected models
.. image:: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
:target: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
:alt: Comparison of Selected Models

-----

Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ Some of the methods are tasks specific:

.. image:: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
:target: https://raw.githubusercontent.com/yzhao062/combo/master/examples/ALL.png
:alt: Comparison of Selected Models


----
Expand Down
5 changes: 5 additions & 0 deletions docs/rebuilt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash
make clean
make html
cd ..
cp examples/*.png /docs/figs/
139 changes: 139 additions & 0 deletions examples/temp_do_not_use.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
"""Compare all detection algorithms by plotting decision boundaries and
the number of decision boundaries.
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import os
import sys

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

# supress warnings for clean output
import warnings

warnings.filterwarnings("ignore")
import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager

# Import all models
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from combo.models.classifier_comb import SimpleClassifierAggregator
from combo.models.stacking import Stacking

# Define the number of inliers and outliers
n_samples = 300
outliers_fraction = 0.5
clusters_separation = [3]

# Compare given detectors under given settings
# Initialize the data
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.zeros(n_samples, dtype=int)
ground_truth[-n_outliers:] = 1

# Show the statics of the data
print('Number of inliers: %i' % n_inliers)
print('Number of outliers: %i' % n_outliers)
print(
'Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(
shape=ground_truth.shape))
print(ground_truth, '\n')

random_state = np.random.RandomState(42)

classifiers = [LogisticRegression(), GaussianNB(), SVC(probability=True),
KNeighborsClassifier()]

# Define nine outlier detection tools to be compared
classifiers = {
# 'Decision Tree': DecisionTreeClassifier(),
'Logistic Regression': LogisticRegression(),
# 'Ada': AdaBoostClassifier(random_state=random_state),
# 'Random Forest': RandomForestClassifier(random_state=random_state),
'Gaussian': GaussianNB(),
'SVM': SVC(probability=True),
'kNN': KNeighborsClassifier(),
'Average': SimpleClassifierAggregator(base_estimators=classifiers,
method='average'),
'Max': SimpleClassifierAggregator(base_estimators=classifiers,
method='maximization'),
'Stacking': Stacking(base_clfs=classifiers, shuffle_data=True),
'Stacking_RF': Stacking(base_clfs=classifiers, shuffle_data=True,
meta_clf=RandomForestClassifier(
random_state=random_state))
}

# Show all detectors
for i, clf in enumerate(classifiers.keys()):
print('Model', i + 1, clf)

# Fit the models with the generated data and
# compare model performances
for i, offset in enumerate(clusters_separation):
np.random.seed(42)
# Data generation
X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
X = np.r_[X1, X2]
# Add outliers
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]

# Fit the model
plt.figure(figsize=(15, 8))
for i, (clf_name, clf) in enumerate(classifiers.items()):
print()
print(i + 1, 'fitting', clf_name)
# fit the data and tag outliers

clf.fit(X, ground_truth)
scores_pred = clf.predict_proba(X)[:, 1] * -1

y_pred = clf.predict(X)
threshold = percentile(scores_pred, 100 * outliers_fraction)
n_errors = (y_pred != ground_truth).sum()
# plot the levels lines and the points
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] * -1
Z = Z.reshape(xx.shape)
subplot = plt.subplot(2, 4, i + 1)
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
cmap=plt.cm.Blues_r)
a = subplot.contour(xx, yy, Z, levels=[threshold],
linewidths=2, colors='red')
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
s=20, edgecolor='k')
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
s=20, edgecolor='k')
subplot.axis('tight')
subplot.legend(
[a.collections[0], b, c],
['learned boundary', 'class 0', 'class 1'],
prop=matplotlib.font_manager.FontProperties(size=10),
loc='lower right')
subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
subplot.set_xlim((-7, 7))
subplot.set_ylim((-7, 7))
plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
plt.suptitle("Model Combination")
plt.savefig('ALL.png', dpi=300)
plt.show()

0 comments on commit 79525e2

Please sign in to comment.