Skip to content

Commit

Permalink
update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 committed Jul 29, 2019
1 parent 79525e2 commit 3194f85
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 17 deletions.
Binary file modified docs/figs/framework_demo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/figs/framework_demo.pptx
Binary file not shown.
Binary file modified examples/ALL.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
135 changes: 135 additions & 0 deletions examples/compare_selected_classifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""Compare select algorithms by plotting decision boundaries and
the number of decision boundaries.
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import os
import sys

# temporary solution for relative imports in case combo is not installed
# if combo is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

# supress warnings for clean output
import warnings

warnings.filterwarnings("ignore")
import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager

# Import all models
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from combo.models.classifier_comb import SimpleClassifierAggregator
from combo.models.stacking import Stacking

# Define the number of class 0 and class 1
n_samples = 300
class1_fraction = 0.5
clusters_separation = [3]

# Compare given detectors under given settings
# Initialize the data
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_class0 = int((1. - class1_fraction) * n_samples)
n_class1 = int(class1_fraction * n_samples)
ground_truth = np.zeros(n_samples, dtype=int)
ground_truth[-n_class1:] = 1

# Show the statics of the data
print('Number of Class 0: %i' % n_class0)
print('Number of Class 1: %i' % n_class1)
print('Ground truth shape is {shape}.\n'.format(shape=ground_truth.shape))
print(ground_truth, '\n')

random_state = np.random.RandomState(42)

classifiers = [LogisticRegression(), GaussianNB(), SVC(probability=True),
KNeighborsClassifier()]

# Define some combination methods to be compared
classifiers = {

'Logistic Regression': LogisticRegression(),
'Gaussian NB': GaussianNB(),
'Support Vector Machine': SVC(probability=True),
'k Nearst Neighbors': KNeighborsClassifier(),
'Simple Average': SimpleClassifierAggregator(base_estimators=classifiers,
method='average'),
'Simple Maximization': SimpleClassifierAggregator(
base_estimators=classifiers, method='maximization'),
'Stacking': Stacking(base_estimators=classifiers, shuffle_data=True),
'Stacking_RF': Stacking(base_estimators=classifiers, shuffle_data=True,
meta_clf=RandomForestClassifier(
random_state=random_state))
}

# Show all classifiers
for i, clf in enumerate(classifiers.keys()):
print('Model', i + 1, clf)

# Fit the models with the generated data and
# compare model performances
for i, offset in enumerate(clusters_separation):
np.random.seed(42)
# Data generation
X1 = 0.3 * np.random.randn(n_class0 // 2, 2) - offset
X2 = 0.3 * np.random.randn(n_class0 // 2, 2) + offset
X = np.r_[X1, X2]
# Add class 1
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_class1, 2))]

# Fit the model
plt.figure(figsize=(15, 8))
for i, (clf_name, clf) in enumerate(classifiers.items()):
print()
print(i + 1, 'fitting', clf_name)
# fit the data and tag class 1

clf.fit(X, ground_truth)
scores_pred = clf.predict_proba(X)[:, 1] * -1

y_pred = clf.predict(X)
threshold = percentile(scores_pred, 100 * class1_fraction)
n_errors = (y_pred != ground_truth).sum()
# plot the levels lines and the points
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] * -1
Z = Z.reshape(xx.shape)
subplot = plt.subplot(2, 4, i + 1)
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
cmap=plt.cm.Blues_r)
a = subplot.contour(xx, yy, Z, levels=[threshold],
linewidths=2, colors='red')
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
b = subplot.scatter(X[:-n_class1, 0], X[:-n_class1, 1], c='white',
s=20, edgecolor='k')
c = subplot.scatter(X[-n_class1:, 0], X[-n_class1:, 1], c='black',
s=20, edgecolor='k')
subplot.axis('tight')
subplot.legend(
[a.collections[0], b, c],
['learned boundary', 'class 0', 'class 1'],
prop=matplotlib.font_manager.FontProperties(size=10),
loc='lower right')
subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
subplot.set_xlim((-7, 7))
subplot.set_ylim((-7, 7))
plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
plt.suptitle("Model Combination")
plt.savefig('ALL.png', dpi=300)
plt.show()
32 changes: 15 additions & 17 deletions examples/temp_do_not_use.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
"""Compare all detection algorithms by plotting decision boundaries and
"""Compare select algorithms by plotting decision boundaries and
the number of decision boundaries.
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
Expand All @@ -11,8 +11,8 @@
import os
import sys

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
# temporary solution for relative imports in case combo is not installed
# if combo is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

Expand Down Expand Up @@ -51,11 +51,9 @@
ground_truth[-n_outliers:] = 1

# Show the statics of the data
print('Number of inliers: %i' % n_inliers)
print('Number of outliers: %i' % n_outliers)
print(
'Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(
shape=ground_truth.shape))
print('Number of Class 0: %i' % n_inliers)
print('Number of Class 1: %i' % n_outliers)
print('Ground truth shape is {shape}.\n'.format(shape=ground_truth.shape))
print(ground_truth, '\n')

random_state = np.random.RandomState(42)
Expand All @@ -69,15 +67,15 @@
'Logistic Regression': LogisticRegression(),
# 'Ada': AdaBoostClassifier(random_state=random_state),
# 'Random Forest': RandomForestClassifier(random_state=random_state),
'Gaussian': GaussianNB(),
'SVM': SVC(probability=True),
'kNN': KNeighborsClassifier(),
'Average': SimpleClassifierAggregator(base_estimators=classifiers,
method='average'),
'Max': SimpleClassifierAggregator(base_estimators=classifiers,
method='maximization'),
'Stacking': Stacking(base_clfs=classifiers, shuffle_data=True),
'Stacking_RF': Stacking(base_clfs=classifiers, shuffle_data=True,
'Gaussian NB': GaussianNB(),
'Support Vector Machine': SVC(probability=True),
'k Nearst Neighbors': KNeighborsClassifier(),
'Simple Average': SimpleClassifierAggregator(base_estimators=classifiers,
method='average'),
'Simple Maximization': SimpleClassifierAggregator(
base_estimators=classifiers, method='maximization'),
'Stacking': Stacking(base_estimators=classifiers, shuffle_data=True),
'Stacking_RF': Stacking(base_estimators=classifiers, shuffle_data=True,
meta_clf=RandomForestClassifier(
random_state=random_state))
}
Expand Down

0 comments on commit 3194f85

Please sign in to comment.