/
example.py
201 lines (150 loc) · 6.53 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# -*- coding: utf-8 -*-
"""Utility functions for running examples
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause
from __future__ import division
from __future__ import print_function
import matplotlib.pyplot as plt
from .data import check_consistent_shape
from .data import get_outliers_inliers
def visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True,
save_figure=False): # pragma: no cover
"""Utility function for visualizing the results in examples.
Internal use only.
Parameters
----------
clf_name : str
The name of the detector.
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
show_figure : bool, optional (default=True)
If set to True, show the figure.
save_figure : bool, optional (default=False)
If set to True, save the figure to the local.
"""
def _add_sub_plot(X_inliers, X_outliers, sub_plot_title,
inlier_color='blue', outlier_color='orange'):
"""Internal method to add subplot of inliers and outliers.
Parameters
----------
X_inliers : numpy array of shape (n_samples, n_features)
Outliers.
X_outliers : numpy array of shape (n_samples, n_features)
Inliers.
sub_plot_title : str
Subplot title.
inlier_color : str, optional (default='blue')
The color of inliers.
outlier_color : str, optional (default='orange')
The color of outliers.
"""
plt.axis("equal")
plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers',
color=inlier_color, s=40)
plt.scatter(X_outliers[:, 0], X_outliers[:, 1],
label='outliers', color=outlier_color, s=50, marker='^')
plt.title(sub_plot_title, fontsize=15)
plt.xticks([])
plt.yticks([])
plt.legend(loc=3, prop={'size': 10})
# check input data shapes are consistent
X_train, y_train, X_test, y_test, y_train_pred, y_test_pred = \
check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred)
if X_train.shape[1] != 2:
raise ValueError("Input data has to be 2-d for visualization. The "
"input data has {shape}.".format(shape=X_train.shape))
X_train_outliers, X_train_inliers = get_outliers_inliers(X_train, y_train)
X_train_outliers_pred, X_train_inliers_pred = get_outliers_inliers(
X_train, y_train_pred)
X_test_outliers, X_test_inliers = get_outliers_inliers(X_test, y_test)
X_test_outliers_pred, X_test_inliers_pred = get_outliers_inliers(
X_test, y_test_pred)
# plot ground truth vs. predicted results
fig = plt.figure(figsize=(12, 10))
plt.suptitle("Demo of {clf_name} Detector".format(clf_name=clf_name),
fontsize=15)
fig.add_subplot(221)
_add_sub_plot(X_train_inliers, X_train_outliers, 'Train Set Ground Truth',
inlier_color='blue', outlier_color='orange')
fig.add_subplot(222)
_add_sub_plot(X_train_inliers_pred, X_train_outliers_pred,
'Train Set Prediction', inlier_color='blue',
outlier_color='orange')
fig.add_subplot(223)
_add_sub_plot(X_test_inliers, X_test_outliers, 'Test Set Ground Truth',
inlier_color='green', outlier_color='red')
fig.add_subplot(224)
_add_sub_plot(X_test_inliers_pred, X_test_outliers_pred,
'Test Set Prediction', inlier_color='green',
outlier_color='red')
if save_figure:
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
def data_visualize(X_train, y_train, show_figure=True,
save_figure=False): # pragma: no cover
"""Utility function for visualizing the synthetic samples generated by
generate_data_cluster function.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
show_figure : bool, optional (default=True)
If set to True, show the figure.
save_figure : bool, optional (default=False)
If set to True, save the figure to the local.
"""
def _plot(X_inliers, X_outliers, inlier_color='blue',
outlier_color='orange'):
"""Internal method to add subplot of inliers and outliers.
Parameters
----------
X_inliers : numpy array of shape (n_samples, n_features)
Outliers.
X_outliers : numpy array of shape (n_samples, n_features)
Inliers.
sub_plot_title : str
Subplot title.
inlier_color : str, optional (default='blue')
The color of inliers.
outlier_color : str, optional (default='orange')
The color of outliers.
"""
plt.axis("equal")
plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers',
color=inlier_color, s=40)
plt.scatter(X_outliers[:, 0], X_outliers[:, 1],
label='outliers', color=outlier_color, s=50, marker='^')
plt.xticks([])
plt.yticks([])
plt.legend(loc='best', prop={'size': 10})
assert len(X_train) <= 5
in_colors = ['blue', 'green', 'purple', 'brown', 'black']
out_colors = ['red', 'orange', 'grey', 'violet', 'pink']
plt.figure(figsize=(13, 10))
plt.suptitle("Demo of Generating Data in Clusters", fontsize=15)
for i, cluster in enumerate(X_train):
X_train_outliers, X_train_inliers = get_outliers_inliers(cluster,
y_train[i])
_plot(X_train_inliers, X_train_outliers,
inlier_color=in_colors[i],
outlier_color=out_colors[i])
if save_figure:
plt.savefig()
if show_figure:
plt.show()