-
Notifications
You must be signed in to change notification settings - Fork 325
/
Copy pathstream_based_sampling.py
47 lines (39 loc) · 1.58 KB
/
stream_based_sampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
In this example the use of ActiveLearner is demonstrated in a pool-based sampling setting.
"""
import numpy as np
from modAL.models import ActiveLearner
from modAL.uncertainty import classifier_uncertainty
from sklearn.ensemble import RandomForestClassifier
np.random.seed(0)
# creating the image
im_width = 500
im_height = 500
im = np.zeros((im_height, im_width))
im[100:im_width - 1 - 100, 100:im_height - 1 - 100] = 1
# create the data to stream from
X_full = np.transpose(
[np.tile(np.asarray(range(im.shape[0])), im.shape[1]),
np.repeat(np.asarray(range(im.shape[1])), im.shape[0])]
)
# map the intensity values against the grid
y_full = np.asarray([im[P[0], P[1]] for P in X_full])
# assembling initial training set
n_initial = 5
initial_idx = np.random.choice(range(len(X_full)), size=n_initial, replace=False)
X_train, y_train = X_full[initial_idx], y_full[initial_idx]
# initialize the learner
learner = ActiveLearner(
estimator=RandomForestClassifier(n_estimators=10),
X_training=X_train, y_training=y_train
)
"""
The instances are randomly selected one by one, if an instance's uncertainty
is above a threshold, the label is requested and shown to the learner. The
process is continued until the learner reaches a previously defined accuracy.
"""
# learning until the accuracy reaches a given threshold
while learner.score(X_full, y_full) < 0.7:
stream_idx = np.random.choice(range(len(X_full)))
if classifier_uncertainty(learner, X_full[stream_idx].reshape(1, -1)) >= 0.4:
learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, ))