tests/example_tests/stream_based_sampling.py

"""
In this example the use of ActiveLearner is demonstrated in a pool-based sampling setting.
"""

import numpy as np
from modAL.models import ActiveLearner
from modAL.uncertainty import classifier_uncertainty
from sklearn.ensemble import RandomForestClassifier

np.random.seed(0)

# creating the image
im_width = 500
im_height = 500
im = np.zeros((im_height, im_width))
im[100:im_width - 1 - 100, 100:im_height - 1 - 100] = 1

# create the data to stream from
X_full = np.transpose(
    [np.tile(np.asarray(range(im.shape[0])), im.shape[1]),
     np.repeat(np.asarray(range(im.shape[1])), im.shape[0])]
)
# map the intensity values against the grid
y_full = np.asarray([im[P[0], P[1]] for P in X_full])

# assembling initial training set
n_initial = 5
initial_idx = np.random.choice(range(len(X_full)), size=n_initial, replace=False)
X_train, y_train = X_full[initial_idx], y_full[initial_idx]

# initialize the learner
learner = ActiveLearner(
    estimator=RandomForestClassifier(n_estimators=10),
    X_training=X_train, y_training=y_train
)

"""
The instances are randomly selected one by one, if an instance's uncertainty
is above a threshold, the label is requested and shown to the learner. The
process is continued until the learner reaches a previously defined accuracy.
"""

# learning until the accuracy reaches a given threshold
while learner.score(X_full, y_full) < 0.7:
    stream_idx = np.random.choice(range(len(X_full)))
    if classifier_uncertainty(learner, X_full[stream_idx].reshape(1, -1)) >= 0.4:
        learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, ))