-
Notifications
You must be signed in to change notification settings - Fork 325
/
Copy pathranked_batch_mode.py
79 lines (60 loc) · 2.39 KB
/
ranked_batch_mode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from functools import partial
import numpy as np
from modAL.batch import uncertainty_batch_sampling
from modAL.models import ActiveLearner
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
# Set our RNG for reproducibility.
RANDOM_STATE_SEED = 123
np.random.seed(RANDOM_STATE_SEED)
iris = load_iris()
X_raw = iris['data']
y_raw = iris['target']
# Define our PCA transformer and fit it onto our raw dataset.
pca = PCA(n_components=2, random_state=RANDOM_STATE_SEED)
transformed_iris = pca.fit_transform(X=X_raw)
# Isolate the data we'll need for plotting.
x_component, y_component = transformed_iris[:, 0], transformed_iris[:, 1]
# Isolate our examples for our labeled dataset.
n_labeled_examples = X_raw.shape[0]
training_indices = np.random.randint(low=0, high=n_labeled_examples + 1, size=3)
X_train = X_raw[training_indices]
y_train = y_raw[training_indices]
# Isolate the non-training examples we'll be querying.
X_pool = np.delete(X_raw, training_indices, axis=0)
y_pool = np.delete(y_raw, training_indices, axis=0)
# Pre-set our batch sampling to retrieve 3 samples at a time.
BATCH_SIZE = 3
preset_batch = partial(uncertainty_batch_sampling, n_instances=BATCH_SIZE)
# Testing the cold-start
learner = ActiveLearner(
estimator=KNeighborsClassifier(n_neighbors=3),
query_strategy=preset_batch
)
cold_start_idx, cold_start_inst = learner.query(X_raw)
learner.teach(X_raw[cold_start_idx], y_raw[cold_start_idx])
# Specify our active learning model.
learner = ActiveLearner(
estimator=KNeighborsClassifier(n_neighbors=3),
X_training=X_train,
y_training=y_train,
query_strategy=preset_batch
)
predictions = learner.predict(X_raw)
# Record our learner's score on the raw data.
unqueried_score = learner.score(X_raw, y_raw)
# Pool-based sampling
N_RAW_SAMPLES = 20
N_QUERIES = N_RAW_SAMPLES // BATCH_SIZE
for index in range(N_QUERIES):
query_index, query_instance = learner.query(X_pool)
# Teach our ActiveLearner model the record it has requested.
X, y = X_pool[query_index], y_pool[query_index]
learner.teach(X=X, y=y)
# Remove the queried instance from the unlabeled pool.
X_pool = np.delete(X_pool, query_index, axis=0)
y_pool = np.delete(y_pool, query_index)
# Calculate and report our model's accuracy.
model_accuracy = learner.score(X_raw, y_raw)
predictions = learner.predict(X_raw)