-
Notifications
You must be signed in to change notification settings - Fork 325
/
Copy pathcustom_query_strategies.py
58 lines (51 loc) · 2.01 KB
/
custom_query_strategies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
from modAL.models import ActiveLearner
from modAL.uncertainty import classifier_margin, classifier_uncertainty
from modAL.utils.combination import make_linear_combination, make_product
from modAL.utils.selection import multi_argmax
from sklearn.datasets import make_blobs
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
# generating the data
centers = np.asarray([[-2, 3], [0.5, 5], [1, 1.5]])
X, y = make_blobs(
n_features=2, n_samples=1000, random_state=0, cluster_std=0.7,
centers=centers
)
# initial training data
initial_idx = np.random.choice(range(len(X)), size=20)
X_training, y_training = X[initial_idx], y[initial_idx]
# initializing the learner
learner = ActiveLearner(
estimator=GaussianProcessClassifier(1.0 * RBF(1.0)),
X_training=X_training, y_training=y_training
)
# creating new utility measures by linear combination and product
# linear_combination will return 1.0*classifier_uncertainty + 1.0*classifier_margin
linear_combination = make_linear_combination(
classifier_uncertainty, classifier_margin,
weights=[1.0, 1.0]
)
# product will return (classifier_uncertainty**0.5)*(classifier_margin**0.1)
product = make_product(
classifier_uncertainty, classifier_margin,
exponents=[0.5, 0.1]
)
# defining the custom query strategy, which uses the linear combination of
# classifier uncertainty and classifier margin
def custom_query_strategy(classifier, X, n_instances=1):
utility = linear_combination(classifier, X)
return multi_argmax(utility, n_instances=n_instances)
custom_query_learner = ActiveLearner(
estimator=GaussianProcessClassifier(1.0 * RBF(1.0)),
query_strategy=custom_query_strategy,
X_training=X_training, y_training=y_training
)
# pool-based sampling
n_queries = 20
for idx in range(n_queries):
query_idx, query_instance = custom_query_learner.query(X, n_instances=2)
custom_query_learner.teach(
X=X[query_idx].reshape(-1, 2),
y=y[query_idx].reshape(-1, )
)