In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.datasets import load_digits
from sklearn.semi_supervised import LabelSpreading
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
digits = load_digits()
rng = np.random.RandomState(42)
indices = np.arange(len(digits.data))
rng.shuffle(indices)

In [5]:
X = digits.data[indices[:330]]
y = digits.target[indices[:330]]
images = digits.images[indices[:330]]

In [6]:
n_total_samples = len(y)
n_labeled_points = 40
max_iterations = 5

In [7]:
unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]
f = plt.figure()

<Figure size 432x288 with 0 Axes>

In [8]:
for i in range(max_iterations):
    if len(unlabeled_indices) == 0:
        print("No unlabeled items left to label.")
        break
    y_train = np.copy(y)
    y_train[unlabeled_indices] = -1

    lp_model = LabelSpreading(gamma=0.25, max_iter=20)
    lp_model.fit(X, y_train)

    predicted_labels = lp_model.transduction_[unlabeled_indices]
    true_labels = y[unlabeled_indices]

    cm = confusion_matrix(true_labels, predicted_labels, labels=lp_model.classes_)

    print(f"Iteration {i} {70*'_'}")
    print(f"Label Spreading model: {n_labeled_points} labeled & {n_total_samples-n_labeled_points} unlabeled ({n_total_samples} total)")

    print(f"{classification_report(true_labels, predicted_labels)}")
    
    print("Confusion matrix")
    print(cm)

    pred_entropies = stats.distributions.entropy(
        lp_model.label_distributions_.T
    )

    uncertainty_index = np.argsort(pred_entropies)[::-1]
    uncertainty_index = uncertainty_index[
        np.in1d(uncertainty_index, unlabeled_indices)
    ][:5]

    delete_indices = np.array([], dtype=int)

    if i < 5:
        f.text(.05, (1-(i+1)*.183), f"model {(i+1)}\n\nfit with\n{i*5+10} labels")
    
    for index, image_index in enumerate(uncertainty_index):
        image = images[image_index]

        if i < 5:
            sub = f.add_subplot(5, 5, index+1+(5*i))
            sub.imshow(image, cmap=plt.cm.gray_r, interpolation="none")
            sub.set_title(f"predict: {lp_model.transduction_[image_index]}\ntrue: {y[image_index]}", size=10)
            sub.axis("off")
        
        delete_index, = np.where(unlabeled_indices == image_index)
        delete_indices = np.concatenate((delete_indices, delete_index))
    
    unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
    n_labeled_points += len(uncertainty_index)

f.suptitle("Active learning with Label Propagation.\nRows show 5 most uncertain labels to learn with the next model.", y=1.15)
plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2, hspace=0.85)
plt.show()

Iteration 0 ______________________________________________________________________
Label Spreading model: 40 labeled & 290 unlabeled (330 total)
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        30
           1       0.88      0.65      0.75        23
           2       0.93      0.93      0.93        27
           3       0.79      0.96      0.87        27
           4       0.97      0.97      0.97        37
           5       0.93      0.71      0.81        38
           6       1.00      0.97      0.98        29
           7       0.83      1.00      0.91        25
           8       0.68      0.68      0.68        22
           9       0.64      0.72      0.68        32

    accuracy                           0.86       290
   macro avg       0.86      0.86      0.86       290
weighted avg       0.87      0.86      0.86       290

Confusion matrix
[[30  0  0  0  0  0  0  0  0  0]
 [ 0 15  0  0  0  0  0  0  7  1]
 [ 0  0 25  2

<Figure size 432x288 with 0 Axes>