In [None]:
!pip install setfit==0.5.0
!pip install small-text[transformers]==1.3.0
!pip install datasets

In [None]:
from datasets import load_dataset
test_dataset = load_dataset('CarperAI/openai_summarize_comparisons', split='test[:100]')
train_dataset = load_dataset('CarperAI/openai_summarize_comparisons', split='train[:1000]')  

In [3]:
import numpy as np
def preprocess_data(data):
  summaries = []
  labels = []
  for i in range(len(data)):
    a = np.random.choice(2)
    if a == 0:
      summaries.append(data[i]['chosen'])
    else:
      summaries.append(data[i]['rejected'])
    labels.append(a)
  return summaries, labels

In [4]:
train_summaries, train_labels = preprocess_data(train_dataset)
test_summaries, test_labels = preprocess_data(test_dataset)

In [5]:
from small_text import TextDataset
import numpy as np

num_classes = 2
target_labels = np.arange(num_classes)

train = TextDataset.from_arrays(train_summaries,
                                np.array(train_labels),
                                target_labels=target_labels)
test = TextDataset.from_arrays(test_summaries,
                               np.array(test_labels),
                               target_labels=target_labels)



In [16]:
from small_text.integrations.transformers.classifiers.setfit import SetFitModelArguments
from small_text.integrations.transformers.classifiers.factories import SetFitClassificationFactory

num_classes = 2
sentence_transformer_model_name = 'sentence-transformers/all-mpnet-base-v2'

#setfit model for classification task
setfit_model_args = SetFitModelArguments(sentence_transformer_model_name)
clf_factory = SetFitClassificationFactory(setfit_model_args, num_classes, )
                                          #classification_kwargs={'trainer_kwargs': {'num_iterations': 10, }})

In [11]:
import gc
import torch
import numpy as np
from sklearn.metrics import accuracy_score

from small_text import (
    PoolBasedActiveLearner, 
    random_initialization_balanced,
    BreakingTies,
    SubsamplingQueryStrategy
)

In [17]:

num_queries = 4  #9 for 200 samples

def initialize_active_learner(y_train, init_samples):

    # initial labeled samples
    x_indices_initial = random_initialization_balanced(y_train, n_samples=init_samples)
    y_initial = y_train[x_indices_initial]

    return x_indices_initial, y_initial

def evaluate(active_learner, train, test):

    # calculate the test errors
    y_pred = active_learner.classifier.predict(train)
    y_pred_test = active_learner.classifier.predict(test)
    
    test_acc = accuracy_score(y_pred_test, test.y)

    print('Train accuracy:', accuracy_score(y_pred, train.y))
    print('Test accuracy:', test_acc)
    
    return test_acc

x_indices_initial, y_initial = initialize_active_learner(train.y, 20)

def different_strategy(strategy, num):

  # BreakingTies strategy to take new labeled samples
  query_strategy = SubsamplingQueryStrategy(strategy)
  labeled_indices = x_indices_initial

  active_learner = PoolBasedActiveLearner(clf_factory, query_strategy, train)
  active_learner.initialize_data(x_indices_initial, y_initial)

  results_setfit = []
  results_setfit.append(evaluate(active_learner, train[labeled_indices], test))

  for i in range(num_queries):
    q_indices = active_learner.query(num_samples=num)
    y = train.y[q_indices]
    # train the new model
    active_learner.update(y, np.arange(5))
    labeled_indices = np.concatenate([q_indices, labeled_indices])

    gc.collect()
    torch.cuda.empty_cache()

    print('---------------')
    print('Iteration #{:d} ({} samples)'.format(i, len(labeled_indices)))
    results_setfit.append(evaluate(active_learner, train[labeled_indices], test))
  return results_setfit, labeled_indices

In [18]:
result, labeled = different_strategy(BreakingTies(), 20)

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num examples = 800
  Num epochs = 1
  Total optimization steps = 25
  Total train batch size = 32


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/25 [00:00<?, ?it/s]

Train accuracy: 1.0
Test accuracy: 0.52


model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num examples = 1400
  Num epochs = 1
  Total optimization steps = 44
  Total train batch size = 32


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/44 [00:00<?, ?it/s]

---------------
Iteration #0 (40 samples)
Train accuracy: 0.9
Test accuracy: 0.56


model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num examples = 2200
  Num epochs = 1
  Total optimization steps = 69
  Total train batch size = 32


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/69 [00:00<?, ?it/s]

---------------
Iteration #1 (60 samples)
Train accuracy: 0.9166666666666666
Test accuracy: 0.55


model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num examples = 3000
  Num epochs = 1
  Total optimization steps = 94
  Total train batch size = 32


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/94 [00:00<?, ?it/s]

---------------
Iteration #2 (80 samples)
Train accuracy: 0.925
Test accuracy: 0.58


model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num examples = 3800
  Num epochs = 1
  Total optimization steps = 119
  Total train batch size = 32


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/119 [00:00<?, ?it/s]

---------------
Iteration #3 (100 samples)
Train accuracy: 0.94
Test accuracy: 0.61
