In [1]:
!pip install transformers datasets transformers[torch] accelerate>=0.20.11

In [25]:
import os
import time

from torchsummary import summary
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from tqdm.auto import tqdm
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import swin_t, swin_b

from datasets import load_dataset, load_metric
from transformers import TrainingArguments, Trainer, AutoTokenizer, AutoConfig, AutoModelForImageClassification, pipeline, AutoFeatureExtractor

#### –§—É–Ω–∫—Ü–∏–∏ –¥–ª—è –∑–∞–º–µ—Ä–∞ –º–æ–¥–µ–ª–µ–π

In [3]:
def get_model_size(model):
  dummy_input = torch.randn(1, 3, 224, 224)
  size = sum(torch.nn.utils.parameters_to_vector(model.parameters()).size() * 4) / (1024 * 1024)
  return size


def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "temp.p")
    size=os.path.getsize("temp.p")
    print("model: ",label,' \t','Size (MB):', round(size/1024/1024, 2))
    os.remove('temp.p')


def performance_test(model, criterion, test_loader, device):
    model.to(device)
    model.eval()
    test_loss = 0
    total = 0
    correct_top1=0
    correct_top5=0
    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            test_loss += criterion(outputs, labels).item()

            # Top-1 –∏ Top-5 accuracy
            _, predicted = outputs.topk(5, dim=1)
            total += labels.size(0)
            correct_top1 += predicted[:, 0].eq(labels).sum().item()
            correct_top5 += predicted.eq(labels.view(-1, 1)).sum().item()

        test_loss /= len(test_loader)
        top1_accuracy = correct_top1 / total
        top5_accuracy = correct_top5 / total

        print(f'Loss: {test_loss:.2f}, acc@1: {top1_accuracy}, acc@5: {top5_accuracy}')

## –°–æ–∑–¥–∞–¥–∏–º –∫–∞—Å—Ç–æ–º–Ω—ã–π Trainer –¥–ª—è –¥–∏—Å—Ç–∏–ª–ª—è—Ü–∏–∏ –∑–Ω–∞–Ω–∏–π

---



1. –û–ø—Ä–µ–¥–µ–ª–∏–º –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã Œ± and T

Œ± - –Ω–∞ —Å–∫–æ–ª—å–∫–æ –±–æ–ª—å—à–µ –º—ã —Ö–æ—Ç–∏–º –æ—Ä–∏–µ–Ω—Ç–∏—Ä–æ–≤–∞—Ç—å—Å—è –Ω–∞ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –º–æ–¥–µ–ª–∏-—É—á–∏—Ç–µ–ª—è, –∏ –º–µ–Ω—å—à–µ –Ω–∞ –º–æ–¥–µ–ª—å-—Å—Ç—É–¥–µ–Ω—Ç–∞  
T - –∫–∞–∫ —Å–∏–ª—å–Ω–æ –¥–æ–ª–∂–Ω–æ –±—ã—Ç—å —Å–≥–ª–∞–∂–µ–Ω–æ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–µ–π –∫–ª–∞—Å—Å–æ–≤

2. –í –∫–∞—á–µ—Å—Ç–≤–µ –º–æ–¥–µ–ª–∏-—É—á–∏—Ç–µ–ª—è –±—É–¥–µ–º –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å BERT-base.

3. –ù–æ–≤–∞—è –ª–æ—Å—Å-—Ñ—É–Ω–∫—Ü–∏—è –±—É–¥–µ—Ç —Å–æ–≤–º–µ—â–∞—Ç—å –≤ —Å–µ–±–µ –∫—Ä–æ—Å—Å-—ç–Ω—Ç—Ä–æ–ø–∏—é –∏ –ª–æ—Å—Å –¥–∏—Å—Ç–∏–ª–ª—è—Ü–∏–∏

–ß—Ç–æ–±—ã –¥–æ–±–∞–≤–∏—Ç—å –Ω–∞—à–∏ –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ —Å–æ–∑–¥–∞—Ç—å –∫–ª–∞—Å—Å TrainingArguments –∏ –≤–∫–ª—é—á–∏—Ç—å –∏—Ö –≤ –Ω–µ–≥–æ –∫–∞–∫ –∞—Ç—Ä–∏–±—É—Ç—ã

In [4]:
class KnowledgeDistillationTrainingArguments(TrainingArguments):
  def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
    super().__init__(*args, **kwargs)

    self.alpha = alpha
    self.temperature = temperature

# –ù–∞–ø–∏—à–µ–º –ª–æ—Å—Å-—Ñ—É–Ω–∫—Ü–∏—é –¥–ª—è –¥–∏—Å—Ç–∏–ª–ª—è—Ü–∏–∏ –∑–Ω–∞–Ω–∏–π
–°–æ–∑–¥–∞–¥–∏–º –Ω–∞—Å–ª–µ–¥–Ω–∏–∫–∞ –∫–ª–∞—Å—Å–∞ Trainer –∏ –ø–µ—Ä–µ–ø–∏—à–µ–º compute_loss()



In [27]:
class KnowledgeDistillationTrainer(Trainer):
  def __init__(self, *args, feature_extractor=None, teacher_model=None, **kwargs):
    super().__init__(*args, **kwargs)
    self.teacher_model = teacher_model
    self.feature_extractor=feature_extractor

  def compute_loss(self, model, inputs, return_outputs=False):
    #Extract cross-entropy loss and logits from student
    inputs = self.feature_extractor(images=inputs, return_tensors="pt")
    outputs_student = model(**inputs)
    loss_ce = outputs_student.loss
    logits_student = outputs_student.logits

    # Extract logits from teacher
    outputs_teacher = self.teacher_model(**inputs)
    logits_teacher = outputs_teacher.logits

    #Computing distillation loss by Softening probabilities
    loss_fct = nn.KLDivLoss(reduction="batchmean")
    #The reduction=batchmean argument in nn.KLDivLoss() specifies that we average the losses over the batch dimension.
    loss_kd = self.args.temperature ** 2 * loss_fct(
                F.log_softmax(logits_student / self.args.temperature, dim=-1),
                F.softmax(logits_teacher / self.args.temperature, dim=-1))

    # Return weighted student loss
    loss = self.args.alpha * loss_ce + (1. - self.args.alpha) * loss_kd
    return (loss, outputs_student) if return_outputs else loss


## –í—ã–±–∏—Ä–∞–µ–º –º–æ–¥–µ–ª—å-—É—á–µ–Ω–∏–∫–∞

–ö–∞–∫ –≤—ã–±—Ä–∞—Ç—å –ø–æ–¥—Ö–æ–¥—è—â—É—é –º–æ–¥–µ–ª—å-—É—á–µ–Ω–∏–∫–∞?
1. –ú–µ–Ω—å—à–∞—è –º–æ–¥–µ–ª—å —á–µ–º —É—á–∏—Ç–µ–ª—å —á—Ç–æ–±—ã —É–º–µ–Ω—å—à–∏—Ç—å –æ–±—ä–µ–º –∑–∞–Ω–∏–º–∞–µ–º–æ–π –ø–∞–º—è—Ç–∏ –∏ —É–≤–µ–ª–∏—á–∏—Ç—å RPS

2. –î–∏—Å—Ç–∏–ª–ª—è—Ü–∏—è –∑–Ω–∞–Ω–∏–π —Ä–∞–±–æ—Ç–∞–µ—Ç –ª—É—á—à–µ, –∫–æ–≥–¥–∞ –º–æ–¥–µ–ª—å-—É—á–∏—Ç–µ–ª—å –∏ —É—á–µ–Ω–∏–∫ –æ–¥–Ω–æ–≥–æ —Ç–∏–ø–∞ (BERT –∏ RoBERTa –º–æ–≥—É—Ç –∏–º–µ—Ç—å —Ä–∞–∑–Ω—É—é –¥–ª–∏–Ω–Ω—É —ç–º–µ–±–¥–¥–∏–Ω–≥–æ–≤ –Ω–∞ –≤—ã—Ö–æ–¥–µ, —á—Ç–æ —Å–æ–∑–¥–∞–µ—Ç —Å–ª–æ–∂–Ω–æ—Å—Ç–∏ –¥–ª—è —É—á–µ–Ω–∏–∫–∞ –º–∏–º–∏–∫—Ä–∏—Ä–æ–≤–∞—Ç—å –ø–æ–¥ —É—á–∏—Ç–µ–ª—è)

–í –∫–∞—á–µ—Å—Ç–≤–µ –ø—Ä–∏–º–µ—Ä–∞ –Ω–∞ —Ä–æ–ª—å –º–æ–¥–µ–ª–∏-—É—á–µ–Ω–∏–∫–∞ –≤–æ–∑—å–º–µ–º DistilBERT.

### –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞—Ç–∞—Å–µ—Ç–∞

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/compression/archive.zip ./
# !mkdir data
!unzip archive.zip -d data/

In [36]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

test_dataset = datasets.ImageFolder(Path('data/'), transform=transform, target_transform=int())

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [38]:
test_dataset.labels

AttributeError: ignored

# –û–ø—Ä–µ–¥–µ–ª–∏–º –º–µ—Ç—Ä–∏–∫—É –∫–æ—Ç–æ—Ä–æ–π –±—É–¥–µ–º –∑–∞–º–µ—Ä—è—Ç—å —Ç–æ—á–Ω–æ—Å—Ç—å

In [11]:
accuracy_score = load_metric("accuracy")

def compute_metrics(pred):
  predictions, labels = pred
  predictions = np.argmax(predictions, axis=1)
  return accuracy_score.compute(predictions=predictions, references=labels)

  accuracy_score = load_metric("accuracy")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

–í —ç—Ç–æ–π —Ñ—É–Ω–∫—Ü–∏–∏ –ø—Ä–æ–≥–Ω–æ–∑—ã –æ—Ç –≥–æ–ª–æ–≤—ã –º–æ–¥–µ–ª–∏ –ø–æ—Å—Ç—É–ø–∞—é—Ç –≤ —Ñ–æ—Ä–º–µ –ª–æ–≥–∏—Ç–æ–≤, –ø–æ—ç—Ç–æ–º—É –º—ã –∏—Å–ø–æ–ª—å–∑—É–µ–º —Ñ—É–Ω–∫—Ü–∏—é np.argmax(), —á—Ç–æ–±—ã –Ω–∞–π—Ç–∏ –Ω–∞–∏–±–æ–ª–µ–µ –¥–æ—Å—Ç–æ–≤–µ—Ä–Ω—ã–π –ø—Ä–æ–≥–Ω–æ–∑ –∫–ª–∞—Å—Å–∞ –∏ —Å—Ä–∞–≤–Ω–∏—Ç—å –µ–≥–æ —Å ground truth –º–µ—Ç–∫–æ–π.

# –û–ø—Ä–µ–¥–µ–ª–∏–º –∞—Ä–≥—É–º–µ–Ω—Ç—ã –¥–ª—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏

In [12]:
batch_size = 48
finetuned_student_ckpt = "swin-student"

In [13]:
student_training_args = KnowledgeDistillationTrainingArguments(
    output_dir=finetuned_student_ckpt, evaluation_strategy = "epoch",
    num_train_epochs=1, learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size, alpha=1, weight_decay=0.01)

## –î–∞–≤–∞–π—Ç–µ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ–º –º–æ–¥–µ–ª—å —É—á–µ–Ω–∏–∫–∞, –Ω–æ –ø–µ—Ä–µ–¥ —ç—Ç–∏–º –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏–º –µ–π —Å–ª–æ–≤–∞—Ä–∏ —Å –∫–∞–∂–¥—ã–º –Ω–∞–º–µ—Ä–µ–Ω–∏–µ–º –∏ –µ–≥–æ –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä–æ–º.

In [14]:
swin_ckpt = "microsoft/swin-tiny-patch4-window7-224"
pipe = pipeline("image-classification", model=swin_ckpt)

# id2label = pipe.model.config.id2label
# label2id = pipe.model.config.label2id

Downloading (‚Ä¶)lve/main/config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/113M [00:00<?, ?B/s]

Downloading (‚Ä¶)rocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [26]:
feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/swin-base-patch4-window7-224")

Downloading (‚Ä¶)rocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]



In [15]:
num_labels = len(test_dataset.classes)
student_config = (AutoConfig.from_pretrained(swin_ckpt, num_labels=num_labels,
                                            #  id2label=id2label,
                                            #  label2id=label2id
                                             ))

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def student_init():
  return (AutoModelForImageClassification.from_pretrained(swin_ckpt, config=student_config).to(device))

## –ó–∞–≥—Ä—É–∑–∏–º –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—ã–µ –≤–µ—Å–∞ –º–æ–¥–µ–ª–∏-—É—á–∏—Ç–µ–ª—è –∏ –Ω–∞—á–Ω–µ–º –¥–æ–æ–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏-—É—á–µ–Ω–∏–∫–∞

In [20]:
teacher_checkpoint = "microsoft/swin-base-patch4-window7-224"
teacher_model = AutoModelForImageClassification.from_pretrained(teacher_checkpoint, num_labels=num_labels).to(device)

In [28]:
# –ù–∞—á–∏–Ω–∞–µ–º fine-tuning —É—á–µ–Ω–∏–∫–∞
swin_trainer = KnowledgeDistillationTrainer(model_init=student_init,
                                            feature_extractor=feature_extractor,
        teacher_model=teacher_model, args=student_training_args,
        train_dataset=test_dataset, # eval_dataset=test_dataset,
        compute_metrics=compute_metrics)

swin_trainer.train()

TypeError: ignored

## –°—Ä–∞–≤–Ω–∏–º –º–æ–¥–µ–ª–∏ —É—á–∏—Ç–µ–ª—è –∏ —É—á–µ–Ω–∏–∫–∞



–°–æ—Ö—Ä–∞–Ω–∏–º –º–æ–¥–µ–ª–∏ —É—á–∏—Ç–µ–ª—è –∏ —É—á–µ–Ω–∏–∫–∞, –∞ –∑–∞—Ç–µ–º –≤—ã—á–∏—Å–ª–∏–º —Ä–∞–∑–º–µ—Ä—ã –º–æ–¥–µ–ª–µ–π –≤ MB.

In [None]:
teacher_model.save_pretrained("teacher_model")
swin_trainer.save_model('student_model')

raw_student = AutoModelForImageClassification.from_pretrained(swin_ckpt, config=student_config)
raw_student.save_pretrained("raw_student_model")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'classifier.weight', 'pre_classifier.

In [None]:
def compute_parameters(model_path):
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
  parameters = model.num_parameters()
  return parameters

In [None]:
teacher_model_parameters = compute_parameters(model_path="/content/teacher_model")
print("Teacher Model: ", teacher_model_parameters)

Teacher Model:  109598359


In [None]:
student_model_parameters = compute_parameters(model_path="/content/student_model")
print("Student Model: ", student_model_parameters)

Student Model:  67069591


In [None]:
decrease = (teacher_model_parameters - student_model_parameters) / teacher_model_parameters
print(f'–ú–æ–¥–µ–ª—å —Å—Ç—É–¥–µ–Ω—Ç–∞ –∏–º–µ–µ—Ç –Ω–∞ {decrease*100:.2f} % –º–µ–Ω—å—à–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤, —á–µ–º –º–æ–¥–µ–ª—å —É—á–∏—Ç–µ–ª—è')

–ú–æ–¥–µ–ª—å —Å—Ç—É–¥–µ–Ω—Ç–∞ –∏–º–µ–µ—Ç –Ω–∞ 38.80 % –º–µ–Ω—å—à–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤, —á–µ–º –º–æ–¥–µ–ª—å —É—á–∏—Ç–µ–ª—è


In [None]:
!ls /content/student_model -al --block-size=MB

total 270MB
drwxr-xr-x 2 root root   1MB Sep 27 14:52 .
drwxr-xr-x 1 root root   1MB Sep 27 14:52 ..
-rw-r--r-- 1 root root   1MB Sep 27 14:52 config.json
-rw-r--r-- 1 root root 269MB Sep 27 14:52 pytorch_model.bin
-rw-r--r-- 1 root root   1MB Sep 27 14:52 special_tokens_map.json
-rw-r--r-- 1 root root   1MB Sep 27 14:52 tokenizer_config.json
-rw-r--r-- 1 root root   1MB Sep 27 14:52 tokenizer.json
-rw-r--r-- 1 root root   1MB Sep 27 14:52 training_args.bin
-rw-r--r-- 1 root root   1MB Sep 27 14:52 vocab.txt


In [None]:
!ls /content/teacher_model -al --block-size=MB

total 439MB
drwxr-xr-x 2 root root   1MB Sep 27 14:52 .
drwxr-xr-x 1 root root   1MB Sep 27 14:52 ..
-rw-r--r-- 1 root root   1MB Sep 27 14:52 config.json
-rw-r--r-- 1 root root 439MB Sep 27 14:52 pytorch_model.bin


–í—ã–ø–æ–ª–Ω–∏–º –∑–∞–º–µ—Ä —Å—Ä–µ–¥–Ω–µ–π —Å–∫–æ—Ä–æ—Å—Ç–∏ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞ —É –æ–±–æ–∏—Ö –º–æ–¥–µ–ª–µ–π –Ω–∞ –æ–¥–∏–Ω–∞–∫–æ–≤—ã—Ö –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö

In [None]:
sample_input = clinc['train']['text'][101]

print(clinc['train']['text'][101])
print(clinc['train']['intent'][101])

complete a transaction from savings to checking of $20000
133


In [None]:
pipe = pipeline("text-classification", model="/content/teacher_model", tokenizer='bert-base-uncased')

#WARMUP
for _ in range(10):
  _ = pipe(sample_input)

#INFERENCE
start = time.time()
for _ in range(100):
  _ = pipe(sample_input)
total_time_teacher_model = time.time() - start
print("–û–±—â–µ–µ –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ 100 –∑–∞–ø—Ä–æ—Å–æ–≤ –º–æ–¥–µ–ª—å—é-—É—á–∏—Ç–µ–ª–µ–º:", total_time_teacher_model)

Downloading (‚Ä¶)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (‚Ä¶)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (‚Ä¶)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (‚Ä¶)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

–û–±—â–µ–µ –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ 100 –∑–∞–ø—Ä–æ—Å–æ–≤ –º–æ–¥–µ–ª—å—é-—É—á–∏—Ç–µ–ª–µ–º: 14.024583339691162


In [None]:
from tqdm.notebook import tqdm

# –ü–û–î–°–ß–ï–¢ –ú–ï–¢–†–ò–ö
data_test_X = clinc['test']['text'][::50]
data_test_y = clinc['test']['intent'][::50]
model_preds = []

for i in tqdm(data_test_X):
    model_preds.append(label2id[pipe(i)[0]['label']])

accuracy_score.compute(predictions=model_preds, references=data_test_y)

  0%|          | 0/110 [00:00<?, ?it/s]

{'accuracy': 0.8363636363636363}

In [None]:
pipe = pipeline("text-classification", model="/content/student_model", tokenizer="distilbert-base-uncased")

#WARMUP
for _ in range(10):
  _ = pipe(sample_input)

#INFERENCE
start = time.time()
for _ in range(100):
  _ = pipe(sample_input)
total_time_student_model = time.time()-start

print("–û–±—â–µ–µ –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ 100 –∑–∞–ø—Ä–æ—Å–æ–≤ –º–æ–¥–µ–ª—å—é-—Å—Ç—É–¥–µ–Ω—Ç–æ–º:", total_time_student_model)

–û–±—â–µ–µ –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ 100 –∑–∞–ø—Ä–æ—Å–æ–≤ –º–æ–¥–µ–ª—å—é-—Å—Ç—É–¥–µ–Ω—Ç–æ–º: 7.051018714904785


In [None]:
decrease_in_time = (total_time_teacher_model - total_time_student_model) / total_time_teacher_model

print(f'–ú–æ–¥–µ–ª—å —Å—Ç—É–¥–µ–Ω—Ç–∞ –∫–ª–∞—Å—Å–∏—Ñ–∏—Ü–∏—Ä—É–µ—Ç –±—ã—Å—Ç—Ä–µ–µ –Ω–∞ {decrease_in_time*100:.2f} %')

–ú–æ–¥–µ–ª—å —Å—Ç—É–¥–µ–Ω—Ç–∞ –∫–ª–∞—Å—Å–∏—Ñ–∏—Ü–∏—Ä—É–µ—Ç –±—ã—Å—Ç—Ä–µ–µ –Ω–∞ 49.72 %


In [None]:
# –ü–û–î–°–ß–ï–¢ –ú–ï–¢–†–ò–ö
data_test_X = clinc['test']['text'][::50]
data_test_y = clinc['test']['intent'][::50]
model_preds = []

for i in tqdm(data_test_X):
    model_preds.append(label2id[pipe(i)[0]['label']])

accuracy_score.compute(predictions=model_preds, references=data_test_y)

  0%|          | 0/110 [00:00<?, ?it/s]

{'accuracy': 0.5727272727272728}

In [None]:
# Raw student-model
pipe = pipeline("text-classification", model="/content/raw_student_model", tokenizer="distilbert-base-uncased")

# –ü–û–î–°–ß–ï–¢ –ú–ï–¢–†–ò–ö
data_test_X = clinc['test']['text'][::50]
data_test_y = clinc['test']['intent'][::50]
model_preds = []

for i in tqdm(data_test_X):
    model_preds.append(label2id[pipe(i)[0]['label']])

accuracy_score.compute(predictions=model_preds, references=data_test_y)

  0%|          | 0/110 [00:00<?, ?it/s]

{'accuracy': 0.0}

In [None]:
model_preds