## Try this Notebook in Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/10FZr5ORlgIo3f2UCg_QoVHmKI0qbh_WS?usp=sharing)

# 🏃‍♂ TL;DR

# 💪 Full example
**To demonstrate we will finetune a small bert model on "emotion" subset of [Tweet Eval](https://huggingface.co/datasets/tweet_eval) dataset**

## ⬇️ Install dependencies

For torch, it is recommended to follow the instructions at https://pytorch.org/get-started/locally/  
We will use the one already installed, otherwise we will just install the CPU version for now

In [None]:
! pip install --quiet "numpy>=1.0.0,<2.0.0" "pandas>=1.0.0,<2.0.0" scikit-learn shap==0.40.0 "tokenizers<0.13.0" "datasets>=2.2.1,<2.3.0" "transformers>=4.19.0,<4.20.0"
! pip install --quiet "torch>=1.2.0,<2.0.0"

In [None]:
! pip freeze | grep 'torch'

## 🛠 Finetune the model

In [None]:
import os
import random

import numpy as np
import pandas as pd

from IPython.display import display, HTML

import datasets
from datasets import load_dataset, load_metric
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

### 📦 Load the dataset and metric

In [None]:
TASK = "classification"
DATASET = "tweet_eval"
SUBSET = "emotion"
MODEL_CHECKPOINT = "google/bert_uncased_L-2_H-128_A-2"

dataset = load_dataset(DATASET, SUBSET)
metric = load_metric("accuracy")

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

In [None]:
show_random_elements(dataset["train"])

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
tokenizer("Hello, this one sentence!", "And this sentence goes with it.")

In [None]:
sentence1_key, sentence2_key = "text", None
if sentence2_key is None:
    print(f"Sentence: {dataset['train'][0][sentence1_key]}")
else:
    print(f"Sentence 1: {dataset['train'][0][sentence1_key]}")
    print(f"Sentence 2: {dataset['train'][0][sentence2_key]}")

### 🤖 Preprocess the dataset

In [None]:
def preprocess_function(examples):
    if sentence2_key is None:
        return tokenizer(examples[sentence1_key], truncation=True, max_length=256)
    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True, max_length=256)

In [None]:
preprocess_function(dataset['train'][:5])

In [None]:
encoded_dataset = dataset.map(preprocess_function, batched=True)

### ⚙️ Load the model checkpoint, setup training arguments

In [None]:
num_labels = dataset['train'].features['label'].num_classes
labels = dataset['train'].features['label'].names
label2id = dict(zip(labels, range(len(labels))))
id2label = {v: k for k, v in label2id.items()}

In [None]:
config = AutoConfig.from_pretrained(MODEL_CHECKPOINT, label2id=label2id, id2label=id2label)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, config=config)

In [None]:
metric_name = "accuracy"
batch_size = 64
epochs = 5
model_name = MODEL_CHECKPOINT.split("/")[-1]

args = TrainingArguments(
    output_dir=f"{model_name}-finetuned-{DATASET}-{SUBSET}-{TASK}",
    logging_strategy="steps",
    logging_steps=20,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    warmup_ratio=0.3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    push_to_hub=False,
    report_to=[],
)

### ⚡️ Train!

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[],
)

In [None]:
trainer.train()

## Evaluation

In [None]:
from transformers import pipeline

model = model.eval()
classifier = pipeline(task="text-classification", model=model, tokenizer=tokenizer)
classifier("Did we miss the fact that #BurkeRamsey swung &amp;hit his sister #JonBenet in the face with a golf club previously out of a fit of ?")