# Try this Notebook in Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truefoundry/mlfoundry-examples/blob/main/examples/huggingface_transformers/tweet_eval_emotion_text_classification.ipynb)

**If you are running on Google Colab it is recommended to choose a GPU Runtime**

**Runtime > Change Runtime Type > GPU**

---

# 🏃‍♂ TL;DR

### 🪄 MlFoundry can automatically log metrics and model checkpoints from HuggingFace Transformers 🤗 Trainer
**Just attach the callback and let it do the rest!**

```python
# Make sure to login via `mlfoundry login` command or set API key in `MLF_API_KEY`
# import os
# os.environ.setdefault('MLF_API_KEY', '<your API key here>')

from transformers import TrainingArguments, Trainer
from mlfoundry.integrations.transformers import MlFoundryTrainerCallback, LogModelStrategy

mlf_cb = MlFoundryTrainerCallback(
    project_name="huggingface",
    run_name="my-hf-run",
    flatten_params=True,
    log_model_strategy=LogModelStrategy.BEST_PLUS_LATEST,
)

args = TrainingArguments(..., report_to=[])
trainer = Trainer(..., args=args, callbacks=[mlf_cb])
trainer.train()
```

**You can also create the callback from a pre initialised run**

```python
from transformers import TrainingArguments, Trainer
import mlfoundry as mlf
from mlfoundry.integrations.transformers import MlFoundryTrainerCallback, LogModelStrategy

client = mlf.get_client(api_key="...")
run = client.create_run(project_name="huggingface", run_name="my-hf-run")

mlf_cb = MlFoundryTrainerCallback.from_run(
    run=run,
    auto_end_run=False,
    flatten_params=True,
    log_model_strategy=LogModelStrategy.BEST_PLUS_LATEST,
)

args = TrainingArguments(..., report_to=[])
trainer = Trainer(..., args=args, callbacks=[mlf_cb])
trainer.train()

run.end()
```

---

# 💪 Full example
**To demonstrate we will finetune a small bert model on "emotion" subset of [Tweet Eval](https://huggingface.co/datasets/tweet_eval) dataset**

## ⬇️ Install dependencies

For torch, it is recommended to follow the instructions at https://pytorch.org/get-started/locally/  
We will use the one already installed, otherwise we will just install the CPU version for now

In [None]:
! pip install --quiet "numpy>=1.0.0,<2.0.0" "pandas>=1.0.0,<2.0.0" scikit-learn "tokenizers>=0.12.0,<1.0.0" "datasets>=2.2.1,<3.0.0" "transformers>=4.19.0,<5.0.0"
! pip install --quiet "torch>=1.2.0,<2.0.0"
! pip install --quiet -U "mlfoundry>=0.4.6,<0.5.0"

In [None]:
! pip freeze | grep 'torch'

## 🔑 Login to MLFoundry

In [None]:
import mlfoundry as mlf

client = mlf.get_client()

## 🛠 Finetune the model

In [None]:
import os
import getpass
import urllib.parse
import random

import numpy as np
import pandas as pd

from IPython.display import display, HTML

import datasets
from datasets import load_dataset, load_metric
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

import mlfoundry as mlf
from mlfoundry.integrations.transformers import MlFoundryTrainerCallback, LogModelStrategy

### 📦 Load the dataset and metric

In [None]:
TASK = "classification"
DATASET = "tweet_eval"
SUBSET = "emotion"
MODEL_CHECKPOINT = "google/bert_uncased_L-2_H-128_A-2"

dataset = load_dataset(DATASET, SUBSET)
metric = load_metric("accuracy")

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

In [None]:
show_random_elements(dataset["train"])

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
tokenizer("Hello, this one sentence!", "And this sentence goes with it.")

In [None]:
sentence1_key, sentence2_key = "text", None
if sentence2_key is None:
    print(f"Sentence: {dataset['train'][0][sentence1_key]}")
else:
    print(f"Sentence 1: {dataset['train'][0][sentence1_key]}")
    print(f"Sentence 2: {dataset['train'][0][sentence2_key]}")

### 🤖 Preprocess the dataset

In [None]:
def preprocess_function(examples):
    if sentence2_key is None:
        return tokenizer(examples[sentence1_key], truncation=True, max_length=256)
    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True, max_length=256)

In [None]:
preprocess_function(dataset['train'][:5])

In [None]:
encoded_dataset = dataset.map(preprocess_function, batched=True)

### ⚙️ Load the model checkpoint, setup training arguments

In [None]:
num_labels = dataset['train'].features['label'].num_classes
labels = dataset['train'].features['label'].names
label2id = dict(zip(labels, range(len(labels))))
id2label = {v: k for k, v in label2id.items()}

In [None]:
config = AutoConfig.from_pretrained(MODEL_CHECKPOINT, label2id=label2id, id2label=id2label)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, config=config)

In [None]:
metric_name = "accuracy"
batch_size = 64
epochs = 5
model_name = MODEL_CHECKPOINT.split("/")[-1]

args = TrainingArguments(
    output_dir=f"{model_name}-finetuned-{DATASET}-{SUBSET}-{TASK}",
    logging_strategy="steps",
    logging_steps=20,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    warmup_ratio=0.3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    push_to_hub=False,
    report_to=[],
)

### ⚡️ Attach our callback and train!

In [None]:
run = client.create_run(
    project_name=f"{DATASET}-{SUBSET}-{TASK}".replace("_", "-"),
    run_name=f"{model_name}-finetuned-hf".replace("_", "-")
)
mlf_cb = MlFoundryTrainerCallback.from_run(
    run=run,
    auto_end_run=False,
    flatten_params=True,
    log_model_strategy=LogModelStrategy.BEST_PLUS_LATEST,
)

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[mlf_cb],
)

In [None]:
trainer.train()

### ✏️ Note down the run id before we finish
Don't worry you can get this anytime from https://app.truefoundry.com/ 

In [None]:
RUN_FQN = run.fqn
print(RUN_FQN) # fqn looks like "<username>/<project_name>/<run_name>"
run.end()

# ⬇️ Load the logged HuggingFace Transformers 🤗 model

In [None]:
import mlfoundry as mlf
from mlfoundry.integrations.transformers import HF_MODEL_PATH
from transformers import pipeline

run = mlf.get_client().get_run(RUN_FQN)
local_path = run.download_artifact(HF_MODEL_PATH)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(local_path, use_fast=True)
print(tokenizer("Hello, this one sentence!", "And this sentence goes with it."))
      
config = AutoConfig.from_pretrained(local_path)
model = AutoModelForSequenceClassification.from_pretrained(local_path, config=config)
model = model.eval()

In [None]:
classifier = pipeline(task="text-classification", model=model, tokenizer=tokenizer)

In [None]:
classifier("Did we miss the fact that #BurkeRamsey swung &amp;hit his sister #JonBenet in the face with a golf club previously out of a fit of ?")