# Fine-tuning a model with the Trainer API or Keras (From Lab3)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [1]:
!pip install datasets evaluate transformers[sentencepiece]

[0m

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "sst2")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(100))
small_eval_dataset = tokenized_datasets["validation"].shuffle(seed=42).select(range(100))

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [3]:
! pip install transformers[torch]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m

In [4]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

In [5]:
import numpy as np
import evaluate

def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "sst2")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [6]:
# Customize your training arguments
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="bert-finetuned-sst2",
                                  evaluation_strategy="epoch")

In [7]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=small_train_dataset, # if using cpu
    eval_dataset=small_eval_dataset, # if using cpu
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [8]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.641175,0.57
2,No log,0.54837,0.76
3,No log,0.494756,0.77


TrainOutput(global_step=39, training_loss=0.4971670493101462, metrics={'train_runtime': 6.2855, 'train_samples_per_second': 47.729, 'train_steps_per_second': 6.205, 'total_flos': 4409165732400.0, 'train_loss': 0.4971670493101462, 'epoch': 3.0})

In [9]:
trainer.save_model("saved_models/bert_sst2")

In [10]:
del model
del trainer
model = AutoModelForSequenceClassification.from_pretrained("saved_models/bert_sst2", num_labels=2)
trainer = Trainer(
    model,
    training_args,
    train_dataset=small_train_dataset, # if using cpu
    eval_dataset=small_eval_dataset, # if using cpu
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [11]:
predictions = trainer.predict(small_eval_dataset)
print(predictions.predictions.shape, predictions.label_ids.shape)
preds = np.argmax(predictions.predictions, axis=-1)

(100, 2) (100,)


In [12]:
import evaluate
metric = evaluate.load("glue", "sst2")
metric.compute(predictions=preds, references=predictions.label_ids)

{'accuracy': 0.77}