# Fine Tuning Models - Part I
> An introduction to fine-tuning using built-in datasets

In this notebook, we'll use: https://huggingface.co/transformers/training.html# as a guide for our work.  The notebook cells are copied here and our job is to figure out what's going on in each of the cells.  To show this, we'll fill in each of the headers.

# 1.  ?

In [None]:
! pip install transformers
! pip install datasets

# 2. ?

In [None]:
import numpy as np

from datasets import load_dataset, load_metric
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# 3. ?

In [None]:
raw_datasets = load_dataset("imdb")
type(raw_datasets)

# 4. ?

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
tokenizer.name_or_path

# 5. ?

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# 6. ?

In [None]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
full_train_dataset = tokenized_datasets["train"]
full_eval_dataset = tokenized_datasets["test"]

# 7. ?

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
model.name_or_path

# 8. ?

In [None]:
training_args = TrainingArguments("test_trainer")
training_args

# 9. ?

In [None]:
trainer = Trainer(model=model, args=training_args, train_dataset=small_train_dataset, eval_dataset=small_eval_dataset)

In [None]:
trainer.train()

# 10. ?

In [None]:
metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.evaluate()