In [None]:
import numpy as np
import pandas as pd

import datasets
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import TrainingArguments, Trainer

import evaluate

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


def tokenize(texts):
    return tokenizer(texts["text"], padding="max_length", truncation=True)

In [None]:
train_dset = (
    datasets.load_dataset("dair-ai/emotion", split="train")
    .map(tokenize)
    .remove_columns(["text"])
)

validation_dset = (
    datasets.load_dataset("dair-ai/emotion", split="validation")
    .map(tokenize)
    .remove_columns(["text"])
)

In [None]:
metric = evaluate.combine(["accuracy", "recall", "precision", "f1"])


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
training_args = TrainingArguments(
    output_dir="test_trainer",
    evaluation_strategy="epoch",
    num_train_epochs=5,
    logging_steps=1,
    per_device_train_batch_size=32,
)

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dset,
    eval_dataset=validation_dset,
    compute_metrics=compute_metrics,
)

trainer.train()