In [None]:
cd ..

In [None]:
# import datasets
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from tklearn import datasets
from tklearn.metrics import TextClassificationMetric
from tklearn.nn.trainer import Trainer
from tklearn.nn.evaluator import Evaluator
from tklearn.nn.callbacks import ProgbarLogger
from tklearn.config import config, config_scope

In [None]:
config['emotion/Trainer'] = {
    'epochs': 2,
}

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


def tokenize(texts):
    return tokenizer(texts["text"].tolist(), padding="max_length", truncation=True)

In [None]:
train_dset = (
    datasets.load_dataset('hf', 'dair-ai/emotion', split="train")
    .map(tokenize, batched=True)
    .rename_column('label', 'labels')
    .remove_columns(["text"])
)

# train_dset = train_dset[:500].to_pylist()

In [None]:
valid_dset = (
    datasets.load_dataset('hf', "dair-ai/emotion", split="validation")
    .map(tokenize, batched=True)
    .rename_column('label', 'labels')
    .remove_columns(["text"])
)

In [None]:
len(train_dset), len(valid_dset)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=8,
)

In [None]:
callbacks = [ProgbarLogger()]

with config_scope('emotion'):
    trainer = Trainer(model, callbacks=callbacks)

In [None]:
metric = TextClassificationMetric(num_labels=8)

evaluator = Evaluator(valid_dset, metric=metric, postprocessor='argmax')

In [None]:
trainer.fit(train_dset, evaluator=evaluator)

In [None]:
test_dset = (
    datasets.load_dataset('hf', "dair-ai/emotion", split="test")
    .map(tokenize, batched=True)
    .rename_column('label', 'labels')
    .remove_columns(["text"])
)

evaluator = Evaluator(test_dset, metric=metric, postprocessor='argmax')

In [None]:
evaluator.evaluate(trainer)