# One-vs-Rest Classifier

This notebook implements an one-vs-rest classifier that fine-tunes several BERT models to tell if a sentence contains problematic metaphors.

<div hidden>
TODO: add extend data3/data.json with better data in the same format that actually makes sense.
</div>

## Imports and Setup

In [1]:
%pip install transformers -Uqq
%pip install sklearn -Uqq
%pip install datasets -Uqq
%pip install torch -Uqq
%pip install numpy -Uqq
%pip install evaluate -Uqq

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import evaluate
import numpy as np
import torch
from datasets import Dataset, load_dataset
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    EvalPrediction,
    Trainer,
    TrainingArguments,
)
import os



In [3]:
os.environ['TOKENIZERS_PARALLELISM']='false'

## Loading Dataset

In [4]:
dataset = load_dataset("json", data_files="data/data.json", field="data")
dataset

Found cached dataset json (/home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'agency', 'humanComparison', 'hyperbole', 'historyComparison', 'unjustClaims', 'deepSounding', 'sceptics', 'deEmphasize', 'performanceNumber', 'inscrutable'],
        num_rows: 791
    })
})

In [5]:
dataset["train"][0:3]

{'text': ['A new vision of artificial intelligence for the people',
  'The gig workers fighting back against the algorithms',
  'How the AI industry profits from catastrophe'],
 'agency': [False, True, False],
 'humanComparison': [False, True, False],
 'hyperbole': [False, True, True],
 'historyComparison': [False, False, False],
 'unjustClaims': [False, False, False],
 'deepSounding': [False, False, False],
 'sceptics': [False, False, False],
 'deEmphasize': [False, False, False],
 'performanceNumber': [False, False, False],
 'inscrutable': [False, False, False]}

In [6]:
labels = [label for label in dataset["train"].features.keys() if label not in ["text"]]

num_epochs = {
    "agency": 6,
    "humanComparison": 2,
    "hyperbole": 2,
    "historyComparison": 2,
    "unjustClaims": 5,
    "deepSounding": 2,
    "sceptics": 2,
    "deEmphasize": 7,
    "performanceNumber": 2,
    "inscrutable": 2,
}

labels

['agency',
 'humanComparison',
 'hyperbole',
 'historyComparison',
 'unjustClaims',
 'deepSounding',
 'sceptics',
 'deEmphasize',
 'performanceNumber',
 'inscrutable']

## Preprocess Data, Create Train/Test Split

In [7]:
processed_dataset = {}
for label in labels:
    projected_dataset = (
        dataset["train"]
        .map(remove_columns=[l for l in labels if l != label])
        .rename_column(label, "labels")
        .class_encode_column("labels")
    )
    processed_dataset[label] = projected_dataset.train_test_split(
        test_size=0.2, stratify_by_column="labels"
    )
    # print(f"{label}:\n\t{processed_dataset[label]['test'][0:3]}\n")

# processed_dataset

Loading cached processed dataset at /home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-6b1473f7e44d070a.arrow
Loading cached processed dataset at /home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ba94b829f3d9f6c3.arrow
Loading cached processed dataset at /home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7c2366939237c140.arrow
Loading cached processed dataset at /home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b853de5887066954.arrow
Loading cached processed dataset at /home/xt0r3/.cache/huggingface/datasets/json/default-6d19e6e1597cb322/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51

In [8]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")


def preprocess_data(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

In [9]:
tokenized_dataset = {
    k: ds.map(
        preprocess_data,
        remove_columns="text",
        batched=True,
    )
    for k, ds in processed_dataset.items()
}

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

Map:   0%|          | 0/632 [00:00<?, ? examples/s]

Map:   0%|          | 0/159 [00:00<?, ? examples/s]

### Verify dataset

In [10]:
example = tokenized_dataset["agency"]["train"][0]
print(example.keys())

dict_keys(['labels', 'input_ids', 'token_type_ids', 'attention_mask'])


In [11]:
tokenizer.decode(example["input_ids"])

'[CLS] AI researchers want to study AI the same way social scientists study humans [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

In [12]:
example["labels"]

0

## Define Metrics

In [13]:
metrics = {
    "accuracy": evaluate.load("accuracy"),
    "presicion": evaluate.load("precision"),
    "recall": evaluate.load("recall"),
    "f1": evaluate.load("f1"),
}

In [14]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    values = {}
    
    for name, metric in metrics.items():
        result = metric.compute(predictions=predictions, references=labels)
        for val in result.values() if isinstance(result, dict) else [result]:
            values[name] = val

    return values

## Train the Model

In [19]:
batch_size = 1  # TODO: increase if we have more data
metric_name = "f1"

In [20]:
for label in ['agency']:  # labels:
    print(f"training model for {label}")

    model = AutoModelForSequenceClassification.from_pretrained(
        'xt0r3/aihype_article_bert_fine_tune',
        num_labels=2,
    )

    training_args = TrainingArguments(
        f"aihype_{label}-vs-rest",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs[label],
        report_to="none",
        load_best_model_at_end=True,
        metric_for_best_model=metric_name,
        push_to_hub=True,  # TODO: enable once model seems good
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset[label]["train"],
        eval_dataset=tokenized_dataset[label]["test"],
        compute_metrics=compute_metrics,
    )

    trainer.train()

training model for agency


loading configuration file config.json from cache at /home/xt0r3/.cache/huggingface/hub/models--xt0r3--aihype_article_bert_fine_tune/snapshots/f77d86d05db534f13f5369d55df707e549baea8b/config.json
Model config BertConfig {
  "_name_or_path": "xt0r3/aihype_article_bert_fine_tune",
  "architectures": [
    "BertForNextSentencePrediction"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}

loading weights file pytorch_model.bin from cache at /home/xt0r3/.cache/huggingface

Epoch,Training Loss,Validation Loss,Accuracy,Presicion,Recall,F1
1,1.1162,1.167958,0.761006,0.0,0.0,0.0
2,0.8014,1.476343,0.72956,0.439024,0.473684,0.455696
3,0.5139,1.59997,0.761006,0.5,0.342105,0.40625
4,0.0486,1.921659,0.742138,0.457143,0.421053,0.438356
5,0.0238,2.070165,0.748428,0.470588,0.421053,0.444444
6,0.0003,2.131654,0.742138,0.459459,0.447368,0.453333


***** Running Evaluation *****
  Num examples = 159
  Batch size = 1
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to aihype_agency-vs-rest/checkpoint-632
Configuration saved in aihype_agency-vs-rest/checkpoint-632/config.json
Model weights saved in aihype_agency-vs-rest/checkpoint-632/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 159
  Batch size = 1
Saving model checkpoint to aihype_agency-vs-rest/checkpoint-1264
Configuration saved in aihype_agency-vs-rest/checkpoint-1264/config.json
Model weights saved in aihype_agency-vs-rest/checkpoint-1264/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 159
  Batch size = 1
Saving model checkpoint to aihype_agency-vs-rest/checkpoint-1896
Configuration saved in aihype_agency-vs-rest/checkpoint-1896/config.json
Model weights saved in aihype_agency-vs-rest/checkpoint-1896/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 159
  Batch size = 1
Saving model checkpoi

## Upload the Model

In [17]:
# agency-vs-rest/checkpoint-263: 0.75 precision, 0.85 recall
#

In [18]:
# trainer.push_to_hub()