## Setup and Imports

In [1]:
experiment = 'IHC-lora'

In [2]:
import os

COLAB = False
if 'google.colab' in str(get_ipython()):
    COLAB = True

if COLAB:
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    repo_path = '/content/drive/Othercomputers/My Mac/266-implicit-hate-speech-detection'

    hf_token = userdata.get('hf_token')

else:
    repo_path = '..'

!python -m pip install transformers accelerate datasets evaluate peft bitsandbytes tqdm

data_path = os.path.join(repo_path, 'data/processed')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, SequentialSampler, TensorDataset

from transformers import (
    BertForSequenceClassification,
    BertConfig,
    BertTokenizer,
    EvalPrediction,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    BitsAndBytesConfig
)

from peft import (
    PeftModel,
    PeftConfig,
    PeftType,
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model
)

import accelerate

import evaluate
from datasets import load_dataset
from datetime import datetime
from sklearn.metrics import classification_report
import time
import math

import bitsandbytes as bnb

In [4]:
# Path Definitions
exp_dir = os.path.join(repo_path, 'experiments', experiment)

model_dir = os.path.join(repo_path, f'models/hateBERT-{experiment}')
model_target = 'GroNLP/hateBERT'

train_file = os.path.join(data_path, 'ihc/ihc_train.csv')
val_file = os.path.join(data_path, 'ihc/ihc_val.csv')
test_file = os.path.join(data_path, 'ihc/ihc_test.csv')

results_file = os.path.join(exp_dir, 'results.csv')
metrics_file = os.path.join(exp_dir, 'metrics.csv')

## Load Data/Model/Tokenizer

In [5]:
data = load_dataset(
    "csv",
    data_files = {
        "train": train_file,
        "val": val_file,
    }
)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained(model_target, token=hf_token, max_length=512)

# set padding_side and truncation side to 'left', following hateBERT procedure
tokenizer.padding_side = 'left'
tokenizer.truncation_side = 'left'

data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    padding = 'max_length',
    max_length = 512,
)

## Preprocess Data

In [7]:
def preprocess(example):
    encoded = tokenizer(
        example['cleaned_text'],
        add_special_tokens=True,
        padding='max_length'
    )

    return encoded

In [8]:
processed = data.map(preprocess)
processed.set_format("torch")

In [9]:
processed

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'cleaned_text', 'label_name', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 15036
    })
    val: Dataset({
        features: ['id', 'text', 'cleaned_text', 'label_name', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3222
    })
})

## Define model

In [10]:
peft_config = LoraConfig(
    task_type="SEQ_CLS",
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
)

In [11]:
model = BertForSequenceClassification.from_pretrained(
    model_target,
    num_labels=3,
    output_attentions=False,
    output_hidden_states=False,
    token=hf_token,
#    quantization_config=bnb_config
)

model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at GroNLP/hateBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [12]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.27073621679578375



## Train setup

In [13]:
batch_size = 20
metric_name = "f1"

args = TrainingArguments(
    model_dir,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    push_to_hub=False,
)

In [14]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions

    y_pred = np.argmax(preds, axis=1).flatten()
    y_true = p.label_ids

    result = classification_report(y_pred, y_true, output_dict=True)
    result['f1'] = result['weighted avg']['f1-score']
    return result

## Train

In [15]:
trainer = Trainer(
    model,
    args,
    train_dataset=processed['train'],
    eval_dataset=processed['val'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


## Run Fine-tuning

In [16]:
start = time.time()
trainer.train()
end = time.time()

print(f"Total training time: ~{(end - start) // 60} minutes")

Epoch,Training Loss,Validation Loss,0,1,2,Accuracy,Macro avg,Weighted avg,F1
1,0.8359,0.806134,"{'precision': 1.0, 'recall': 0.6188702669149596, 'f1-score': 0.7645705521472391, 'support': 3222}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}",0.61887,"{'precision': 0.3333333333333333, 'recall': 0.2062900889716532, 'f1-score': 0.2548568507157464, 'support': 3222}","{'precision': 1.0, 'recall': 0.6188702669149596, 'f1-score': 0.7645705521472392, 'support': 3222}",0.764571
2,0.8013,0.771069,"{'precision': 0.9839518555667001, 'recall': 0.62784, 'f1-score': 0.7665559679624927, 'support': 3125}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.048826291079812206, 'recall': 0.5360824742268041, 'f1-score': 0.08950086058519795, 'support': 97}",0.625078,"{'precision': 0.34425938221550406, 'recall': 0.3879741580756013, 'f1-score': 0.28535227618256354, 'support': 3222}","{'precision': 0.9557994099567596, 'recall': 0.6250775915580384, 'f1-score': 0.7461728688266771, 'support': 3222}",0.746173
3,0.7496,0.707289,"{'precision': 0.854062186559679, 'recall': 0.712254286909243, 'f1-score': 0.776738882554162, 'support': 2391}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.4328638497652582, 'recall': 0.5547533092659447, 'f1-score': 0.4862869198312237, 'support': 831}",0.671633,"{'precision': 0.4289753454416458, 'recall': 0.42233586539172924, 'f1-score': 0.4210086007951286, 'support': 3222}","{'precision': 0.7454290959711739, 'recall': 0.6716325263811297, 'f1-score': 0.7018271566004806, 'support': 3222}",0.701827
4,0.7125,0.693646,"{'precision': 0.8385155466399198, 'recall': 0.7365638766519824, 'f1-score': 0.7842401500938087, 'support': 2270}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.504225352112676, 'recall': 0.5640756302521008, 'f1-score': 0.5324739712444224, 'support': 952}",0.685599,"{'precision': 0.44758029958419865, 'recall': 0.4335465023013611, 'f1-score': 0.4389047071127437, 'support': 3222}","{'precision': 0.7397432731483196, 'recall': 0.6855990068280571, 'f1-score': 0.7098511363555667, 'support': 3222}",0.709851
5,0.7031,0.687623,"{'precision': 0.8335005015045135, 'recall': 0.7442901925660547, 'f1-score': 0.7863733144073811, 'support': 2233}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.5286384976525822, 'recall': 0.5692618806875632, 'f1-score': 0.5481986368062317, 'support': 989}",0.690565,"{'precision': 0.4540463330523652, 'recall': 0.4378506910845393, 'f1-score': 0.44485731707120424, 'support': 3222}","{'precision': 0.7399224376281758, 'recall': 0.6905648665425201, 'f1-score': 0.7132650722759296, 'support': 3222}",0.713265
6,0.698,0.683574,"{'precision': 0.816950852557673, 'recall': 0.7569702602230484, 'f1-score': 0.7858176555716353, 'support': 2152}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.5671361502347417, 'recall': 0.5644859813084112, 'f1-score': 0.565807962529274, 'support': 1070}",0.693048,"{'precision': 0.46136233426413825, 'recall': 0.4404854138438199, 'f1-score': 0.45054187270030316, 'support': 3222}","{'precision': 0.7339894213082824, 'recall': 0.6930477963997517, 'f1-score': 0.7127542255420491, 'support': 3222}",0.712754
7,0.6868,0.678819,"{'precision': 0.8304914744232698, 'recall': 0.75, 'f1-score': 0.7881960970966205, 'support': 2208}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.539906103286385, 'recall': 0.5670611439842209, 'f1-score': 0.5531505531505532, 'support': 1014}",0.692427,"{'precision': 0.45679919256988494, 'recall': 0.4390203813280736, 'f1-score': 0.4471155500823912, 'support': 3222}","{'precision': 0.7390409572498368, 'recall': 0.6924270639354438, 'f1-score': 0.7142245944394783, 'support': 3222}",0.714225
8,0.6793,0.679754,"{'precision': 0.8019057171514543, 'recall': 0.7709739633558341, 'f1-score': 0.7861356932153393, 'support': 2074}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.6028169014084507, 'recall': 0.5592334494773519, 'f1-score': 0.5802078626299141, 'support': 1148}",0.695531,"{'precision': 0.4682408728533017, 'recall': 0.44340247094439533, 'f1-score': 0.4554478519484178, 'support': 3222}","{'precision': 0.7309702855955982, 'recall': 0.6955307262569832, 'f1-score': 0.7127635176994894, 'support': 3222}",0.712764
9,0.6876,0.675588,"{'precision': 0.8244734202607823, 'recall': 0.7579529737206085, 'f1-score': 0.7898150372327647, 'support': 2169}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.5605633802816902, 'recall': 0.5669515669515669, 'f1-score': 0.5637393767705382, 'support': 1053}",0.695531,"{'precision': 0.4616789335141575, 'recall': 0.4416348468907252, 'f1-score': 0.4511848046677676, 'support': 3222}","{'precision': 0.7382234909938723, 'recall': 0.6955307262569832, 'f1-score': 0.7159299750146627, 'support': 3222}",0.71593
10,0.6815,0.676016,"{'precision': 0.8149448345035105, 'recall': 0.7625527921163773, 'f1-score': 0.787878787878788, 'support': 2131}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}","{'precision': 0.5755868544600939, 'recall': 0.5618698441796517, 'f1-score': 0.5686456400742116, 'support': 1091}",0.6946,"{'precision': 0.4635105629878682, 'recall': 0.4414742120986763, 'f1-score': 0.4521748093176665, 'support': 3222}","{'precision': 0.7338959343708701, 'recall': 0.6945996275605214, 'f1-score': 0.7136443483211242, 'support': 3222}",0.713644


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 0.6188702669149596, 'f1-score': 0.7645705521472391, 'support': 3222}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a val

Total training time: ~59.0 minutes


In [17]:
trainer.evaluate()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 0.6188702669149596, 'f1-score': 0.7645705521472391, 'support': 3222}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a val

{'eval_loss': 0.8061335682868958,
 'eval_0': {'precision': 1.0,
  'recall': 0.6188702669149596,
  'f1-score': 0.7645705521472391,
  'support': 3222},
 'eval_1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0},
 'eval_2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0},
 'eval_accuracy': 0.6188702669149596,
 'eval_macro avg': {'precision': 0.3333333333333333,
  'recall': 0.2062900889716532,
  'f1-score': 0.2548568507157464,
  'support': 3222},
 'eval_weighted avg': {'precision': 1.0,
  'recall': 0.6188702669149596,
  'f1-score': 0.7645705521472392,
  'support': 3222},
 'eval_f1': 0.7645705521472392,
 'eval_runtime': 31.2711,
 'eval_samples_per_second': 103.034,
 'eval_steps_per_second': 5.181,
 'epoch': 10.0}

## Save best model checkpoint

In [18]:
trainer.save_model(os.path.join(model_dir, 'final_model'))