## Setup and Imports

In [2]:
experiment = 'IHC-lora-EDA'

In [1]:
import os

COLAB = False
if 'google.colab' in str(get_ipython()):
    COLAB = True

if COLAB:
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    repo_path = '/content/drive/Othercomputers/My Mac/266-implicit-hate-speech-detection'

    hf_token = userdata.get('hf_token')

else:
    repo_path = '..'

!python -m pip install transformers accelerate datasets evaluate peft bitsandbytes tqdm

data_path = os.path.join(repo_path, 'data/processed')
aug_path = os.path.join(repo_path, 'data/easy_data_augmentation')

Mounted at /content/drive
Collecting accelerate
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, SequentialSampler, TensorDataset

from transformers import (
    BertForSequenceClassification,
    BertConfig,
    BertTokenizer,
    EvalPrediction,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    BitsAndBytesConfig
)

from peft import (
    PeftModel,
    PeftConfig,
    PeftType,
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model
)

import accelerate

import evaluate
from datasets import load_dataset
from datetime import datetime
from sklearn.metrics import classification_report
import time
import math

import bitsandbytes as bnb

In [4]:
# Path Definitions
exp_dir = os.path.join(repo_path, 'experiments', experiment)

model_dir = os.path.join(repo_path, f'models/hateBERT-{experiment}')
model_target = 'GroNLP/hateBERT'

train_file = os.path.join(aug_path, 'ihc/ihc_train.csv')
val_file = os.path.join(data_path, 'ihc/ihc_val.csv')
test_file = os.path.join(data_path, 'ihc/ihc_test.csv')

results_file = os.path.join(exp_dir, 'results.csv')
metrics_file = os.path.join(exp_dir, 'metrics.csv')

## Load Data/Model/Tokenizer

In [7]:
data = load_dataset(
    "csv",
    data_files = {
        "train": train_file,
    }
)

val = load_dataset(
    'csv',
    data_files = {
        "val": val_file,
    }
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained(model_target, token=hf_token, max_length=512)

# set padding_side and truncation side to 'left', following hateBERT procedure
tokenizer.padding_side = 'left'
tokenizer.truncation_side = 'left'

data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    padding = 'max_length',
    max_length = 512,
)

tokenizer_config.json:   0%|          | 0.00/151 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

## Preprocess Data

In [9]:
def preprocess(example):
    encoded = tokenizer(
        example['cleaned_text'],
        add_special_tokens=True,
        padding='max_length'
    )

    return encoded

In [10]:
processed = data.map(preprocess)
processed.set_format("torch")

processed_val = val.map(preprocess)
processed_val.set_format("torch")

Map:   0%|          | 0/14910 [00:00<?, ? examples/s]

Map:   0%|          | 0/3222 [00:00<?, ? examples/s]

In [12]:
processed

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'id', 'text', 'cleaned_text', 'label_name', 'label', 'orig_id', 'orig_cleaned_text', 'aug_method', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 14910
    })
})

## Define model

In [13]:
peft_config = LoraConfig(
    task_type="SEQ_CLS",
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
)

In [14]:
model = BertForSequenceClassification.from_pretrained(
    model_target,
    num_labels=3,
    output_attentions=False,
    output_hidden_states=False,
    token=hf_token,
#    quantization_config=bnb_config
)

model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at GroNLP/hateBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [15]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.27073621679578375



## Train setup

In [16]:
batch_size = 20
metric_name = "f1"

args = TrainingArguments(
    model_dir,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    push_to_hub=False,
)

In [17]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions

    y_pred = np.argmax(preds, axis=1).flatten()
    y_true = p.label_ids

    result = classification_report(y_pred, y_true, output_dict=True)
    result['f1'] = result['weighted avg']['f1-score']
    return result

## Train

In [18]:
trainer = Trainer(
    model,
    args,
    train_dataset=processed['train'],
    eval_dataset=processed_val['val'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


## Run Fine-tuning

In [19]:
start = time.time()
trainer.train()
end = time.time()

print(f"Total training time: ~{(end - start) // 60} minutes")

Epoch,Training Loss,Validation Loss,0,1,2,Accuracy,Macro avg,Weighted avg,F1
1,0.9879,0.862544,"{'precision': 0.3204613841524574, 'recall': 0.710789766407119, 'f1-score': 0.4417559626685102, 'support': 899}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.7896713615023474, 'recall': 0.3628127696289905, 'f1-score': 0.49719184156074486, 'support': 2318}",0.459342,"{'precision': 0.37004424855160156, 'recall': 0.3578675120120365, 'f1-score': 0.3129826014097517, 'support': 3222}","{'precision': 0.6575273123263502, 'recall': 0.45934202358783366, 'f1-score': 0.4809526068208558, 'support': 3222}",0.480953
2,0.6626,0.846779,"{'precision': 0.46940822467402205, 'recall': 0.7506014434643143, 'f1-score': 0.5775995063252083, 'support': 1247}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7}","{'precision': 0.7455399061032864, 'recall': 0.4034552845528455, 'f1-score': 0.5235740191229804, 'support': 1968}",0.536934,"{'precision': 0.4049827102591028, 'recall': 0.38468557600571995, 'f1-score': 0.3670578418160629, 'support': 3222}","{'precision': 0.6370498421414564, 'recall': 0.5369335816263191, 'f1-score': 0.543345826822334, 'support': 3222}",0.543346
3,0.6332,0.821139,"{'precision': 0.5571715145436309, 'recall': 0.7683264177040111, 'f1-score': 0.6459302325581395, 'support': 1446}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 6}","{'precision': 0.7220657276995305, 'recall': 0.4344632768361582, 'f1-score': 0.5425044091710758, 'support': 1770}",0.583489,"{'precision': 0.42641241408105374, 'recall': 0.4009298981800564, 'f1-score': 0.3961448805764051, 'support': 3222}","{'precision': 0.6467182954867347, 'recall': 0.5834885164494104, 'f1-score': 0.5879105898547095, 'support': 3222}",0.587911
4,0.6335,0.78718,"{'precision': 0.6474423269809428, 'recall': 0.7648104265402843, 'f1-score': 0.7012493210211841, 'support': 1688}","{'precision': 0.018404907975460124, 'recall': 0.2727272727272727, 'f1-score': 0.034482758620689655, 'support': 11}","{'precision': 0.6657276995305165, 'recall': 0.46552856204858833, 'f1-score': 0.5479134466769706, 'support': 1523}",0.621664,"{'precision': 0.4438583114956398, 'recall': 0.5010220871053818, 'f1-score': 0.4278818421062815, 'support': 3222}","{'precision': 0.65393804727391, 'recall': 0.6216635630043451, 'f1-score': 0.6264929681929275, 'support': 3222}",0.626493
5,0.5988,0.772432,"{'precision': 0.6399197592778335, 'recall': 0.7852307692307692, 'f1-score': 0.7051671732522797, 'support': 1625}","{'precision': 0.04294478527607362, 'recall': 0.4117647058823529, 'f1-score': 0.07777777777777779, 'support': 17}","{'precision': 0.7004694835680751, 'recall': 0.47215189873417723, 'f1-score': 0.5640831758034026, 'support': 1580}",0.629733,"{'precision': 0.4611113427073274, 'recall': 0.5563824579490998, 'f1-score': 0.44900937561115334, 'support': 3222}","{'precision': 0.6664622762922817, 'recall': 0.6297330850403476, 'f1-score': 0.6326723452906744, 'support': 3222}",0.632672
6,0.5888,0.758833,"{'precision': 0.664493480441324, 'recall': 0.7948410317936413, 'f1-score': 0.723845943731221, 'support': 1667}","{'precision': 0.03067484662576687, 'recall': 0.35714285714285715, 'f1-score': 0.056497175141242945, 'support': 14}","{'precision': 0.7070422535211267, 'recall': 0.48864373783257625, 'f1-score': 0.577897160399079, 'support': 1541}",0.646493,"{'precision': 0.4674035268627392, 'recall': 0.5468758755896915, 'f1-score': 0.45274675975718104, 'support': 3222}","{'precision': 0.6820894451969287, 'recall': 0.6464928615766604, 'f1-score': 0.6511426669233096, 'support': 3222}",0.651143
7,0.5747,0.760903,"{'precision': 0.6544633901705116, 'recall': 0.8065512978986403, 'f1-score': 0.722591362126246, 'support': 1618}","{'precision': 0.0736196319018405, 'recall': 0.42857142857142855, 'f1-score': 0.1256544502617801, 'support': 28}","{'precision': 0.7258215962441315, 'recall': 0.49048223350253806, 'f1-score': 0.5853843241196517, 'support': 1576}",0.648665,"{'precision': 0.48463487277216116, 'recall': 0.5752016533242023, 'f1-score': 0.47787671216922595, 'support': 3222}","{'precision': 0.6843196619087183, 'recall': 0.6486654252017381, 'f1-score': 0.6502907645376061, 'support': 3222}",0.650291
8,0.5725,0.759434,"{'precision': 0.649949849548646, 'recall': 0.8069738480697385, 'f1-score': 0.72, 'support': 1606}","{'precision': 0.0736196319018405, 'recall': 0.42857142857142855, 'f1-score': 0.1256544502617801, 'support': 28}","{'precision': 0.7258215962441315, 'recall': 0.48677581863979846, 'f1-score': 0.5827365246890311, 'support': 1588}",0.645872,"{'precision': 0.4831303592315393, 'recall': 0.5741070317603217, 'f1-score': 0.47613032498360375, 'support': 3222}","{'precision': 0.6823356619813958, 'recall': 0.6458721291123526, 'f1-score': 0.6471830930519898, 'support': 3222}",0.647183
9,0.569,0.753614,"{'precision': 0.6629889669007021, 'recall': 0.8065893837705919, 'f1-score': 0.7277731902009358, 'support': 1639}","{'precision': 0.0736196319018405, 'recall': 0.42857142857142855, 'f1-score': 0.1256544502617801, 'support': 28}","{'precision': 0.7220657276995305, 'recall': 0.4945337620578778, 'f1-score': 0.5870229007633587, 'support': 1555}",0.6527,"{'precision': 0.48622477550069104, 'recall': 0.5765648581332994, 'f1-score': 0.48015018040869156, 'support': 3222}","{'precision': 0.6863787936115059, 'recall': 0.6527001862197392, 'f1-score': 0.6546117920650795, 'support': 3222}",0.654612
10,0.5694,0.758584,"{'precision': 0.6484453360080241, 'recall': 0.8066126013724267, 'f1-score': 0.718932443703086, 'support': 1603}","{'precision': 0.0736196319018405, 'recall': 0.41379310344827586, 'f1-score': 0.125, 'support': 29}","{'precision': 0.7248826291079812, 'recall': 0.48553459119496856, 'f1-score': 0.5815442561205273, 'support': 1590}",0.644631,"{'precision': 0.48231586567261525, 'recall': 0.5686467653385571, 'f1-score': 0.4751588999412044, 'support': 3222}","{'precision': 0.6809919997603062, 'recall': 0.6446306641837368, 'f1-score': 0.6457880429819012, 'support': 3222}",0.645788


Trainer is attempting to log a value of "{'precision': 0.3204613841524574, 'recall': 0.710789766407119, 'f1-score': 0.4417559626685102, 'support': 899}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7896713615023474, 'recall': 0.3628127696289905, 'f1-score': 0.49719184156074486, 'support': 2318}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.37004424855160156, 'recall': 0.3578675120120365, 'f1-score': 0.3129826014097517,

Total training time: ~59.0 minutes


In [20]:
trainer.evaluate()

Trainer is attempting to log a value of "{'precision': 0.6629889669007021, 'recall': 0.8065893837705919, 'f1-score': 0.7277731902009358, 'support': 1639}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0736196319018405, 'recall': 0.42857142857142855, 'f1-score': 0.1256544502617801, 'support': 28}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7220657276995305, 'recall': 0.4945337620578778, 'f1-score': 0.5870229007633587, 'support': 1555}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.48622477550069104, 'recall': 0.5

{'eval_loss': 0.7536143660545349,
 'eval_0': {'precision': 0.6629889669007021,
  'recall': 0.8065893837705919,
  'f1-score': 0.7277731902009358,
  'support': 1639},
 'eval_1': {'precision': 0.0736196319018405,
  'recall': 0.42857142857142855,
  'f1-score': 0.1256544502617801,
  'support': 28},
 'eval_2': {'precision': 0.7220657276995305,
  'recall': 0.4945337620578778,
  'f1-score': 0.5870229007633587,
  'support': 1555},
 'eval_accuracy': 0.6527001862197392,
 'eval_macro avg': {'precision': 0.48622477550069104,
  'recall': 0.5765648581332994,
  'f1-score': 0.48015018040869156,
  'support': 3222},
 'eval_weighted avg': {'precision': 0.6863787936115059,
  'recall': 0.6527001862197392,
  'f1-score': 0.6546117920650795,
  'support': 3222},
 'eval_f1': 0.6546117920650795,
 'eval_runtime': 31.7673,
 'eval_samples_per_second': 101.425,
 'eval_steps_per_second': 5.1,
 'epoch': 10.0}

## Save best model checkpoint

In [21]:
trainer.save_model(os.path.join(model_dir, 'final_model'))