## Setup and Imports

In [1]:
experiment = 'IHC-lora-back-translation'

In [2]:
import os

COLAB = False
if 'google.colab' in str(get_ipython()):
    COLAB = True

if COLAB:
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    repo_path = '/content/drive/Othercomputers/My Mac/266-implicit-hate-speech-detection'

    hf_token = userdata.get('hf_token')

else:
    repo_path = '..'

!python -m pip install transformers accelerate datasets evaluate peft bitsandbytes tqdm

data_path = os.path.join(repo_path, 'data/processed')
aug_path = os.path.join(repo_path, 'data/back_translation')

Mounted at /content/drive
Collecting accelerate
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, SequentialSampler, TensorDataset

from transformers import (
    BertForSequenceClassification,
    BertConfig,
    BertTokenizer,
    EvalPrediction,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    BitsAndBytesConfig
)

from peft import (
    PeftModel,
    PeftConfig,
    PeftType,
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model
)

import accelerate

import evaluate
from datasets import load_dataset
from datetime import datetime
from sklearn.metrics import classification_report
import time
import math

import bitsandbytes as bnb

In [4]:
# Path Definitions
exp_dir = os.path.join(repo_path, 'experiments', experiment)

model_dir = os.path.join(repo_path, f'models/hateBERT-{experiment}')
model_target = 'GroNLP/hateBERT'

train_file = os.path.join(aug_path, 'ihc/ihc_train.csv')
val_file = os.path.join(data_path, 'ihc/ihc_val.csv')
test_file = os.path.join(data_path, 'ihc/ihc_test.csv')

results_file = os.path.join(exp_dir, 'results.csv')
metrics_file = os.path.join(exp_dir, 'metrics.csv')

## Load Data/Model/Tokenizer

In [5]:
data = load_dataset(
    "csv",
    data_files = {
        "train": train_file,
    }
)

val = load_dataset(
    'csv',
    data_files = {
        "val": val_file,
    }
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained(model_target, token=hf_token, max_length=512)

# set padding_side and truncation side to 'left', following hateBERT procedure
tokenizer.padding_side = 'left'
tokenizer.truncation_side = 'left'

data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    padding = 'max_length',
    max_length = 512,
)

tokenizer_config.json:   0%|          | 0.00/151 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

## Preprocess Data

In [7]:
def preprocess(example):
    encoded = tokenizer(
        example['cleaned_text'],
        add_special_tokens=True,
        padding='max_length'
    )

    return encoded

In [8]:
processed = data.map(preprocess)
processed.set_format("torch")

processed_val = val.map(preprocess)
processed_val.set_format("torch")

Map:   0%|          | 0/14910 [00:00<?, ? examples/s]

Map:   0%|          | 0/3222 [00:00<?, ? examples/s]

In [9]:
processed

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'index', 'id', 'text', 'cleaned_text', 'label_name', 'label', 'orig_id', 'orig_cleaned_text', 'aug_method', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 14910
    })
})

## Define model

In [10]:
peft_config = LoraConfig(
    task_type="SEQ_CLS",
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
)

In [11]:
model = BertForSequenceClassification.from_pretrained(
    model_target,
    num_labels=3,
    output_attentions=False,
    output_hidden_states=False,
    token=hf_token,
#    quantization_config=bnb_config
)

model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at GroNLP/hateBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [12]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.27073621679578375



## Train setup

In [13]:
batch_size = 20
metric_name = "f1"

args = TrainingArguments(
    model_dir,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    push_to_hub=False,
)

In [14]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions

    y_pred = np.argmax(preds, axis=1).flatten()
    y_true = p.label_ids

    result = classification_report(y_pred, y_true, output_dict=True)
    result['f1'] = result['weighted avg']['f1-score']
    return result

## Train

In [15]:
trainer = Trainer(
    model,
    args,
    train_dataset=processed['train'],
    eval_dataset=processed_val['val'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


## Run Fine-tuning

In [16]:
start = time.time()
trainer.train()
end = time.time()

print(f"Total training time: ~{(end - start) // 60} minutes")

Epoch,Training Loss,Validation Loss,0,1,2,Accuracy,Macro avg,Weighted avg,F1
1,1.0,0.858321,"{'precision': 0.40320962888665995, 'recall': 0.715939447907391, 'f1-score': 0.5158806544754573, 'support': 1123}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3}","{'precision': 0.7474178403755869, 'recall': 0.3797709923664122, 'f1-score': 0.5036380892122746, 'support': 2096}",0.496586,"{'precision': 0.3835424897540823, 'recall': 0.36523681342460107, 'f1-score': 0.339839581229244, 'support': 3222}","{'precision': 0.6267511504242549, 'recall': 0.4965859714463066, 'f1-score': 0.5074361917954271, 'support': 3222}",0.507436
2,0.6596,0.843819,"{'precision': 0.4879638916750251, 'recall': 0.7519319938176198, 'f1-score': 0.5918491484184916, 'support': 1294}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.7389671361502348, 'recall': 0.40840685002594707, 'f1-score': 0.5260695187165776, 'support': 1927}",0.546245,"{'precision': 0.4089770092750866, 'recall': 0.3867796146145223, 'f1-score': 0.3726395557116897, 'support': 3222}","{'precision': 0.6379313926719382, 'recall': 0.5462445685909373, 'f1-score': 0.5523242584172481, 'support': 3222}",0.552324
3,0.6265,0.810201,"{'precision': 0.5737211634904714, 'recall': 0.7771739130434783, 'f1-score': 0.6601269474899019, 'support': 1472}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.7276995305164319, 'recall': 0.44311034877072614, 'f1-score': 0.550817341862118, 'support': 1749}",0.595593,"{'precision': 0.4338068980023011, 'recall': 0.4067614206047348, 'f1-score': 0.40364809645067323, 'support': 3222}","{'precision': 0.657127259941407, 'recall': 0.595592799503414, 'f1-score': 0.600585474122278, 'support': 3222}",0.600585
4,0.6212,0.777485,"{'precision': 0.6118355065195586, 'recall': 0.7922077922077922, 'f1-score': 0.6904357668364459, 'support': 1540}","{'precision': 0.012269938650306749, 'recall': 0.5, 'f1-score': 0.02395209580838323, 'support': 4}","{'precision': 0.7295774647887324, 'recall': 0.46305125148986886, 'f1-score': 0.5665329930732774, 'support': 1678}",0.620422,"{'precision': 0.45122763665286597, 'recall': 0.5850863478992203, 'f1-score': 0.4269736185727022, 'support': 3222}","{'precision': 0.6724105356021771, 'recall': 0.6204220980757293, 'f1-score': 0.6250804629696834, 'support': 3222}",0.62508
5,0.5856,0.75716,"{'precision': 0.619358074222668, 'recall': 0.7972885732730794, 'f1-score': 0.6971493084956252, 'support': 1549}","{'precision': 0.006134969325153374, 'recall': 0.25, 'f1-score': 0.011976047904191616, 'support': 4}","{'precision': 0.7323943661971831, 'recall': 0.46734571599760333, 'f1-score': 0.570592538405267, 'support': 1669}",0.625698,"{'precision': 0.45262913658166815, 'recall': 0.504878096423561, 'f1-score': 0.4265726316016946, 'support': 3222}","{'precision': 0.6771497188179119, 'recall': 0.6256983240223464, 'f1-score': 0.6307421258999786, 'support': 3222}",0.630742
6,0.5749,0.753603,"{'precision': 0.6058174523570712, 'recall': 0.8074866310160428, 'f1-score': 0.6922636103151862, 'support': 1496}","{'precision': 0.018404907975460124, 'recall': 0.3333333333333333, 'f1-score': 0.03488372093023256, 'support': 9}","{'precision': 0.7502347417840376, 'recall': 0.46534653465346537, 'f1-score': 0.5744069015097053, 'support': 1717}",0.623836,"{'precision': 0.4581523673721897, 'recall': 0.5353888330009472, 'f1-score': 0.43385141091837465, 'support': 3222}","{'precision': 0.6811364384050745, 'recall': 0.6238361266294227, 'f1-score': 0.6276216525177079, 'support': 3222}",0.627622
7,0.5629,0.747373,"{'precision': 0.6108324974924775, 'recall': 0.810918774966711, 'f1-score': 0.6967963386727689, 'support': 1502}","{'precision': 0.03680981595092025, 'recall': 0.4, 'f1-score': 0.06741573033707865, 'support': 15}","{'precision': 0.7502347417840376, 'recall': 0.4686217008797654, 'f1-score': 0.5768953068592056, 'support': 1705}",0.627871,"{'precision': 0.4659590184091451, 'recall': 0.5598468252821589, 'f1-score': 0.44703579195635107, 'support': 3222}","{'precision': 0.6819282412212133, 'recall': 0.6278708876474239, 'f1-score': 0.6304177016873063, 'support': 3222}",0.630418
8,0.5634,0.736409,"{'precision': 0.6399197592778335, 'recall': 0.8070841239721696, 'f1-score': 0.7138461538461539, 'support': 1581}","{'precision': 0.03067484662576687, 'recall': 0.45454545454545453, 'f1-score': 0.05747126436781609, 'support': 11}","{'precision': 0.7361502347417841, 'recall': 0.48098159509202454, 'f1-score': 0.5818181818181819, 'support': 1630}",0.640906,"{'precision': 0.46891494688179486, 'recall': 0.5808703912032162, 'f1-score': 0.4510452000107173, 'support': 3222}","{'precision': 0.6865224845934967, 'recall': 0.6409062693978895, 'f1-score': 0.6448127217574338, 'support': 3222}",0.644813
9,0.5571,0.745764,"{'precision': 0.6078234704112337, 'recall': 0.8150638870208473, 'f1-score': 0.6963516230968112, 'support': 1487}","{'precision': 0.03680981595092025, 'recall': 0.375, 'f1-score': 0.0670391061452514, 'support': 16}","{'precision': 0.7549295774647887, 'recall': 0.4677137870855148, 'f1-score': 0.5775862068965517, 'support': 1719}",0.627561,"{'precision': 0.46652095460898096, 'recall': 0.552592558035454, 'f1-score': 0.44699231204620476, 'support': 3222}","{'precision': 0.6834718811976075, 'recall': 0.62756052141527, 'f1-score': 0.6298628736494273, 'support': 3222}",0.629863
10,0.5601,0.740183,"{'precision': 0.623370110330993, 'recall': 0.8124183006535948, 'f1-score': 0.7054483541430193, 'support': 1530}","{'precision': 0.03680981595092025, 'recall': 0.4, 'f1-score': 0.06741573033707865, 'support': 15}","{'precision': 0.7455399061032864, 'recall': 0.47346451997614786, 'f1-score': 0.5791393143690736, 'support': 1677}",0.634078,"{'precision': 0.4685732774617332, 'recall': 0.5619609402099143, 'f1-score': 0.4506677996163906, 'support': 3222}","{'precision': 0.684226827616665, 'recall': 0.6340782122905028, 'f1-score': 0.6367361415241503, 'support': 3222}",0.636736


Trainer is attempting to log a value of "{'precision': 0.40320962888665995, 'recall': 0.715939447907391, 'f1-score': 0.5158806544754573, 'support': 1123}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7474178403755869, 'recall': 0.3797709923664122, 'f1-score': 0.5036380892122746, 'support': 2096}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.3835424897540823, 'recall': 0.36523681342460107, 'f1-score': 0.339839581229244,

Total training time: ~46.0 minutes


In [17]:
trainer.evaluate()

Trainer is attempting to log a value of "{'precision': 0.6399197592778335, 'recall': 0.8070841239721696, 'f1-score': 0.7138461538461539, 'support': 1581}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.03067484662576687, 'recall': 0.45454545454545453, 'f1-score': 0.05747126436781609, 'support': 11}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7361502347417841, 'recall': 0.48098159509202454, 'f1-score': 0.5818181818181819, 'support': 1630}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.46891494688179486, 'recall': 

{'eval_loss': 0.7364094257354736,
 'eval_0': {'precision': 0.6399197592778335,
  'recall': 0.8070841239721696,
  'f1-score': 0.7138461538461539,
  'support': 1581},
 'eval_1': {'precision': 0.03067484662576687,
  'recall': 0.45454545454545453,
  'f1-score': 0.05747126436781609,
  'support': 11},
 'eval_2': {'precision': 0.7361502347417841,
  'recall': 0.48098159509202454,
  'f1-score': 0.5818181818181819,
  'support': 1630},
 'eval_accuracy': 0.6409062693978895,
 'eval_macro avg': {'precision': 0.46891494688179486,
  'recall': 0.5808703912032162,
  'f1-score': 0.4510452000107173,
  'support': 3222},
 'eval_weighted avg': {'precision': 0.6865224845934967,
  'recall': 0.6409062693978895,
  'f1-score': 0.6448127217574338,
  'support': 3222},
 'eval_f1': 0.6448127217574338,
 'eval_runtime': 23.8986,
 'eval_samples_per_second': 134.82,
 'eval_steps_per_second': 6.779,
 'epoch': 10.0}

## Save best model checkpoint

In [18]:
trainer.save_model(os.path.join(model_dir, 'final_model'))