In [1]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    get_scheduler,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torch.optim import AdamW
from datasets import load_dataset
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report
from torch.nn import CrossEntropyLoss

In [3]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

Using device: mps


In [4]:
class FocalLoss(CrossEntropyLoss):
    def __init__(self, alpha=1.0, gamma=3.0, reduction='mean', ignore_index=-100, label_smoothing=0.1):
        super().__init__(weight=None, reduction=reduction, ignore_index=ignore_index, label_smoothing=label_smoothing)
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = super().forward(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss

In [5]:
dataset = load_dataset('csv', data_files={'train': 'bias_in_bios_gender_dataset.csv'})
dataset = dataset['train'].train_test_split(test_size=0.2)

In [6]:
model_name = 'NLP-LTU/bertweet-large-sexism-detector'
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [7]:
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=256)

In [8]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

Map: 100%|██████████| 8000/8000 [00:00<00:00, 13136.96 examples/s]
Map: 100%|██████████| 2000/2000 [00:00<00:00, 17490.04 examples/s]


In [9]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='macro')
    cm = confusion_matrix(labels, predictions)
    report = classification_report(labels, predictions, target_names=['unbiased', 'biased'], output_dict=True)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", report)
    return {'accuracy': acc, 'f1_macro': f1}

In [10]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

In [11]:
for name, param in model.named_parameters():
    if 'layer' in name and int(name.split('.')[3]) < 8:
        param.requires_grad = False
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=Tru

In [12]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        class_weights = torch.tensor([1.0, 1.5]).to(logits.device)  # Boost biased if under-recalled
        loss_fn = FocalLoss(alpha=1.0, gamma=3.0)
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [13]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=8,  # Small for memory on large data/MPS; increase to 16+ if possible
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1_macro',
    greater_is_better=True,
    learning_rate=1e-5,
    fp16=False,
    bf16=True
)

In [None]:
optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)
num_training_steps = len(tokenized_datasets['train']) // training_args.per_device_train_batch_size * training_args.num_train_epochs
scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=training_args.warmup_steps, num_training_steps=num_training_steps)

In [15]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    optimizers=(optimizer, scheduler),
)

  trainer = CustomTrainer(


In [16]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.0559,0.021511,0.8895,0.889477
2,0.0124,0.022086,0.8965,0.896468
3,0.0082,0.018014,0.9075,0.907435
4,0.0022,0.022716,0.9085,0.908465
5,0.0016,0.024404,0.908,0.907976


Confusion Matrix:
 [[875 113]
 [108 904]]
Classification Report:
 {'unbiased': {'precision': 0.8901322482197355, 'recall': 0.8856275303643725, 'f1-score': 0.8878741755454084, 'support': 988.0}, 'biased': {'precision': 0.8888888888888888, 'recall': 0.8932806324110671, 'f1-score': 0.8910793494332183, 'support': 1012.0}, 'accuracy': 0.8895, 'macro avg': {'precision': 0.8895105685543121, 'recall': 0.8894540813877199, 'f1-score': 0.8894767624893134, 'support': 2000.0}, 'weighted avg': {'precision': 0.889503108398327, 'recall': 0.8895, 'f1-score': 0.8894959935326402, 'support': 2000.0}}
Confusion Matrix:
 [[914  74]
 [133 879]]
Classification Report:
 {'unbiased': {'precision': 0.8729703915950334, 'recall': 0.9251012145748988, 'f1-score': 0.8982800982800982, 'support': 988.0}, 'biased': {'precision': 0.9223504721930745, 'recall': 0.8685770750988142, 'f1-score': 0.8946564885496183, 'support': 1012.0}, 'accuracy': 0.8965, 'macro avg': {'precision': 0.897660431894054, 'recall': 0.89683914483685

TrainOutput(global_step=5000, training_loss=0.04366940427981317, metrics={'train_runtime': 8021.8569, 'train_samples_per_second': 4.986, 'train_steps_per_second': 0.623, 'total_flos': 1.863862726656e+16, 'train_loss': 0.04366940427981317, 'epoch': 5.0})

In [17]:
trainer.save_model('./fine_tuned_bertTweetClassifier')

In [29]:
new_model_path='./fine_tuned_bertTweetClassifier'

In [None]:
modelUd = AutoModelForSequenceClassification.from_pretrained(new_model_path)
tokenizerUd = AutoTokenizer.from_pretrained(new_model_path)

In [31]:
modelUd.config.id2label = {0: 'Unbiased', 1: 'Biased'}
modelUd.config.label2id = {'Unbiased': 0, 'Biased': 1}

In [32]:
modelUd.save_pretrained('./fine_tuned_bias_classifier_remapped')
tokenizerUd.save_pretrained('./fine_tuned_bias_classifier_remapped')

('./fine_tuned_bias_classifier_remapped/tokenizer_config.json',
 './fine_tuned_bias_classifier_remapped/special_tokens_map.json',
 './fine_tuned_bias_classifier_remapped/vocab.json',
 './fine_tuned_bias_classifier_remapped/merges.txt',
 './fine_tuned_bias_classifier_remapped/added_tokens.json',
 './fine_tuned_bias_classifier_remapped/tokenizer.json')

In [33]:
classifier2 = pipeline('text-classification', model='./fine_tuned_bias_classifier_remapped', tokenizer='./fine_tuned_bias_classifier_remapped')

Device set to use mps:0


In [18]:
from transformers import pipeline
classifier1 = pipeline('text-classification', model='./fine_tuned_bertTweetClassifier', device=0 if device.type == 'mps' else -1)


Device set to use mps:0


In [19]:
print(classifier1("Only Men can become actors in America not women.Women cannot get good roles in movies made in america. Women are expected to stick to working in kitchen more often"))  # Biased test
print(classifier1("John is a successful actor in America, but women like Mary cannot become actors because only men are suited for the role"))
print(classifier1("American actors are diverse."))  # Unbiased test
print(classifier1("Only women can work as nurses and cook and househelps")) 
print(classifier1("Men are always strong and women are always weak, this is the truth and women should accept it")) # Biased test


[{'label': 'sexist', 'score': 0.9065775275230408}]
[{'label': 'sexist', 'score': 0.9001874327659607}]
[{'label': 'not sexist', 'score': 0.8941965699195862}]
[{'label': 'sexist', 'score': 0.7785928249359131}]
[{'label': 'sexist', 'score': 0.8025675415992737}]


In [34]:

print(classifier2("Only Men can become actors in America not women.Women cannot get good roles in movies made in america. Women are expected to stick to working in kitchen more often"))  # Biased test
print(classifier2("John is a successful actor in America, but women like Mary cannot become actors because only men are suited for the role"))
print(classifier2("American actors are diverse."))  # Unbiased test
print(classifier2("Only women can work as nurses and cook and househelps")) 
print(classifier2("Men are always strong and women are always weak, this is the truth and women should accept it")) # Biased test

[{'label': 'Biased', 'score': 0.9065775275230408}]
[{'label': 'Biased', 'score': 0.9001874327659607}]
[{'label': 'Unbiased', 'score': 0.8941965699195862}]
[{'label': 'Biased', 'score': 0.7785928249359131}]
[{'label': 'Biased', 'score': 0.8025675415992737}]


In [20]:
print(classifier1("She graduated with honors in 2012. Having more than 5 years of diverse experiences, especially in NURSE PRACTITIONER, Pamela A Boyd affiliates with many hospitals including Eastside Medical Center, Clearview Regional Medical Center, and cooperates with other doctors and specialists in many medical groups including Greater Gwinnett Internal Medicine Associates LLC, Grayson Primary Care LLC. Call Pamela A Boyd on phone number (678) 225-4999 for more information and advises or to book an appointment."))  # Biased test
print(classifier1("Prior to joining UCL, he has worked as a Catastrophe Risk Modeller at AIR Worldwide and as a Research Associate at the University of California, Irvine. His recent research focuses on developing methods and tools for multi-hazard risk and resilience assessment of critical infrastructure in developing countries."))  # Unbiased test

[{'label': 'not sexist', 'score': 0.951004147529602}]
[{'label': 'sexist', 'score': 0.9647285342216492}]


In [21]:
print(classifier1("Mr. Chad Allen Walker's NPI Number is #1730145368 and has been listed in the NPI registry for 10 years. Mr. Chad Allen Walker's practice location is listed as: 300 W 10Th Ave Columbus, OH 43210-1280 and can be reached via phone at (614) 366-7594."))

[{'label': 'sexist', 'score': 0.904565155506134}]


In [41]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer,pipeline
import torch
model = AutoModelForSequenceClassification.from_pretrained('NLP-LTU/bertweet-large-sexism-detector')
tokenizer = AutoTokenizer.from_pretrained('NLP-LTU/bertweet-large-sexism-detector') 
classifier2 = pipeline("text-classification", model=model, tokenizer=tokenizer)

# label_pred = 'not sexist' if prediction == 0 else 'sexist' 




Device set to use mps:0


In [42]:
prediction=classifier2("Every woman wants to be a model. It's codeword for 'I get everything for free and people want me' ")
print(prediction)

[{'label': 'sexist', 'score': 0.9985693693161011}]


In [43]:
print(classifier2("Only Men can become actors in America not women.Women cannot get good roles in movies made in america. Women are expected to stick to working in kitchen more often"))  # Biased test
print(classifier2("John is a successful actor in America, but women like Mary cannot become actors because only men are suited for the role"))
print(classifier2("American actors are diverse."))  # Unbiased test
print(classifier2("Only women can work as nurses and cook and househelps")) 
print(classifier2("Men are always strong and women are always weak, this is the truth and women should accept it")) # Biased test

[{'label': 'sexist', 'score': 0.987933874130249}]
[{'label': 'sexist', 'score': 0.9904969930648804}]
[{'label': 'not sexist', 'score': 0.9995484948158264}]
[{'label': 'sexist', 'score': 0.995790958404541}]
[{'label': 'sexist', 'score': 0.9985925555229187}]


In [44]:
print(classifier2("She graduated with honors in 2012. Having more than 5 years of diverse experiences, especially in NURSE PRACTITIONER, Pamela A Boyd affiliates with many hospitals including Eastside Medical Center, Clearview Regional Medical Center, and cooperates with other doctors and specialists in many medical groups including Greater Gwinnett Internal Medicine Associates LLC, Grayson Primary Care LLC. Call Pamela A Boyd on phone number (678) 225-4999 for more information and advises or to book an appointment."))  # Biased test
print(classifier2("Prior to joining UCL, he has worked as a Catastrophe Risk Modeller at AIR Worldwide and as a Research Associate at the University of California, Irvine. His recent research focuses on developing methods and tools for multi-hazard risk and resilience assessment of critical infrastructure in developing countries."))  # Unbiased test

[{'label': 'not sexist', 'score': 0.9984695315361023}]
[{'label': 'not sexist', 'score': 0.9992302656173706}]


In [28]:
print(classifier1("Only a man can act well in films , not women"))  # Biased (stereotype)
print(classifier1("Women can only cook"))  # Unbiased
print(classifier1("only men can become engineers"))  # Biased (StereoSet-like)


[{'label': 'sexist', 'score': 0.9370615482330322}]
[{'label': 'sexist', 'score': 0.9157705307006836}]
[{'label': 'sexist', 'score': 0.5840442776679993}]


In [27]:
print(classifier1("Math is only for men"))  # Unbiased (anti-stereotype)
print(classifier1("Math is  for everyone")) 
print(classifier1("Math is only for women")) 

[{'label': 'sexist', 'score': 0.7287744879722595}]
[{'label': 'not sexist', 'score': 0.6844674944877625}]
[{'label': 'sexist', 'score': 0.8942158818244934}]
