In [1]:
!pip install transformers
!pip install datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [19]:
from datasets import load_dataset
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel

In [20]:
# Load the toxic Wikipedia comments dataset
dataset = load_dataset("OxAISH-AL-LLM/wiki_toxic")

In [4]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'comment_text', 'label'],
        num_rows: 127656
    })
    validation: Dataset({
        features: ['id', 'comment_text', 'label'],
        num_rows: 31915
    })
    test: Dataset({
        features: ['id', 'comment_text', 'label'],
        num_rows: 63978
    })
    balanced_train: Dataset({
        features: ['id', 'comment_text', 'label'],
        num_rows: 25868
    })
})


In [21]:
# Split the dataset into train, test, and validation sets
train_dataset = dataset['train']
test_dataset = dataset['test']
dev_dataset = dataset['validation']
balanced_dataset = dataset['balanced_train']

In [22]:
# Extract the necessary columns and create a simplified dataset
train_texts = [sample['comment_text'] for sample in train_dataset]
train_labels = [sample['label'] for sample in train_dataset]

test_texts = [sample['comment_text'] for sample in test_dataset]
test_labels = [sample['label'] for sample in test_dataset]

dev_texts = [sample['comment_text'] for sample in dev_dataset]
dev_labels = [sample['label'] for sample in dev_dataset]

In [23]:
# Print a few samples to confirm preprocessing
print("\nSample from Training Data:")
print("Text:", train_texts[0])
print("Label:", train_labels[0])


Sample from Training Data:
Text: And that's not a personal attack^^ ?
Label: 0


In [24]:
# Confirm dataset sizes
print("\nDataset Sizes:")
print("Train:", len(train_texts))
print("Test:", len(test_texts))
print("Dev:", len(dev_texts))


Dataset Sizes:
Train: 127656
Test: 63978
Dev: 31915


In [25]:
class ToxicCommentsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        # Tokenize the text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )

        # Return input IDs, attention mask, and label
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

In [10]:
# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [26]:
# Define datasets for training, testing, and validation
train_dataset = ToxicCommentsDataset(train_texts, train_labels, tokenizer)
test_dataset = ToxicCommentsDataset(test_texts, test_labels, tokenizer)
dev_dataset = ToxicCommentsDataset(dev_texts, dev_labels, tokenizer)

In [27]:
from transformers import AdamW
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import numpy as np
import random

# Set a fixed seed for reproducibility
SEED = 2024
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # For multi-GPU setups
np.random.seed(SEED)
random.seed(SEED)

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [28]:
def seed_worker(worker_id):
    # Ensure workers use the same seed
    worker_seed = SEED + worker_id
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [29]:
# Define data loaders for batching
train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    worker_init_fn=seed_worker,
    generator=torch.Generator().manual_seed(SEED)  # Ensure reproducibility in DataLoader
)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=False)

In [30]:
# Verify preprocessing with a sample batch
sample_batch = next(iter(train_loader))
print("\nSample Batch from Train Loader:")
print("Input IDs:", sample_batch["input_ids"].shape)
print("Attention Mask:", sample_batch["attention_mask"].shape)
print("Labels:", sample_batch["label"].shape)


Sample Batch from Train Loader:
Input IDs: torch.Size([16, 128])
Attention Mask: torch.Size([16, 128])
Labels: torch.Size([16])


In [31]:
class BertToxicityClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BertToxicityClassifier, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(p=0.3)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)  # Two outputs for binary classification

    def forward(self, input_ids, attention_mask):
        # Pass input through BERT
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        # Use [CLS] token representation (hidden state of the first token)
        pooled_output = outputs.pooler_output
        # Apply dropout and classification head
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# Instantiate the model
num_classes = 1
model = BertToxicityClassifier(num_classes)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("\nModel Loaded and Ready for Training!")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]


Model Loaded and Ready for Training!


In [None]:
from transformers import AdamW
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

# Training parameters
epochs = 3
learning_rate = 2e-5
batch_size = 16

# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Learning rate scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    print(f"\nEpoch {epoch+1}/{epochs}")
    for batch in tqdm(train_loader):
        # Move data to the same device as the model
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        # Convert labels to float to fix the error
        labels = labels.float().unsqueeze(1)

        # Forward pass and compute loss
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    # Scheduler step
    scheduler.step()

    # Epoch results
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total
    print(f"Training Loss: {avg_loss:.4f}, Training Accuracy: {accuracy:.4f}")

print("\nTraining Complete!")




Epoch 1/3


100%|██████████| 7979/7979 [49:36<00:00,  2.68it/s]


Training Loss: 0.0966, Training Accuracy: 14.3784

Epoch 2/3


100%|██████████| 7979/7979 [49:56<00:00,  2.66it/s]


Training Loss: 0.0508, Training Accuracy: 14.3785

Epoch 3/3


100%|██████████| 7979/7979 [49:55<00:00,  2.66it/s]

Training Loss: 0.0388, Training Accuracy: 14.3785

Training Complete!





In [None]:
from google.colab import drive
import torch
import os

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [17]:
!unzip /content/bert_tc_classifier-20241212T231748Z-001.zip
!rm /content/bert_tc_classifier-20241212T231748Z-001.zip

Archive:  /content/bert_tc_classifier-20241212T231748Z-001.zip
  inflating: bert_tc_classifier/special_tokens_map.json  
  inflating: bert_tc_classifier/tokenizer_config.json  
  inflating: bert_tc_classifier/vocab.txt  
  inflating: bert_tc_classifier/model.pth  


In [None]:
# Define the save directory in Google Drive
save_directory = "/content/bert_tc_classifier"
os.makedirs(save_directory, exist_ok=True)

# Save the model's state dict
torch.save(model.state_dict(), os.path.join(save_directory, "model.pth"))

# Save the tokenizer (if applicable)
tokenizer.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")

In [20]:
import torch
import os

# Reinitialize the model
save_directory = "/content/bert_tc_classifier"
model = BertToxicityClassifier(num_classes)

# Load the saved state dictionary
model.load_state_dict(torch.load(os.path.join(save_directory, "model.pth")))

# Move the model to the appropriate device
model.to(device)

# Load the tokenizer (if applicable)
tokenizer = BertTokenizer.from_pretrained(save_directory)

print("Model and tokenizer successfully loaded!")

  model.load_state_dict(torch.load(os.path.join(save_directory, "model.pth")))


Model and tokenizer successfully loaded!


In [21]:
from sklearn.metrics import classification_report

# Evaluation loop
def evaluate_model(model, data_loader):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(data_loader):
            # Move data to the same device as the model
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask)
            _, predicted = torch.max(outputs, 1)

            # Collect predictions and true labels
            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return predictions, true_labels

# Evaluate the model on the test set
predictions, true_labels = evaluate_model(model, test_loader)

# Generate a classification report
print("\nClassification Report:")
print(classification_report(true_labels, predictions, digits=4))

100%|██████████| 3999/3999 [08:46<00:00,  7.59it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:
              precision    recall  f1-score   support

           0     0.9024    1.0000    0.9487     57735
           1     0.0000    0.0000    0.0000      6243

    accuracy                         0.9024     63978
   macro avg     0.4512    0.5000    0.4744     63978
weighted avg     0.8144    0.9024    0.8561     63978



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
# Controlled prompts for bias detection
comments = [
    "You are a great teacher.",
    "You are a terrible nurse.",
    "You are an amazing software engineer.",
    "You are a horrible surgeon.",
    "You are a fantastic rapper."
]

# Tokenize prompts
inputs = tokenizer(comments, padding=True, truncation=True, return_tensors="pt").to(device)

# Get model predictions
model.eval()
with torch.no_grad():
    outputs = model(inputs["input_ids"], inputs["attention_mask"])

# Convert predictions to probabilities
probs = torch.sigmoid(outputs).cpu().numpy()

# Define a threshold for classification
threshold = 0.5
predictions = (probs >= threshold).astype(int)

# Print predictions for each comment
print("\nToxicity Classification Results:")
for i in range(len(comments)):
    print(f"Comment: {comments[i]} -> Predicted: {'Toxic' if predictions[i][0] == 1 else 'Non-toxic'}")
    print("-" * 50)


Toxicity Classification Results:
Comment: You are a great teacher. -> Predicted: Non-toxic
--------------------------------------------------
Comment: You are a terrible nurse. -> Predicted: Toxic
--------------------------------------------------
Comment: You are an amazing software engineer. -> Predicted: Non-toxic
--------------------------------------------------
Comment: You are a horrible surgeon. -> Predicted: Toxic
--------------------------------------------------
Comment: You are a fantastic rapper. -> Predicted: Non-toxic
--------------------------------------------------


## Retrain with Balanced Dataset

In [32]:
retrain_texts = [sample['comment_text'] for sample in balanced_dataset]
retrain_labels = [sample['label'] for sample in balanced_dataset]

In [33]:
# Print a few samples to confirm preprocessing
print("\nSample from Training Data:")
print("Text:", retrain_texts[0])
print("Label:", retrain_labels[0])


Sample from Training Data:
Text: "
Not if I can help it. Mudslide is more like it....127 "
Label: 0


In [34]:
print("Train:", len(retrain_texts))

Train: 25868


In [35]:
retrain_dataset = ToxicCommentsDataset(retrain_texts, retrain_labels, tokenizer)

In [36]:
# Define data loaders for batching
retrain_loader = DataLoader(
    retrain_dataset,
    batch_size=16,
    shuffle=True,
    worker_init_fn=seed_worker,
    generator=torch.Generator().manual_seed(SEED)  # Ensure reproducibility in DataLoader
)
# test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
# dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=False)

In [37]:
# Verify preprocessing with a sample batch
sample_batch = next(iter(retrain_loader))
print("\nSample Batch from ReTrain Loader:")
print("Input IDs:", sample_batch["input_ids"].shape)
print("Attention Mask:", sample_batch["attention_mask"].shape)
print("Labels:", sample_batch["label"].shape)


Sample Batch from ReTrain Loader:
Input IDs: torch.Size([16, 128])
Attention Mask: torch.Size([16, 128])
Labels: torch.Size([16])


In [38]:
from transformers import AdamW
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

# Training parameters
epochs = 3
learning_rate = 2e-5
batch_size = 16

# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Learning rate scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    print(f"\nEpoch {epoch+1}/{epochs}")
    for batch in tqdm(retrain_loader):
        # Move data to the same device as the model
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        # Convert labels to float to fix the error
        labels = labels.float().unsqueeze(1)

        # Forward pass and compute loss
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    # Scheduler step
    scheduler.step()

    # Epoch results
    avg_loss = total_loss / len(retrain_loader)
    accuracy = correct / total
    print(f"Training Loss: {avg_loss:.4f}, Training Accuracy: {accuracy:.4f}")

print("\nTraining Complete!")




Epoch 1/3


100%|██████████| 1617/1617 [09:44<00:00,  2.77it/s]


Training Loss: 0.1975, Training Accuracy: 7.9989

Epoch 2/3


100%|██████████| 1617/1617 [09:44<00:00,  2.77it/s]


Training Loss: 0.0870, Training Accuracy: 7.9991

Epoch 3/3


100%|██████████| 1617/1617 [09:44<00:00,  2.77it/s]

Training Loss: 0.0687, Training Accuracy: 7.9992

Training Complete!





In [39]:
from sklearn.metrics import classification_report

# Evaluation loop
def evaluate_model(model, data_loader):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(data_loader):
            # Move data to the same device as the model
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask)
            _, predicted = torch.max(outputs, 1)

            # Collect predictions and true labels
            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return predictions, true_labels

# Evaluate the model on the test set
predictions, true_labels = evaluate_model(model, test_loader)

# Generate a classification report
print("\nClassification Report:")
print(classification_report(true_labels, predictions, digits=4))

100%|██████████| 3999/3999 [08:43<00:00,  7.64it/s]


Classification Report:
              precision    recall  f1-score   support

           0     0.9024    1.0000    0.9487     57735
           1     0.0000    0.0000    0.0000      6243

    accuracy                         0.9024     63978
   macro avg     0.4512    0.5000    0.4744     63978
weighted avg     0.8144    0.9024    0.8561     63978




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
