# Hate Speech Detection — Research‐Level Notebook

This notebook implements a robust, research‐oriented pipeline for hate speech detection across HASOC 2019, OLID (A/B/C), and two distant datasets. It incorporates:

- **BERT fine-tuning** with transfer learning
- **Adversarial training (FGM)** for robustness
- **MixUp data augmentation** in embedding space
- **Class‐balance weighting** to counter dataset imbalance
- **Early stopping & checkpointing** for best‐model selection
- **Attention‐based explainability**
- **Comprehensive EDA & visualization**

## 1. Setup & Configuration

In [12]:
# Cell 1: Install & import dependencies
# !pip install transformers torch pandas scikit-learn openpyxl emoji matplotlib seaborn

import os, re, json, time
import pandas as pd
import numpy as np
import emoji
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
SEED = 42
BATCH_SIZE = 16
EPOCHS = 5
LR = 2e-5
EPSILON = 0.1        # for FGM
ALPHA = 0.2          # for MixUp
PATIENCE = 2         # early stopping
MAX_LEN = 128
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
OUTPUT_DIR = 'outputs/'

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Reproducibility
np.random.seed(SEED)
torch.manual_seed(SEED)
if DEVICE.type=='cuda': torch.cuda.manual_seed_all(SEED)

print(f"Using device: {DEVICE}")

Using device: cuda


## 2. Exploratory Data Analysis (EDA)

In [13]:
# Cell 2: Visualize dataset balance
def plot_label_dist(df, title):
    sns.countplot(x='label', data=df)
    plt.title(title)
    plt.show()

# After loading each source, call:
# plot_label_dist(df_olid_train, 'OLID Train Label Distribution')

## 3. Data Loading & Preprocessing

### 3.1 Cleaning Function

In [14]:
# Cell 3: Text cleaning — anonymize, remove URLs, emojis, punctuation
import string

def clean_text(text: str) -> str:
    text = re.sub(r"@\w+", "@USER", text)
    text = re.sub(r"http\S+|www\.[^ ]+|URL", "", text)
    text = emoji.replace_emoji(text, replace="")
    text = text.translate(str.maketrans('', '', string.punctuation))
    return re.sub(r"\s+", " ", text).strip().lower()

### 3.2 Load Functions

In [15]:
# Cell 4: OLID loader
def load_olid(path: str, task: str):
    df = pd.read_csv(path, sep='\t').dropna(subset=['tweet'])
    # Map subtasks and filter
    if task=='a': df = df[['id','tweet','subtask_a']].rename(columns={'subtask_a':'label'});
    elif task=='b': df=df[df['subtask_a']=='OFF'][['id','tweet','subtask_b']].rename(columns={'subtask_b':'label'});
    else: df=df[df['subtask_b']=='TIN'][['id','tweet','subtask_c']].rename(columns={'subtask_c':'label'});
    # Convert to numeric
    mapping = {'OFF':1,'NOT':0,'TIN':1,'UNT':0,'IND':0,'GRP':1,'OTH':2}
    df['label'] = df['label'].map(mapping)
    df['tweet'] = df['tweet'].apply(clean_text)
    return df.dropna()

In [16]:
# Cell 5: HASOC loader
def load_hasoc(path: str):
    df = pd.read_csv(path, sep='\t').dropna(subset=['tweet','HS'])
    df = df[['id','tweet','HS']].rename(columns={'HS':'label'})
    df['label'] = (df['label']=='HATE').astype(int)
    df['tweet'] = df['tweet'].apply(clean_text)
    return df

In [17]:
# Cell 6: Distant dataset loader
def load_distant(path: str, sheet: str, col_map: dict, label_map: dict=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df = df.rename(columns=col_map)[['id','tweet','label']]
    if label_map: df['label']=df['label'].map(label_map)
    df['tweet']=df['tweet'].astype(str).apply(clean_text)
    return df.dropna()

### 3.3 Combine & Split

In [18]:
# Cell 7: Load all sources and inspect
data_dir=os.path.join(os.getcwd(), 'OlidPreprcessed', 'OLID_Tain_ATUSER_URL_EmojiRemoved_Pedro.txt')
# OLID
olid_train = load_olid(data_dir+'OLID_Tain_ATUSER_URL_EmojiRemoved_Pedro.txt','a')
hasoc = load_hasoc(data_dir+'hasoc2019_en_train.tsv')
# Distant
distA = load_distant(data_dir+'task_a_part8.xlsx','Sheet1',{'tweet_text':'tweet','hate_flag':'label'},{'hate':1,'no_hate':0})
distB = load_distant(data_dir+'task_b_distant.xlsx','Sheet1',{'text':'tweet','offensive':'label'},{0:0,1:1})
distC = load_distant(data_dir+'task_c_distant_ann.xlsx','Sheet1',{'msg':'tweet','class':'label'},{'IND':0,'GRP':1,'OTH':2})

# Concatenate binary-class sources
all_binary = pd.concat([olid_train, hasoc, distA, distB], ignore_index=True)
train_df, val_df = train_test_split(all_binary, test_size=0.15, stratify=all_binary['label'], random_state=SEED)
print(f"Binary—Train: {train_df.shape}, Val: {val_df.shape}")

# Keep distC separate for multiclass experiments

FileNotFoundError: [Errno 2] No such file or directory: '/OlidPreprcessed/OLID_Tain_ATUSER_URL_EmojiRemoved_Pedro.txtOLID_Tain_ATUSER_URL_EmojiRemoved_Pedro.txt'

## 4. Dataset & MixUp Augmentation

In [None]:
# Cell 8: Dataset class with MixUp
TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased')

class HateSpeechDataset(Dataset):
    def __init__(self, df, mixup=False):
        self.texts, self.labels = df['tweet'].tolist(), df['label'].tolist()
        self.mixup, self.alpha = mixup, ALPHA

    def __len__(self): return len(self.texts)

    def __getitem__(self, idx):
        enc = TOKENIZER(self.texts[idx], padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors='pt')
        x1, m1, y1 = enc['input_ids'].squeeze(), enc['attention_mask'].squeeze(), torch.tensor(self.labels[idx],dtype=torch.float)
        if self.mixup:
            j = np.random.randint(len(self.texts))
            enc2 = TOKENIZER(self.texts[j], padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors='pt')
            x2, m2, y2 = enc2['input_ids'].squeeze(), enc2['attention_mask'].squeeze(), torch.tensor(self.labels[j],dtype=torch.float)
            lam = np.random.beta(self.alpha,self.alpha)
            x1 = (lam*x1 + (1-lam)*x2).long()
            m1 = torch.max(m1,m2)
            y1 = lam*y1 + (1-lam)*y2
        return {'input_ids':x1,'attention_mask':m1,'labels':y1}

# Loaders
train_loader = DataLoader(HateSpeechDataset(train_df,mixup=True), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(HateSpeechDataset(val_df), batch_size=BATCH_SIZE)

## 5. Model, Loss Function & FGM

In [None]:
# Cell 9: Initialize model & weighted loss
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=2, output_attentions=True).to(DEVICE)

# Compute class weights to address imbalance
weights = train_df['label'].value_counts(normalize=True).sort_index().rpow(-1).values
class_weights = torch.tensor(weights, dtype=torch.float).to(DEVICE)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

optimizer = AdamW(model.parameters(), lr=LR)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*EPOCHS)

# FGM utilities
def fgm_attack(epsilon=EPSILON):
    emb = model.bert.embeddings.word_embeddings
    if emb.weight.grad is None: return None
    delta = epsilon * emb.weight.grad.sign()
    emb.weight.data.add_(delta)
    return delta

def fgm_restore(delta):
    model.bert.embeddings.word_embeddings.weight.data.sub_(delta)

## 6. Training Loop with Early Stopping

In [None]:
# Cell 10: Train + validate with checkpointing
best_loss, patience_counter = float('inf'), 0
history = {'train_loss':[], 'val_loss':[], 'val_acc':[]}

for epoch in range(EPOCHS):
    # Training
    model.train(); total_loss=0
    for batch in train_loader:
        ids, mask, labels = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE), batch['labels'].to(DEVICE).long()
        optimizer.zero_grad()
        out = model(ids, attention_mask=mask, labels=labels)
        loss = out.loss; loss.backward(retain_graph=True)
        total_loss += loss.item()
        # FGM adversarial step
        delta = fgm_attack()
        if delta is not None:
            out2 = model(ids, attention_mask=mask, labels=labels)
            out2.loss.backward()
            fgm_restore(delta)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step(); scheduler.step()
    avg_train = total_loss/len(train_loader)
    history['train_loss'].append(avg_train)
    # Validation
    model.eval(); val_loss, correct, total = 0,0,0
    with torch.no_grad():
        for batch in val_loader:
            ids, mask, labels = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE), batch['labels'].to(DEVICE).long()
            out = model(ids, attention_mask=mask, labels=labels)
            val_loss += out.loss.item()
            preds = out.logits.argmax(dim=1)
            correct += (preds==labels).sum().item(); total+=labels.size(0)
    avg_val = val_loss/len(val_loader); val_acc=correct/total
    history['val_loss'].append(avg_val); history['val_acc'].append(val_acc)
    print(f"Epoch {epoch+1}/{EPOCHS}: Train={avg_train:.4f}, Val={avg_val:.4f}, Acc={val_acc:.4f}")
    # Early stopping
    if avg_val < best_loss:
        best_loss, patience_counter = avg_val, 0
        torch.save(model.state_dict(), os.path.join(OUTPUT_DIR,'best_model.pt'))
    else:
        patience_counter+=1
        if patience_counter>=PATIENCE:
            print("Early stopping triggered."); break

## 7. Evaluation & Detailed Metrics

In [None]:
# Cell 11: Load best model
model.load_state_dict(torch.load(os.path.join(OUTPUT_DIR,'best_model.pt')))

# Evaluate on validation
y_true,y_pred = [],[]
model.eval()
with torch.no_grad():
    for batch in val_loader:
        ids,mask,labels = batch['input_ids'].to(DEVICE),batch['attention_mask'].to(DEVICE),batch['labels'].to(DEVICE).long()
        out = model(ids, attention_mask=mask)
        preds = out.logits.argmax(dim=1).cpu().numpy()
        y_pred.extend(preds); y_true.extend(labels.cpu().numpy())

# Classification report
print(classification_report(y_true,y_pred))
# Confusion matrix
cm=confusion_matrix(y_true,y_pred)
plt.figure(figsize=(5,4)); sns.heatmap(cm,annot=True,fmt='d'); plt.title('Val Confusion Matrix'); plt.show()

## 8. Visualize Training Curves

In [None]:
# Cell 12: Plot loss & accuracy
epochs_ran = len(history['train_loss'])
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(history['train_loss'], label='Train'); plt.plot(history['val_loss'], label='Val'); plt.title('Loss'); plt.legend()
plt.subplot(1,2,2)
plt.plot(history['val_acc'], label='Val Acc'); plt.title('Accuracy'); plt.legend()
plt.show()

## 9. Explainability via Attention

In [None]:
def visualize_attention(text, top_k=20):
    enc = TOKENIZER.encode_plus(text, return_tensors='pt', max_length=MAX_LEN, padding='max_length', truncation=True)
    inputs = {k:v.to(DEVICE) for k,v in enc.items()}
    model.eval()
    with torch.no_grad():
        out = model(**inputs, output_attentions=True)
    attns = torch.stack(out.attentions)  # layers x batch x heads x seq x seq
    cls_attn = attns[:,:, :, 0, :].mean(dim=[0,1]).cpu().numpy()
    tokens = TOKENIZER.convert_ids_to_tokens(enc['input_ids'][0])
    idxs=[i for i,m in enumerate(enc['attention_mask'][0]) if m==1]
    tokens=[tokens[i] for i in idxs]; scores=cls_attn[idxs]
    # Show top_k tokens
    top_idx = np.argsort(scores)[-top_k:]
    plt.figure(figsize=(8,2))
    sns.barplot(x=[tokens[i] for i in top_idx], y=scores[top_idx])
    plt.xticks(rotation=45); plt.title(text[:50]); plt.show()

# Example
visualize_attention(val_df['tweet'].iloc[0])

## 10. Next Steps & Research Directions

```markdown
- Incorporate **domain adaptation** across datasets.  
- Explore **GAN-based text generators** for stronger adversarial examples.  
- Evaluate on **multilingual** hate speech corpora.  
- Tune **thresholds** for precision/recall trade‑offs in application settings.  
- Package into a reusable **Python module** and **CLI tool** for deployment.  

## 11. Testing on Test Sets

In [None]:
# Cell 14: Evaluate on OLID Subtask A (OFF vs. NOT)
print("=== Subtask A Evaluation ===")
df_testA = load_olid(paths['olid_A'], 'a')
test_loaderA = DataLoader(HateSpeechDataset(df_testA), batch_size=BATCH_SIZE)
y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for batch in test_loaderA:
        ids = batch['input_ids'].to(DEVICE)
        mask = batch['attention_mask'].to(DEVICE)
        out = model(ids, attention_mask=mask)
        preds = out.logits.argmax(dim=1).cpu().numpy()
        y_pred.extend(preds)
        y_true.extend(batch['labels'].cpu().numpy().astype(int))
print(classification_report(y_true, y_pred))
sns.heatmap(confusion_matrix(y_true, y_pred), annot=True, fmt='d')
plt.title('Subtask A Confusion Matrix')
plt.show()

In [None]:
# Cell 15: Evaluate on OLID Subtask B (Targeted vs. Untargeted)
print("=== Subtask B Evaluation ===")
df_testB = load_olid(paths['olid_B'], 'b')
test_loaderB = DataLoader(HateSpeechDataset(df_testB), batch_size=BATCH_SIZE)
y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for batch in test_loaderB:
        ids = batch['input_ids'].to(DEVICE)
        mask = batch['attention_mask'].to(DEVICE)
        out = model(ids, attention_mask=mask)
        preds = out.logits.argmax(dim=1).cpu().numpy()
        y_pred.extend(preds)
        y_true.extend(batch['labels'].cpu().numpy().astype(int))
print(classification_report(y_true, y_pred))
sns.heatmap(confusion_matrix(y_true, y_pred), annot=True, fmt='d')
plt.title('Subtask B Confusion Matrix')
plt.show()

In [None]:
# Cell 16: Evaluate on Distant Subtask C (IND/GRP/OTH)
print("=== Subtask C Evaluation ===")
# Load or reload a model trained with three labels on distant C
model_c = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=3, output_attentions=False).to(DEVICE)
model_c.load_state_dict(torch.load(os.path.join(OUTPUT_DIR, 'best_model_subtask_c.pt')))

df_testC = load_distant(paths['distant_C'], 'Sheet1', {'msg':'tweet','class':'label'},
                        {'IND':0,'GRP':1,'OTH':2})
test_loaderC = DataLoader(HateSpeechDataset(df_testC), batch_size=BATCH_SIZE)
y_true, y_pred = [], []
model_c.eval()
with torch.no_grad():
    for batch in test_loaderC:
        ids = batch['input_ids'].to(DEVICE)
        mask = batch['attention_mask'].to(DEVICE)
        out = model_c(ids, attention_mask=mask)
        preds = out.logits.argmax(dim=1).cpu().numpy()
        y_pred.extend(preds)
        y_true.extend(batch['labels'].cpu().numpy().astype(int))
labels_c = ['IND','GRP','OTH']
print(classification_report(y_true, y_pred, target_names=labels_c))
sns.heatmap(confusion_matrix(y_true, y_pred), annot=True, fmt='d', xticklabels=labels_c, yticklabels=labels_c)
plt.title('Subtask C Confusion Matrix')
plt.show()

## 12. Interactive User Testing

In [None]:
# Cell 17: Interactive prediction across subtasks
def predict_text(text, model, num_labels):
    enc = TOKENIZER(text, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors='pt')
    inputs = {k:v.to(DEVICE) for k,v in enc.items()}
    model.eval()
    with torch.no_grad():
        out = model(**inputs)
        logits = out.logits.detach().cpu().numpy()[0]
    if num_labels == 2:
        return 'OFF' if logits.argmax()==1 else 'NOT'
    else:
        return ['IND','GRP','OTH'][logits.argmax()]

In [None]:

model_c = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=3).to(DEVICE)
model_c.load_state_dict(torch.load(os.path.join(OUTPUT_DIR,'best_model_subtask_c.pt')))

print("Interactive Hate Speech Prediction:")
for task in ['a','b','c']:
    print(f"
--- Subtask {task.upper()} ---")
    user_text = input("Enter a tweet: ")
    if task in ['a','b']:
        pred = predict_text(user_text, model, num_labels=2)
    else:
        pred = predict_text(user_text, model_c, num_labels=3)
    print(f"Prediction for Subtask {task.upper()}: {pred}")