# Differentially Private Hate Speech Detection - TensorFlow Privacy

# Data Preprocessing

In [None]:
import pandas as pd

df = pd.read_csv('white-supremacist-forum.csv')
print(df.head())

In [None]:
# filter invalid label
correctLabels = ['noHate', 'hate']
df = df.loc[df['label'].isin(correctLabels)]
df['label'].value_counts()

The dataset is imbalanced(normal for hate speech corpus). Tried oversampling below, not working terribly well

## Experiment: Oversampling

In [None]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0)
train_texts_resampled, train_labels_resampled = ros.fit_resample(df['text'].to_frame(), df['label'].to_frame())

In [None]:
train_texts_resampled.info()

In [None]:
train_labels_resampled.value_counts()

In [None]:
train_texts_resampled = train_texts_resampled.squeeze()
train_labels_resampled = train_labels_resampled.squeeze()

In [None]:
from sklearn.model_selection import train_test_split

# Initial split to separate out the test set
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'], df['label'], test_size=0.1, random_state=42)

# Data Preparation for ML Model

In [None]:
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def encode_texts(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=38, return_tensors="tf")

In [None]:
import numpy as np
# Convert labels to numeric
label_mapping = {'noHate': 0, 'hate': 1}
train_labels = train_labels.map(label_mapping)
test_labels = test_labels.map(label_mapping)

In [None]:
train_labels

In [None]:
test_labels.value_counts()

## Experiment: Undersampling

In [None]:
# undersampling the dataset, noHate:hate=1:1
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy=1)
train_texts, train_labels = undersample.fit_resample(train_texts.to_frame(), train_labels.to_frame())

In [None]:
train_texts

In [None]:
train_labels

In [None]:
train_texts = train_texts.squeeze()
train_labels = train_labels.squeeze()

In [None]:
train_texts

In [None]:
train_labels

In [None]:
train_labels.value_counts()

In [None]:
train_texts = dict(encode_texts(train_texts.to_list()))
test_texts = dict(encode_texts(test_texts.to_list()))
train_texts

# Model

Tensorflow Privacy: Set TensorFlow version to at most 2.15.0 to avoid problems with using Keras 3.

In [None]:
import tensorflow as tf
import numpy as np

tf.get_logger().setLevel('ERROR')

In [None]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

In [None]:
model.summary()

In [None]:
model.layers

In [None]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [None]:
epochs = 3
batch_size = 2

# TensorFlow Privacy

## DP Model with low privacy budget (ùúÄ:10.8)

In [None]:
l2_norm_clip = 1.5
noise_multiplier = 0.4
num_microbatches = 2
learning_rate = 1e-5

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [None]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)

In [None]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

In [None]:
# Fit your model
history = model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)

## classfication report: biased to majority class

In [None]:
# classfication report
outputs1 = test_pred.logits
classifications1 = np.argmax(outputs1, axis=1)
print(classification_report(test_labels, np.array(pred_labels)))

## DP Model with Tighter privacy budget (ùúÄ:4.20)

In [None]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

In [None]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [None]:
l2_norm_clip = 1
noise_multiplier = 0.6
num_microbatches = 32
learning_rate = 0.25
batch_size = 32
epochs = 3

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [None]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)

In [None]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

In [None]:
# Fit your model
model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)

## DP Model with Strongest privacy budget (ùúÄ:0.57)

In [None]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

In [None]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [None]:
l2_norm_clip = 1.5
noise_multiplier = 1.3
num_microbatches = 32
learning_rate = 0.25
epochs = 5

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [None]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)

In [None]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

In [None]:
# Fit your model
model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)