# Differentially Private Hate Speech Detection - TensorFlow Privacy

# Data Preprocessing

In [27]:
import pandas as pd

df = pd.read_csv('white-supremacist-forum.csv')
print(df.head())

   Unnamed: 0                                               text   label
0           0  As of March 13th , 2014 , the booklet had been...  noHate
1           1  In order to help increase the booklets downloa...  noHate
2           2  ( Simply copy and paste the following text int...  noHate
3           3  Click below for a FREE download of a colorfull...    hate
4           4  Click on the `` DOWNLOAD ( 7.42 MB ) '' green ...  noHate


In [28]:
# filter invalid label
correctLabels = ['noHate', 'hate']
df = df.loc[df['label'].isin(correctLabels)]
df['label'].value_counts()

label
noHate    9507
hate      1196
Name: count, dtype: int64

The dataset is imbalanced(normal for hate speech corpus). Tried oversampling below, not working terribly well

## Experiment: Oversampling

In [29]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0)
train_texts_resampled, train_labels_resampled = ros.fit_resample(df['text'].to_frame(), df['label'].to_frame())

In [30]:
train_texts_resampled.info()

In [31]:
train_labels_resampled.value_counts()

In [32]:
train_texts_resampled = train_texts_resampled.squeeze()
train_labels_resampled = train_labels_resampled.squeeze()

In [33]:
from sklearn.model_selection import train_test_split

# Initial split to separate out the test set
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'], df['label'], test_size=0.1, random_state=42)

# Data Preparation for ML Model

In [34]:
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def encode_texts(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=38, return_tensors="tf")

In [35]:
import numpy as np
# Convert labels to numeric
label_mapping = {'noHate': 0, 'hate': 1}
train_labels = train_labels.map(label_mapping)
test_labels = test_labels.map(label_mapping)

In [36]:
train_labels

1458     0
10917    0
10530    1
583      0
6087     0
        ..
5846     0
5296     0
5499     0
867      0
7418     0
Name: label, Length: 9632, dtype: int64

In [37]:
test_labels.value_counts()

label
0    953
1    118
Name: count, dtype: int64

## Experiment: Undersampling

In [38]:
# undersampling the dataset, noHate:hate=1:1
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy=1)
train_texts, train_labels = undersample.fit_resample(train_texts.to_frame(), train_labels.to_frame())

In [39]:
train_texts

1458     So people go out of the country for a few year...
10917    Ive never liked rap , or to be closely associa...
10530    It should come as no surprise that the Jews st...
583      Well yes that s the thing there is a differenc...
6087     I'm from France but it 's so hard to find any ...
                               ...                        
5846     For God 's sake nobody cares about this childi...
5296     Let me know if White girls making out with bla...
5499     im from wilbraham myself i 'm looking to organ...
867      I would definitely not recommend becoming a te...
7418                            He should sue for racism .
Name: text, Length: 9632, dtype: object

In [40]:
train_labels

1458     0
10917    0
10530    1
583      0
6087     0
        ..
5846     0
5296     0
5499     0
867      0
7418     0
Name: label, Length: 9632, dtype: int64

In [13]:
train_texts = train_texts.squeeze()
train_labels = train_labels.squeeze()

In [41]:
train_texts

1458     So people go out of the country for a few year...
10917    Ive never liked rap , or to be closely associa...
10530    It should come as no surprise that the Jews st...
583      Well yes that s the thing there is a differenc...
6087     I'm from France but it 's so hard to find any ...
                               ...                        
5846     For God 's sake nobody cares about this childi...
5296     Let me know if White girls making out with bla...
5499     im from wilbraham myself i 'm looking to organ...
867      I would definitely not recommend becoming a te...
7418                            He should sue for racism .
Name: text, Length: 9632, dtype: object

In [42]:
train_labels

1458     0
10917    0
10530    1
583      0
6087     0
        ..
5846     0
5296     0
5499     0
867      0
7418     0
Name: label, Length: 9632, dtype: int64

In [43]:
train_labels.value_counts()

label
0    8554
1    1078
Name: count, dtype: int64

In [44]:
train_texts = dict(encode_texts(train_texts.to_list()))
test_texts = dict(encode_texts(test_texts.to_list()))
train_texts

{'input_ids': <tf.Tensor: shape=(9632, 38), dtype=int32, numpy=
 array([[  101,  2061,  2111, ...,     0,     0,     0],
        [  101,  4921,  2063, ...,     0,     0,     0],
        [  101,  2009,  2323, ...,     0,     0,     0],
        ...,
        [  101, 10047,  2013, ...,  2620,  2683,   102],
        [  101,  1045,  2052, ...,     0,     0,     0],
        [  101,  2002,  2323, ...,     0,     0,     0]], dtype=int32)>,
 'token_type_ids': <tf.Tensor: shape=(9632, 38), dtype=int32, numpy=
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>,
 'attention_mask': <tf.Tensor: shape=(9632, 38), dtype=int32, numpy=
 array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ...,

# Model

Tensorflow Privacy: Set TensorFlow version to at most 2.15.0 to avoid problems with using Keras 3.

In [182]:
import tensorflow as tf
import numpy as np

tf.get_logger().setLevel('ERROR')

In [None]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

In [None]:
model.summary()

In [None]:
model.layers

In [None]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [193]:
epochs = 3
batch_size = 2

# TensorFlow Privacy

## DP Model with low privacy budget (𝜀:10.8)

In [194]:
l2_norm_clip = 1.5
noise_multiplier = 0.4
num_microbatches = 2
learning_rate = 1e-5

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [195]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)



(10.821862030774117, 2.25)

In [196]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

Model: "tf_bert_for_sequence_classification_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_227 (Dropout)       multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109483778 (417.65 MB)
Trainable params: 1538 (6.01 KB)
Non-trainable params: 109482240 (417.64 MB)
_________________________________________________________________


In [None]:
# Fit your model
history = model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)

Epoch 1/3
Epoch 2/3

## classfication report: biased to majority class

In [211]:
# classfication report
outputs1 = test_pred.logits
classifications1 = np.argmax(outputs1, axis=1)
print(classification_report(test_labels, np.array(pred_labels)))

              precision    recall  f1-score   support

           0       0.88      0.76      0.82       953
           1       0.09      0.18      0.12       118

    accuracy                           0.70      1071
   macro avg       0.48      0.47      0.47      1071
weighted avg       0.79      0.70      0.74      1071



## DP Model with Tighter privacy budget (𝜀:4.20)

In [45]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [46]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [47]:
l2_norm_clip = 1
noise_multiplier = 0.6
num_microbatches = 32
learning_rate = 0.25
batch_size = 32
epochs = 3

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [48]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)



(4.202145584797149, 4.0)

In [49]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

Model: "tf_bert_for_sequence_classification_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_75 (Dropout)        multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109483778 (417.65 MB)
Trainable params: 1538 (6.01 KB)
Non-trainable params: 109482240 (417.64 MB)
_________________________________________________________________


In [50]:
# Fit your model
model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x2f5be0210>

## DP Model with Strongest privacy budget (𝜀:0.57)

In [69]:
from transformers import BertConfig, TFBertForSequenceClassification

# Download model and configuration from huggingface.co and cache.
model = TFBertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [70]:
# freeze all layers, except for the last encoder(Classifier)
trainable_layers = [model.classifier]

for layer in model.layers:
        layer.trainable = False

for layer in trainable_layers:
    layer.trainable = True

In [71]:
l2_norm_clip = 1.5
noise_multiplier = 1.3
num_microbatches = 32
learning_rate = 0.25
epochs = 5

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

In [72]:
from tensorflow_privacy import compute_dp_sgd_privacy
# Compute privacy
compute_dp_sgd_privacy(n=train_labels.shape[0],
                      batch_size=batch_size,
                      noise_multiplier=noise_multiplier,
                      epochs=epochs,
                      delta=1e-5)



(0.5784845682832211, 18.0)

In [73]:
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

# Select your differentially private optimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

# Select your loss function
loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)

# Compile your model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()

Model: "tf_bert_for_sequence_classification_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_151 (Dropout)       multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109483778 (417.65 MB)
Trainable params: 1538 (6.01 KB)
Non-trainable params: 109482240 (417.64 MB)
_________________________________________________________________


In [75]:
# Fit your model
model.fit(train_texts, train_labels,
  epochs=epochs,
  validation_data=(test_texts, test_labels),
  batch_size=batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tf_keras.src.callbacks.History at 0x3511137d0>