# Data Preprocessing (DWMW17)

In [None]:
from google.colab import drive
drive.mount('/drive')
path = '/drive/My Drive/CSCI544 Project/'

In [2]:
import pandas as pd
import numpy as np
from collections import Counter, OrderedDict
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import StepLR, MultiStepLR, CyclicLR
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt

In [3]:
# if torch.cuda.is_available():  
#   dev = "cuda:0"
#   torch.cuda.set_device(0)
# else:  
#   dev = "cpu"
device = torch.device('cpu')

In [4]:
# Used DWMW17 from https://github.com/t-davidson/hate-speech-and-offensive-language/tree/master/data
data_path = 'https://raw.githubusercontent.com/t-davidson/hate-speech-and-offensive-language/master/data/labeled_data.csv'
data = pd.read_csv(data_path)
df = data[['class', 'tweet']].copy()

In [5]:
len(df)

24783

In [6]:
df.head()

Unnamed: 0,class,tweet
0,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


In [7]:
# preprocessing
df['tweet'] = df['tweet'].apply(lambda x:x.lower())
punctuation_signs = list("?:!.,;")
df['tweet'] = df['tweet']

for punct_sign in punctuation_signs:   
    df['tweet'] = df['tweet'].str.replace(punct_sign, '')

df['tweet'] = df['tweet'].apply(lambda x: x.replace('\n', ' '))
df['tweet'] = df['tweet'].apply(lambda x: x.replace('\t', ' '))
df['tweet'] = df['tweet'].str.replace("    ", " ")
df['tweet'] = df['tweet'].str.replace('"', '')
df['tweet'] = df['tweet'].str.replace("'s", "")

  df['tweet'] = df['tweet'].str.replace(punct_sign, '')


In [8]:
# remove stop words
nltk.download('stopwords')
stop_words = list(stopwords.words('english'))
for stop_word in stop_words:
    regex_stopword = r"\b" + stop_word + r"\b"
    df['tweet'] = df['tweet'].str.replace(regex_stopword, '')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/luvlzf/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  df['tweet'] = df['tweet'].str.replace(regex_stopword, '')


In [10]:
aae = np.genfromtxt('aae.txt', delimiter=',')
df['race'] = aae.argmax(axis=1)
df.head(10)

Unnamed: 0,class,tweet,race
0,2,rt @mayasolovely woman ' complain cleanin...,3
1,1,rt @mleew17 boy dats coldtyga dwn bad cuffin...,0
2,1,rt @urkindofbrand dawg rt @80sbaby4life ever...,0
3,1,rt @c_g_anderson @viva_based look like tranny,0
4,1,rt @shenikaroberts shit hear might true ...,0
5,1,@t_madison_x shit blows meclaim faithful ...,0
6,1,@__brighterdays sit hate another bitch ...,0
7,1,&#8220@selfiequeenbri cause ' tired big bitc...,0
8,1,&amp might get ya bitch back &amp thats,0
9,1,@rhythmixx_ hobbies include fighting mariam ...,0


In [11]:
df['tweet'] = df['tweet'].str.split()

In [12]:
def is_toxic(class_type):
    if class_type == 0 or class_type == 1:
        return 'toxic' # toxic
    else:
        return 'non-toxic' # non-toxic
    
df['toxic'] = df['class'].apply(is_toxic)

In [13]:
indices = np.arange(len(df))
X_train, X_test, y_train, y_test = train_test_split(df[['class', 'tweet', 'race']], df['toxic'], test_size = 0.2, stratify=df['toxic'], random_state=17)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5, stratify=y_test, random_state=17)

### Resample training data

In [14]:
from imblearn.under_sampling import RandomUnderSampler

In [15]:
# Separate the features and labels
X = X_train # 0 African American, 1, 2, 3
y = y_train # 0 toxic, 1 non-toxic

# Split the data into AAE and non-AAE
X_aae = X[X['race'] == 0]
X_non_aae = X[X['race'] != 0]
y_aae = y[X['race'] == 0]
y_non_aae = y[X['race'] != 0]


In [16]:
X_non_aae
X_aae

Unnamed: 0,class,tweet,race
15209,1,"[rt, @flyoutchase, saw, men, saying, arguing, ...",0
8505,1,"[come, lil, bitch, want, sloppy, toppy]",0
14232,1,"[rt, @assholeofdayear, beyonce, made, many, ho...",0
5653,1,"[@brendacoyt_3, got, bitches, snowboarding, &#...",0
21303,1,"[tell, hater, said, fuck, tell, bitch, said, f...",0
...,...,...,...
15478,1,"[rt, @hi_niamani, @clemheem, grandma, got, twe...",0
19479,1,"[rt, @kxngtae, niggas, cheat, girl, stalk, cal...",0
4043,1,"[@marshallsots, tryna, say, ', pussy, course, ...",0
3429,1,"[@huntermoore, hells, yeah, fuckin, sick, ass,...",0


In [17]:
y_non_aae

20025        toxic
21514    non-toxic
17727    non-toxic
13016        toxic
11453    non-toxic
           ...    
4133         toxic
1066         toxic
14323        toxic
22796    non-toxic
18558    non-toxic
Name: toxic, Length: 9292, dtype: object

In [18]:
# Calculate the non-toxic to toxic ratio in non-AAE
print(sum(y_non_aae == 'non-toxic'))
print(sum(y_non_aae == 'toxic'))
ratio_non_aae = sum(y_non_aae == 'non-toxic') / sum(y_non_aae == 'toxic')

2785
6507


In [19]:
ratio_non_aae

0.4280006147226064

In [20]:
# Calculate the non-toxic to toxic ratio in AAE
print(sum(y_aae == 'non-toxic'))
print(sum(y_aae == 'toxic'))
ratio_aae = sum(y_aae == 'non-toxic') / sum(y_aae == 'toxic')
ratio_aae

545
9989


0.05456001601761938

Augment the training data to satisfy P(toxic|AAE) / P(non-toxic|AAE) =P(toxic|non-AAE) / P(non-toxic|non-AAE). <br>
 For example, if initially in non-AAE data, the ratio of non-toxic to toxic data is 5:1, and in AAE data, the ratio is 2:1, <br>
 then you want to upsample the data that are both AAE and non-toxic, so that the ratio in AAE data can become 5:1. <br>

In [21]:
# Undersample the toxic tweets in AAE to match the ratio in non-AAE
rus = RandomUnderSampler(sampling_strategy={ 'toxic': int(sum(y_aae == 'non-toxic') / ratio_non_aae) })
X_aae_resampled, y_aae_resampled = rus.fit_resample(X_aae, y_aae)

In [22]:
# Combine the oversampled AAE data and the non-AAE data
X_resampled = pd.concat([X_non_aae, X_aae_resampled])
y_resampled = pd.concat([y_non_aae, y_aae_resampled])

# Combine the features and labels back into a data frame
df_resampled = pd.concat([X_resampled, y_resampled], axis=1)

In [23]:
df_resampled # X_train and y_train concatenated

Unnamed: 0,class,tweet,race,toxic
20025,1,"[rt, @skyereyes_, &#8220@feebito_23, lol, ', f...",1,toxic
21514,2,"[yankees, still, suck, @nikaaaa3]",3,non-toxic
17727,2,"[rt, @thestrangelog, nerf, hell, birds, longer...",3,non-toxic
13016,1,"[dad, told, @yourdudeferg, dad, text, one, fri...",3,toxic
11453,2,"[isis, supporters, america, jihadis, next, doo...",3,non-toxic
...,...,...,...,...
1813,1,"[soon, u, get, pt, ', wonder, go, hard, hoes, ...",0,toxic
1814,1,"[', bitch, caught, body, week, ago, fuck, us, ...",0,toxic
1815,1,"[rt, @chelseypaige42, bitches, begging, love, ...",0,toxic
1816,1,"[yeen, real, nicca, yeen, got, porn, sites, n,...",0,toxic


In [24]:
X_train = df_resampled[['tweet', 'race', 'toxic']]
y_train = df_resampled['class']

In [25]:
X_train

Unnamed: 0,tweet,race,toxic
20025,"[rt, @skyereyes_, &#8220@feebito_23, lol, ', f...",1,toxic
21514,"[yankees, still, suck, @nikaaaa3]",3,non-toxic
17727,"[rt, @thestrangelog, nerf, hell, birds, longer...",3,non-toxic
13016,"[dad, told, @yourdudeferg, dad, text, one, fri...",3,toxic
11453,"[isis, supporters, america, jihadis, next, doo...",3,non-toxic
...,...,...,...
1813,"[soon, u, get, pt, ', wonder, go, hard, hoes, ...",0,toxic
1814,"[', bitch, caught, body, week, ago, fuck, us, ...",0,toxic
1815,"[rt, @chelseypaige42, bitches, begging, love, ...",0,toxic
1816,"[yeen, real, nicca, yeen, got, porn, sites, n,...",0,toxic


In [26]:
y_train

20025    1
21514    2
17727    2
13016    1
11453    2
        ..
1813     1
1814     1
1815     1
1816     1
1817     1
Name: class, Length: 11110, dtype: int64

到这里以上完成training的resample

In [28]:
glove_model = {}
# with open(path + 'glove.6B.100d','r') as f:
with open('glove.6B.100d','r') as f:
    for line in f:
        line = line.split()
        word = line[0]
        embedding = np.array(line[1:], dtype=np.float64)
        glove_model[word] = embedding

In [29]:
# # make encoding in X match glove embedding
vecs = np.zeros((len(glove_model), 100), dtype=np.float64)
i = 0
for word, embedding in glove_model.items():
    vecs[i] = embedding
    i += 1

pad_vec = np.zeros((1,100))   # vector for padding

unk_vec = np.mean(vecs, axis=0) # <unk>

glove_embeddings = np.vstack((pad_vec, unk_vec, vecs))
glove_vocab = list(glove_model.keys())
glove_vocab.insert(0, '<unk>')
glove_vocab.insert(0, '<pad>')
word_index = {glove_vocab[i]:i for i in range(len(glove_vocab))}

In [30]:
def encode_X(X_df):
  X = [0] * len(X_df)
  count = 0
  for index, row in X_df.iterrows():
      X[count] = []
      for word in row['tweet']:
          if word in glove_vocab:
              X[count].append(word_index[word])
          else:
              X[count].append(word_index['<unk>'])
      count += 1
  return X

# runtime: 3 minutes

In [31]:
X_train = encode_X(X_train)
X_val = encode_X(X_val)

In [32]:
print(len(X))
print(len(y))

19826
19826


In [33]:
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

class TrainData(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = torch.transpose(pad_sequence([torch.Tensor(i) for i in X], padding_value=0), 0, 1).to(device)
        self.y = y
        self.lengths = [len(i) for i in X]
        self.transform = transform
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        tweet = self.X[index]
        label = self.y.iloc[index]
        length = self.lengths[index]
            
        return tweet, length, label


class TestData(Dataset):
    def __init__(self, X, transform=None):
      self.X = torch.transpose(pad_sequence([torch.Tensor(i) for i in X], padding_value=0), 0, 1).to(device)
      self.lengths = [len(i) for i in X]

      self.transform = transform
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        tweet = self.X[index]
        length = self.lengths[index]
            
        return tweet, length

In [34]:
train_data = TrainData(X_train, y_train, transform=transforms.ToTensor())
val_data = TrainData(X_val, y_val, transform=transforms.ToTensor())
batch_size = 20
num_workers = 0
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

# Model (DWMW17)

In [35]:
import torch.nn as nn
import torch.nn.functional as F

class LSTM(nn.Module):
    def __init__(self, input_size, embed_dim, hidden_size, num_layers, num_classes, dropout):
        super(LSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(glove_embeddings).float())
        self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(hidden_size, 128)
        self.elu = nn.ELU()
        self.fc = nn.Linear(128 , num_classes)
        
    def forward(self, x, lengths):
        # Initialize hidden states and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # one forward, one backward, so *2
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        x = x.int().to(device)
        out = self.embedding(x)
        out = pack_padded_sequence(out, lengths, batch_first=True, enforce_sorted=False)
        out, _ = self.lstm(out, (h0, c0))
        out, _ = pad_packed_sequence(out, batch_first=True)
        out = self.linear1(out)
        out = self.dropout(out)
        out = self.elu(out)
        out = self.fc(out)
        out = out[:, -1, :]

        return out

In [36]:
n_hidden = 256
n_input = len(glove_embeddings)
n_embed_dim = 100
n_layers = 1
n_classes = 3 # output layer
dropout = 0.33
lstm = LSTM(n_input, n_embed_dim, n_hidden, n_layers, n_classes, dropout)
lstm.to(device)

LSTM(
  (embedding): Embedding(400002, 100)
  (lstm): LSTM(100, 256, batch_first=True)
  (dropout): Dropout(p=0.33, inplace=False)
  (linear1): Linear(in_features=256, out_features=128, bias=True)
  (elu): ELU(alpha=1.0)
  (fc): Linear(in_features=128, out_features=3, bias=True)
)

In [37]:
learning_rate = 0.5
n_epochs = 30
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)
scheduler = MultiStepLR(optimizer, milestones=[10, 20, 50], gamma=0.5)
# scheduler = CyclicLR(optimizer, base_lr=0.005, max_lr=0.5)
train_loss_min = np.Inf # initialize minimum validation loss

for epoch in range(n_epochs):
  # initialize train and validation loss
  train_loss = 0
  valid_loss = 0


  # train
  # mini-batch gradient descent
  lstm.train()
  for i, (data, lengths, target) in enumerate(train_loader):
    # forward
    output = lstm(data, lengths)
    loss = criterion(output, target)
    optimizer.zero_grad()
    # backward
    loss.backward()
    # update parameters
    optimizer.step()
    # track training loss
    train_loss += loss.item()
    # scheduler.step()


  # evaluation
  # lstm.eval()
  # for i, (data,lengths, target) in enumerate(valid_loader):
  #     # forward pass
  #     output = lstm(data, lengths)
  #     # calculate the loss
  #     print(target)
  #     loss = criterion(output, target)
  #     # track validation loss
  #     valid_loss += loss.item()


  train_loss = train_loss/len(train_loader.dataset)
  # valid_loss = valid_loss/len(valid_loader.dataset)
  
  print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}\tLearning Rate: {:.3f}'.format(
      epoch+1, 
      train_loss,
      valid_loss,
      scheduler.get_last_lr()[0]
      ))
  
  # if train_loss <= train_loss_min:
  #     print('Training loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
  #     train_loss_min,
  #     train_loss))
  #     torch.save(lstm.state_dict(), 'lstm_model.pt')
  #     train_loss_min = train_loss

  scheduler.step()

Epoch: 1 	Training Loss: 0.042348 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 2 	Training Loss: 0.041949 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 3 	Training Loss: 0.041673 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 4 	Training Loss: 0.041626 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 5 	Training Loss: 0.041545 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 6 	Training Loss: 0.041521 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 7 	Training Loss: 0.041434 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 8 	Training Loss: 0.041394 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 9 	Training Loss: 0.041373 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 10 	Training Loss: 0.041337 	Validation Loss: 0.000000	Learning Rate: 0.500
Epoch: 11 	Training Loss: 0.040950 	Validation Loss: 0.000000	Learning Rate: 0.250
Epoch: 12 	Training Loss: 0.040991 	Validation Loss: 0.000000	Learning Rate: 0.250
Epoch: 13 	Tr

In [38]:
torch.save(lstm.state_dict(), 'lstm_model(dwmw17).pt')

In [39]:
# run the split again
X_train, X_test, y_train, y_test = train_test_split(df[['class', 'tweet', 'race']], df['class'], test_size = 0.2, stratify=df['toxic'], random_state=17)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5, stratify=y_test, random_state=17)

In [40]:
# lstm.load_state_dict(torch.load(path+'lstm_model.pt'))
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

def test():
  test_data = TestData(encode_X(X_test), transform=transforms.ToTensor())
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, 
      num_workers=num_workers)
  y_pred = []
  lstm.eval()
  with torch.no_grad():
      for data, lengths in test_loader:
          outputs = lstm(data, lengths)
          _, predicted = torch.max(outputs.data, 1)
          y_pred.append(predicted.tolist()[0])

  print("Test Accuracy: " + str(accuracy_score(list(y_test), y_pred)))
  print("Test F1 Score: "+ str(f1_score(list(y_test), y_pred, average='micro')))
  return y_pred

In [41]:
y_pred = test()

Test Accuracy: 0.8378378378378378
Test F1 Score: 0.8378378378378378


In [90]:
# def predict():
#   test_data = TestData(encode_X(df[['class', 'tweet', 'race']]), transform=transforms.ToTensor())
#   test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, 
#       num_workers=num_workers)
#   y_pred = []
#   lstm.eval()
#   with torch.no_grad():
#       for data, lengths in test_loader:
#           outputs = lstm(data, lengths)
#           _, predicted = torch.max(outputs.data, 1)
#           y_pred.append(predicted.tolist()[0])
#   return y_pred

In [91]:
# y_pred = predict()

# LSTM Bias Evalutaion (DWMW 17)

In [42]:
len(df)

24783

In [43]:
df

Unnamed: 0,class,tweet,race,toxic
0,2,"[rt, @mayasolovely, woman, ', complain, cleani...",3,non-toxic
1,1,"[rt, @mleew17, boy, dats, coldtyga, dwn, bad, ...",0,toxic
2,1,"[rt, @urkindofbrand, dawg, rt, @80sbaby4life, ...",0,toxic
3,1,"[rt, @c_g_anderson, @viva_based, look, like, t...",0,toxic
4,1,"[rt, @shenikaroberts, shit, hear, might, true,...",0,toxic
...,...,...,...,...
24778,1,"[muthaf***, lie, &#8220@lifeasking, @20_pearls...",0,toxic
24779,2,"[', gone, broke, wrong, heart, baby, drove, re...",3,non-toxic
24780,1,"[young, buck, wanna, eat, dat, nigguh, like, a...",0,toxic
24781,1,"[youu, got, wild, bitches, tellin, lies]",0,toxic


In [47]:
lstm_result = X_test.copy()
# lstm_result = lstm_result.reindex(np.arange(len(X)))
# lstm_result.loc[indices_train, 'data_type'] = 'train'
# lstm_result.loc[indices_val, 'data_type'] = 'val'
# lstm_result.loc[indices_test, 'data_type'] = 'test'
lstm_result['pred'] = y_pred
# lstm_result = lstm_result.loc[lstm_result['data_type']=='test'][['class', 'pred', 'race']]
aae_group = lstm_result.loc[lstm_result['race'] == 0]
other_group = lstm_result.loc[lstm_result['race'] != 0]

In [48]:
def fpr(cm):
  FP = cm.sum(axis=0) - np.diag(cm)  
  FN = cm.sum(axis=1) - np.diag(cm)
  TP = np.diag(cm)
  TN = cm.sum() - (FP + FN + TP)
  FPR = FP/(FP+TN)
  return FPR

In [49]:
from sklearn.metrics import confusion_matrix
print("LSTM Bias Evaluation: ")
print('\tHate Speech' + ' Offensive' + '  Neither')
aae_cm = confusion_matrix(aae_group['class'], aae_group['pred'])
print("AAE" + '\t' + str(fpr(aae_cm)))
other_cm = confusion_matrix(other_group['class'], other_group['pred'])
print("Non-AAE" + '\t' + str(fpr(other_cm)))

LSTM Bias Evaluation: 
	Hate Speech Offensive  Neither
AAE	[0.00080841 0.67692308 0.043654  ]
Non-AAE	[0.00091408 0.3816092  0.11151515]


# Data Preprocessing (FDCL18)

In [50]:
fdcl18_data = pd.read_csv('FDCL18.csv', delimiter='\t', nrows=None, skiprows=2, header=None, names=['tweet', 'label', 'count'])

In [51]:
df1 = fdcl18_data[['tweet', 'label']].copy()
df1.loc[df1.label == 'hateful', 'label'] = 0
df1.loc[df1.label == 'abusive', 'label'] = 1
df1.loc[df1.label == 'spam', 'label'] = 2
df1.loc[df1.label == 'normal', 'label'] = 3

In [52]:
# preprocessing
df1['tweet'] = df1['tweet'].apply(lambda x:x.lower())
punctuation_signs = list("?:!.,;")
df1['tweet'] = df1['tweet']

for punct_sign in punctuation_signs:   
    df1['tweet'] = df1['tweet'].str.replace(punct_sign, '')

df1['tweet'] = df1['tweet'].apply(lambda x: x.replace('\n', ' '))
df1['tweet'] = df1['tweet'].apply(lambda x: x.replace('\t', ' '))
df1['tweet'] = df1['tweet'].str.replace("    ", " ")
df1['tweet'] = df1['tweet'].str.replace('"', '')
df1['tweet'] = df1['tweet'].str.replace("'s", "")

# remove stop words
nltk.download('stopwords')
stop_words = list(stopwords.words('english'))
for stop_word in stop_words:
    regex_stopword = r"\b" + stop_word + r"\b"
    df1['tweet'] = df1['tweet'].str.replace(regex_stopword, '')

  df1['tweet'] = df1['tweet'].str.replace(punct_sign, '')
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/luvlzf/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  df1['tweet'] = df1['tweet'].str.replace(regex_stopword, '')


In [53]:
print(len(df1))
df1.head(10)

99995


Unnamed: 0,tweet,label
0,rt @papapishu man would fucking rule part...,1
1,time draw close &#128591&#127995 father ...,3
2,notice start act different distant bc p...,3
3,forget unfollowers believe growing 7 new fol...,3
4,rt @vitiligoprince hate sexually frustrated l...,1
5,topped group tgp disc jam season 2 onto sem...,3
6,daily baby aspirin #heart might preventin...,3
7,liked @youtube video @mattshea https//tco/n...,3
8,rt @lestuhgang_ fucking &amp homies dont t...,1
9,uber finds one allegedly stolen waymo file – ...,2


In [54]:
df1['tweet'] = df1['tweet'].str.split()

In [55]:
# African-American, Hispanic, Asian, and White topics,
aae = np.genfromtxt('fdcl18_aae.csv', delimiter=',')
df1['race'] = aae.argmax(axis=1)

In [56]:
print(len(df1))
df1.head(10)

99995


Unnamed: 0,tweet,label,race
0,"[rt, @papapishu, man, would, fucking, rule, pa...",1,3
1,"[time, draw, close, &#128591&#127995, father, ...",3,3
2,"[notice, start, act, different, distant, bc, p...",3,0
3,"[forget, unfollowers, believe, growing, 7, new...",3,3
4,"[rt, @vitiligoprince, hate, sexually, frustrat...",1,0
5,"[topped, group, tgp, disc, jam, season, 2, ont...",3,3
6,"[daily, baby, aspirin, #heart, might, preventi...",3,3
7,"[liked, @youtube, video, @mattshea, https//tco...",3,3
8,"[rt, @lestuhgang_, fucking, &amp, homies, dont...",1,1
9,"[uber, finds, one, allegedly, stolen, waymo, f...",2,3


In [58]:
def is_toxic(label):
    if label == 0 or label == 1:
        return 'toxic' # toxic
    else:
        return 'non-toxic' # non-toxic
    
df1['toxic'] = df1['label'].apply(is_toxic)

In [66]:
indices = np.arange(len(df))
X_train, X_test, y_train, y_test = train_test_split(df1[['tweet', 'label', 'race']], df1['toxic'], test_size = 0.2, stratify=df1['toxic'], random_state=17)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5, stratify=y_test, random_state=17)

### Resample training data

In [67]:
df1.head(10)

Unnamed: 0,tweet,label,race,toxic
0,"[rt, @papapishu, man, would, fucking, rule, pa...",1,3,toxic
1,"[time, draw, close, &#128591&#127995, father, ...",3,3,non-toxic
2,"[notice, start, act, different, distant, bc, p...",3,0,non-toxic
3,"[forget, unfollowers, believe, growing, 7, new...",3,3,non-toxic
4,"[rt, @vitiligoprince, hate, sexually, frustrat...",1,0,toxic
5,"[topped, group, tgp, disc, jam, season, 2, ont...",3,3,non-toxic
6,"[daily, baby, aspirin, #heart, might, preventi...",3,3,non-toxic
7,"[liked, @youtube, video, @mattshea, https//tco...",3,3,non-toxic
8,"[rt, @lestuhgang_, fucking, &amp, homies, dont...",1,1,toxic
9,"[uber, finds, one, allegedly, stolen, waymo, f...",2,3,non-toxic


In [68]:
# Separate the features and labels
X = X_train # race: 0 African American, 1, 2, 3
y = y_train

# Split the data into AAE and non-AAE
X_aae = X[X['race'] == 0]
X_non_aae = X[X['race'] != 0]

y_aae = y[X['race'] == 0]
y_non_aae = y[X['race'] != 0]

In [69]:
X_non_aae

Unnamed: 0,tweet,label,race
12871,"[good, suit, man’, flattering, fashion, item, ...",3,3
7557,"[epic, concept, album, proggy, metalheads, @ma...",3,3
67861,"[@blackpyramidofficial, outta, jersey, @embell...",3,3
13992,"[lmaoooo, fucking, hate, guy, &#128557&#128557...",1,1
99971,"[unfollow, im, tweeting, rest, fucking, life, ...",1,3
...,...,...,...
87559,"[immediate, response, lets, fucking, go, couti...",1,3
19556,"[found, transponder, snail, behind--scenes, lo...",3,3
14823,"[@krgpryal, @cjprender, wow, staggering, painf...",3,3
93624,"[apple, name, 10th, anniversary, smartphone, '...",3,3


In [70]:
y_non_aae

12871    non-toxic
7557     non-toxic
67861    non-toxic
13992        toxic
99971        toxic
           ...    
87559        toxic
19556    non-toxic
14823    non-toxic
93624    non-toxic
77663    non-toxic
Name: toxic, Length: 70272, dtype: object

In [71]:
# Calculate the non-toxic to toxic ratio in non-AAE
print(sum(y_non_aae == 'non-toxic'))
print(sum(y_non_aae == 'toxic'))
ratio_non_aae = sum(y_non_aae == 'non-toxic') / sum(y_non_aae == 'toxic')
ratio_non_aae

51086
19186


2.6626706973835086

In [72]:
# Calculate the non-toxic to toxic ratio in AAE
print(sum(y_aae == 'non-toxic'))
print(sum(y_aae == 'toxic'))
ratio_aae = sum(y_aae == 'non-toxic') / sum(y_aae == 'toxic')
ratio_aae

3218
6506


0.4946203504457424

In [73]:
# Undersample the toxic tweets in AAE to match the ratio in non-AAE
rus = RandomUnderSampler(sampling_strategy={ 'toxic': int(sum(y_aae == 'non-toxic') / ratio_non_aae) })
X_aae_resampled, y_aae_resampled = rus.fit_resample(X_aae, y_aae)

In [74]:
# Combine the oversampled AAE data and the non-AAE data
X_resampled = pd.concat([X_non_aae, X_aae_resampled])
y_resampled = pd.concat([y_non_aae, y_aae_resampled])

# Combine the features and labels back into a data frame
df1_resampled = pd.concat([X_resampled, y_resampled], axis=1)

In [75]:
df1_resampled # X_train and y_train concatenated

Unnamed: 0,tweet,label,race,toxic
12871,"[good, suit, man’, flattering, fashion, item, ...",3,3,non-toxic
7557,"[epic, concept, album, proggy, metalheads, @ma...",3,3,non-toxic
67861,"[@blackpyramidofficial, outta, jersey, @embell...",3,3,non-toxic
13992,"[lmaoooo, fucking, hate, guy, &#128557&#128557...",1,1,toxic
99971,"[unfollow, im, tweeting, rest, fucking, life, ...",1,3,toxic
...,...,...,...,...
4421,"[rt, @emaaataylorrr, &#128166&#128175&#128152&...",1,0,toxic
4422,"[bruh, fucking, lineup, @_richyrozay_, https//...",1,0,toxic
4423,"[rt, @c_liveee, niece, fucked, em, baby, @_lub...",1,0,toxic
4424,"[rt, @13reasonsfans, fucked, https//tco/58gm0y...",1,0,toxic


In [81]:
print(len(df1_resampled))
print(len(df1))

74698
99995


In [82]:
X_train = df1_resampled[['tweet', 'race', 'toxic']]
y_train = df1_resampled['label']

In [83]:
y = df1['label']
len(y)

99995

In [84]:
X_train = encode_X(X_train)

In [85]:
train_data = TrainData(X_train, y_train, transform=transforms.ToTensor())
# val_data = TrainData(X_val, y_val, transform=transforms.ToTensor())
batch_size = 60
num_workers = 0
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
# valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

# Model (FDCL18)

In [86]:
n_hidden = 256
n_input = len(glove_embeddings)
n_embed_dim = 100
n_layers = 1
n_classes = 4 # output layer
dropout = 0.33
lstm = LSTM(n_input, n_embed_dim, n_hidden, n_layers, n_classes, dropout)
lstm.to(device)

LSTM(
  (embedding): Embedding(400002, 100)
  (lstm): LSTM(100, 256, batch_first=True)
  (dropout): Dropout(p=0.33, inplace=False)
  (linear1): Linear(in_features=256, out_features=128, bias=True)
  (elu): ELU(alpha=1.0)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)

In [87]:
learning_rate = 0.5
n_epochs = 30
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)
scheduler = MultiStepLR(optimizer, milestones=[10, 20, 50], gamma=0.5)
# scheduler = CyclicLR(optimizer, base_lr=0.005, max_lr=0.5)
train_loss_min = np.Inf # initialize minimum validation loss

for epoch in range(n_epochs):
  # initialize train and validation loss
  train_loss = 0
  valid_loss = 0


  # train
  # mini-batch gradient descent
  lstm.train()
  for i, (data, lengths, target) in enumerate(train_loader):
    # forward
    output = lstm(data, lengths)
    loss = criterion(output, target)
    optimizer.zero_grad()
    # backward
    loss.backward()
    # update parameters
    optimizer.step()
    # track training loss
    train_loss += loss.item()
    # scheduler.step()


  # evaluation
  # lstm.eval()
  # for i, (data,lengths, target) in enumerate(valid_loader):
  #     # forward pass
  #     output = lstm(data, lengths)
  #     # calculate the loss
  #     loss = criterion(output, target)
  #     # track validation loss
  #     valid_loss += loss.item()


  train_loss = train_loss/len(train_loader.dataset)
  valid_loss = valid_loss/len(valid_loader.dataset)
  
  print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}\tLearning Rate: {:.3f}'.format(
      epoch+1, 
      train_loss,
      valid_loss,
      scheduler.get_last_lr()[0]
      ))
  
  if train_loss <= train_loss_min:
      print('Training loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
      train_loss_min,
      train_loss))
      torch.save(lstm.state_dict(), 'lstm_model.pt')
      train_loss_min = train_loss

  scheduler.step()

Epoch: 1 	Training Loss: 0.017928 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (inf --> 0.017928).  Saving model ...
Epoch: 2 	Training Loss: 0.017867 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.017928 --> 0.017867).  Saving model ...
Epoch: 3 	Training Loss: 0.017818 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.017867 --> 0.017818).  Saving model ...
Epoch: 4 	Training Loss: 0.017792 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.017818 --> 0.017792).  Saving model ...
Epoch: 5 	Training Loss: 0.017777 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.017792 --> 0.017777).  Saving model ...
Epoch: 6 	Training Loss: 0.017776 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.017777 --> 0.017776).  Saving model ...
Epoch: 7 	Training Loss: 0.017763 	Validation Loss: 0.000000	Learning Rate: 0.500
Training loss decreased (0.01

In [88]:
torch.save(lstm.state_dict(), 'lstm_model(fdcl18).pt')

In [91]:
df1

Unnamed: 0,tweet,label,race,toxic
0,"[rt, @papapishu, man, would, fucking, rule, pa...",1,3,toxic
1,"[time, draw, close, &#128591&#127995, father, ...",3,3,non-toxic
2,"[notice, start, act, different, distant, bc, p...",3,0,non-toxic
3,"[forget, unfollowers, believe, growing, 7, new...",3,3,non-toxic
4,"[rt, @vitiligoprince, hate, sexually, frustrat...",1,0,toxic
...,...,...,...,...
99990,"[rt, @shangros, fucking, queen, https//tco/wax...",1,1,toxic
99991,"[#osteporosis, treated, #pemf, -, rebuild, bon...",3,3,non-toxic
99992,"[@lgusamobile, phone, screen, keeps, flickring...",3,3,non-toxic
99993,"[#bigdata, vs, #reality, equally, applies, #ec...",3,3,non-toxic


In [114]:
# run the split again
X_train, X_test, y_train, y_test = train_test_split(df1[['label', 'tweet', 'race']], df1['label'], test_size = 0.2, stratify=df1['label'], random_state=17)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5, stratify=y_test, random_state=17)

In [116]:
y_pred = test()

Test Accuracy: 0.7678
Test F1 Score: 0.7678


In [None]:
# df1['pred'] = y_pred
# with open('lstm_pred(fdcl18).csv', 'w') as f:
#   f.write('tweet,label,pred')
#   for index, row in df1.iterrows():
#     f.write(str(row['tweet'])+ ',' + str(row['label']) + ','+ str(row['pred']))

# LSTM Bias Evaluation (FDCL18)

In [117]:
lstm_result = X_test.copy()
lstm_result['pred'] = y_pred
aae_group = lstm_result.loc[lstm_result['race'] == 0]
other_group = lstm_result.loc[lstm_result['race'] != 0]

In [118]:
aae_group['label'].unique()

array([1, 0, 3, 2], dtype=object)

In [121]:
print(aae_group.dtypes)

label    object
tweet    object
race      int64
pred      int64
dtype: object


In [122]:
aae_group['label'] = aae_group['label'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aae_group['label'] = aae_group['label'].astype('int64')


In [123]:
print(aae_group.dtypes)

label     int64
tweet    object
race      int64
pred      int64
dtype: object


In [124]:
other_group['label'] = other_group['label'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_group['label'] = other_group['label'].astype('int64')


In [131]:
from sklearn.metrics import confusion_matrix
print("LSTM Bias Evaluation: ")
print('\tHate Speech' + ' Offensive' + '  Spam' + '       Neither')
aae_cm = confusion_matrix(aae_group['label'], aae_group['pred'])
print("AAE" + '\t' + str(fpr(aae_cm)))
other_cm = confusion_matrix(other_group['label'], other_group['pred'])
print("Non-AAE" + '\t' + str(fpr(other_cm)))

LSTM Bias Evaluation: 
	Hate Speech Offensive  Spam       Neither
AAE	[0.         0.3250478  0.05741627 0.10273224]
Non-AAE	[0.         0.07763975 0.06634004 0.2627027 ]


# References:
https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal