In [None]:
import pickle
import numpy as np
import transformers
import torch
import torch.nn as nn
import re
import nltk
import json
import torch.nn.functional as F
import datetime

from collections import Counter
from sklearn.model_selection import train_test_split
from collections import defaultdict
from sklearn import metrics
from sklearn.metrics import classification_report,f1_score
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW,AutoModel, BertTokenizerFast,BertModel,RobertaTokenizer, RobertaModel,RobertaTokenizerFast
from nltk import word_tokenize
from dataset import InstagramDataset,VineDataset,Swearwords
from psutil import virtual_memory
from nltk import word_tokenize

nltk.download('punkt')

#1.Check the device

In [None]:
if torch.cuda.is_available and torch.cuda.device_count()>0:
  device=torch.device("cuda")
  print("there are %d GPU(S) avaliable" % torch.cuda.device_count())
else:
  print("NO GPU avaliable")

NO GPU avaliable


In [None]:
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')


#2.Configuration

In [None]:
class Config:
    def __init__(self):
        self.batch_size = 16
        self.num_epochs = 4
        self.lr = 2e-5
        self.t = 15.0

        self.max_sents=140
        self.max_document_length=512
        self.modelname="Roberta"
	      #self.modelname="Bert"

        self.debias = True
        self.hidden_layer = False
        self.constraint=0.2
        self.test_size=0.2
        self.random_state=121

        self.dropout = 0.3
config = Config()

# 2.Load Datasets

##2.1 Load Vine

In [None]:
dataDict_vine=[]
userinfo_vine = {}
vine_data=VineDataset('./data/vine/vine_labeled_cyberbullying_data.csv')
vine_profile_data=VineDataset('./data/vine/vine_users_data.json')

with open(vine_data, 'r',encoding='unicode_escape') as f:
    reader = csv.DictReader((line.replace('\0','') for line in f))
    for row in reader:
        dataDict_vine.append(row)

for line in vine_profile_data.readlines():
    rr = json.loads(line)
    userinfo_vine[rr['username'].lower()]=rr['description'].lower()

In [None]:
# Convert time to hours
def timeconvert(timestr,start_time):
    ifpm=False
    ntp=datetime.datetime.strptime(timestr, "%Y-%m-%d %H:%M:%S")
    try:
        otp=datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    except:
        start_time='2'+start_time
        otp=datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    delta=ntp-otp
    hours=delta.days*24+delta.seconds/3600
    return hours

In [None]:
# Remove redundant session info
removeList=['_golden','_unit_state','_unit_id','_trusted_judgments','_last_judgment_at','img_url']
for row in dataDict_vine:
    row['creationtime']=row['creationtime'].split('posted at:')[-1].strip()
    row['creationtime']=row['creationtime'].replace('.000000','')
    row['creationtime']=row['creationtime'].replace('T',' ')
    for keys in list(row):
        if (keys in removeList) or (keys[:4]=='colu' and row[keys]=='empty'):
            del row[keys]
        elif keys[:4]=='colu':
            row[keys]=row[keys].replace('<font color="#0066CC">',"")
            row[keys]=row[keys].replace('</font>::',"&&&&&")
            row[keys]=row[keys].replace('(created at:','(created_at:')
            row[keys]=row[keys].split('(created_at:')
            if len(row[keys])>1:
                row[keys]=[row[keys][0].strip(),row[keys][1].strip(')')]
                row[keys][1]=row[keys][1].replace('.000000','')
                row[keys]=[row[keys][0].split("&&&&&")[0],row[keys][0].split("&&&&&")[1],row[keys][1].replace('T',' ')]
                row[keys][0]=row[keys][0].lower()
                row[keys][1]=row[keys][1].lower()
                new_str=re.sub(r'[\x80-\xFF]+','',row[keys][1])
                if new_str!=row[keys][1]:
                    row[keys][1]=re.sub('\_*','',new_str)
                row[keys][1]=word_tokenize(row[keys][1])
            else:
                del row[keys]
    try:
        datetime.datetime.strptime(row['creationtime'], "%Y-%m-%d %H:%M:%S")
    except:
        for i in range(10):
            try:
                row['creationtime']=row['column'+str(i)][2]
                break
            except:
                pass
    for keys in list(row):
        if keys[:4]=='colu':
            row[keys][2]=timeconvert(row[keys][2],row['creationtime'])
    row['likecount']=row['likecount'].split('\n\n ')[0]
    row['username']=row['username'].replace('<font color="#0066CC">',"")
    row['username']=row['username'].replace('</font>',"")
    new_cptn=re.sub(r'[\x80-\xFF]+','',row['mediacaption'])
    if new_cptn!=row['mediacaption']:
        row['mediacaption']=re.sub('\_*','',new_cptn)
    row['mediacaption']=word_tokenize(row['mediacaption'].lower())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# Vine data ready
session_labels_vine=[]
session_tokens_vine=[]
session_histories=[]
session_times=[]
for row in dataDict_vine:
    if row['question1']=='noneAgg' and row['question2']=='noneBll':
        session_labels_vine.append(0)
    elif row['question1']=='aggression' and row['question2']=='noneBll':
        session_labels_vine.append(0)
    else:
        session_labels_vine.append(1)

    row_tokens=[]
    row_times=[]
    row_history=[]
    owner_ut=[row['username']]+row['mediacaption']
    row_tokens.append(owner_ut)
    if row['username'] in userinfo_vine.keys():
        row_history.append(userinfo_vine[row['username']])
    else:
        row_history.append([])
    row_times.append(0)
    for keys in list(row):
        if keys[:4]=='colu':
            row_tokens.append([row[keys][0]]+row[keys][1])
            row_times.append(row[keys][2])
    sorted_row_times=[]
    sorted_row_tokens=[]

    mintime=row_times[np.argsort(row_times)[0]]
    for i in np.argsort(row_times):
        if mintime<0:
            sorted_row_times.append(row_times[i]-mintime)
        else:
            sorted_row_times.append(row_times[i])
        sorted_row_tokens.append(row_tokens[i])

    sorted_row_times=sorted_row_times[:config.max_sents]+(config.max_sents-len(sorted_row_times))*[0]
    session_times.append(sorted_row_times)
    session_tokens_vine.append(sorted_row_tokens)
    session_histories.append(row_history)

comments_list_vine=[[" ".join(x)  for x in y] for y in session_tokens_vine]
data=[" ".join(x) for x in comments_list_vine]
labels=session_labels_vine

##2.2 Load Instagram

In [None]:
In_data_1=InstagramDataset('./data/instagram/sessions_0plus_to_10_metadata.csv')
In_data_2=InstagramDataset('./data/instagram/sessions_10plus_to_40_metadata.csv')
In_data_3=InstagramDataset('./data/instagram/sessions_40plus_metadata.csv')
dataDict=[]
with open(In_data_1, 'r',encoding='unicode_escape') as f:
    reader = csv.DictReader(f)
    for row in reader:
        dataDict.append(row)
with open(In_data_2, 'r',encoding='unicode_escape') as f:
    reader = csv.DictReader((line.replace('\0','') for line in f))
    for row in reader:
        dataDict.append(row)
with open(In_data_3, 'r',encoding='unicode_escape') as f:
    reader = csv.DictReader((line.replace('\0','') for line in f))
    for row in reader:
        dataDict.append(row)

In [None]:
# Convert time to hours
def timeconvert(timestr,start_time):
    ifpm=False
    ntp=datetime.datetime.strptime(timestr, "%Y-%m-%d %H:%M:%S")
    try:
        otp=datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    except:
        start_time='2'+start_time
        otp=datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    delta=ntp-otp
    hours=delta.days*24+delta.seconds/3600
    return hours

In [None]:
# Remove redundant session info
removeList=['_golden','_unit_state','_unit_id','_trusted_judgments','_last_judgment_at','img_url']
for row in dataDict:
    row['cptn_time']=row['cptn_time'].split('Media posted at ')[-1].strip()
    for keys in list(row):
        if (keys in removeList) or (keys[:4]=='clmn' and row[keys]=='empety'):
            del row[keys]
        elif keys[:4]=='clmn':
            row[keys]=row[keys].replace('<font color="#0066CC">',"")
            row[keys]=row[keys].replace('</font>',"")
            row[keys]=row[keys].replace('(created at:','(created_at:')
            row[keys]=row[keys].split('(created_at:')
            row[keys]=[row[keys][0].strip(),row[keys][1].strip(')')]
            row[keys]=[row[keys][0].split('   ')[0],row[keys][0].split('   ')[1],row[keys][1]]
            row[keys][0]=row[keys][0].lower()
            row[keys][1]=row[keys][1].lower()
            new_str=re.sub(r'[\x80-\xFF]+','',row[keys][1])
            if new_str!=row[keys][1]:
                row[keys][1]=re.sub('\_*','',new_str)
            row[keys][1]=word_tokenize(row[keys][1])
            row[keys][2]=timeconvert(row[keys][2],row['cptn_time'])
    row['likes']=row['likes'].split('\n\n ')[0]
    row['owner_id']=row['owner_id'].replace('<font color="#0066CC">',"")
    row['owner_id']=row['owner_id'].replace('</font>  ',"")
    new_cptn=re.sub(r'[\x80-\xFF]+','',row['owner_cmnt'])
    if new_cptn!=row['owner_cmnt']:
        row['owner_cmnt']=re.sub('\_*','',new_cptn)
    row['owner_cmnt']=word_tokenize(row['owner_cmnt'].lower())

In [None]:
# Construct input session data according to time
session_labels_in=[]
session_tokens_in=[]
session_histories=[]
session_times=[]
for row in dataDict:
    if row['question1']=='noneAgg' and row['question2']=='noneBll':
        session_labels_in.append(0)
    elif row['question1']=='aggression' and row['question2']=='noneBll':
        session_labels_in.append(0)
    else:
        session_labels_in.append(1)

    row_tokens=[]
    row_times=[]
    row_history=[]

    for keys in list(row):
        if keys[:4]=='clmn':
            row_tokens.append([row[keys][0]]+row[keys][1])
            row_times.append(row[keys][2])

    sorted_row_times=[]
    sorted_row_tokens=[]
    sorted_row_history=[]

    # It returns an array of indices of the same shape as arr that  would sort the array.
    mintime=row_times[np.argsort(row_times)[0]]

    for i in np.argsort(row_times):
        if mintime<0:
            sorted_row_times.append(row_times[i]-mintime)
        else:
            sorted_row_times.append(row_times[i])

        sorted_row_tokens.append(row_tokens[i])
    session_tokens_in.append(sorted_row_tokens)
comments_list_in=[[" ".join(x)  for x in y] for y in session_tokens_in]
#Data ready!
data=[" ".join(x) for x in comments_list_in]
labels=session_labels_in

##3.3 Swear words

In [None]:
Swearwords_van=Swearwords('./data/swearwords/swear_van.txt')
Swearwords_google=Swearwords('./data/swearwords/swear_google.txt')
swearwords=[]
with open(Swearwords_van) as f:
    swearwords = f.read().splitlines()

with open(Swearwords_google) as f:
    swearwords=swearwords+ f.read().splitlines()

#remove redundant and space
swearwords=list(set(swearwords))
swearwords.remove('')

#3.Define Base models

In [None]:
def Loaddata(train_text, train_labels,test_text,test_labels):
  X_test=test_text
  Y_test=test_labels
  X_train, X_val, Y_train, Y_val = train_test_split( train_text, train_labels, random_state=121, test_size=0.10)

  if config.modelname=="Bert":
    tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
  if config.modelname=="Roberta":
    tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')

  # tokenize and encode sequences in the training set
  tokens_train = tokenizer.batch_encode_plus(
    X_train,
    max_length = config.max_document_length,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
  )
  train_seq = torch.tensor(tokens_train['input_ids'])
  train_mask = torch.tensor(tokens_train['attention_mask'])
  train_y = torch.tensor(Y_train)


# tokenize and encode sequences in the validation set
  tokens_val = tokenizer.batch_encode_plus(
    X_val,
    max_length = config.max_document_length,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
  )
  val_seq = torch.tensor(tokens_val['input_ids'])
  val_mask = torch.tensor(tokens_val['attention_mask'])
  val_y = torch.tensor(Y_val)

  tokens_test = tokenizer.batch_encode_plus(
    X_test,
    max_length = config.max_document_length,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
  )

  test_seq = torch.tensor(tokens_test['input_ids'])
  test_mask = torch.tensor(tokens_test['attention_mask'])
  test_y = torch.tensor(Y_test)
  test_index = torch.tensor(Y_test)

  train_data = TensorDataset(train_seq, train_mask, train_y)
  train_sampler = RandomSampler(train_data)
  train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=config.batch_size)

  val_data = TensorDataset(val_seq, val_mask, val_y)
  val_sampler = SequentialSampler(val_data)
  val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=config.batch_size)

  test_data = TensorDataset(test_seq, test_mask, test_y)
  test_sampler = SequentialSampler(test_seq)
  test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size=config.batch_size)

  return train_dataloader,val_dataloader,test_dataloader,X_train,Y_train

##3.1 Bert

In [None]:
class BertEncoder(nn.Module):
    def __init__(self):
        super(BertEncoder, self).__init__()
        self.encoder = BertModel.from_pretrained('bert-base-uncased',output_hidden_states = True)

    def forward(self, x, mask=None):
        outputs = self.encoder(x, attention_mask=mask)
        feat = outputs[1]
        feat_hidden = outputs[2]

        return feat,feat_hidden

class BertClassifier(nn.Module):
    def __init__(self, dropout=config.dropout):
        super(BertClassifier, self).__init__()

        self.dropout = nn.Dropout(p=dropout)
        # relu activation function
        self.relu =  nn.ReLU()

       # dense layer 1
        self.fc1 = nn.Linear(768,512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512,2)

        #softmax activation function
        self.softmax = nn.LogSoftmax(dim=1)
        self.apply(self.init_bert_weights)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)

       # apply softmax activation
        out = self.softmax(x)
        return out

    def init_bert_weights(self, module):
        """ Initialize the weights.
        """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=0.02)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()



##3.2 Roberta

In [None]:
class RobertaEncoder(nn.Module):
    def __init__(self):
        super(RobertaEncoder, self).__init__()
        self.encoder = RobertaModel.from_pretrained('roberta-base',output_hidden_states = True)

    def forward(self, x, mask=None):
        outputs = self.encoder(x, attention_mask=mask)
        feat = outputs[1]
        feat_hidden = outputs[2]

        return feat,feat_hidden


class RobertaClassifier(nn.Module): #768->512->2+BN
    def __init__(self, dropout=config.dropout):
        super(RobertaClassifier, self).__init__()

        self.dropout = nn.Dropout(p=dropout)
        # relu activation function
        self.relu =  nn.ReLU()

       # dense layer 1
        self.fc1 = nn.Linear(768,512)
        self.bn1 = nn.BatchNorm1d(512)
        # dense layer 2 (Output layer)
        self.fc2 = nn.Linear(512,2)

        #softmax activation function
        self.softmax = nn.LogSoftmax(dim=1)


        self.apply(self.init_bert_weights)

    def forward(self, x):
        #pass the inputs to the model

        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
         # output layer
        x = self.fc2(x)

       # apply softmax activation
        out = self.softmax(x)
        return out

    def init_bert_weights(self, module):
        """ Initialize the weights.
        """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=0.02)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()

#4.Define Training

In [None]:
##Get the hiddent states
def get_newfeat(feat_hiddent,layerindx,bachsize):
  sec_vec=torch.mean(feat_hiddent[layerindx][0], dim=0).reshape(1,768)
  if bachsize==1:
    return sec_vec
  for i in range(1,bachsize):
    sec_vec=torch.cat((sec_vec,torch.mean(feat_hiddent[layerindx][i], dim=0).reshape(1,768)),dim=0)
  return sec_vec

#Use CUDA if it's available."""
def make_cuda(tensor):
    if torch.cuda.is_available():
        tensor = tensor.cuda()
    return tensor


def countbias_train(w,fp,fn,tp,tn,data):

  FPW=sum([1 for i in fp if w in data[i]])

  TNW=sum([1 for i in fn if w in data[i]])

  FNW=sum([1 for i in tn if w in data[i]])

  TPW=sum([1 for i in tp if w in data[i]])

  if FPW+TNW==0 or FNW+TPW==0:
    return 0,0
  else:
    #print(FPW,TNW,FNW,TPW)
    return(abs(((len(fp)/(len(fp)+len(tn)))-(FPW/(FPW+TNW)))),abs((len(fn)/(len(fn)+len(tp)))-(FNW/(FNW+TPW)))) #FPD, FND

In [None]:
def get_Adversarialdata(data):
  train_x_text_clear=[]
  for x in data:
    for swear in swearwords:
      if swear in x:
        x=x.replace(swear,"")
    train_x_text_clear.append(x)

  if config.model_name=="Bert":
    tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
  if config.model_name=="Roberta":
    tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')

  tokens_train_clear = tokenizer.batch_encode_plus(
    train_x_text_clear,
    max_length = config.max_document_length,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
  )

  train_seq_clear = torch.tensor(tokens_train_clear['input_ids'])
  train_mask_clear = torch.tensor(tokens_train_clear['attention_mask'])
  train_y = torch.tensor(y_train)
  cosin_y=torch.tensor([1]*len(train_y))
  train_data_clear = TensorDataset(train_seq_clear, train_mask_clear, cosin_y)
  train_sampler_clear = SequentialSampler(train_data_clear)
  train_dataloader_clear = DataLoader(train_data_clear,shuffle=False,sampler=train_sampler_clear, batch_size=config.batch_size)
  return train_dataloader_clear

## 4.1 Train_base

In [None]:
def train_base(encoder, classifier, train_dataloader,val_dataloader,epoch_num):
    best_valid_loss = float('inf')
    optimizer = AdamW(list(encoder.parameters()) + list(classifier.parameters()),
                       lr=2e-5)
    CELoss = nn.CrossEntropyLoss()
    encoder.train()
    classifier.train()
    for epoch in range(epoch_num):
        loss=0
        for step, batch in enumerate(train_dataloader):
            sent_id, mask, labels = batch
            # zero gradients for optimizer
            optimizer.zero_grad()
            feat,feat_hidden = encoder(sent_id, mask)

            preds = classifier(feat)
            cls_loss = CELoss(preds, labels)
            cls_loss.backward()
            loss+=cls_loss

            # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 1.0)
            torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0)

            optimizer.step()
            # print step info
            if (step + 1) % 50 == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f"
                      % (epoch + 1,
                        2,
                         step + 1,
                         len(train_dataloader),
                         cls_loss.item()))
        print("Average Loss from training: "+str(loss/len(train_dataloader)))
        acc, preds,valid_loss,real=evaluate(encoder, classifier, val_dataloader)
        if valid_loss < best_valid_loss:
          best_valid_loss = valid_loss
          # save final model
          torch.save(encoder.state_dict(), 'source-encoder.pt')
          torch.save(classifier.state_dict(), 'source-classifier.pt')

    encoder.load_state_dict(torch.load('source-encoder.pt'))
    classifier.load_state_dict(torch.load('source-classifier.pt'))
    return encoder, classifier

##4.2 Train_debias

In [None]:
def train_debias(encoder, classifier, train_dataloader, train_dataloader_clear,val_dataloader,layer):
    best_valid_loss = float('inf')
    max_bias=float('inf')
    best_f1=0
    fpd=0
    fnd=0
    Tar=torch.tensor([1,-1])
    T = 20

    optimizer = AdamW(list(encoder.parameters()) + list(classifier.parameters()),
                       lr=2e-5)

    CELoss = nn.CrossEntropyLoss()
    KLDivLoss = nn.KLDivLoss(reduction='batchmean')
    cosloss = nn.CosineEmbeddingLoss(reduction='none')
    encoder.train()
    classifier.train()

    for epoch in range(config.num_epochs):
        loss=0

        data_zip = enumerate(zip(train_dataloader, train_dataloader_clear))
        for step, ((reviews_src, src_mask, labels), (reviews_tgt, tgt_mask, cosin_y)) in data_zip:

            reviews_src = make_cuda(reviews_src)
            src_mask = make_cuda(src_mask)

            reviews_tgt = make_cuda(reviews_tgt)
            tgt_mask = make_cuda(tgt_mask)

            labels = make_cuda(labels)
            cosin_y=make_cuda(cosin_y)
            optimizer.zero_grad()

            feat_src_tgt, feat_src_tgt_hidden= encoder(reviews_src, src_mask)
            feat_tgt,feat_tgt_hidden= encoder(reviews_tgt, tgt_mask)
            if config.hidden_layer :
              feat_src_tgt=get_newfeat(feat_src_tgt_hidden,layer,len(feat_src_tgt))
              feat_tgt=get_newfeat(feat_tgt_hidden,layer,len(feat_tgt))

            preds = classifier(feat_src_tgt)
            cls_loss = CELoss(preds, labels)


            with torch.no_grad():
                src_prob = F.softmax(classifier(feat_src_tgt) / T, dim=-1)
            tgt_prob = F.log_softmax(classifier(feat_tgt) / T, dim=-1)


            cos_loss = cosloss(feat_tgt, feat_src_tgt,cosin_y)


            cls_loss=cls_loss+cos_loss+config.constraint*(fpd+fnd)
            cls_loss.backward()
            loss+=cls_loss

            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 1.0)
            torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0)

            optimizer.step()

            # print step info
            if (step + 1) % 50 == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f"
                      % (epoch + 1,
                        2,
                         step + 1,
                         len(train_dataloader),
                         cls_loss.item()))

        print("Average Loss from training: "+str(loss/len(val_dataloader)))
        acc, preds,valid_loss,real=evaluate(encoder, classifier, val_dataloader)
        #acc, preds,valid_loss,real=evaluate(encoder, classifier, val_dataloader)
        preds=[int(x.cpu().numpy()) for x in preds]
        real=[int(x.cpu().numpy()) for x in real]
        fpd=0
        fnd=0
        fp=[i for i in range(len(preds)) if preds[i]==1 and real[i]==0]
        fn=[i for i in range(len(preds)) if preds[i]==0 and real[i]==1]
        tp=[i for i in range(len(preds)) if preds[i]==1 and real[i]==1]
        tn=[i for i in range(len(preds)) if preds[i]==0 and real[i]==0]
        for w in swearwords:
          x,y= countbias_train(w,fp,fn,tp,tn,val_dataloader)
          fpd+=x
          fnd+=y

        if valid_loss < best_valid_loss :
          best_valid_loss = valid_loss
          torch.save(encoder.state_dict(), 'source-encoder.pt')
          torch.save(classifier.state_dict(), 'source-classifier.pt')
        #Jump when the loss threshold is reached
        if valid_loss<config.t and epoch>0: break

    encoder.load_state_dict(torch.load('source-encoder.pt'))
    classifier.load_state_dict(torch.load('source-classifier.pt'))
    return encoder, classifier,valid_loss

##4.3 Evaluate

In [None]:
def evaluate(encoder, classifier, data_loader,layer):
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0
    pred=[]
    real=[]

    criterion = nn.CrossEntropyLoss()

    for batch in data_loader:
        batch = [t.to(device) for t in batch]
        sent_id, mask, labels = batch

        real+=labels

        with torch.no_grad():
            feat,feat_hidden = encoder(sent_id, mask)

            if config.hidden_layer:
              newfeat=get_newfeat(feat_hidden,layer,len(feat))
            else:
              newfeat=feat

            preds = classifier(newfeat)
        loss += criterion(preds, labels).item()
        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum().item()
        pred+=pred_cls


    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = %.4f, Avg Accuracy = %.4f" % (loss, acc))

    return acc,pred,loss,real

#5.Count bias

In [None]:
def count_fp_fn_tp_tn(predictions_list,target_list):
  FP=[i for i in range(len(predictions_list)) if predictions_list[i]==1 and target_list[i]==0]
  FN=[i for i in range(len(predictions_list)) if predictions_list[i]==0 and target_list[i]==1]
  TP=[i for i in range(len(predictions_list)) if predictions_list[i]==1 and target_list[i]==1]
  TN=[i for i in range(len(predictions_list)) if predictions_list[i]==0 and target_list[i]==0]
  return FP,TN,FN,TP

def countbias(word,FP,TN,FN,TP,data):
  FPW=sum([1 for i in FP if word in data[i]])
  TNW=sum([1 for i in TN if word in data[i]])
  FNW=sum([1 for i in FN if word in data[i]])
  TPW=sum([1 for i in TP if word in data[i]])
  if FPW+TNW==0 or FNW+TPW==0:
    return 0,0
  else:
    return(abs(((len(FP)/(len(FP)+len(TN)))-(FPW/(FPW+TNW)))),abs((len(FN)/(len(FN)+len(TP)))-(FNW/(FNW+TPW)))) #FPD, FND


#6.Running

In [None]:
train_x_text,test_x_text,train_labels,test_labels= train_test_split(data,labels,random_state=config.random_state, test_size=config.test_size)
train_dataloader,val_dataloader,test_dataloader,x_train,y_train=Loaddata(train_x_text,train_labels,test_x_text,test_labels)
if config.modelname=="Bert":
   train_encoder = BertEncoder()
   train_classifier= BertClassifier()

if config.modelname=="Robert":
   train_encoder = RobertaEncoder()
   train_classifier= RobertaClassifier()

train_encoder = train_encoder.to(device)
train_classifier = train_classifier.to(device)

if config.debias:
  train_dataloader_clear=get_Adversarialdata(x_train)
  if config.hidden_layer:
    best_loss = float('inf')
    bestlayer=-1
    for i in range(1,13):
      train_encoder, train_classifier,loss = train_debias(train_encoder, train_classifier, train_dataloader,train_dataloader_clear,val_dataloader,i)
      if best_loss>loss:
        best_loss=loss
        torch.save(train_encoder.state_dict(), 'encoder.pt')
        torch.save(train_classifier.state_dict(), 'classifier.pt')
        bestlayer=i
    train_encoder.load_state_dict(torch.load('encoder.pt'))
    train_classifier.load_state_dict(torch.load('classifier.pt'))
  else:
       train_encoder, train_classifier,_ = train_debias(train_encoder, train_classifier, train_dataloader,train_dataloader_clear,val_dataloader,i)
else:
    train_encoder, train_classifier =train_base(train_encoder, train_classifier, train_dataloader,val_dataloader,config.num_epochs)

acc,preds,loss,real=evaluate(train_encoder, train_classifier, test_dataloader,bestlayer)
preds=[int(x.cpu().numpy()) for x in preds]
real=[int(x.cpu().numpy()) for x in real]
print(classification_report(real, preds))

#count bias
FPD=[]
FND=[]
FP,TN,FN,TP=count_fp_fn_tp_tn(preds,real)
for w in list(swearwords):
  x,y= countbias(w,FP,TN,FN,TP,test_x_text)
  FPD.append((w,x))
  FND.append((w,y))

FPED=sum([x for _,x in FPD])
FNED=sum([x for _,x in FND])