## Libraies used in this IPYNB file 

In [1]:
import numpy as np
import pandas as pd

#torch lib
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch import optim

#basic lib
import sys
import random
import math
import time
from tqdm import tqdm

#sklearn Lib 
from sklearn.metrics import precision_recall_fscore_support, f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

#transformer lib Autotokenizer
from transformers import BertTokenizer, AutoTokenizer
from transformers import BertModel, AutoModel, AutoModelForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.utils.tensorboard import SummaryWriter


#NLTK lib and pandas
import pandas as pd
from nltk.corpus import stopwords,wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer,WordNetLemmatizer
import language_tool_python 
import nltk
nltk.download('stopwords')
nltk.download('wordnet')


stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# pre-requisite Function to test model

In [2]:
#Cuda memory
use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.autograd.set_detect_anomaly(True)
torch.backends.cudnn.benchmark = True
np.random.seed(0)
torch.manual_seed(0)

base_model = 'twitter-xlm-roberta-base-sentiment'

#model selection 
model_list = ['bert-base-uncased', 'bert-base-multilingual-uncased', 'google/muril-base-cased', 'xlm-roberta-base',
              'ai4bharat/indic-bert','cardiffnlp/twitter-xlm-roberta-base','cardiffnlp/twitter-xlm-roberta-base-sentiment',
              'cardiffnlp/twitter-roberta-base', 'cardiffnlp/twitter-roberta-base-sentiment',
              'cardiffnlp/twitter-roberta-base-hate', 'roberta-base']

#model path 
model_path = 'mnt/saved_models/'

#result are saaved in this location 
results_path = 'mnt/saved_results/'

#Data augumentation 
class HateData(Dataset):
    def __init__(self, data_path, split='train', lang='bengali', aug_prob=0.2, flip_prob=0.5):
        self.split = split
        self.data = pd.read_csv(data_path + split + lang + ".tsv", sep='\t', lineterminator='\n') 
        if self.split == 'train':
            self.label2data = {0:[], 1:[], 2:[]}
            for i in tqdm(range(len(self.data))):
                row = self.data.iloc[i]
                self.label2data[row[label_idx]].append(row[text_idx])
            self.aug_prob = aug_prob
            self.flip_prob = flip_prob

    def __len__(self):
        return len(self.data)

    
    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        data = self.data.iloc[index]
        labels = data[label_idx]
        text = data[text_idx]
        inputs = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQ_LEN)
        input_ids = inputs['input_ids']
        token_type_ids = np.zeros(MAX_SEQ_LEN)
        attn_mask = inputs['attention_mask']
        aug_text = text  
        labels_aug = labels
        
        if self.split == 'train' and labels == 1:
            if np.random.uniform() < self.aug_prob:
                aug_text = np.random.choice(self.label2data[0])
         
                if np.random.uniform() < self.flip_prob:
                    aug_text = aug_text + " [SEP] " + text
                else:
                    aug_text = text + " [SEP] " + aug_text 
            labels_aug = 1
      
        inputs_aug = tokenizer(aug_text, padding='max_length', truncation=True, max_length=MAX_SEQ_LEN)
        input_ids_aug = inputs_aug['input_ids']
        token_type_ids_aug = np.zeros(MAX_SEQ_LEN)
        attn_mask_aug = inputs_aug['attention_mask']

        input_ids = torch.tensor(np.vstack([input_ids, input_ids_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        token_type_ids = torch.tensor(np.vstack([token_type_ids, token_type_ids_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        attn_mask = torch.tensor(np.vstack([attn_mask, attn_mask_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        labels = torch.tensor(np.vstack([labels, labels_aug]), dtype=torch.long).view(2)

        return input_ids, attn_mask, token_type_ids, labels


#data classifier 
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        H1, H2, num_class = 768, 128, 2
        self.bert = AutoModel.from_pretrained(model_list[model_choice])
        self.clf = nn.Sequential(
            nn.Linear(H1, H2),
            nn.ReLU(),
            nn.Linear(H2, H2),
            nn.ReLU(),
            nn.Linear(H2, num_class)
        )
        
    def forward(self, input_ids, attn_mask, token_type_ids):  
        outputs = self.bert(input_ids, attn_mask)
        cls_emb = outputs.pooler_output 
        logits = self.clf(cls_emb)
        return logits
    
#evaluate fuction 
def evaluate(input_ids, attn_mask, token_type_ids, label, model, mode='train'):
   
    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]
    with torch.no_grad():
        if use_cuda:
            input_ids = input_ids.to(device)
            attn_mask = attn_mask.to(device)
            token_type_ids = token_type_ids.to(device)
            label = label.to(device)
        
        logits = model(input_ids[:,0,:], attn_mask[:,0,:], token_type_ids[:,0,:])
        loss = loss_fn(logits, label[:,0])
        
        if mode == 'train':
            return float(loss.item())
        preds = torch.argmax(logits, dim=1).flatten()
        return float(loss.item()), preds.cpu().numpy()
loss_fn = nn.CrossEntropyLoss()


### Tested Model with 1000 samples for Hate Explain 

In [3]:
data=pd.read_csv("data/Latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])

test_sam['post']=sam1
test_sam['label']=sam2
test_sam.to_csv("hxsam_test.tsv",sep="\t",index=False)



model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/Latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))

Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:24<00:00,  3.08it/s]

Test Loss:  0.43809041582606734
              precision    recall  f1-score   support

           0     0.8369    0.8771    0.8565       667
           1     0.7276    0.6577    0.6909       333

    accuracy                         0.8040      1000
   macro avg     0.7822    0.7674    0.7737      1000
weighted avg     0.8005    0.8040    0.8013      1000






### Synonym Replacement
#### All the words in the sentences are replaced by their synonyms while they're not stopwords, when their length is more than 3 letters.

In [6]:

data=pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    sen=[]
    for i in l:
        if i in stop_words:
            sen.append(i)
            pass
        else:
            try:
                if len(i)<=3:
                    sen.append(i)
                else:
                    j=wordnet.synsets(i)
                    lt=j[0].lemmas()[0].name()
                    if i !=lt:
                        sen.append(lt)
                    else:
                        sen.append(i)
            except:
                sen.append(i)
    post.append(' '.join(sen))


test_sam['post']=post
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))



Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:36<00:00,  2.97it/s]


Test Loss:  0.5136432139184326
              precision    recall  f1-score   support

           0     0.8080    0.8771    0.8411       667
           1     0.7029    0.5826    0.6371       333

    accuracy                         0.7790      1000
   macro avg     0.7555    0.7298    0.7391      1000
weighted avg     0.7730    0.7790    0.7732      1000



### Charcter swaping
#### All the words in the sentences are swaped their charcters while they're not stopwords, when their length is more than 3 letters.

In [7]:

stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')

data=pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    sen=[]
    count=0
    for i in l:
        if i in stop_words:
            sen.append(i)
            pass
        else:
            try:
                if len(i)<=3:
                    sen.append(i)
                else:
                    if count==2:
                        sen.append(i)
                    else:
                        t=list(i)
                        t[2],t[3]=t[3],t[2]
                        r="".join(t)
                        count=count+1
                        sen.append(r)
            except:
                sen.append(i)
    post.append(' '.join(sen))


test_sam['post']=post
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/Latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))


Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:17<00:00,  3.15it/s]

Test Loss:  0.5482584201395512
              precision    recall  f1-score   support

           0     0.7869    0.9025    0.8408       667
           1     0.7234    0.5105    0.5986       333

    accuracy                         0.7720      1000
   macro avg     0.7552    0.7065    0.7197      1000
weighted avg     0.7658    0.7720    0.7601      1000






### Concatenation last charcter of the word to next word in each sentence

In [8]:
stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')

data=pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    sen=[]
    count=0
    word=""
    for i in l:
        if word!="":
            i=word+i
            word=""
        else:
            pass
        if i in stop_words:
            sen.append(i)
            pass
        else:
            try:
                if len(i)<=3:
                    sen.append(i)
                else:
                    if count==1:
                        sen.append(i)
                    else:
                        t=list(i)
                        word=t.pop()
                        #print(word)
                        r="".join(t)
                        count=count+1
                        sen.append(r)
            except:
                sen.append(i)
    post.append(' '.join(sen))


test_sam['post']=post
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/Latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))

Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:15<00:00,  3.17it/s]

Test Loss:  0.5144728650208562
              precision    recall  f1-score   support

           0     0.8164    0.8666    0.8407       667
           1     0.6952    0.6096    0.6496       333

    accuracy                         0.7810      1000
   macro avg     0.7558    0.7381    0.7452      1000
weighted avg     0.7760    0.7810    0.7771      1000






### dropping few words from sentences

In [18]:
import pandas as pd
from nltk.corpus import stopwords,wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer,WordNetLemmatizer
import language_tool_python 
import nltk
nltk.download('stopwords')
nltk.download('wordnet')


stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')

data=pd.read_csv("data/Latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(500,1500):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    if len(l)>3:
        sen=[]
        count=0
        word=""
        for i in l:
            sen.append(i)
        random.shuffle(sen)
        sen.pop()
        random.shuffle(sen)
        sen.pop()
        post.append(' '.join(sen))
        post
    else:
        post.append(sam1[i])


test_sam['post']=post
print(test_sam)
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


                                                  post
0    dylan random kori white media and he by is pen...
1    read s iran the the of holocaust more question...
2    racist make entire sense doesn why they and wh...
3    s disrespecting t loser kaepernick rally kraep...
4    and that all enemies states must be united all...
..                                                 ...
995  spencer better spencer know white should richa...
996  opposing speech hate foe for of of blasphemy a...
997  supporters conservatives debate their trumps w...
998  for thing people the prison guards good jobs t...
999  are to person 6 person 32 blacks are white to ...

[1000 rows x 1 columns]


Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:26<00:00,  3.06it/s]

Test Loss:  0.6366159313134849
              precision    recall  f1-score   support

           0     0.7541    0.9061    0.8231       660
           1     0.7005    0.4265    0.5302       340

    accuracy                         0.7430      1000
   macro avg     0.7273    0.6663    0.6766      1000
weighted avg     0.7359    0.7430    0.7235      1000






In [19]:

stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')

data=pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    sen=[]
    count=0
    word=""
    for i in l:
        sen.append(i)
    random.shuffle(sen)
    random.shuffle(sen)
    post.append(' '.join(sen))
    post


test_sam['post']=post
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/Latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))

Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1000/1000 [05:26<00:00,  3.06it/s]

Test Loss:  0.5633190030883998
              precision    recall  f1-score   support

           0     0.7799    0.8981    0.8348       667
           1     0.7069    0.4925    0.5805       333

    accuracy                         0.7630      1000
   macro avg     0.7434    0.6953    0.7077      1000
weighted avg     0.7556    0.7630    0.7502      1000






### Adding Special Charcters to sentence

In [21]:
stemmer = WordNetLemmatizer()
grammer = language_tool_python.LanguageTool('en-US')
from string import punctuation
pn=[punctuation]
char=random.choice(pn)

data=pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
data["post"]
sam1=[]
sam2=[]
post=[]

test_sam=pd.DataFrame()
for i in range(1000):
    sam1.append(data["post"][i])
    sam2.append(data["class"][i])
stop_words = set(stopwords.words('english'))
                
for i in range(len(sam1)):
    
    l=sam1[i].split()
    sen=[]
    count=0
    word=""
    for i in l:
        sen.append(i+char)
    post.append(' '.join(sen))
    post


test_sam['post']=post
test_sam['label']=sam2

test_sam.to_csv("hxsam_test2.tsv",sep="\t",index=False)

model_choice = 8
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])
#Load pre trained Model
model = Classifier()
model.load_state_dict(torch.load("mnt/saved_models/Latent_tws.pth", map_location=device))
model = model.to(device)
label_idx = 1
MAX_SEQ_LEN = 128
text_idx = 0

test_data = HateData(data_path="", split='', lang="hxsam_test2")
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


model.eval()
test_loss = []
test_pred = []

#Record the prediction result  
wr = open(results_path + "test_prediction_" + base_model + "_" + "Hx" + ".txt", "w")    
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")
test_loss = np.mean(test_loss)#.item()
print("Test Loss: ", test_loss)
wr.close()

df_test = pd.read_csv("data/multilingual/test_"+"Hx"+".tsv", sep='\t', lineterminator='\n')
gt_labels = test_sam["label"]

print(classification_report(gt_labels, test_pred, digits=4))

Some weights of RobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
