In [46]:
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import classification_report, f1_score, recall_score, accuracy_score
from tqdm import tqdm
import numpy as np
import csv
from transformers import BertTokenizer
import csv, time, random, numpy as np, pandas as pd
from types import SimpleNamespace
import torch
from torch import nn
from torch.optim import AdamW
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from itertools import cycle
from transformers import BertModel, BertTokenizer
import pandas as pd

In [47]:
def preprocess_string(s):
    return ' '.join(s.lower().replace('.', ' .').replace('?', ' ?').replace(',', ' ,').replace('\'', ' \'').split())

class SentenceClassificationDataset(Dataset):
    """Inheriting the dataset class for the sentence classification task"""
    def __init__(self, dataset):
        self.dataset = dataset; self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]

    def pad_data(self, data):
        '''This function pads the data to the max length of the batch'''
        sents = [x[0] for x in data]
        labels = [int(x[1])-1 for x in data] # subtracting to accomodate for 0-indexed classes
        sent_ids = [x[2] for x in data]

        encoding = self.tokenizer(sents, return_tensors='pt', padding=True, truncation=True)
        token_ids = torch.LongTensor(encoding['input_ids']);    attention_mask = torch.LongTensor(encoding['attention_mask']);  labels = torch.LongTensor(labels)

        return token_ids, attention_mask, labels, sents, sent_ids

    def collate_fn(self, all_data):
        token_ids, attention_mask, labels, sents, sent_ids= self.pad_data(all_data)

        batched_data = { 'token_ids': token_ids, 'attention_mask': attention_mask, 'labels': labels, 'sents': sents, 'sent_ids': sent_ids }
        return batched_data

class SentencePairDataset(Dataset):
    def __init__(self, dataset, isRegression =False):
        self.dataset = dataset
        self.isRegression = isRegression
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]

    def pad_data(self, data):
        sent1 = [x[0] for x in data]
        sent2 = [x[1] for x in data]
        labels = [x[2] for x in data]
        sent_ids = [x[3] for x in data]

        encoding1 = self.tokenizer(sent1, return_tensors='pt', padding=True, truncation=True)
        encoding2 = self.tokenizer(sent2, return_tensors='pt', padding=True, truncation=True)

        token_ids = torch.LongTensor(encoding1['input_ids'])
        attention_mask = torch.LongTensor(encoding1['attention_mask'])
        token_type_ids = torch.LongTensor(encoding1['token_type_ids'])

        token_ids2 = torch.LongTensor(encoding2['input_ids'])
        attention_mask2 = torch.LongTensor(encoding2['attention_mask'])
        token_type_ids2 = torch.LongTensor(encoding2['token_type_ids'])
        if self.isRegression:
            labels = torch.FloatTensor(labels)
        else:
            labels = torch.LongTensor(labels)
            

        return (token_ids, token_type_ids, attention_mask,
                token_ids2, token_type_ids2, attention_mask2,
                labels,sent_ids)

    def collate_fn(self, all_data):
        (token_ids, token_type_ids, attention_mask, token_ids2, token_type_ids2, attention_mask2, labels, sent_ids) = self.pad_data(all_data)

        batched_data = { 'token_ids_1': token_ids, 'token_type_ids_1': token_type_ids, 'attention_mask_1': attention_mask, 'token_ids_2': token_ids2, 'token_type_ids_2': token_type_ids2, 'attention_mask_2': attention_mask2, 'labels': labels, 'sent_ids': sent_ids }
        return batched_data

In [61]:

def load_multitask_data( sentiment_filename, paraphrase_filename, similarity_filename, emotion_filename, split='train'):
    '''This function loads the training datasets for the multitask dataset'''
    sentiment_data = []
    num_labels = {}

    with open(sentiment_filename, 'r') as fp:
        for record in csv.DictReader(fp,delimiter = '\t'):
            sent = record['sentence'].lower().strip()
            sent_id = record['id'].lower().strip()
            label = int(record['sentiment'].strip())
            if label not in num_labels:
                num_labels[label] = len(num_labels)
            sentiment_data.append((sent, label,sent_id))

    print(f"Loaded {len(sentiment_data)} {split} examples from {sentiment_filename}")

    emotion_data= []
    with open(emotion_filename, 'r') as fp:
        for record in csv.DictReader(fp,delimiter = '\t'):
            sent = record['sentence'].lower().strip()
            sent_id = record['id'].lower().strip()
            label = int(record['sentiment'].strip())
            if label not in num_labels:
                num_labels[label] = len(num_labels)
            emotion_data.append((sent, label,sent_id))

    print(f"Loaded {len(emotion_data)} {split} examples from {emotion_filename}")

    paraphrase_data = []
    with open(paraphrase_filename, 'r') as fp:
        for record in csv.DictReader(fp,delimiter = '\t'):
            try:
                sent_id = record['id'].lower().strip()
                paraphrase_data.append((preprocess_string(record['sentence1']), preprocess_string(record['sentence2']), int(float(record['is_duplicate'])),sent_id))
            except:
                pass

    print(f"Loaded {len(paraphrase_data)} {split} examples from {paraphrase_filename}")

    similarity_data = []
    with open(similarity_filename, 'r') as fp:
        for record in csv.DictReader(fp,delimiter = '\t'):
            sent_id = record['id'].lower().strip()
            similarity_data.append((preprocess_string(record['sentence1']), preprocess_string(record['sentence2']), float(record['similarity']),sent_id))

    print(f"Loaded {len(similarity_data)} {split} examples from {similarity_filename}")
    return sentiment_data, paraphrase_data, similarity_data, emotion_data

In [62]:
args= {
    'batch_size': 2
}

In [63]:
files= { 'sst_file': './test_data/test_sentiment.csv', 'para_file': './test_data/test_paraphase.csv', 'sts_file': './test_data/test_similarity.csv', 'emt_file': './test_data/test_emotion.csv' }
sst_test_data, para_test_data, sts_test_data, emt_test_data = load_multitask_data( files['sst_file'], files['para_file'], files['sts_file'], files['emt_file'])


sst_test_data = SentenceClassificationDataset(sst_test_data)
sst_test_dataloader = DataLoader(sst_test_data, shuffle=True, batch_size=args['batch_size'], collate_fn=sst_test_data.collate_fn)

emt_test_data = SentenceClassificationDataset( emt_test_data)
emt_test_dataloader = DataLoader( emt_test_data, shuffle=True, batch_size=args['batch_size'], collate_fn=emt_test_data.collate_fn)

para_test_data = SentencePairDataset(para_test_data)
para_test_dataloader = DataLoader(para_test_data, shuffle=True, batch_size=args['batch_size'],collate_fn=para_test_data.collate_fn)

sts_test_data = SentencePairDataset(sts_test_data)
sts_test_dataloader = DataLoader(sts_test_data, shuffle=True, batch_size=args['batch_size'],collate_fn=sts_test_data.collate_fn)

Loaded 30 train examples from ./test_data/test_sentiment.csv
Loaded 30 train examples from ./test_data/test_emotion.csv
Loaded 30 train examples from ./test_data/test_paraphase.csv
Loaded 30 train examples from ./test_data/test_similarity.csv


In [64]:
class Bert_MultiTask(nn.Module):
    def __init__(self,config):
        super(Bert_MultiTask, self).__init__()
        self.model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16); self.model.to("cpu")
        self.tokenizer= BertTokenizer.from_pretrained("bert-base-uncased")
        BERT_HIDDEN_SIZE = 768
        
        N_SENTIMENT_CLASSES = 5;    N_EMOTION_CLASSES= 14

        # defining the linear layers for sentiment classification
        self.dropout_sentiment = nn.ModuleList([nn.Dropout(config.hidden_dropout_prob) for _ in range(config.n_hidden_layers + 1)])
        self.linear_sentiment = nn.ModuleList([nn.Linear(BERT_HIDDEN_SIZE, BERT_HIDDEN_SIZE, dtype=torch.float16) for _ in range(config.n_hidden_layers)] + [nn.Linear(BERT_HIDDEN_SIZE, N_SENTIMENT_CLASSES, dtype=torch.float16)])
        self.last_linear_sentiment = None

        # defining the layers for emotion detection
        self.dropout_emotion = nn.ModuleList([nn.Dropout(config.hidden_dropout_prob) for _ in range(config.n_hidden_layers + 1)])
        self.linear_emotion = nn.ModuleList([nn.Linear(BERT_HIDDEN_SIZE, BERT_HIDDEN_SIZE, dtype=torch.float16) for _ in range(config.n_hidden_layers)] + [nn.Linear(BERT_HIDDEN_SIZE, N_EMOTION_CLASSES, dtype=torch.float16)])
        self.last_linear_emotion = None

        # Add a linear layer for paraphrase detection
        self.dropout_paraphrase = nn.ModuleList([nn.Dropout(config.hidden_dropout_prob) for _ in range(config.n_hidden_layers + 1)])
        self.linear_paraphrase = nn.ModuleList([nn.Linear(BERT_HIDDEN_SIZE, BERT_HIDDEN_SIZE, dtype=torch.float16) for _ in range(config.n_hidden_layers)] + [nn.Linear(BERT_HIDDEN_SIZE, 1, dtype=torch.float16)])

        # Add a linear layer for semantic textual similarity
        self.dropout_similarity = nn.ModuleList([nn.Dropout(config.hidden_dropout_prob) for _ in range(config.n_hidden_layers + 1)])
        self.linear_similarity = nn.ModuleList([nn.Linear(BERT_HIDDEN_SIZE, BERT_HIDDEN_SIZE,dtype=torch.float16) for _ in range(config.n_hidden_layers)] + [nn.Linear(BERT_HIDDEN_SIZE, 1,dtype=torch.float16)])

    def forward(self, input_ids, attention_mask, task_id):
        with torch.autocast(device_type='cpu', dtype=torch.float16):
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        
        # Extract the [CLS] token embedding
        cls_embedding = outputs.last_hidden_state[:, 0, :]  
        combined_embedding = cls_embedding
        return combined_embedding
    
    def last_layers_sentiment(self, x):
        for i in range(len(self.linear_sentiment) - 1):
            x = self.dropout_sentiment[i](x)
            x.to(torch.float16)
            x = self.linear_sentiment[i](x)
            x = F.relu(x)

        x = self.dropout_sentiment[-1](x)
        logits = self.linear_sentiment[-1](x)
        return logits
    
    def predict_sentiment(self, input_ids, attention_mask):
        x = self.forward(input_ids, attention_mask, task_id=0 )
        x = self.last_layers_sentiment(x)
        return x
    
    def last_layers_emotion(self, x):
        for i in range(len(self.linear_emotion) - 1):
            x = self.dropout_emotion[i](x)
            x.to(torch.float16)
            x = self.linear_emotion[i](x)
            x = F.relu(x)

        x = self.dropout_emotion[-1](x)
        logits = self.linear_emotion[-1](x)
        return logits
    
    def predict_emotion(self, input_ids, attention_mask):
        x = self.forward(input_ids, attention_mask, task_id=3 )
        x = self.last_layers_emotion(x)
        return x

    def get_similarity_paraphrase_embeddings(self, input_ids_1, attention_mask_1, input_ids_2, attention_mask_2, task_id):
        # Get [SEP] token ids
        sep_token_id = torch.tensor([self.tokenizer.sep_token_id], dtype=torch.long, device=input_ids_1.device)
        batch_sep_token_id = sep_token_id.repeat(input_ids_1.shape[0], 1)

        # Concatenate the two sentences in: sent1 [SEP] sent2 [SEP]
        input_id = torch.cat((input_ids_1, batch_sep_token_id, input_ids_2, batch_sep_token_id), dim=1)
        attention_mask = torch.cat((attention_mask_1, torch.ones_like(batch_sep_token_id), attention_mask_2, torch.ones_like(batch_sep_token_id)), dim=1)
        x = self.forward(input_id, attention_mask, task_id=task_id)
        return x

    def last_layers_paraphrase(self, x):
        for i in range(len(self.linear_paraphrase) - 1):
            x = self.dropout_paraphrase[i](x)
            x = self.linear_paraphrase[i](x)
            x = F.relu(x)

        x = self.dropout_paraphrase[-1](x)
        logits = self.linear_paraphrase[-1](x)
        return logits

    def predict_paraphrase(self, input_ids_1, attention_mask_1, input_ids_2, attention_mask_2):
        x = self.get_similarity_paraphrase_embeddings(input_ids_1, attention_mask_1, input_ids_2, attention_mask_2, task_id=1)
        return self.last_layers_paraphrase(x)


    def last_layers_similarity(self, x):
        for i in range(len(self.linear_similarity) - 1):
            x = self.dropout_similarity[i](x)
            x = self.linear_similarity[i](x)
            x = F.relu(x)

        x = self.dropout_similarity[-1](x)
        preds = self.linear_similarity[-1](x)
        preds = torch.sigmoid(preds) * 4 + 1
        return preds
    
    def predict_similarity(self,input_ids_1, attention_mask_1,input_ids_2, attention_mask_2):
        x = self.get_similarity_paraphrase_embeddings(input_ids_1, attention_mask_1, input_ids_2, attention_mask_2, task_id=2)
        return self.last_layers_similarity(x)
    
device= torch.device("cpu")
model= torch.load('multimodel.pt', weights_only=False).to(device)
model.eval()

Bert_MultiTask(
  (model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

## Sentiment Test Set Evaluation

In [65]:
with torch.no_grad():
    y_true = []; y_pred = []; sents = []
    sent_ids = []
    for step, batch in enumerate(tqdm( sst_test_dataloader, desc=f'eval')):
        b_ids, b_mask, b_labels, b_sents, b_sent_ids = batch['token_ids'],batch['attention_mask'],  batch['labels'], batch['sents'], batch['sent_ids']
        b_ids = b_ids.to(device);   b_mask = b_mask.to(device)  
        logits = model.predict_sentiment(b_ids, b_mask)
        logits = logits.detach().cpu().numpy()
        preds = np.argmax(logits, axis=1).flatten()
        b_labels = b_labels.flatten()

        y_true.extend(b_labels)
        y_pred.extend(preds)
        sents.extend(b_sents)
        sent_ids.extend(b_sent_ids) 

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)

print(f"F1 Score has come out to be: {f1}\nAccuracy for the test set: {acc}")

eval: 100%|██████████| 15/15 [04:29<00:00, 17.94s/it]

F1 Score has come out to be: 0.06666666666666667
Accuracy for the test set: 0.2





## Paraphase Test Set Evaluation

In [66]:
with torch.no_grad():
    para_y_pred = []; para_sent_ids = []; para_y_true= []
    for step, batch in enumerate(tqdm( para_test_dataloader, desc=f'eval')):
        (b_ids1, b_mask1, b_ids2, b_mask2, b_labels, b_sent_ids) = (batch['token_ids_1'], batch['attention_mask_1'], batch['token_ids_2'], batch['attention_mask_2'], batch['labels'], batch['sent_ids'])
        b_ids1 = b_ids1.to(device)
        b_mask1 = b_mask1.to(device)
        b_ids2 = b_ids2.to(device)
        b_mask2 = b_mask2.to(device)
        logits = model.predict_paraphrase(b_ids1, b_mask1, b_ids2, b_mask2)
        y_hat = logits.sigmoid().round().flatten().cpu().numpy()
        
        b_labels = b_labels.flatten().cpu().numpy()
        
        para_y_pred.extend(y_hat)
        para_y_true.extend(b_labels)
        para_sent_ids.extend(b_sent_ids)

paraphrase_accuracy = np.mean(np.array(para_y_pred) == np.array(para_y_true))
print(f"Paraphase Accuracy has come  to be: {paraphrase_accuracy}")

eval: 100%|██████████| 15/15 [02:39<00:00, 10.66s/it]

Paraphase Accuracy has come  to be: 0.0





## Similarity Test Set Evaluation

In [67]:
with torch.no_grad():
    sts_y_true = [];    sts_y_pred = [];    sts_sent_ids = []
    for step, batch in enumerate(tqdm(sts_test_dataloader, desc=f'eval')):
        (b_ids1, b_mask1,b_ids2, b_mask2, b_labels, b_sent_ids) = (batch['token_ids_1'], batch['attention_mask_1'], batch['token_ids_2'], batch['attention_mask_2'], batch['labels'], batch['sent_ids'])
        b_ids1 = b_ids1.to(device)
        b_mask1 = b_mask1.to(device)
        b_ids2 = b_ids2.to(device)
        b_mask2 = b_mask2.to(device)
        
        logits = model.predict_similarity(b_ids1, b_mask1, b_ids2, b_mask2)
        y_hat = logits.flatten().cpu().numpy()
        b_labels = b_labels.flatten().cpu().numpy()
        
        sts_y_pred.extend(y_hat);   sts_y_true.extend(b_labels);    sts_sent_ids.extend(b_sent_ids)
    
# Calculating pearson matrix
pearson_mat = np.corrcoef(sts_y_pred,sts_y_true)
sts_corr = pearson_mat[1][0]
print(f"The Correlation Coefficient from the pearson matrix: {sts_corr}")

eval: 100%|██████████| 15/15 [01:20<00:00,  5.38s/it]

The Correlation Coefficient from the pearson matrix: nan





## Emotion Test Set Evaluation

In [68]:
with torch.no_grad():
    emt_y_true = []
    emt_y_pred = []
    emt_sent_ids = []

    for step, batch in enumerate(tqdm(emt_test_dataloader, desc=f'eval')):
        b_ids, b_mask, b_labels, b_sent_ids = batch['token_ids'], batch['attention_mask'], batch['labels'], batch['sent_ids']
        b_ids = b_ids.to(device)
        b_mask = b_mask.to(device)

        logits = model.predict_emotion(b_ids, b_mask)
        y_hat = logits.argmax(dim=-1).flatten().cpu().numpy()
        b_labels = b_labels.flatten().cpu().numpy()

        emt_y_pred.extend(y_hat);   emt_y_true.extend(b_labels);    emt_sent_ids.extend(b_sent_ids)

f1 = f1_score(emt_y_true, emt_y_pred, average='macro')
acc = accuracy_score(emt_y_true, emt_y_pred)
print(f"F1 Score has come out to be: {f1}\nAccuracy for the test set: {acc}")

eval: 100%|██████████| 15/15 [01:34<00:00,  6.27s/it]

F1 Score has come out to be: 0.01075268817204301
Accuracy for the test set: 0.03333333333333333



