# Multi-Task Learning Expansion

In [2]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [4]:
# Defining a SentenceTransformer_V2 Class which inherits torch.nn.Module and uses BERT model and takes sentences as input to give fixed-size shared embeddings
# Then it passes the shared embeddings to the classification task head and sentiment task head for multi-tasking purposes.

class SentenceTransformer_V2(nn.Module):
    def __init__(self, model_name='bert-base-uncased'):
        super().__init__()
        self.tokenizer = BertTokenizer.from_pretrained(model_name) # Importing BertTokenizer to tokenize sentences
        self.bert = BertModel.from_pretrained(model_name) # Importing BertModel to extract contextualized embeddings from sentences
    
        #Defining Sentences Classification Head
        self.text_classification = nn.Sequential(
            nn.Linear(768,128),  # First layer of the classification head
            nn.ReLU(), # Assigning an activation function between two layers
            nn.Linear(128,5) # Final layer of classification head with output of shape 5 to classify 5 classes, for example - happy, sad, angry, fear, disgust
        )

        #Defining Sentiment Classification Head
        self.sentiment = nn.Sequential(
            nn.Linear(768,128), # First layer of the sentiment head
            nn.ReLU(), # Assigning an activation function between two layers
            nn.Linear(128,3) #FInal layer of sentiment head wiith output of shape 2 for positive, negative and neutral sentiment classification
        )

    def forward(self, sentences):
        tokens = self.tokenizer(sentences, padding = True, truncation = True, return_tensors = 'pt') # Tokenizing all the sentences
        outputs = self.bert(input_ids=tokens['input_ids'], token_type_ids=tokens['token_type_ids'], attention_mask=tokens['attention_mask']) # Using BertModel to extract shared embeddings
        embeddings = outputs.last_hidden_state.mean(dim = 1) # Performing pooling i.e. averaging the last hidden state embeddings over the sequence length to get fixed size embeddings
        
        classification_logits = self.text_classification(embeddings) # Passing the shared embeddings to classification head to get classification output probability
        sentiment_logits =  self.sentiment(embeddings) # Passing the shared embeddings to sentiment head to get sentiment output probability

        return classification_logits, sentiment_logits # get both task's outputs 

In [5]:
# Creating a list of sentences with variable length to check if we get the embeddings of fixed-size and get example outputs of both the tasks

sentences = ['I love Pizza',
            'I went to the zoo and saw a tiger',
            'After all, you are my wonderwall']

In [6]:
# Calling out the model
sentence_transformer = SentenceTransformer_V2()



In [7]:
# giving the list of sentences to the model as an input
class_logits, sentiment_logits = sentence_transformer(sentences)

In [11]:
# CHecking the Outputs shape for both the task outputs

print('The output shape of classification head:', class_logits.shape) # 3 sentences and 5 possible outcomes - for example - happy, sad, angry, fear, disgust
print('The output shape of sentiment head:', sentiment_logits.shape) # 3 sentences and 3 possible outcomes - for example -  positive, negative and neutral

The output shape of classification head: torch.Size([3, 5])
The output shape of sentiment head: torch.Size([3, 3])
