# Task1: Model 1


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
import torch.nn as nn
from tqdm import tqdm

class EmotionDataset(Dataset):
    def __init__(self, data_file, max_length=512):
        with open(data_file, 'r') as f:
            data = json.load(f)
        self.data = data
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        self.max_length = max_length
        self.label_mapping = {
            'disgust': 0,
            'anger': 1,
            'sadness': 2,
            'surprise': 3,
            'neutral': 4,
            'joy': 5,
            'fear': 6,
            '<PAD>': 7
        }

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        utterance = self.data[idx]['utterances']
        speaker = self.data[idx]['speakers']
        final_utterance = [u + ' ' + s for u, s in zip(utterance, speaker)]
        final_utterance = final_utterance + [self.tokenizer.pad_token] * (24 - len(final_utterance))

        inputs = self.tokenizer(final_utterance, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt')
        input_ids = inputs['input_ids']

        return input_ids

class BiLSTMWithEmotions(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=2, num_classes=768, dropout=0.1):
        super(BiLSTMWithEmotions, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.fc3 = nn.Linear(768, 192)
        self.relu = nn.ReLU()
        model_name = 'bert-base-uncased'
        self.model = BertModel.from_pretrained(model_name)
        self.softmax = nn.Softmax(dim=2)

    def forward(self, input_ids):
        input_ids = input_ids.to(torch.float32)  # Cast input tensor to float32
        lstm_output, _ = self.lstm(input_ids)
        lstm_output = self.dropout(lstm_output)
        pooled_output, _ = torch.max(lstm_output, dim=1)
        fc1_output = self.relu(self.fc1(pooled_output))
        output = self.fc3(self.fc2(fc1_output))
        output = self.softmax(output.view(output.size(0), 24, -1))
        return output



def inference(model, test_loader, device, label_mapping):
    model.eval()
    predictions = []

    with torch.no_grad():
        for inputs in tqdm(test_loader):
            inputs = inputs.to(device)
            outputs = model(inputs)
            predicted = torch.argmax(outputs, 2)
            predicted_labels = [[label_mapping[idx.item()] for idx in row] for row in predicted]  # Map integers to emotion labels
            predictions.extend(predicted_labels)

    return predictions



# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the saved model
model_path = "/Model1.pkl"
model = BiLSTMWithEmotions(input_size=512)
model.load_state_dict(torch.load(model_path, map_location=device))  # Load model parameters to the correct device
model.eval()

# Move model to the correct device
model.to(device)

# Load the test data
test_data_file = '/Data/test_file.json'
test_dataset = EmotionDataset(test_data_file)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the label mapping
label_mapping = {
    0: 'disgust',
    1: 'anger',
    2: 'sadness',
    3: 'surprise',
    4: 'neutral',
    5: 'joy',
    6: 'fear',
    7: '<PAD>'
}

predictions = inference(model, test_loader, device, label_mapping)
print(predictions)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

100%|██████████| 1/1 [00:00<00:00,  5.90it/s]

[['neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']]





# Task1: Model 2


In [2]:
import json
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, GPT2Tokenizer

class EmotionTriggerDataset(Dataset):
    def __init__(self, data_file, max_length=512):
        with open(data_file, 'r') as f:
            data = json.load(f)

        self.data = data
        self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        self.bert_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        self.gpt_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

        self.max_length = max_length
        self.label_mapping = {'disgust': 0,
                               'anger': 1,
                               'sadness': 2,
                               'surprise': 3,
                               'neutral': 4,
                               'joy': 5,
                               'fear': 6,
                              '<pad>': 7}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # if idx in [513,655]:
        #     idx=512

        speaker = self.data[idx]['speakers']
        utterances = self.data[idx]['utterances']
        emotions = self.data[idx]['emotions']
        last = len(emotions)
        final_utterance = [s + '  ' + u + '  '  for s, u,in zip(speaker, utterances)] # X
        final_utterance = final_utterance + [self.bert_tokenizer.pad_token] * (24 - len(final_utterance))


        bert_inputs = self.bert_tokenizer(final_utterance, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='np')
        bert_input_ids = torch.tensor(bert_inputs['input_ids'],dtype=torch.float32)
        emotions = [self.label_mapping[i] for i in emotions]
        emotions += [7] * (24 - len(emotions))
        emotional_label = torch.tensor(emotions, dtype=torch.float32)

        return bert_input_ids, emotional_label,last

In [3]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, GPT2Model, BertTokenizer, GPT2Tokenizer
from tqdm import tqdm
import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer,TransformerDecoderLayer,TransformerDecoder
from torch.utils.data import dataset
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class Encoder(nn.Module):
    def __init__(self, d_model: int =768, nhead: int=8, d_hid: int=200,nlayers: int=6, dropout: float = 0.2):
        super(Encoder, self).__init__()
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)


    def forward(self, x):

        x= x.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        return x

class Decoder(nn.Module):
    def __init__(self, d_model: int =768, nhead: int=64, d_hid: int=200,nlayers: int=8, dropout: float = 0.2):
        super(Decoder, self).__init__()
        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)


    def forward(self,last,memory):
        # print(last.shape)
        x= last.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_decoder(x,memory)
        return x




# class EmotionClassifier(nn.Module):
#     def __init__(self, input_size=512, hidden_size=64, output_size=1, num_layers=1):
#         super(EmotionClassifier, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
#         self.fc = nn.Linear(hidden_size, output_size)
#         self.activation = nn.Softmax(dim=2)

#     def forward(self, x):
#         # Initialize hidden state with zeros

#         # Forward propagate LSTM
#         out, _ = self.rnn(x)

#         print("hidden - ",out.shape)
#         out = self.fc(out)  # out: tensor of shape (batch_size, seq_length, output_size)

#         # Apply softmax activation function to get probabilities
#         out = self.activation(out)

#         return out

import torch
import torch.nn as nn

# class EmotionClassifier(nn.Module):
#     def __init__(self, input_size=512, hidden_size=64, output_size=24, num_layers=1):
#         super(EmotionClassifier, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_size, output_size)
#         self.activation = nn.Sigmoid()

#     def forward(self, x):
#         # Initialize hidden state with zeros
#         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
#         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

#         # Forward propagate LSTM
#         out, _ = self.rnn(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

#         # Decode the hidden state of the last time step
#         out = self.fc(out)  # out: tensor of shape (batch_size, seq_length, output_size)

#         # Apply sigmoid activation function to get values between 0 and 1
#         out = self.activation(out)

#         return out.unsqueeze(0)


# Access the output embeddings or other model outputs as needed

class EmotionClassifier(nn.Module):
    def __init__(self, input_size=512, hidden_size=768, output_size=24, num_layers=1):
        super(EmotionClassifier, self).__init__()
        self.hidden_size = hidden_size

        self.num_layers = num_layers

        self.dim_change = nn.Linear(input_size, 768)
        model_name = 'bert-base-uncased'  # Example model name

        self.model = BertModel.from_pretrained(model_name)

        self.encoder = self.model.encoder
        self.pooler = self.model.pooler

        self.fc = nn.Linear(hidden_size, output_size*8)

        self.activation = nn.Sigmoid()

    def forward(self, x):
        out = self.dim_change(x)

        # Initialize hidden state with zeros
        out = self.encoder(out)[0].permute(1,0,2)
        out = self.pooler(out)
        out = self.fc(out)

        out = out.view(out.size(0),24,8)
        # Apply sigmoid activation function to get values between 0 and 1
        out = self.activation(out)

        return out



In [4]:
import torch
import torch.nn as nn

class EmotionTriggerModel(nn.Module):
    def __init__(self, d_model=512, nhead=128, d_hid=512, encoder_layers=32, decoder_layers=32, hidden_size=768, output_size=24):
        super(EmotionTriggerModel, self).__init__()

        self.encoder = Encoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=encoder_layers)
        self.decoder = Decoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=decoder_layers)
        self.emotion_classifier = EmotionClassifier(input_size=d_model, hidden_size=hidden_size, output_size=output_size)

    def forward(self, input_ids, last):
        # print(input_ids.shape,'aaa')
        encoder_output = self.encoder(input_ids)
        # last_output = encoder_output[-1].unsqueeze(0)  # Taking the last output from encoder
        decoder_output = self.decoder(last, encoder_output)
        emotion_output = self.emotion_classifier(decoder_output)
        # print("emotion_output_class", emotion_output.shape)
        x = emotion_output.squeeze(0)
        return x.squeeze(0)


In [5]:
label_mapping = {
    'disgust': 0,
    'anger': 1,
    'sadness': 2,
    'surprise': 3,
    'neutral': 4,
    'joy': 5,
    'fear': 6,
    '<pad>': 7
}

# Create reverse dictionary
reverse_label_mapping = {v: k for k, v in label_mapping.items()}

print(reverse_label_mapping)

{0: 'disgust', 1: 'anger', 2: 'sadness', 3: 'surprise', 4: 'neutral', 5: 'joy', 6: 'fear', 7: '<pad>'}


In [6]:
# test data dataloader
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
test_file = "/Data/val_file.json"
dataset = EmotionTriggerDataset(test_file)
test_loader = DataLoader(dataset, batch_size=5, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = '/Model1.pth'
model = EmotionTriggerModel()
model_state_dict = torch.load(model_path)
model.load_state_dict(model_state_dict)

model.to(device)
model.eval()
f1_scores = []
acc = []
predicted = []
labels_see = []
for batch in tqdm(test_loader):
        inputs,  labels,last_idx = batch
        inputs = inputs.to(device)
        last = torch.ones((inputs.shape[0],1,512)).to(device)
        labels = labels.to(device)
        labels_one_hot = F.one_hot(labels.long(), num_classes=8).float()

        outputs = model(inputs, last)

        predicted_labels = (outputs >= 0.5).float()
        predicted_labels_arg = torch.argmax(predicted_labels, dim=2)
        try:
            predicted.append((predicted_labels_arg[0].clone(),last_idx[0].clone()))
            predicted.append((predicted_labels_arg[1].clone(),last_idx[1].clone()))
            predicted.append((predicted_labels_arg[2].clone(),last_idx[2].clone()))
            predicted.append((predicted_labels_arg[3].clone(),last_idx[3].clone()))
            predicted.append((predicted_labels_arg[4].clone(),last_idx[4].clone()))
        except Exception as e:
            print(e)
        predicted_labels_flatten = predicted_labels_arg.flatten()
        labels_flatten = labels.flatten()

        # print(labels_flatten)
        # print("-"*50)
        # print(predicted_labels_flatten)
        # print("*"*100)

        f1 = f1_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'), average='macro')
        f1_scores.append(f1)

        val_acc =  accuracy_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'))
        acc.append(val_acc)
        # correct = (predicted_labels == labels_one_hot).all(dim=-1).sum().item()
        # correct_predictions += correct
        # total_predictions += labels.numel()
test_accuracy = np.mean(np.array(acc))
f1_score_test = np.mean(np.array(f1_scores))
print("Test accuracy: ", test_accuracy)
print(f"Test F1 Score: {f1_score_test:.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

100%|██████████| 169/169 [00:19<00:00,  8.82it/s]

index 3 is out of bounds for dimension 0 with size 3
Test accuracy:  0.47777777777777775
Test F1 Score: 0.1198





In [7]:
pred = []
for i,j in predicted:
  temp = []
  for z in i[:j.item()+1].cpu().numpy():

    temp.append(z)
  pred.append(temp)

In [8]:
mapped_emotions = []
for sublist in pred:
    mapped_sublist = [reverse_label_mapping[label] for label in sublist]
    mapped_emotions.append(mapped_sublist)

# Print mapped emotions
print(mapped_emotions)

[['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger', 'disgust', 'disgust', 'disgust', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger', 'disgust', 'disgust', 'disgust', '<pad>', '<pad>', '<pad>'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger', 'disgust'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger', 'disgust', 'disgust', 'disgust', '<pad>'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger', 'disgust', 'disgust', 'disgust', '<pad>', '<pad>'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust'], ['disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'disgust', 'anger

# Task2: Model 1

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# libraries
import json
import torch

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, GPT2Model, BertTokenizer, GPT2Tokenizer
from tqdm import tqdm
import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

from torch.nn import TransformerEncoder, TransformerEncoderLayer,TransformerDecoderLayer,TransformerDecoder
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

In [4]:
# dataclass
class EmotionTriggerDataset(Dataset):
    def __init__(self, data_file, max_length=512):
        with open(data_file, 'r') as f:
            data = json.load(f)

        self.data = data
        self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        self.bert_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        self.gpt_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

        self.max_length = max_length
        self.label_mapping = {'disgust': 0,
                               'anger': 1,
                               'sadness': 2,
                               'surprise': 3,
                               'neutral': 4,
                               'joy': 5,
                               'fear': 6,
                              '<pad>': 8}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        last_idx = len(self.data[idx]['speakers'])-1
        speaker = self.data[idx]['speakers']
        utterances = self.data[idx]['utterances']
        emotions = self.data[idx]['emotions']
        final_utterance = [s + '  ' + u + '  ' + e for s, u, e in zip(speaker, utterances, emotions)] # X
        final_utterance = final_utterance + [self.bert_tokenizer.pad_token] * (24 - len(final_utterance))

        triggers = self.data[idx]['triggers'] # y
        temp = []
        for i in triggers:
            if i == None:
                temp.append(0)
            else:
                temp.append(i)
        temp += [2] * (24 - len(temp))   # 2 is pad token

        bert_inputs = self.bert_tokenizer(final_utterance, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='np')
        bert_input_ids = torch.tensor(bert_inputs['input_ids'],dtype=torch.float32)

        trigger_labels = torch.tensor(temp, dtype=torch.float32)

        return bert_input_ids, bert_input_ids[last_idx].unsqueeze(0), trigger_labels

In [3]:
# model class
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: nn) -> nn:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class Encoder(nn.Module):
    def __init__(self, d_model: int =512, nhead: int=64, d_hid: int=200,nlayers: int=8, dropout: float = 0.2):
        super(Encoder, self).__init__()
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)


    def forward(self, x):
        x= x.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        return x

class Decoder(nn.Module):
    def __init__(self, d_model: int =512, nhead: int=64, d_hid: int=200,nlayers: int=8, dropout: float = 0.2):
        super(Decoder, self).__init__()
        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)

    def forward(self,last,memory):
        x= last.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_decoder(x,memory)
        return x

import torch
import torch.nn as nn

class EmotionClassifier(nn.Module):
    def __init__(self, input_size=512, hidden_size=64, output_size=24, num_layers=1, num_classes=3):
        super(EmotionClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size * num_classes)  # Change output size
        self.activation = nn.Sigmoid()

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.rnn(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out)  # out: tensor of shape (batch_size, seq_length, output_size * num_classes)

        # Reshape the output to (batch_size, seq_length, output_size, num_classes)
        out = out.view(out.size(0), out.size(1), 24, -1)

        # Apply sigmoid activation function to get values between 0 and 1
        out = self.activation(out)

        return out


In [5]:
class EmotionTriggerModel(nn.Module):
    def __init__(self, d_model=512, nhead=128, d_hid=400, encoder_layers=32, decoder_layers=32, hidden_size=64, output_size=24):
        super(EmotionTriggerModel, self).__init__()

        self.encoder = Encoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=encoder_layers)
        self.decoder = Decoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=decoder_layers)
        self.emotion_classifier = EmotionClassifier(input_size=d_model, hidden_size=hidden_size, output_size=output_size)

    def forward(self, input_ids, last):
        encoder_output = self.encoder(input_ids)
        decoder_output = self.decoder(last, encoder_output)
        emotion_output = self.emotion_classifier(decoder_output)
        x = emotion_output.squeeze(0)
        return x.squeeze(0)

In [6]:
# test data dataloader
test_file = "/Data/val_file.json"
dataset = EmotionTriggerDataset(test_file)
test_loader = DataLoader(dataset, batch_size=32, shuffle=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = '/Model3.pth'
model = EmotionTriggerModel()
model_state_dict = torch.load(model_path)
model.load_state_dict(model_state_dict)

model.to(device)
model.eval()
f1_scores = []
acc = []
predicted = []
labels_see = []

for batch in tqdm(test_loader, desc="Evaluating on Test Data"):
    inputs, last, labels = batch
    inputs = inputs.to(device)
    last = last.to(device)
    labels = labels.to(device)
    labels_one_hot = F.one_hot(labels.long(), num_classes=3).float()

    with torch.no_grad():
        outputs = model(inputs, last)

    predicted_labels = (outputs >= 0.5).float()
    predicted_labels_arg = torch.argmax(predicted_labels, dim=2)
    predicted_labels_flatten = predicted_labels_arg.flatten()
    predicted.append(predicted_labels_flatten)
    labels_flatten = labels.flatten()
    labels_see.append(labels_flatten)

    test_ac = accuracy_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'))
    f1 = f1_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'), average = 'macro')
    acc.append(test_ac)
    f1_scores.append(f1)

test_accuracy = np.mean(np.array(acc))
f1_score_test = np.mean(np.array(f1_scores))
print("Test accuracy: ", test_accuracy)
print(f"Test F1 Score: {f1_score_test:.4f}")


Evaluating on Test Data: 100%|██████████| 27/27 [00:08<00:00,  3.16it/s]

Test accuracy:  0.7238136574074074
Test F1 Score: 0.5476





In [None]:
pred = []
for i,j in predicted:
  temp = []
  for z in i[:j.item()+1].cpu().numpy():

    temp.append(z)
  pred.append(temp)

# Task 2 Model 2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [8]:
import json
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, GPT2Tokenizer

class EmotionTriggerDataset(Dataset):
    def __init__(self, data_file, max_length=512):
        with open(data_file, 'r') as f:
            data = json.load(f)

        self.data = data
        self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        self.bert_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        self.gpt_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

        self.max_length = max_length
        self.label_mapping = {'disgust': 0,
                               'anger': 1,
                               'sadness': 2,
                               'surprise': 3,
                               'neutral': 4,
                               'joy': 5,
                               'fear': 6,
                              '<pad>': 8}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # if idx in [513,655]:
        #     idx=512
        last_idx = len(self.data[idx]['speakers'])-1
        speaker = self.data[idx]['speakers']
        utterances = self.data[idx]['utterances']
        emotions = self.data[idx]['emotions']
        final_utterance = [s + '  ' + u + '  ' + e for s, u, e in zip(speaker, utterances, emotions)] # X
        final_utterance = final_utterance + [self.bert_tokenizer.pad_token] * (24 - len(final_utterance))

        triggers = self.data[idx]['triggers'] # y
        # print("TRIGGERS             ",triggers)
        temp = []
        for i in triggers:
            if i == None:
                temp.append(0)
            else:
                temp.append(i)
        # triggers = [0 if t is None else t for t in triggers]
        # triggers += [0] * (24 - len(triggers)) # max length is 24
        temp += [2] * (24 - len(temp))   # 2 is pad token

        bert_inputs = self.bert_tokenizer(final_utterance, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='np')
        bert_input_ids = torch.tensor(bert_inputs['input_ids'],dtype=torch.float32)

        trigger_labels = torch.tensor(temp, dtype=torch.float32)

        return bert_input_ids, bert_input_ids[last_idx].unsqueeze(0), trigger_labels,last_idx

In [9]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, GPT2Model, BertTokenizer, GPT2Tokenizer
from tqdm import tqdm
import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer,TransformerDecoderLayer,TransformerDecoder
from torch.utils.data import dataset
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class Encoder(nn.Module):
    def __init__(self, d_model: int =512, nhead: int=8, d_hid: int=200,nlayers: int=6, dropout: float = 0.2):
        super(Encoder, self).__init__()
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)


    def forward(self, x):

        x= x.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        return x

class Decoder(nn.Module):
    def __init__(self, d_model: int =512, nhead: int=64, d_hid: int=200,nlayers: int=8, dropout: float = 0.2):
        super(Decoder, self).__init__()
        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers)
        self.pos_encoder = PositionalEncoding(d_model, dropout)


    def forward(self,last,memory):
        # print(last.shape)
        x= last.permute(1,0,2)
        x = self.pos_encoder(x)
        x = self.transformer_decoder(x,memory)
        return x


import torch
import torch.nn as nn

class EmotionClassifier(nn.Module):
    def __init__(self, input_size=512, hidden_size=768, output_size=24, num_layers=1):
        super(EmotionClassifier, self).__init__()
        self.hidden_size = hidden_size

        self.num_layers = num_layers

        self.dim_change = nn.Linear(input_size, 768)
        model_name = 'bert-base-uncased'  # Example model name

        self.model = BertModel.from_pretrained(model_name)

        self.encoder = self.model.encoder
        self.pooler = self.model.pooler

        self.fc = nn.Linear(hidden_size, output_size*3)

        self.activation = nn.Sigmoid()

    def forward(self, x):
        out = self.dim_change(x)

        # Initialize hidden state with zeros
        out = self.encoder(out)[0].permute(1,0,2)
        out = self.pooler(out)
        out = self.fc(out)

        out = out.view(out.size(0),24,3)
        # Apply sigmoid activation function to get values between 0 and 1
        out = self.activation(out)

        return out



In [10]:
import torch
import torch.nn as nn

class EmotionTriggerModel(nn.Module):
    def __init__(self, d_model=512, nhead=128, d_hid=512, encoder_layers=32, decoder_layers=32, hidden_size=768, output_size=24):
        super(EmotionTriggerModel, self).__init__()

        self.encoder = Encoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=encoder_layers)
        self.decoder = Decoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=decoder_layers)
        self.emotion_classifier = EmotionClassifier(input_size=d_model, hidden_size=hidden_size, output_size=output_size)

    def forward(self, input_ids, last):
        # print(input_ids.shape,'aaa')
        encoder_output = self.encoder(input_ids)
        # last_output = encoder_output[-1].unsqueeze(0)  # Taking the last output from encoder
        decoder_output = self.decoder(last, encoder_output)
        emotion_output = self.emotion_classifier(decoder_output)
        # print("emotion_output_class", emotion_output.shape)
        x = emotion_output.squeeze(0)
        return x.squeeze(0)


In [11]:
import torch
import torch.nn as nn

class EmotionTriggerModel(nn.Module):
    def __init__(self, d_model=512, nhead=128, d_hid=512, encoder_layers=32, decoder_layers=32, hidden_size=768, output_size=24):
        super(EmotionTriggerModel, self).__init__()

        self.encoder = Encoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=encoder_layers)
        self.decoder = Decoder(d_model=d_model, nhead=nhead, d_hid=d_hid, nlayers=decoder_layers)
        self.emotion_classifier = EmotionClassifier(input_size=d_model, hidden_size=hidden_size, output_size=output_size)

    def forward(self, input_ids, last):
        # print(input_ids.shape,'aaa')
        encoder_output = self.encoder(input_ids)
        # last_output = encoder_output[-1].unsqueeze(0)  # Taking the last output from encoder
        decoder_output = self.decoder(last, encoder_output)
        emotion_output = self.emotion_classifier(decoder_output)
        # print("emotion_output_class", emotion_output.shape)
        x = emotion_output.squeeze(0)
        return x.squeeze(0)


In [12]:
# test data dataloader
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
test_file = "/Data/val_file.json"
dataset = EmotionTriggerDataset(test_file)
test_loader = DataLoader(dataset, batch_size=5, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = '/Model4.pth'
model = EmotionTriggerModel()
model_state_dict = torch.load(model_path)
model.load_state_dict(model_state_dict)

model.to(device)
model.eval()
f1_scores = []
acc = []
predicted = []
labels_see = []

for batch in tqdm(test_loader, desc="Evaluating on Test Data"):
    inputs, last, labels,last_idx = batch
    inputs = inputs.to(device)
    last = last.to(device)
    labels = labels.to(device)
    labels_one_hot = F.one_hot(labels.long(), num_classes=3).float()

    with torch.no_grad():
        outputs = model(inputs, last)

    predicted_labels = (outputs >= 0.5).float()
    predicted_labels_arg = torch.argmax(predicted_labels, dim=2)
    predicted_labels_flatten = predicted_labels_arg.flatten()
    try:
      predicted.append((predicted_labels_arg[0].clone(),last_idx[0].clone()))
      predicted.append((predicted_labels_arg[1].clone(),last_idx[1].clone()))
      predicted.append((predicted_labels_arg[2].clone(),last_idx[2].clone()))
      predicted.append((predicted_labels_arg[3].clone(),last_idx[3].clone()))
      predicted.append((predicted_labels_arg[4].clone(),last_idx[4].clone()))
    except Exception as e:
      print(e)
    labels_flatten = labels.flatten()
    labels_see.append(labels_flatten)

    test_ac = accuracy_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'))
    f1 = f1_score(labels_flatten.to('cpu'), predicted_labels_flatten.to('cpu'), average = 'macro')
    acc.append(test_ac)
    f1_scores.append(f1)

test_accuracy = np.mean(np.array(acc))
f1_score_test = np.mean(np.array(f1_scores))
print("Test accuracy: ", test_accuracy)
print(f"Test F1 Score: {f1_score_test:.4f}")




model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Evaluating on Test Data: 100%|██████████| 169/169 [00:15<00:00, 10.71it/s]

index 3 is out of bounds for dimension 0 with size 3
Test accuracy:  0.7241288625904011
Test F1 Score: 0.5454





In [13]:
pred = []
for i,j in predicted:
  temp = []
  for z in i[:j.item()+1].cpu().numpy():

    temp.append(z)
  pred.append(temp)