In [1]:
import transformers
from transformers import RobertaModel, RobertaTokenizer, RobertaConfig
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from copy import deepcopy
from sklearn.metrics import precision_score, recall_score, f1_score
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

# load pre-trained model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
base_model = RobertaModel.from_pretrained('roberta-base') 
encodings = pd.read_csv(r'./data/encodings.csv')
random_seed = 42
encodings = encodings.sample(n=20000, random_state=random_seed)

# read train data
def read_train_data(encodings):
 
    weights = [eval(weights) for weights in encodings['weights'].tolist()]
    weight1 = [w[0] for w in weights]
    weight2 = [w[1] for w in weights]
    labels = encodings['labels'].tolist()
    train = [eval(subdata) for subdata in encodings['encodings'].tolist()]
    inputs_ids,inputs_ids2, inputs_ids3, inputs_ids4, target_mask, target_mask2, target_mask3, target_mask4, target_mask5 = [],[],[],[],[],[],[],[],[]
    for sm, wm, wsi, se, di, dm, twe, wsim, sem, ce in tqdm(train):
        inputs_ids.append(ce)
        inputs_ids2.append(se)
        inputs_ids3.append(wsi)
        inputs_ids4.append(di)
        target_mask.append(sm)
        target_mask2.append(wm)
        target_mask3.append(dm)
        target_mask4.append(wsim)
        target_mask5.append(sem)
       
    return list(zip(inputs_ids,inputs_ids2, inputs_ids3, inputs_ids4, target_mask, target_mask2, target_mask3, target_mask4, target_mask5, weight1, weight2, labels)) 

class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        input_ids = self.dataset[idx][0]
        input_ids2 = self.dataset[idx][1]
        input_ids3 = self.dataset[idx][2]
        input_ids4 = self.dataset[idx][3]
        target_mask = self.dataset[idx][4]
        target_mask2 = self.dataset[idx][5]
        target_mask3 = self.dataset[idx][6]
        target_mask4 = self.dataset[idx][7]
        target_mask5 = self.dataset[idx][8]
        weight1 = self.dataset[idx][9]
        weight2 = self.dataset[idx][10]
        label = self.dataset[idx][11]
                
        return input_ids, input_ids2, input_ids3, input_ids4,target_mask, target_mask2, target_mask3, target_mask4, target_mask5, weight1, weight2, label
data = read_train_data(encodings)
print('finished data loading')
dataset = CustomDataset(data)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 20000/20000 [00:00<00:00, 1205346.36it/s]

finished data loading





In [2]:
def customAvgPool(tensor):
    # iterate batched
    pooled_batch = []
    for idx in range(len(tensor)):
        count1 = 0
        count2 = 0
        v1_sum = 0
        v2_sum = 0
        # iterate each element
        for unit in tensor[idx]:
            print(f'unit:{unit}, type:{type(unit)}')
            v1 = unit[0]
            v2 = unit[1]
            if v1 != 0:
                count1 += 1
            if v2 != 0:
                count2 += 1
            v1_sum += v1
            v2_sum += v2
        # when every element is zero, assign zero
        if count1 == 0:
            pooled_v1 =0
        else:
            pooled_v1 = v1_sum / count1
        if count2 == 0:
            pooled_v2 = 0
        else:
            pooled_v2 = v2_sum / count2
        # add one more dimension
        pooled_batch.append([pooled_v1, pooled_v2])

    batch_output = torch.tensor(pooled_batch, dtype=torch.float32, requires_grad = True)
    
    return batch_output

def expand_label(gold_labels):
    expansion = 1 - gold_labels
    expanded_gold_labels = []
    for i in range(len(gold_labels[0])):
        v1 = gold_labels[0][i]
        v2 = expansion[0][i]
        expanded_gold_labels.append([v1,v2])
        
    return torch.tensor(expanded_gold_labels, dtype=torch.float32)

class Model(nn.Module):
    def __init__(self, num_labels=2):
        super(Model, self).__init__()
        self.encoder =  RobertaModel.from_pretrained('roberta-base')
        self.num_labels = num_labels
        self.config = RobertaConfig.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(dropout_ratio)
        self.classifier = nn.Linear(self.config.hidden_size, num_labels)
        self.classifier2 = nn.Linear(self.config.hidden_size * 2, num_labels)
        self.classifier3 = nn.Linear(2 * 3, num_labels)
        self._init_weights(self.classifier)
        self._init_weights(self.classifier2)
        self._init_weights(self.classifier3)
        self.sigmoid = nn.Sigmoid()
        
    def _init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()
    
    # Define the model architecture

    def forward(
        self,
        input_ids,
        input_ids2,
        input_ids3,
        input_ids4,
        target_mask,
        target_mask2,
        target_mask3,
        target_mask4,
        target_mask5,
        weight1,
        weight2
    ):  
        
        # encode the sentences
        target_sent_output = self.encoder(input_ids, attention_mask=target_mask).last_hidden_state 
    
        
        # get sentence embedding in context
      
        target_sent_output = self.dropout(target_sent_output)
        
        # get word embeddings in context
       
        target_word_output = self.dropout(self.encoder(input_ids,attention_mask=target_mask2).last_hidden_state)
     
        # get individual sentence embeddings
        outputs2 = self.encoder(input_ids2,attention_mask=target_mask5).last_hidden_state
      
        target_sent_output2 = self.dropout(outputs2)
      
        # get individual word embeddings
        outputs3 = self.encoder(input_ids3,attention_mask=target_mask4).last_hidden_state
       
        target_word_output2 = self.dropout(outputs3)
     
        # MIP layers    
        MIP_hidden = torch.cat([target_word_output, target_word_output2], dim=2)
        MIP2_hidden = torch.cat([target_sent_output, target_sent_output2], dim=2)
        
        MIP_hidden = self.classifier2(MIP_hidden)
        MIP2_hidden = self.classifier2(MIP2_hidden)
        
       
        
        # Domain layer
        outputs4 = self.encoder(input_ids4,attention_mask=target_mask3).last_hidden_state

        domain_outputs = self.dropout(outputs4)
      
        Domain_hidden = self.classifier(domain_outputs) 
  
        weight1 = weight1.view(-1, 1, 1).float()
        weight2 = weight2.view(-1, 1, 1).float()

        # apply weights
        MIP_hidden = weight1 * MIP_hidden
        MIP2_hidden = weight2 * MIP2_hidden

        # Feed-forward layers
        concatenated_layer = torch.cat([MIP_hidden, MIP2_hidden, Domain_hidden], dim=2)
        
        logits = self.classifier3(self.dropout(concatenated_layer))
        
        pooled_logits = customAvgPool(logits)
        
        pooled_logits = self.sigmoid(pooled_logits)

        return pooled_logits
     
# set hyperparameters
batch_size = 16
n_fold = 3
num_epoch = 3
learning_rate = 2e-5
dropout_ratio = 0.1

# set up gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# prepare data and instantiate model

model = Model()
print('start training')

loss_fn = nn.BCELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
kf = KFold(n_splits=n_fold, shuffle=True)

# set default f1score
best_fscore = 0

for train_ids, val_ids in kf.split(data):
    train_data = torch.utils.data.Subset(dataset, train_ids)
    val_data = torch.utils.data.Subset(dataset, val_ids)
    
    trainset = CustomDataset(train_ids)
    
    train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)
    
    #scaler = GradScaler()
    model.to(device)
    
    

    for epoch in range(num_epoch):
        
        model.train()
        
        for input_ids, input_ids2, input_ids3, input_ids4,target_mask, target_mask2, target_mask3, target_mask4, target_mask5, weight1, weight2, labels in tqdm(train_dataloader):
            input_ids = [tensor.to(device) for tensor in input_ids]
            input_ids2 = [tensor.to(device) for tensor in input_ids2]
            input_ids3 = [tensor.to(device) for tensor in input_ids3]
            input_ids4 = [tensor.to(device) for tensor in input_ids4]
            
            input_ids = torch.stack(input_ids, dim=1)
            input_ids2 = torch.stack(input_ids2, dim=1)
            input_ids3 = torch.stack(input_ids3, dim=1)
            input_ids4 = torch.stack(input_ids4, dim=1)
            
            target_mask = [tensor.to(device) for tensor in target_mask]
            target_mask2 = [tensor.to(device) for tensor in target_mask2]
            target_mask3 = [tensor.to(device) for tensor in target_mask3]
            target_mask4 = [tensor.to(device) for tensor in target_mask4]
            target_mask5 = [tensor.to(device) for tensor in target_mask5]
            
            target_mask = torch.stack(target_mask, dim=1)
            target_mask2 = torch.stack(target_mask2, dim=1)
            target_mask3 = torch.stack(target_mask3, dim=1)
            target_mask4 = torch.stack(target_mask4, dim=1)
            target_mask5 = torch.stack(target_mask5, dim=1)
        
            weight1 = weight1.unsqueeze(dim=0).to(device)
            weight2 = weight2.unsqueeze(dim=0).to(device)
            labels = labels.unsqueeze(dim=0).to(device)
            
        
            
            optimizer.zero_grad()
            outputs = model(input_ids, input_ids2, input_ids3, input_ids4,target_mask, target_mask2, target_mask3, target_mask4, target_mask5, weight1, weight2)
            pooled_outputs = customAvgPool(outputs)
    
            expanded_labels = expand_label(labels)
            
            loss = loss_fn(pooled_outputs, expanded_labels)
            
            loss.backward()
     
            optimizer.step()
            
        print(f'finished epoch:{epoch}')
        
        model.eval()
        with torch.no_grad():
            for val_inputs, val_labels in val_dataloader:
                val_outputs = model(val_inputs)
                # indices of tensor storing maximum values
                _, predicted = torch.argmax(val_outputs, 1)
                # apply evaluation
                recall = recall_score(val_labels, predicted)
                precision = preision_score(val_labels, predicted)
                f1score = f1_score(val_labels, predicted)
                # print info
                print(f'Epoch: {epoch + 1}, Recall: {recall}, Precision: {precision}, F1_score: {f1score}')
                
        if f1score > best_fscore:
            best_fscore = f1score
            optimal_weights = deepcopy(model.state_dict())

# save weights
torch.save(best_model_weights, './optimal_weights.pth')

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


start training


  0%|          | 0/834 [00:00<?, ?it/s]

tensor:tensor([[[-0.0101, -0.0081],
         [-0.0046,  0.0080],
         [-0.0061,  0.0056],
         ...,
         [-0.0082,  0.0049],
         [-0.0034,  0.0059],
         [ 0.0018, -0.0095]],

        [[-0.0007, -0.0050],
         [-0.0106,  0.0173],
         [-0.0166,  0.0329],
         ...,
         [-0.0034,  0.0072],
         [-0.0025,  0.0025],
         [-0.0034,  0.0041]],

        [[-0.0016,  0.0012],
         [-0.0061,  0.0062],
         [-0.0046,  0.0032],
         ...,
         [-0.0100,  0.0030],
         [ 0.0005, -0.0028],
         [-0.0009,  0.0039]],

        ...,

        [[-0.0028,  0.0016],
         [-0.0111,  0.0090],
         [-0.0059,  0.0058],
         ...,
         [-0.0046,  0.0016],
         [-0.0070,  0.0099],
         [-0.0014,  0.0004]],

        [[ 0.0005, -0.0009],
         [-0.0083,  0.0115],
         [-0.0071,  0.0056],
         ...,
         [-0.0032, -0.0023],
         [ 0.0023, -0.0112],
         [ 0.0090, -0.0193]],

        [[-0.0011, -0.0004],


unit:tensor([ 0.0057, -0.0101], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([0.0010, 0.0059], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([0.0013, 0.0030], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0092, -0.0150], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0066, -0.0076], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([0.0006, 0.0019], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0008,  0.0014], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0031,  0.0047], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0019, -0.0054], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0001,  0.0011], device='cuda:0', grad_fn=<Unb

unit:tensor([ 0.0077, -0.0132], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0003,  0.0008], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0187, -0.0304], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0069, -0.0156], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0251, -0.0535], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0051, -0.0147], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0099, -0.0149], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0075, -0.0104], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0116, -0.0313], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0013,  0.0026], device='cuda:0', grad_f

unit:tensor([ 0.0070, -0.0008], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([0.0055, 0.0022], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([0.0014, 0.0017], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0112, -0.0152], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0081, -0.0030], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0131, -0.0152], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0078, -0.0111], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0066, -0.0086], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0029, -0.0043], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0061, -0.0068], device='cuda:0', grad_fn=<U

  0%|          | 0/834 [00:02<?, ?it/s]

unit:tensor([-0.0030,  0.0007], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0085,  0.0113], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0013, -0.0141], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0161,  0.0191], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0042,  0.0111], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([ 0.0006, -0.0068], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0023,  0.0015], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0035,  0.0004], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0026, -0.0126], device='cuda:0', grad_fn=<UnbindBackward0>), type:<class 'torch.Tensor'>
unit:tensor([-0.0039,  0.0142], device='cuda:0', grad_f




IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number

In [3]:
weights = torch.randn(16)

# Reshape the weight tensor to match the batch input size
weights = weights.view(16, 1, 1)

In [3]:
labels

tensor([[1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]], device='cuda:0')

In [5]:
batch_input = torch.randn(16, 150, 768)
result = batch_input * weights

print(result.size()) 

torch.Size([16, 150, 768])


In [10]:
gold_labels = torch.tensor([[1], [0], [1]])
expanded_labels = 1 - gold_labels

# Concatenate the original and expanded labels along the last dimension
expanded_gold_labels = torch.cat((gold_labels, expanded_labels), dim=-1)

In [12]:
1-gold_labels

tensor([[0],
        [1],
        [0]])

In [4]:
expanded_gold_labels

tensor([[1, 0],
        [0, 1],
        [1, 0]])

In [7]:
import torch
import torch.nn as nn

# Example output tensor of shape (batch_size, max_seq_length, num_classes)
output = torch.randn(16, 150, 2)
output = torch.squeeze(output, dim=1)
# Example label tensor of shape (batch_size, 1, num_classes)
label = torch.randn(16, 1, 2)

# Reshape the label tensor to match the shape of the output tensor
label = torch.squeeze(label, dim=1)  # Shape: (16, 2)

# Apply the binary cross-entropy loss
loss_fn = nn.BCEWithLogitsLoss()
loss = loss_fn(output, label)

print(loss.item())  # Output: the loss value as a scalar


ValueError: Target size (torch.Size([16, 2])) must be the same as input size (torch.Size([16, 150, 2]))

In [8]:
output = torch.squeeze(output, dim=1)

tensor([[[-1.8042, -1.5464],
         [ 0.0775, -0.2856],
         [ 0.0517,  0.3943],
         ...,
         [ 1.5280,  0.9240],
         [-1.7156,  0.5514],
         [ 0.5865,  0.3923]],

        [[ 1.2700, -1.1823],
         [ 0.4500,  0.1019],
         [-1.5407,  0.1174],
         ...,
         [ 0.7142, -1.1516],
         [ 0.4866,  1.1697],
         [-0.5269, -0.7407]],

        [[-0.8502,  1.9818],
         [ 0.4104,  0.2557],
         [ 0.1200, -1.6964],
         ...,
         [-0.4595, -0.4315],
         [ 0.6965, -0.7116],
         [-0.4857,  1.6270]],

        ...,

        [[-0.3259,  1.4065],
         [ 0.8124, -1.0277],
         [ 1.0877, -0.4718],
         ...,
         [ 0.3873,  1.3699],
         [-0.5146, -1.3189],
         [-1.2557,  1.5081]],

        [[ 0.0367,  1.3099],
         [-1.0436, -0.4787],
         [-1.4458, -1.6952],
         ...,
         [-0.4669,  0.4569],
         [-0.7117,  1.8009],
         [ 0.2602, -0.6997]],

        [[-0.6684,  0.1575],
       

In [9]:
output.shape

torch.Size([16, 150, 2])

In [20]:
import torch
import torch.nn as nn

# Example output tensor of shape (batch_size, max_seq_length, num_classes)
output = torch.randn(16, 150, 2)

# Apply max pooling along the second dimension
pooling = nn.MaxPool1d(kernel_size=output.size(1))
pooled_output = pooling(output.transpose(2,1))

print(pooled_output.size())  # Output: torch.Size([16, 2])


torch.Size([16, 2, 1])


In [22]:
t = torch.tensor([[[2,2],[1,1]]])

In [23]:
t.shape

torch.Size([1, 2, 2])

In [27]:
p = pooling(t.transpose(2,1))

RuntimeError: "max_pool1d_impl" not implemented for 'Long'

In [41]:
import torch
import torch.nn as nn

# Example tensor with masked padding values of shape (batch_size, max_seq_length)
tensor = torch.tensor([[[2, 0], [1, 0]], 
                       [[1, 2], [3, 0]], 
                       [[3, 5], [6, 0]]])

# Count the number of non-zero elements along the second dimension
count = (tensor != 0).sum(dim=2)

# Apply average pooling while ignoring zero values
pooled_output = tensor.sum(dim=2) / count

print(pooled_output)


tensor([[2.0000, 1.0000],
        [1.5000, 3.0000],
        [4.0000, 6.0000]])


In [42]:
customAvgPool(tensor)

tensor([[[1.5000, 0.0000]],

        [[2.0000, 2.0000]],

        [[4.5000, 5.0000]]])

In [34]:
len(tensor)

3

In [6]:
x = torch.randn(1,16)    

In [7]:
x

tensor([[-0.7955, -0.2466, -1.4711,  0.4696, -0.8638, -0.4367, -0.9309,  1.2311,
         -0.4941,  0.0586,  0.3497, -1.3287,  1.1064,  0.7120,  0.6686, -2.5801]])

In [8]:
def expand_label(gold_labels):
    expansion = 1 - gold_labels
    expanded_gold_labels = []
    for i in range(len(gold_labels[0])):
        v1 = gold_labels[0][i]
        v2 = expansion[0][i]
        expanded_gold_labels.append([v1,v2])
        
    return torch.tensor(expanded_gold_labels)
expand_label(x)

tensor([[-0.7955,  1.7955],
        [-0.2466,  1.2466],
        [-1.4711,  2.4711],
        [ 0.4696,  0.5304],
        [-0.8638,  1.8638],
        [-0.4367,  1.4367],
        [-0.9309,  1.9309],
        [ 1.2311, -0.2311],
        [-0.4941,  1.4941],
        [ 0.0586,  0.9414],
        [ 0.3497,  0.6503],
        [-1.3287,  2.3287],
        [ 1.1064, -0.1064],
        [ 0.7120,  0.2880],
        [ 0.6686,  0.3314],
        [-2.5801,  3.5801]])

In [5]:
z = torch.tensor([-0.0055, -0.0173])

In [6]:
z[0]

tensor(-0.0055)

In [None]:
pooled_logits = customAvgPool(logits)
pooled_logits = self.sigmoid(pooled_logits)
return pooled_logits