In [1]:
from xml.etree import ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from transformers import XLNetTokenizer, XLNetModel, XLNetForSequenceClassification, RobertaTokenizer, AutoTokenizer, AutoModel, BertTokenizer, BertModel
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
import re
import os
import zipfile
import string
from tqdm import tqdm
from sklearn.metrics import f1_score,accuracy_score,confusion_matrix,mean_absolute_error
from IPython.display import FileLink,FileLinks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler,OneHotEncoder,LabelEncoder
import nltk
from nltk import word_tokenize
from nltk import StanfordTagger
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

# **Task 1 : Hedge Cue Detection**

**Step-1 : HedgePeer Data Extraction and Dataframe Prep**

In [2]:
root = '../input'
os.chdir(root)
data_path = 'hedgepeer/HedgePeer.jsonl'

dataObj = pd.read_json(path_or_buf=data_path, lines=True)
data_list = []
for index, row in dataObj.iterrows():
    rev_id = row['Review_id']
    sents = row['Sentences']
    for s in sents:
        hedges = s['Hedges']
        if(len(hedges)==0):
            d = {}
            d['Review_id'] = rev_id
            d['Sentence_id'] = s['Sentence_id']
            d['Raw Sentence'] = s['Sentence']
            d['Hedged Sentence'] = s['Sentence']
            d['Hedge'] = 'NO HEDGE'
            d['Span'] = None
            data_list.append(d)
        else:
            for h in hedges:
                d = {}
                d['Review_id'] = rev_id
                d['Sentence_id'] = s['Sentence_id']
                d['Raw Sentence'] = s['Sentence']
                d['Hedged Sentence'] = h['Hedged Sentence']
                d['Hedge'] = h['Hedge']
                d['Span'] = h['Span']
                data_list.append(d)

In [3]:
df = pd.DataFrame(data_list)
df

In [4]:
## Run -- To generate sentiment values for the data

os.chdir('sentiment-intensity-prediction/generating-reviews-discovering-sentiment-master')

## Load the OpenAI sentiment Model
from encoder import Model
model = Model()

sent = list(df['Raw Sentence'])
text_features = model.transform(sent)

sentiment_unit = text_features[:, 2388]

df['Sentiment Intensity'] = list(sentiment_unit)
sentiment = df['Sentiment Intensity'].to_list()

os.chdir(root)

In [5]:
## Run -- Standardizing sentiment values......

x = df['Sentiment Intensity'].values.reshape(-1, 1) #returns a numpy array
std_scaler = StandardScaler()
x_scaled = std_scaler.fit_transform(x)
df['Sentiment Intensity'] = list(x_scaled.reshape(-1))
df

In [4]:
## Run -- Creating unique_id column......

rev_id = df['Review_id']
sen_id = df['Sentence_id']
unq_id = [i+'_'+str(j) for i,j in zip(rev_id,sen_id)]
df['Unique_id'] = unq_id
df

In [None]:
df.to_csv('/kaggle/working/HedgePeer_sentiment.csv')

In [5]:
## Run

unq_list = []
sent_list = []
hedged_sent_list = []
hed_list = []
span_list = []
sentiment_list=[]

gp = df.groupby(by=['Unique_id'])
for name,grp in tqdm(gp):
    sent_df = gp.get_group(name)
    raw_sent = list(set(sent_df['Raw Sentence']))
    sentiments = list(set(sent_df['Sentiment Intensity']))
    hed_sent = list(sent_df['Hedged Sentence'])
        
    sent_hedges = list(sent_df['Hedge'])
    sent_spans = list(sent_df['Span'])
    sent_hed_span = [(i,j,k) for i,j,k in zip(hed_sent,sent_hedges,sent_spans) if j not in ['NO HEDGE','IDENT_PRECED']]
        
    hedged_sents = [i[0] for i in sent_hed_span]
    hedges = [i[1] for i in sent_hed_span]
    spans = [i[2] for i in sent_hed_span]
        
    unq_list.append(name)
    sent_list.append(raw_sent)
    hedged_sent_list.append(hedged_sents)
    hed_list.append(hedges)
    span_list.append(spans)
    sentiment_list.append(sentiments)

In [34]:
spans

In [8]:
## Run
data_dict = {'sentence_id':unq_list, 'sentence':sent_list, 'hedged_sentence':hedged_sent_list, 'speculative_cues':hed_list, 'scope_string':span_list, 'sentiment':sentiment_list}
data = pd.DataFrame(data_dict)
data

In [9]:
## Run -- Convert hedged sentences to hashed sentences.....

hash_sents = []
for index, row in data.iterrows():
    temp_list = []
    cues = row['speculative_cues']
    raw_sent = row['sentence']
    hed_sent = row['hedged_sentence']
    span = row['scope_string']
    
    if(len(cues)>0):
        for hs in hed_sent:
            hs = hs.replace(".","")
            hs = hs.replace(",","")
            hs = hs.replace("-","")
            hs = hs.replace("?","")
            hs = hs.replace("<span>", "")
            hs = hs.replace("</span>", "")
            if(hs.find('<h>')>-1):
                hs = hs.replace("<h>", "#")
                hs = hs.replace("</h>", "#")
            if(hs.find('<mh>')>-1):
                hs = hs.replace("<mh>", "#")
                hs = hs.replace("</mh>", "#")
            temp_list.append(hs)
    else:
        temp_list.append(raw_sent[0])
    
    hash_sents.append(temp_list)

data['hashed_sentence'] = hash_sents
data

In [10]:
final_hashed_sents = []
for index, row in data.iterrows():
    cues = row['speculative_cues']
    hash_sents = row['hashed_sentence']
    if(len(cues)<=1):
        final_hashed_sents.append(hash_sents[0])
    else:
        raw_sent_from_hash = hash_sents[0].replace('#','').split()
        hash_sents = [i.split() for i in hash_sents]
        cue_idxs = []
        for hs,c in zip(hash_sents,cues):
            hash_idx = [i for i,x in enumerate(hs) if x=='#']
            if(len(hash_idx)<2):
                print('LESS THAN 2 HASHES FOUND IN MULTIPLE CUE INSTANCE....')
                continue
            cue_idx = (hash_idx[0], hash_idx[1]-2)
                
            st_idx = cue_idx[0]
            end_idx = cue_idx[1]+1
            if(c.split()!=raw_sent_from_hash[st_idx:end_idx]):
                i=0
                while(i<3):
                    i+=1
                    if(end_idx+i<len(raw_sent_from_hash) and c.split()==raw_sent_from_hash[st_idx+i:end_idx+i]):
                        cue_idx = (st_idx+i, end_idx+i-1)
                        break
                    if(st_idx-i>=0 and c.split()==raw_sent_from_hash[st_idx-i:end_idx-i]):
                        cue_idx = (st_idx-i, end_idx-i-1)
                        break
                
            cue_idxs.append(cue_idx)
            
        for cue_idx in cue_idxs:
            raw_sent_from_hash[cue_idx[0]] = '# '+ raw_sent_from_hash[cue_idx[0]]
            raw_sent_from_hash[cue_idx[1]] = raw_sent_from_hash[cue_idx[1]] + ' #'
        
        final_hashed_sents.append(' '.join(raw_sent_from_hash))

In [11]:
## Run

data['hashed_sentence'] = final_hashed_sents
data['sentence'] = [i[0] for i in data['sentence'].to_list()]
data['sentiment'] = [i[0] for i in data['sentiment'].to_list()]

## To remove instance with raw sentence == Nan.....
non_nan_list = [type(i)==str for i in data['sentence'].to_list()]
data = data.loc[non_nan_list]

sent1 = data['hashed_sentence']
sen_list = None
cues = data['speculative_cues'].to_list()
sentiment = data['sentiment'].to_list()
data

**Step-2 : Creating dataloaders**

In [13]:
## Run -- One-Hot Rep for POS tags

tag_set = np.array(['CC','CD','DT','EX','FW','IN','JJ','JJR','JJS','LS','MD','NN','NNS','NNP','NNPS','PDT','POS','PRP','PRP$','RB','RBR','RBS','RP','SYM','TO','UH','VB','VBD','VBG','VBN','VBP','VBZ','WDT','WP','WP$','WRB','$',"''"])
num_tags = tag_set.shape[0]

label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(tag_set)
char_to_int = dict((c, i) for c,i in zip(tag_set,integer_encoded))
int_to_char = dict((i, c) for c,i in zip(tag_set,integer_encoded))
print(integer_encoded)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
one_hot_labels = dict((c,list(i)) for c,i in zip(tag_set,onehot_encoded))
print(onehot_encoded)

In [14]:
num_tags

In [15]:
## Run
## 0=not a cue, 1=normal cue, 2=multiword cue, 3=<pad> token 

class Bio_dataset(Dataset):
    def __init__(self,sentences,cues,trans_model,sentiment,tokenizer,max_len,pos_tagging,num_tags,pos_scaff):
        self.sent = sentences
        self.trans_model = trans_model
        self.token = tokenizer
        self.max = max_len
        self.cues = cues
        self.sentiment = sentiment
        self.pos_tagging = pos_tagging
        self.num_tags = num_tags
        self.pos_scaff = pos_scaff
    def __len__(self):
        return len(self.sent)
    def tokenids_gen(self):
        targets = []
        senids=[]
        attention_masks=[]
        pos_tags=[]
        pad_token_ids = {'xlnet':5,'bert':0,'scibert':0}
        for s,c in zip(self.sent,self.cues):
            encodings = tokenizer.encode_plus(s,
                                  return_tensors='pt',
                                  truncation=False,
                                  return_token_type_ids=True,
                                  return_attention_mask=True,
                                  )
            
            att = list(encodings['attention_mask'][0])
            senid = list(encodings['input_ids'][0])
            att = [i.item() for i in att]
            senid = [i.item() for i in senid]
            k = [tokenizer.decode(i) for i in senid]

            tar = [0 for i in range(len(k))]
            idxend=-1
            for cue in c:
                idxstart = k.index('#',idxend+1)
                idxend = k.index('#',idxstart+1)
                tar[idxstart] = -1
                tar[idxend] = -1
                if(len(cue.split())>1):
                    tar[idxstart+1:idxend] = [2 for i in range(idxend-idxstart-1)]
                else:
                    tar[idxstart+1:idxend] = [1 for i in range(idxend-idxstart-1)]
                    
            ## loop to delete '' or '#' element from the sent list (here sent list = list of sentences with each one having multiple cues marked with #) 
            for i in range(2*len(c)):
                idx = k.index('#')
                if(k[idx-1]==''):
                    del k[idx-1:idx+1]
                    del senid[idx-1:idx+1]
                    del tar[idx-1:idx+1]
                    del att[idx-1:idx+1]
                else:
                    del k[idx]
                    del senid[idx]
                    del tar[idx]
                    del att[idx]
                
            senid = [i for i,j in zip(senid,k) if re.search('[A-Za-z0-9]+', j)!=None]
            tar = [i for i,j in zip(tar,k) if re.search('[A-Za-z0-9]+', j)!=None]
            att = [i for i,j in zip(att,k) if re.search('[A-Za-z0-9]+', j)!=None]
            k = [i for i in k if re.search('[A-Za-z0-9]+', i)!=None]

            ## adding pad token at the end....
            tar = tar+[3 for i in range(self.max - len(k))]
            senid = senid+[pad_token_ids[self.trans_model] for i in range(self.max - len(k))]
            att = att+[0 for i in range(self.max - len(k))]
    
            if(self.pos_tagging == True and self.pos_scaff == False):
                tagged = nltk.pos_tag(k)
                pos_tag_labels = [one_hot_labels[i[1]] for i in tagged]
                pad_encod = [0 for i in range(self.num_tags)]
                pos_tag_labels = pos_tag_labels+[pad_encod for i in range(self.max - len(k))]
                pos_tags.append(pos_tag_labels)
            
            if(self.pos_tagging == True and self.pos_scaff == True):
                tagged = nltk.pos_tag(k)
                pos_tag_labels = [char_to_int[i[1]] for i in tagged]
                pad_encod = self.num_tags
                pos_tag_labels = pos_tag_labels+[pad_encod for i in range(self.max - len(k))]
                pos_tags.append(pos_tag_labels)
                
            targets.append(tar)
            senids.append(senid)
            attention_masks.append(att)
            
        return (senids,attention_masks,targets,self.sentiment,pos_tags)

In [16]:
## Run -- Creates dictionary with input_tokens, att_mask, targets tensors
class Dataset_gen(Dataset):

    def __init__(self,sentences,targets,att_masks,sentiment,pos_tags):
        self.sent = sentences
        self.tar = targets
        self.att = att_masks
        self.sentiment = sentiment
        self.pos_tags = pos_tags
    
    def __len__(self):
        return len(self.sent)
    
    def __getitem__(self, item):
        sent = torch.tensor(self.sent[item])
        target = torch.tensor(self.tar[item])
        att = torch.tensor(self.att[item])
        sentiment = torch.tensor(self.sentiment[item])
        if(len(self.pos_tags)!=0):
            pos_tags = torch.tensor(self.pos_tags[item])
            ret_dict = {'input':sent,'attention_mask':att,'targets':target,'sentiment':sentiment,'pos_tags':pos_tags}
        else:
            ret_dict = {'input':sent,'attention_mask':att,'targets':target,'sentiment':sentiment}
        return ret_dict

In [17]:
## Run

# for task 1 => data2mark = (cues,sentiment)
# for task 2 => data2mark = (scope,sentiment)

def dataloader_gen(sent,data2mark,trans_model,tokenizer,max_len,batch_size,task,pos_tagging,num_tags,pos_scaff):
    if(task==1):
        cues = [i[0] for i in data2mark]
        sentiment = [i[1] for i in data2mark]
        if(pos_tagging==True):
            b = Bio_dataset(sent,cues,trans_model,sentiment,tokenizer,max_len,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
        else:
            b = Bio_dataset(sent,cues,trans_model,sentiment,tokenizer,max_len,pos_tagging=False,num_tags=None,pos_scaff=False)
    else:
        spans = [i[0] for i in data2mark]
        sentiment = [i[1] for i in data2mark]
        if(pos_tagging==True):
            b = Biot2_dataset(sent,spans,trans_model,sentiment,tokenizer,max_len,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
        else:
            b = Biot2_dataset(sent,spans,trans_model,sentiment,tokenizer,max_len,pos_tagging=False,num_tags=None,pos_scaff=False)
    
    x,att,y,ys,pos_tags = b.tokenids_gen()
    data = Dataset_gen(x,y,att,ys,pos_tags)
    
    return DataLoader(data,batch_size=batch_size)

In [18]:
## leng_more = list of indices with sent tokens length > max_len

def remove_big_instances(data,sen_list,sent,data2mark,sentiment,tokenizer,max_len):
    l = list(data['sentence'])
    leng_more = [i[0] for i in enumerate(l) if len(tokenizer.encode_plus(i[1],truncation=False,return_token_type_ids=True,return_attention_mask=True)['input_ids'])>max_len]
    if(sen_list!=None):
        sen_list = [i[1] for i in enumerate(sen_list) if i[0] not in leng_more]
    sent = [i[1] for i in enumerate(sent) if i[0] not in leng_more]
    data2mark = [i[1] for i in enumerate(data2mark) if i[0] not in leng_more]
    sentiment = [i[1] for i in enumerate(sentiment) if i[0] not in leng_more]
    return (leng_more,sen_list,sent,data2mark,sentiment)

In [19]:
len(sent1)

In [20]:
## Run 
# remove instances larger than max_len

trans_model = 'xlnet'

tokenizer1 = XLNetTokenizer.from_pretrained('xlnet-base-cased')

tokenizer2 = BertTokenizer.from_pretrained('bert-base-cased')

tokenizer3 = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_cased')

tokenizer_dict = {'xlnet':tokenizer1,'bert':tokenizer2,'scibert':tokenizer3}

tokenizer = tokenizer_dict[trans_model]

len_more,sen_list,sent1,cues,sentiment = remove_big_instances(data,sen_list,sent1,cues,sentiment,tokenizer,100)

In [21]:
len(len_more)

In [22]:
## Run -- Length after removing max_len instances
len(sent1)

In [23]:
## Run : Loading data into DataLoaders

'''
Value of Tags for different architectures --
1. Simple Baseline = 
pos_tagging = False, pos_scaff = False, sent_scaff = False
2. MTL with sentiment scaff = 
pos_tagging = False, pos_scaff = False, sent_scaff = True
1. MTL with sentiment + POS scaffs = 
pos_tagging = True, pos_scaff = True, sent_scaff = True
1. MTL with sentiment scaff + POS Late Fusion (LF) = 
pos_tagging = True, pos_scaff = False, sent_scaff = True
'''

data = 'hedgepeer'
pos_tagging=False
pos_scaff=False
sent_scaff=False
num_tags=num_tags

max_len = 100
batch_size = 4

if(data=='hedgepeer'):
    # split (train,val,test) = (70%,20%,10%)
    train_size = 36924
    val_size = 10552
    test_size = len(sent1)-train_size-val_size

else:
    print('Wrong Dataset Entry!')

y12 = [(c,s) for c,s in zip(cues,sentiment)]
sen_train, sen_test, y12_train, y12_test = train_test_split(sent1,y12,test_size=test_size, random_state=0)
sen_train, sen_val, y12_train, y12_val = train_test_split(sen_train,y12_train,test_size=val_size, random_state=0)
    
train_data_loader = dataloader_gen(sen_train,y12_train,trans_model,tokenizer,max_len,batch_size,task=1,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
val_data_loader = dataloader_gen(sen_val,y12_val,trans_model,tokenizer,max_len,batch_size,task=1,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
test_data_loader = dataloader_gen(sen_test,y12_test,trans_model,tokenizer,max_len,batch_size,task=1,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)

In [24]:
len(sen_test)

In [None]:
len(sen_train)

# **Task 1 : Hedge cues detection Model**

**Simple Baseline**

In [25]:
## XLnet model
class cue_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.xlmodel = XLNetModel.from_pretrained('xlnet-base-cased')
        self.lin = nn.Linear(768,4)
        
    def forward(self,x,att):
        xl=self.xlmodel(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

In [None]:
## Scibert model
class cue_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.xlmodel = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        self.lin = nn.Linear(768,4)
        
    def forward(self,x,att):
        xl=self.xlmodel(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

In [None]:
## Bert model
class cue_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.xlmodel = BertModel.from_pretrained('bert-base-cased')
        self.lin = nn.Linear(768,4)
        
    def forward(self,x,att):
        xl=self.xlmodel(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

**Multi Task Learning Model**

In [None]:
## Attention class
class attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        self.a = 0
        self.th = 0
        self.eij = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.kaiming_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
        
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim 
        step_dim = self.step_dim

        self.eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            self.eij = self.eij + self.b
            
        self.th = torch.tanh(self.eij)
        a = torch.exp(self.th)
        
        if mask is not None:
            a = a * mask

        self.a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)

        weighted_input = x * torch.unsqueeze(self.a, -1)
        return torch.sum(weighted_input, 1)

In [None]:
## MAIN TASK WITHOUT POS_TAGGING........
# feed forward of main task (hedge cue/span prediction)

class main_head_1(nn.Module):
    def __init__(self,lstm_hidden_size):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,16)
        self.drop = nn.Dropout(p=0.4)
        self.out = nn.Linear(16,4)
        self.relu = torch.nn.ReLU()
    def forward(self,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        drop = self.drop(lstm)
        lin = self.lin(drop)
        lin = self.relu(lin)
        out = self.out(lin)
        return (out) 

In [None]:
## MAIN TASK WITH POS_TAGGING........
# feed forward of main task (hedge cue/span prediction)

class main_head_2(nn.Module):
    def __init__(self,lstm_hidden_size,pos_tagging,num_tags):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,16)
        self.out = nn.Linear(16+num_tags,4)
        self.pos_tagging = pos_tagging
    def forward(self,pos_tags,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        pos_tags = pos_tags.view(-1,pos_tags.shape[2])
        lin = self.lin(lstm)
        pos_lin = torch.cat((lin,pos_tags),1).float()
        out = self.out(pos_lin)
        return (out) 

In [None]:
# feed forward of sentiment task 
class sentiment_head(nn.Module):
    def __init__(self,hidden_size,max_len):
        super().__init__()
        self.hidden_size = hidden_size
        self.att = attention(2*hidden_size,max_len)   ## here second argument is max_len (here max_len = 100)
        self.out = nn.Linear(2*hidden_size,1)
    def forward(self,lstm):
        at = self.att(lstm).view(-1,2*self.hidden_size)
        x = self.out(at)
        return (x) 

In [None]:
## POS TASK........
# feed forward of pos task
class pos_head(nn.Module):
    def __init__(self,lstm_hidden_size,num_tags):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,num_tags)
    def forward(self,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        lin = self.lin(lstm)
        return (lin) 

In [None]:
## MTL model
class cue_MTL_model(nn.Module):
    def __init__(self,trans_model,hidden_size,max_len,num_tags,pos_tagging=False,pos_scaff=False):
        super().__init__()
        if(trans_model=='xlnet'):
            self.trans = XLNetModel.from_pretrained('xlnet-base-cased')
        if(trans_model=='scibert'):
            self.trans = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        if(trans_model=='bert'):
            self.trans = BertModel.from_pretrained('bert-base-cased')
        
        self.lstm = nn.LSTM(768,hidden_size,num_layers=1,bidirectional=True,batch_first=True)
        
        if(pos_tagging==True and pos_scaff==False):
            self.main = main_head_2(lstm_hidden_size=hidden_size,pos_tagging=pos_tagging,num_tags=num_tags)
        else:
            self.main = main_head_1(lstm_hidden_size=hidden_size)
        
        if(pos_tagging==True and pos_scaff==True):
            self.pos = pos_head(lstm_hidden_size=hidden_size,num_tags=num_tags)
            
        self.sentiment = sentiment_head(hidden_size,max_len)
        self.pos_tagging = pos_tagging
        
    def forward(self,x,att,pos_tags=None):
        xl=self.trans(x,attention_mask=att)[0]
        lstm,_=self.lstm(xl)
        if(self.pos_tagging==True and pos_scaff==False):
            main_out = self.main(pos_tags,lstm)
        else:
            main_out = self.main(lstm)
        
        sentiment_out = self.sentiment(lstm)
        
        if(pos_tagging==True and pos_scaff==True):
            pos_out = self.pos(lstm)
            return (main_out,sentiment_out,pos_out)
            
        else:
            return (main_out,sentiment_out)

**MTL Sentiment scaff + POS Late Fusion Model**

In [None]:
## MTL model
class cue_LF_model(nn.Module):
    def __init__(self,trans_model,max_len,num_tags,pos_tagging=False,sentiment_lf=False):
        super().__init__()
        if(trans_model=='xlnet'):
            self.trans = XLNetModel.from_pretrained('xlnet-base-cased')
        if(trans_model=='scibert'):
            self.trans = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        if(trans_model=='bert'):
            self.trans = BertModel.from_pretrained('bert-base-cased')
                
        self.lin = nn.Linear(768,16)
        self.out = nn.Linear(16+num_tags+1,4)            
        self.pos_tagging = pos_tagging
        self.sentiment_lf = sentiment_lf
        
    def forward(self,x,att,pos_tags=None,sentiment=None):
        xl=self.trans(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        pos_lin = torch.cat((lin,pos_tags),1).float()

        xl=self.trans(x,attention_mask=att)[0]
#         print('\ntrans model output shape : ',xl.shape)
        lstm,_=self.lstm(xl)
#         print('lstm shape : ',lstm.shape)
        if(self.pos_tagging==True and pos_scaff==False):
            main_out = self.main(pos_tags,lstm)
#             print('main_out shape : ',main_out.shape)
        else:
            main_out = self.main(lstm)
        
        sentiment_out = self.sentiment(lstm)
        
        if(pos_tagging==True and pos_scaff==True):
            pos_out = self.pos(lstm)
#             print('pos_out shape : ',pos_out.shape)
            return (main_out,sentiment_out,pos_out)
            
        else:
            return (main_out,sentiment_out)

# **Training**

**Simple Baseline**

In [26]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [27]:
model = cue_model()
model.to(device)

In [None]:
for i,d in enumerate(val_data_loader):
    inp = d['input'].to(device)
    att = d['attention_mask'].to(device)
    targets = d['targets'].to(device)
    sentiment = d['sentiment'].to(device)
    print(targets.shape)
    targets = targets.view(-1)
    print(targets.shape)
    print(sentiment.shape)
    break

In [28]:
## Run

def evaluate(model,val_data,task,pos_tagging,pos_scaff,sent_scaff):
    model.eval()
    model.to(device)
    main_loss = 0
    true=[]
    pred=[]
    with torch.no_grad():
        for i,d in enumerate(val_data):
            inp = d['input'].to(device)
            att = d['attention_mask'].to(device)
            targets = d['targets'].view(-1).to(device)
            
            if(pos_tagging==True and pos_scaff==False):
                pos_tags = d['pos_tags'].to(device)
                logits = model(inp,att,pos_tags)[0]
            elif(sent_scaff==False and pos_tagging==False and pos_scaff==False):
                logits = model(inp,att)   ## For simple baseline
            else:
                logits = model(inp,att)[0]   ## For sentiment MTL

            loss = cse_loss(logits,targets)
            main_loss += loss.item()
            
            _,predictions = torch.max(logits,dim=1)
            
            targets = targets.cpu().detach().numpy()
            predictions = predictions.cpu().detach().numpy()
            
            if(task==1):
                tr,pr = remove_pad_pred_t1(targets,predictions)
                true += tr
                pred += pr
            else:
                true += list(targets)
                pred += list(predictions)

        main_loss = main_loss/(i+1)
    return (main_loss,true,pred)

In [29]:
def remove_pad_pred_t1(true,pred):
    idx = [i for i,d in enumerate(true) if d==3]
    true = [i for i in true if i!=3]
    pred = [d for i,d in enumerate(pred) if i not in idx]
    return (true,pred)

In [31]:
## Run --- Training Cue Detcetion Model -- SIMPLE BASELINE........
epochs = 1

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
weights = torch.tensor([1., 1., 1., 0.]).to(device)
cse_loss = torch.nn.CrossEntropyLoss(weight=weights)

loss_list=[]
for ep in range(epochs):
    total_loss=0
    true=[]
    pred=[]
    model.train()
    
    for i,d in enumerate(train_data_loader):
        if(i%300 == 299):
            print('batch - ',i+1)

        inp = d['input'].to(device)
        att = d['attention_mask'].to(device)
        targets = d['targets'].view(-1).to(device)

        logits = model(inp,att)

        loss = cse_loss(logits,targets)
            
        _,predictions = torch.max(logits,dim=1)

        targets = targets.cpu().detach().numpy()
        predictions = predictions.cpu().detach().numpy()
        
        tr,pr = remove_pad_pred_t1(targets,predictions)
        
        true += tr
        pred += pr

        total_loss += loss.item()
            
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
            
    total_loss = total_loss/(i+1)

    f1 = f1_score(true,pred,average='macro')
    acc = accuracy_score(true, pred)
    print('epoch : ',ep+1,' --','\n','loss : ',total_loss,'\t','f1 : ',f1,'\t','acc : ',acc)
    print('train confusion matrix :')
    print(confusion_matrix(true,pred))
    
    # validation 
    val_loss,val_true,val_pred = evaluate(model=model,val_data=val_data_loader,task=1,pos_tagging=False,pos_scaff=False,sent_scaff=False)
        
    val_f1 = f1_score(val_true,val_pred,average='macro')
    val_acc = accuracy_score(val_true, val_pred)
    print('val loss : ',val_loss,'\t','val_f1 : ',val_f1,'\t','val_acc : ',val_acc)
    print('val confusion matrix :')
    print(confusion_matrix(val_true,val_pred))
    
    torch.save(model, f'/kaggle/working/xlnet_{data}_cue_only_model_ep{ep+1}.pt')
    
    loss_list.append({'train_loss':total_loss,'val_loss':val_loss})

In [None]:
torch.save(model, f'/kaggle/working/xlnet_{data}_cue_only_model_ep{ep+1}.pt')

In [None]:
## Run -- Results on HedgePeer Test Data
print('BERT MODEL RESULTS ON HEDGEPEER TEST DATA')
root = '/kaggle/working'
for model_name in os.listdir(root):
    model_path = root+model_name
    if(model_name[:14]!='bert_hedgepeer'):
        continue
    model = torch.load(model_path)
    model.to(device)
    test_loss,test_true,test_pred = evaluate(model,test_data_loader,task=1)

    test_f1 = f1_score(test_true,test_pred,average='macro')
    test_acc = accuracy_score(test_true, test_pred)
    print(f'model : {model_name}')
    print('test loss : ',test_loss,'\t','test_f1 : ',test_f1,'\t','test_acc : ',test_acc)
    print('test confusion matrix :')
    print(confusion_matrix(test_true,test_pred))
    print('\n')

**MTL model**

In [32]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
pos_tagging = False
hidden_size=100
pos_scaff = False
sent_scaff = True

if(pos_tagging == False and pos_scaff == False):
    num_tags = None

if(pos_tagging == True and pos_scaff == True):
    num_tags = num_tags+1           # To take into account the label for the padding token.......

model = cue_MTL_model(trans_model=trans_model,hidden_size=hidden_size,max_len=max_len,num_tags=num_tags,pos_tagging=pos_tagging,pos_scaff=pos_scaff)
model.to(device)

In [None]:
## Run --- Training Cue Detcetion Model -- MTL MODEL........
epochs = 7

# lambd1 = Lambd1 for main task, lambd2 = Lambda for sentiment task, lambd3 = Lambda for POS task
lambd1 = 1
lambd2 = 0.1
lambd3 = 0.05
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
weights = torch.tensor([1., 1., 1., 0.]).to(device)
if(pos_tagging==True and pos_scaff==True):
    pos_weights = torch.tensor([float(1) for i in range(num_tags-1)]+[float(0)]).to(device)     # weight = 0 for padding token
    pos_cse_loss = torch.nn.CrossEntropyLoss(weight=pos_weights)
cse_loss = torch.nn.CrossEntropyLoss(weight=weights)
mse_loss = torch.nn.MSELoss()

loss_list=[]
for ep in range(epochs):
    running_loss=0
    ep_main_loss=0
    ep_sentiment_loss=0
    ep_pos_loss=0
    true=[]
    pred=[]
    sent_true=[]
    sent_pred=[]
    pos_true=[]
    pos_pred=[]
    model.train()
    
    for i,d in enumerate(train_data_loader):
        if(i%300 == 299):
            print('batch - ',i+1)

        inp = d['input'].to(device)
        att = d['attention_mask'].to(device)
        targets = d['targets'].view(-1).to(device)           # targets shape : (batch_size,max_len)
        sentiment = d['sentiment'].to(device)
        
        if(pos_tagging==True):
            pos_tags = d['pos_tags'].to(device)              # pos_tags shape for pos_scaff==False : (batch_size,max_len,num_tags)
            if(pos_scaff==False):
                main_logits,sentiment_logits = model(inp,att,pos_tags)
            else:
                pos_tags = pos_tags.view(-1)                 # pos_tags shape before view : (batch_size,max_len)
                main_logits,sentiment_logits,pos_logits = model(inp,att)

        else:
            main_logits,sentiment_logits = model(inp,att)

        main_loss = cse_loss(main_logits,targets)
        sentiment_loss = mse_loss(sentiment_logits.view(-1),sentiment)
        
        if(pos_tagging==True and pos_scaff==True):
            pos_loss = pos_cse_loss(pos_logits,pos_tags)
            pos_tags = pos_tags.cpu().detach().numpy()
            _,pos_predictions = torch.max(pos_logits,dim=1)
            pos_tr,pos_pr = remove_pad_pred_t1(pos_tags,pos_predictions)
            pos_true += pos_tr
            pos_pred += pos_pr
            ep_pos_loss += pos_loss.item()
            
        _,main_predictions = torch.max(main_logits,dim=1)

        targets = targets.cpu().detach().numpy()
        main_predictions = main_predictions.cpu().detach().numpy()
        sentiment = list(sentiment.cpu().detach().numpy())
        sentiment_logits = list(sentiment_logits.view(-1).cpu().detach().numpy())
        
        sent_true += sentiment
        sent_pred += sentiment_logits
        
        tr,pr = remove_pad_pred_t1(targets,main_predictions)
        
        true += tr
        pred += pr

        if(pos_scaff==False):
            total_loss = lambd1*main_loss + lambd2*sentiment_loss
        else:
            total_loss = lambd1*main_loss + lambd2*sentiment_loss + lambd3*pos_loss
            
        total_loss.backward()
        running_loss += total_loss.item()
        ep_main_loss += main_loss.item()
        ep_sentiment_loss += sentiment_loss.item()
        
        optimizer.step()
        optimizer.zero_grad()
        
            
    running_loss = running_loss/(i+1)
    ep_main_loss = ep_main_loss/(i+1)
    ep_sentiment_loss = ep_sentiment_loss/(i+1)

    f1 = f1_score(true,pred,average='macro')
    acc = accuracy_score(true, pred)
    mae = mean_absolute_error(sent_true,sent_pred)
    
    if(pos_tagging==True and pos_scaff==True):
        ep_pos_loss = ep_pos_loss/(i+1) 
#         pos_f1 = f1_score(pos_true,pos_pred,average='macro')
#         pos_acc = accuracy_score(pos_true, pos_pred)
        pos_f1 = None
        pos_acc = None
    
    if(pos_scaff==False):
        pos_f1 = 'DOES NOT EXIST'
        pos_acc = 'DOES NOT EXIST'
        ep_pos_loss = 'DOES NOT EXIST'
        
    print('\nepoch : ',ep+1,' --','\n','Combined loss : ',running_loss,'\t','Main Task Loss : ',ep_main_loss,'\t','Sentiment Task Loss : ',ep_sentiment_loss,'\t','POS Task Loss : ',ep_pos_loss)
    print('\nMain Task : -- ','F1 : ',f1,'\t','Acc : ',acc)
    print('train confusion matrix :')
    print(confusion_matrix(true,pred))
    print('\nSentiment Task : -- ','MAE : ',mae)
    print('\nPOS Task : -- ','F1 : ',pos_f1,'\t','Acc : ',pos_acc)
    
    torch.save(model, f'/kaggle/working/{trans_model}_{data}_cue_only_MTL_sentiment_model_ep{ep+1}.pt')

    # validation 
    main_val_loss,main_val_true,main_val_pred = evaluate(model,val_data_loader,task=1,pos_tagging=pos_tagging,pos_scaff=pos_scaff,sent_scaff=sent_scaff)
        
    main_val_f1 = f1_score(main_val_true,main_val_pred,average='macro')
    main_val_acc = accuracy_score(main_val_true, main_val_pred)
    print('\nmain task val loss : ',main_val_loss,'\t','main task val_f1 : ',main_val_f1,'\t','main task val_acc : ',main_val_acc)
    print('main task val confusion matrix :')
    print(confusion_matrix(main_val_true,main_val_pred))
    print('\n')
    
    loss_list.append({'total_train_loss':running_loss,'main_train_loss':ep_main_loss,'main_val_loss':main_val_loss,'sentiment_train_loss':ep_sentiment_loss,'pos_train_loss':ep_pos_loss})

In [None]:
import matplotlib.pyplot as plt
total_train_loss_list = [i['total_train_loss'] for i in loss_list]
main_train_loss_list = [i['main_train_loss'] for i in loss_list]
sentiment_train_loss_list = [i['sentiment_train_loss'] for i in loss_list]
main_val_loss_list = [i['main_val_loss'] for i in loss_list]
ep_list = [i+1 for i in range(epochs)]
plt.rcParams['figure.figsize'] = [10, 10]
# plt.plot(ep_list,total_train_loss_list, label='total train loss')
plt.plot(ep_list,main_train_loss_list, label='main task train loss')
# plt.plot(ep_list,sentiment_train_loss_list, label='sentiment task train loss')
plt.plot(ep_list,main_val_loss_list, label='main task val loss')
plt.legend()
plt.show()

In [None]:
## Run -- Results on HedgePeer Test Data
print('{trans_model} MODEL RESULTS ON {data} TEST DATA')
# root = '/kaggle/working/'
for model_name in os.listdir(root):
    model_path = root+model_name
    if(model_name[-2:]!='pt' or model_name[:4]=='bert'):
        continue
    model = torch.load(model_path)
    model.to(device)
    test_loss,test_true,test_pred = evaluate(model,test_data_loader,task=1,pos_tagging=pos_tagging,pos_scaff=pos_scaff,sent_scaff=sent_scaff)

    test_f1 = f1_score(test_true,test_pred,average='macro')
    test_acc = accuracy_score(test_true, test_pred)
    print(f'model : {model_name}')
    print('test loss : ',test_loss,'\t','test_f1 : ',test_f1,'\t','test_acc : ',test_acc)
    print('test confusion matrix :')
    print(confusion_matrix(test_true,test_pred))
    print('\n')

# **Task 2: Span Detection**

**Step-1 : HedgePeer Task 2 data Prep**

In [6]:
## Run
datat2 = df.reset_index().drop(columns=['index']).rename(columns = {'Raw Sentence': 'sentence'})
datat2['Span'] = spans
datat2

In [None]:
## Run
sent2_hedpeer = datat2['Hedged Sentence'].to_list()
span2_hedpeer = datat2['Span'].to_list()
sentiment = datat2['Sentiment Intensity'].to_list()
sen_t2list = None
sent2 = []
spans = []

for i,d in enumerate(zip(sent2_hedpeer,span2_hedpeer)):
    hs = d[0]
    s = d[1]
    if(hs.find('<h>')>-1):
        hs = hs.replace('<span>','#')
        hs = hs.replace('</span>','#')
        hs = hs.replace('<h>','token[0]')
        hs = hs.replace('</h>','')
        s = s.replace('<h>','token[0]')
        s = s.replace('</h>','')
    elif(hs.find('<mh>')>-1):
        hs = hs.replace('<span>','#')
        hs = hs.replace('</span>','#')
        hs = hs.replace('<mh>','token[1]')
        hs = hs.replace('</mh>','')
        s = s.replace('<mh>','token[1]')
        s = s.replace('</mh>','')
    if(type(s)!=str):
        s = ''
    sent2.append(hs)
    spans.append(s)

In [None]:
## Run
## 0=out of scope, 1=in scope 

class Biot2_dataset(Dataset):
    def __init__(self,sentences,spans,trans_model,sentiment,tokenizer,max_len,pos_tagging,num_tags,pos_scaff):
        self.sent = sentences
        self.trans_model = trans_model
        self.token = tokenizer
        self.max = max_len
        self.spans = spans
        self.sentiment = sentiment
        self.pos_tagging = pos_tagging
        self.num_tags = num_tags
        self.pos_scaff = pos_scaff
    def __len__(self):
        return len(self.sent)
    def tokenids_gen(self):
        targets = []
        senids=[]
        attention_masks=[]
        pos_tags=[]
        pad_token_ids = {'xlnet':5,'bert':0,'scibert':0}
        for s,sc in zip(self.sent,self.spans):
            encodings = tokenizer.encode_plus(s,
                                  return_tensors='pt',
                                  truncation=False,
                                  return_token_type_ids=True,
                                  return_attention_mask=True,
                                  )
            
            att = list(encodings['attention_mask'][0])
            senid = list(encodings['input_ids'][0])
            att = [i.item() for i in att]
            senid = [i.item() for i in senid]
            k = [tokenizer.decode(i) for i in senid]

            tar = [0 for i in range(len(k))]
            if(sc is not ''):
                idxstart = k.index('#')
                idxend = k.index('#',idxstart+1)
                tar[idxstart] = -1
                tar[idxend] = -1
                tar[idxstart+1:idxend] = [1 for i in range(idxend-idxstart-1)]
            
            
                for i in range(2):
                    idx = k.index('#')
                    if(k[idx-1]==''):
                        del k[idx-1:idx+1]
                        del senid[idx-1:idx+1]
                        del tar[idx-1:idx+1]
                        del att[idx-1:idx+1]
                    else:
                        del k[idx]
                        del senid[idx]
                        del tar[idx]
                        del att[idx]
                
            senid = [i for i,j in zip(senid,k) if re.search('[A-Za-z0-9]+', j)!=None]
            tar = [i for i,j in zip(tar,k) if re.search('[A-Za-z0-9]+', j)!=None]
            att = [i for i,j in zip(att,k) if re.search('[A-Za-z0-9]+', j)!=None]
            k = [i for i in k if re.search('[A-Za-z0-9]+', i)!=None]
            
            if(len(k)!=len(tar)):
                print(k)
                print('#'*40)
                
            ## adding pad token at the end....
            tar = tar+[0 for i in range(self.max - len(k))]
            senid = senid+[pad_token_ids[self.trans_model] for i in range(self.max - len(k))]
            att = att+[0 for i in range(self.max - len(k))]
            
            if(self.pos_tagging == True and self.pos_scaff == False):
                tagged = nltk.pos_tag(k)
                pos_tag_labels = [one_hot_labels[i[1]] for i in tagged]
                pad_encod = [0 for i in range(self.num_tags)]
                pos_tag_labels = pos_tag_labels+[pad_encod for i in range(self.max - len(k))]
                pos_tags.append(pos_tag_labels)
            
            if(self.pos_tagging == True and self.pos_scaff == True):
                tagged = nltk.pos_tag(k)
                pos_tag_labels = [char_to_int[i[1]] for i in tagged]
                pad_encod = self.num_tags
                pos_tag_labels = pos_tag_labels+[pad_encod for i in range(self.max - len(k))]
                pos_tags.append(pos_tag_labels)
            
            targets.append(tar)
            senids.append(senid)
            attention_masks.append(att)
        return (senids,attention_masks,targets,self.sentiment,pos_tags)

In [None]:
## Run : Choose tokenizer type first in inp_tokenizer
trans_model = 'xlnet'
data = 'hedgepeer'
pos_tagging=False
pos_scaff=False
num_tags=num_tags

tokenizer1 = XLNetTokenizer.from_pretrained('xlnet-base-cased')

tokenizer2 = BertTokenizer.from_pretrained('bert-base-cased')

tokenizer3 = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_cased')

tokenizer_dict = {'xlnet':tokenizer1,'bert':tokenizer2,'scibert':tokenizer3}

tokenizer = tokenizer_dict[trans_model]

## Run
# remove instances with length more than 100
len_more,sen_t2list,sent2,spans,sentiment = remove_big_instances(datat2,sen_t2list,sent2,spans,sentiment,tokenizer,100)

In [None]:
len(len_more)

In [None]:
## Run

max_len = 100
batch_size = 4

if(data=='hedgepeer'):
    # split (train,val,test) = (70%,20%,10%)
    train_size = 40088
    val_size = 11448
    test_size = len(sent2)-val_size-train_size

else:
    print('Wrong Dataset Entry!')
    
y12 = [(c,s) for c,s in zip(spans,sentiment)]
sen_train, sen_test, y12_train, y12_test = train_test_split(sent2,y12,test_size=test_size, random_state=0)
sen_train, sen_val, y12_train, y12_val = train_test_split(sen_train,y12_train,test_size=val_size, random_state=0)
    
train_data_loader = dataloader_gen(sen_train,y12_train,trans_model,tokenizer,max_len,batch_size,task=2,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
val_data_loader = dataloader_gen(sen_val,y12_val,trans_model,tokenizer,max_len,batch_size,task=2,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)
test_data_loader = dataloader_gen(sen_test,y12_test,trans_model,tokenizer,max_len,batch_size,task=2,pos_tagging=pos_tagging,num_tags=num_tags,pos_scaff=pos_scaff)

In [None]:
len(sent2)

In [None]:
len(sen_train)

# **Task 2 : Span detection Model**

**Simple Baselines**

In [None]:
## Run : XLNet model
class scoperes_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.xlmodel = XLNetModel.from_pretrained('xlnet-base-cased')
        self.lin = nn.Linear(768,2)
        
    def forward(self,x,att):
        xl=self.xlmodel(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

In [None]:
## Run : Bert model 
class scoperes_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = BertModel.from_pretrained('bert-base-cased')
        self.lin = nn.Linear(768,2)
        
    def forward(self,x,att):
        xl=self.model(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

In [None]:
## Run : Scibert model 
class scoperes_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        self.lin = nn.Linear(768,2)
        
    def forward(self,x,att):
        xl=self.model(x,attention_mask=att)[0]
        xl = xl.view(-1,xl.shape[2])
        lin = self.lin(xl)
        return (lin)

**MTL Model**

In [None]:
## Attention class
class attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        self.a = 0
        self.th = 0
        self.eij = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.kaiming_uniform_(weight)
        self.weight = nn.Parameter(weight)
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
        
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim 
        step_dim = self.step_dim

        self.eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            self.eij = self.eij + self.b
            
        self.th = torch.tanh(self.eij)
        a = torch.exp(self.th)
        if mask is not None:
            a = a * mask

        self.a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)
        weighted_input = x * torch.unsqueeze(self.a, -1)
        return torch.sum(weighted_input, 1)

In [None]:
## MAIN TASK WITHOUT POS_TAGGING........
# feed forward of main task (hedge cue/span prediction)

class main_head_1(nn.Module):
    def __init__(self,lstm_hidden_size):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,16)
        self.drop = nn.Dropout(p=0.4)
        self.out = nn.Linear(16,2)
        self.relu = torch.nn.ReLU()
    def forward(self,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        drop = self.drop(lstm)
        lin = self.lin(drop)
        lin = self.relu(lin)
        out = self.out(lin)
        return (out) 

In [None]:
## MAIN TASK WITH POS_TAGGING........
# feed forward of main task (hedge cue/span prediction)

class main_head_2(nn.Module):
    def __init__(self,lstm_hidden_size,pos_tagging,num_tags):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,16)
        self.out = nn.Linear(16+num_tags,2)
        self.pos_tagging = pos_tagging
    def forward(self,pos_tags,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        pos_tags = pos_tags.view(-1,pos_tags.shape[2])
        lin = self.lin(lstm)
        pos_lin = torch.cat((lin,pos_tags),1).float()
        out = self.out(pos_lin)
        return (out) 

In [None]:
# feed forward of sentiment task 
class sentiment_head(nn.Module):
    def __init__(self,hidden_size,max_len):
        super().__init__()
        self.hidden_size = hidden_size
        self.att = attention(2*hidden_size,max_len)   ## here second argument is max_len (here max_len = 100)
        self.out = nn.Linear(2*hidden_size,1)
    def forward(self,lstm):
        at = self.att(lstm).view(-1,2*self.hidden_size)
        x = self.out(at)
        return (x) 

In [None]:
## POS TASK........
# feed forward of pos task
class pos_head(nn.Module):
    def __init__(self,lstm_hidden_size,num_tags):
        super().__init__()
        self.lin = nn.Linear(2*lstm_hidden_size,num_tags)
    def forward(self,lstm):
        lstm = torch.reshape(lstm, (-1, lstm.shape[2]))
        lin = self.lin(lstm)
        return (lin) 

In [None]:
## MTL model
class scope_MTL_model(nn.Module):
    def __init__(self,trans_model,hidden_size,max_len,num_tags,pos_tagging=False,pos_scaff=False):
        super().__init__()
        if(trans_model=='xlnet'):
            self.trans = XLNetModel.from_pretrained('xlnet-base-cased')
        if(trans_model=='scibert'):
            self.trans = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        if(trans_model=='bert'):
            self.trans = BertModel.from_pretrained('bert-base-cased')
        
        self.lstm = nn.LSTM(768,hidden_size,num_layers=1,bidirectional=True,batch_first=True)
        
        if(pos_tagging==True and pos_scaff==False):
            self.main = main_head_2(lstm_hidden_size=hidden_size,pos_tagging=pos_tagging,num_tags=num_tags)
        else:
            self.main = main_head_1(lstm_hidden_size=hidden_size)
        
        if(pos_tagging==True and pos_scaff==True):
            self.pos = pos_head(lstm_hidden_size=hidden_size,num_tags=num_tags)
            
        self.sentiment = sentiment_head(hidden_size,max_len)
        self.pos_tagging = pos_tagging
        
    def forward(self,x,att,pos_tags=None):
        xl=self.trans(x,attention_mask=att)[0]
        lstm,_=self.lstm(xl)
        if(self.pos_tagging==True and pos_scaff==False):
            main_out = self.main(pos_tags,lstm)
        else:
            main_out = self.main(lstm)
        
        sentiment_out = self.sentiment(lstm)
        
        if(pos_tagging==True and pos_scaff==True):
            pos_out = self.pos(lstm)
            return (main_out,sentiment_out,pos_out)
            
        else:
            return (main_out,sentiment_out)

# **Training**

**Simple Baseline**

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = scoperes_model()
model.to(device)

In [None]:
## Run

def evaluate(model,val_data,task,pos_tagging,pos_scaff,sent_scaff):
    model.eval()
    model.to(device)
    main_loss = 0
    true=[]
    pred=[]
    with torch.no_grad():
        for i,d in enumerate(val_data):
            inp = d['input'].to(device)
            att = d['attention_mask'].to(device)
            targets = d['targets'].view(-1).to(device)
            
            if(pos_tagging==True and pos_scaff==False):
                pos_tags = d['pos_tags'].to(device)
                logits = model(inp,att,pos_tags)[0]
            elif(sent_scaff==False and pos_tagging==False and pos_scaff==False):
                logits = model(inp,att)   ## For simple baseline
            else:
                logits = model(inp,att)[0]   ## For sentiment MTL

            loss = cse_loss(logits,targets)
            main_loss += loss.item()
            
            _,predictions = torch.max(logits,dim=1)
            
            targets = targets.cpu().detach().numpy()
            predictions = predictions.cpu().detach().numpy()
            
            if(task==1):
                tr,pr = remove_pad_pred_t1(targets,predictions)
                true += tr
                pred += pr
            else:
                true += list(targets)
                pred += list(predictions)
                
        main_loss = main_loss/(i+1)
    return (main_loss,true,pred)

In [None]:
def remove_pad_pred_t1(true,pred):
    idx = [i for i,d in enumerate(true) if d==3]
    true = [i for i in true if i!=3]
    pred = [d for i,d in enumerate(pred) if i not in idx]
    return (true,pred)

In [None]:
## Run = Training -- Simple Baseline

epochs = 7

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
cse_loss = torch.nn.CrossEntropyLoss()

loss_list=[]
for ep in range(epochs):
    total_loss=0
    true=[]
    pred=[]
    model.train()
    
    for i,d in enumerate(train_data_loader):
        if(i%300 == 299):
            print('batch - ',i+1)
        
        inp = d['input'].to(device)
        att = d['attention_mask'].to(device)
        targets = d['targets'].view(-1).to(device)

        logits = model(inp,att)

        loss = cse_loss(logits,targets)
            
        _,predictions = torch.max(logits,dim=1)

        targets = targets.cpu().detach().numpy()
        predictions = predictions.cpu().detach().numpy()
        
        true += list(targets)
        pred += list(predictions)
        
        total_loss += loss.item()
            
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
            
    total_loss = total_loss/(i+1)

    f1 = f1_score(true,pred,average='macro')
    acc = accuracy_score(true, pred)
    print('epoch : ',ep+1,' --','\n','loss : ',total_loss,'\t','f1 : ',f1,'\t','acc : ',acc)
    print('train confusion matrix :')
    print(confusion_matrix(true,pred))
    
    # validation 
    val_loss,val_true,val_pred = evaluate(model=model,val_data=val_data_loader,task=2,pos_tagging=False,pos_scaff=False,sent_scaff=False)
        
    val_f1 = f1_score(val_true,val_pred,average='macro')
    val_acc = accuracy_score(val_true, val_pred)
    print('val loss : ',val_loss,'\t','val_f1 : ',val_f1,'\t','val_acc : ',val_acc)
    print('val confusion matrix :')
    print(confusion_matrix(val_true,val_pred))
    
    torch.save(model, f'/kaggle/working/{trans_model}_{data}_span_only_model_ep{ep+1}.pt')
    
    loss_list.append({'train_loss':total_loss,'val_loss':val_loss})

In [None]:
## Run -- Results on HedgePeer Test Data
print('SCIBERT MODEL RESULTS ON HEDGEPEER TEST DATA')
root = './'
for model_name in os.listdir(root):
    model_path = root+model_name
    if(model_name[-2:]!='pt' or model_name[:4]=='bert'):
        continue
    model = torch.load(model_path)
    model.to(device)
    test_loss,test_true,test_pred = evaluate(model=model,val_data=test_data_loader,task=2,pos_tagging=False,pos_scaff=False,sent_scaff=False)

    test_f1 = f1_score(test_true,test_pred,average='macro')
    test_acc = accuracy_score(test_true, test_pred)
    print(f'model : {model_name}')
    print('test loss : ',test_loss,'\t','test_f1 : ',test_f1,'\t','test_acc : ',test_acc)
    print('test confusion matrix :')
    print(confusion_matrix(test_true,test_pred))
    print('\n')

In [None]:
cse_loss = torch.nn.CrossEntropyLoss()
loss,truei,predi = evaluate(model,test_data_loader,task=task,pos_tagging=False,pos_scaff=False,sent_scaff=False)
        
# val_f1 = f1_score(val_true,val_pred,average='macro')
# val_acc = accuracy_score(val_true, val_pred)
print(confusion_matrix(truei,predi))

**MTL Model**

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
pos_tagging = True
hidden_size=100
pos_scaff = True
sent_scaff = True

if(pos_tagging == False and pos_scaff == False):
    num_tags = None

if(pos_tagging == True and pos_scaff == True):
    num_tags = num_tags+1           # To take into account the label for the padding token.......

model = scope_MTL_model(trans_model=trans_model,hidden_size=hidden_size,max_len=max_len,num_tags=num_tags,pos_tagging=pos_tagging,pos_scaff=pos_scaff)
model.to(device)

In [None]:
## Run --- Training Cue Detection Model -- MTL MODEL........
epochs = 7

# lambd1 = Lambda for main task, lambd2 = Lambda for sentiment task, lambd3 = Lambda for POS task
lambd1 = 1
lambd2 = 0.1
lambd3 = 0.05
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
if(pos_tagging==True and pos_scaff==True):
    pos_weights = torch.tensor([float(1) for i in range(num_tags-1)]+[float(0)]).to(device)     # weight = 0 for padding token
    pos_cse_loss = torch.nn.CrossEntropyLoss(weight=pos_weights)
cse_loss = torch.nn.CrossEntropyLoss()
mse_loss = torch.nn.MSELoss()

loss_list=[]
for ep in range(epochs):
    running_loss=0
    ep_main_loss=0
    ep_sentiment_loss=0
    ep_pos_loss=0
    true=[]
    pred=[]
    sent_true=[]
    sent_pred=[]
    pos_true=[]
    pos_pred=[]
    model.train()
    
    for i,d in enumerate(train_data_loader):
        if(i%300 == 299):
            print('batch - ',i+1)

        inp = d['input'].to(device)
        att = d['attention_mask'].to(device)
        targets = d['targets'].view(-1).to(device)           # targets shape : (batch_size,max_len)
        sentiment = d['sentiment'].to(device)
        
        if(pos_tagging==True):
            pos_tags = d['pos_tags'].to(device)              # pos_tags shape for pos_scaff==False : (batch_size,max_len,num_tags)
            if(pos_scaff==False):
                main_logits,sentiment_logits = model(inp,att,pos_tags)
            else:
                pos_tags = pos_tags.view(-1)                 # pos_tags shape before view : (batch_size,max_len)
                main_logits,sentiment_logits,pos_logits = model(inp,att)

        else:
            main_logits,sentiment_logits = model(inp,att)

        main_loss = cse_loss(main_logits,targets)
        sentiment_loss = mse_loss(sentiment_logits.view(-1),sentiment)
        
        if(pos_tagging==True and pos_scaff==True):
            pos_loss = pos_cse_loss(pos_logits,pos_tags)
            pos_tags = pos_tags.cpu().detach().numpy()
            _,pos_predictions = torch.max(pos_logits,dim=1)
            pos_tr,pos_pr = remove_pad_pred_t1(pos_tags,pos_predictions)
            pos_true += pos_tr
            pos_pred += pos_pr
            ep_pos_loss += pos_loss.item()
            
        _,main_predictions = torch.max(main_logits,dim=1)

        targets = targets.cpu().detach().numpy()
        main_predictions = main_predictions.cpu().detach().numpy()
        sentiment = list(sentiment.cpu().detach().numpy())
        sentiment_logits = list(sentiment_logits.view(-1).cpu().detach().numpy())
        
        sent_true += sentiment
        sent_pred += sentiment_logits
        
        true += list(targets)
        pred += list(main_predictions)

        if(pos_scaff==False):
            total_loss = lambd1*main_loss + lambd2*sentiment_loss
        else:
            total_loss = lambd1*main_loss + lambd2*sentiment_loss + lambd3*pos_loss
            
        total_loss.backward()
        running_loss += total_loss.item()
        ep_main_loss += main_loss.item()
        ep_sentiment_loss += sentiment_loss.item()
        
        optimizer.step()
        optimizer.zero_grad()
        
            
    running_loss = running_loss/(i+1)
    ep_main_loss = ep_main_loss/(i+1)
    ep_sentiment_loss = ep_sentiment_loss/(i+1)

    f1 = f1_score(true,pred,average='macro')
    acc = accuracy_score(true, pred)
    mae = mean_absolute_error(sent_true,sent_pred)
    
    if(pos_tagging==True and pos_scaff==True):
        ep_pos_loss = ep_pos_loss/(i+1) 
#         pos_f1 = f1_score(pos_true,pos_pred,average='macro')
#         pos_acc = accuracy_score(pos_true, pos_pred)
        pos_f1 = None
        pos_acc = None
    
    if(pos_scaff==False):
        pos_f1 = 'DOES NOT EXIST'
        pos_acc = 'DOES NOT EXIST'
        ep_pos_loss = 'DOES NOT EXIST'
        
    print('\nepoch : ',ep+1,' --','\n','Combined loss : ',running_loss,'\t','Main Task Loss : ',ep_main_loss,'\t','Sentiment Task Loss : ',ep_sentiment_loss,'\t','POS Task Loss : ',ep_pos_loss)
    print('\nMain Task : -- ','F1 : ',f1,'\t','Acc : ',acc)
    print('train confusion matrix :')
    print(confusion_matrix(true,pred))
    print('\nSentiment Task : -- ','MAE : ',mae)
    print('\nPOS Task : -- ','F1 : ',pos_f1,'\t','Acc : ',pos_acc)
    
    torch.save(model, f'/kaggle/working/{trans_model}_{data}_span_only_MTL_sentiment_POS_scaff_model_ep{ep+1}.pt')

    # validation 
    main_val_loss,main_val_true,main_val_pred = evaluate(model,val_data_loader,task=2,pos_tagging=pos_tagging,pos_scaff=pos_scaff,sent_scaff=sent_scaff)
        
    main_val_f1 = f1_score(main_val_true,main_val_pred,average='macro')
    main_val_acc = accuracy_score(main_val_true, main_val_pred)
    print('\nmain task val loss : ',main_val_loss,'\t','main task val_f1 : ',main_val_f1,'\t','main task val_acc : ',main_val_acc)
    print('main task val confusion matrix :')
    print(confusion_matrix(main_val_true,main_val_pred))
    print('\n')
    
    loss_list.append({'total_train_loss':running_loss,'main_train_loss':ep_main_loss,'main_val_loss':main_val_loss,'sentiment_train_loss':ep_sentiment_loss,'pos_train_loss':ep_pos_loss})

In [None]:
import matplotlib.pyplot as plt
total_train_loss_list = [i['total_train_loss'] for i in loss_list]
main_train_loss_list = [i['main_train_loss'] for i in loss_list]
sentiment_train_loss_list = [i['sentiment_train_loss'] for i in loss_list]
main_val_loss_list = [i['main_val_loss'] for i in loss_list]
ep_list = [i+1 for i in range(epochs)]
plt.rcParams['figure.figsize'] = [10, 10]
# plt.plot(ep_list,total_train_loss_list, label='total train loss')
plt.plot(ep_list,main_train_loss_list, label='main task train loss')
# plt.plot(ep_list,sentiment_train_loss_list, label='sentiment task train loss')
plt.plot(ep_list,main_val_loss_list, label='main task val loss')
plt.legend()
plt.show()

In [None]:
## Run -- Results on HedgePeer Test Data
print(f'{trans_model} MODEL RESULTS ON {data} TEST DATA')
# root = '../input/cue-only-models/'
root = '/kaggle/working/'
for model_name in os.listdir(root):
    model_path = root+model_name
    if(model_name[-2:]!='pt' or model_name[:4]!='bert'):
        continue
#     if(model_name[:17]!='scibert_hedgepeer'):
#         continue
    model = torch.load(model_path)
    model.to(device)
    test_loss,test_true,test_pred = evaluate(model,test_data_loader,task=1,pos_tagging=pos_tagging,pos_scaff=pos_scaff,sent_scaff=sent_scaff)

    test_f1 = f1_score(test_true,test_pred,average='macro')
    test_acc = accuracy_score(test_true, test_pred)
    print(f'model : {model_name}')
    print('test loss : ',test_loss,'\t','test_f1 : ',test_f1,'\t','test_acc : ',test_acc)
    print('test confusion matrix :')
    print(confusion_matrix(test_true,test_pred))
    print('\n')