In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.nn import Parameter
from torch.utils import data
from torch.cuda.amp import GradScaler
from torchcrf import CRF
from torch.optim.lr_scheduler import StepLR

import numpy
import numpy as np
import json
import os
import math
import copy
from seqeval.metrics import accuracy_score,f1_score, precision_score, recall_score

In [None]:
import codecs
import transformers
from transformers import BertModel,BertTokenizer

In [None]:
#random seed
np.random.seed(3)
torch.manual_seed(3)

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="1,3"

# Focal loss and Dice loss

In [None]:
#multi-classification Focal Loss
def focal_loss(input,target,alpha=0.5,gamma=2,weight=None,ignore_index=-100,reduction='mean'):
    '''
    reduction (string, optional): Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed
    '''
    logpt = -F.nll_loss(input=input,target=target,weight=weight,ignore_index=ignore_index,reduction=reduction)
    pt = torch.exp(logpt)
    loss = -((1-pt)**gamma)*alpha*logpt
    return loss

In [None]:
#multi-classification Dice Loss
def dice_loss(input,target,esp=1e-5,alpha=0.2,loss_type='DSC'):
    '''
    :param input: floattensor, the result of softmax layer, shape is (batch_size,class_num)
    :param target: longtensor, the true label, shape is (batch_size)
    :param esp: float,for smoothing purposes, default is 1e-5
    :param alpha: float,it will be used if loss_type is 'TL', and range is (0,1)
    :loss_type: str, there are 4 kinds of dice loss: 'DSC','DL','TL' and 'sadDSC'
    '''
    
    loss = 0
    B = input.shape[0]
    C = input.shape[1]
    
    target = target.unsqueeze(dim=1)
    class_mask = input.data.new_zeros(B,C)
    class_mask.scatter_(1,target,1.)
    
    numerator = input*class_mask
    if loss_type=='DSC':
        denominator = input + class_mask
        loss_tensor = (2*numerator+esp)/(denominator+esp)
        loss = loss_tensor.sum()
    elif loss_type=='DL':
        denominator = input**2 + class_mask**2
        loss_tensor = (2*numerator+esp)/(denominator+esp)
        loss = loss_tensor.sum()
    elif loss_type=='TL':
        denominator = numerator+alpha*input*(1-class_mask)+(1-alpha)*(1-input)*class_mask
        loss_tensor = (numerator+esp)/(denominator+esp)
        loss = loss_tensor.sum()
    else:
        numerator = (1-input)*numerator
        denominator = (1-input)*input+class_mask
        loss_tensor = (2*numerator+esp)/(denominator+esp)
        loss = loss_tensor.sum()
    loss = 1-loss/B
    return loss

# modules

In [None]:
class Bottle(nn.Module):
    ''' Perform the reshape routine before and after an operation '''

    def forward(self, input):
        if len(input.size()) <= 2:
            return super(Bottle, self).forward(input)
        size = input.size()[:2]
        out = super(Bottle, self).forward(torch.reshape(input,(size[0] * size[1], -1)))
        return out.view(size[0], size[1], -1)

In [None]:
class XavierLinear(nn.Module):
    '''
    Simple Linear layer with Xavier init
    '''

    def __init__(self, in_features, out_features, bias=True):
        super(XavierLinear, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        nn.init.xavier_normal_(self.linear.weight)

    def forward(self, x):
        return self.linear(x)

In [None]:
class OrthogonalLinear(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(OrthogonalLinear, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        nn.init.orthogonal_(self.linear.weight)

    def forward(self, x):
        return self.linear(x)

In [None]:
class BottledXavierLinear(Bottle, XavierLinear):
    pass

class BottledOrthogonalLinear(Bottle, OrthogonalLinear):
    pass

# GCN

In [None]:
class GraphConvolution(nn.Module):
    def __init__(self, inputs, outputs, edge_types, dropout=0.5, bias=True, use_bn=False, device=torch.device("cpu")):
        """
        Single Layer GraphConvolution

        :param inputs: The number of incoming features
        :param outputs: The number of output features
        :param edge_types: The number of edge types in the whole graph
        :param dropout: Dropout keep rate, if not bigger than 0, 0 or None, default 0.5
        :param bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        """
        super(GraphConvolution, self).__init__()
        self.inputs = inputs
        self.outputs = outputs
        self.edge_types = edge_types
        self.dropout = dropout if type(dropout) == float and -1e-7 < dropout < 1 + 1e-7 else None
        
        self.Weight = Parameter(torch.Tensor(self.inputs,self.outputs))
        #parameters for gates
        self.Gates = nn.ModuleList()
        
        #parameters for graph convolutions
        self.GraphConv = nn.ModuleList()
        
        #batch norm
        self.use_bn = use_bn
        if self.use_bn:
            self.bn = nn.BatchNorm1d(self.outputs)
            
        for _ in range(edge_types):
            self.Gates.append(BottledOrthogonalLinear(in_features=inputs,out_features=1,bias=bias))
            self.GraphConv.append(BottledOrthogonalLinear(in_features=inputs,out_features=outputs,bias=bias))
        
        self.device = device
        self.to(device)
    
    def forward(self, inputs, adj):
        """

        :param inputs: FloatTensor, input feature tensor, (batch_size, seq_len, hidden_size)
        :param adj: FloatTensor (sparse.FloatTensor.to_dense()), adjacent matrix for provided graph of padded sequences, (batch_size, edge_types, seq_len, seq_len)
        :return: output
            - **output**: FloatTensor, output feature tensor with the same size of input, (batch_size, seq_len, hidden_size)
        """

        adj_ = adj.transpose(0, 1)  # (edge_types, batch_size, seq_len, seq_len)
        ts = []
        
        inputs = torch.matmul(inputs,self.Weight)
        for i in range(self.edge_types):
            gate_status = F.relu(self.Gates[i](inputs))  # (batch_size, seq_len, 1) 
            adj_hat_i = adj_[i] * gate_status  # (batch_size, seq_len, seq_len)
            ts.append(torch.bmm(adj_hat_i, self.GraphConv[i](inputs)))
        ts = torch.stack(ts).sum(dim=0, keepdim=False).to(self.device)
        if self.use_bn:
            ts = ts.transpose(1, 2).contiguous()
            ts = self.bn(ts)
            ts = ts.transpose(1, 2).contiguous()
        ts = F.relu(ts)
        if self.dropout is not None:
            ts = F.dropout(ts, p=self.dropout, training=self.training)
        return ts
    
    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.inputs) + ' -> ' + str(self.outputs) + ')'

# DynamicRNN model，including LSTM,GRU,RNN

In [None]:
class DynamicRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='relu', bias=True, batch_first=True,
                dropout=0, bidirectional=True, rnn_mode='lstm', device=torch.device('cpu')):
        """
        Dynamic RNN which can hold variable length sequence, and has different RNN mode,such as RNN,LSTM,GRU

        :param input_size: The number of expected features in the input x
        :param hidden_size: The number of features in the hidden state h
        :param num_layers: Number of recurrent layers.
        :param nonlinearity: The non-linearity to use. when the rnn_mode is 'rnn', Can be either 'tanh' or 'relu'. Default: 'tanh'
        :param bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        :param batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
        :param dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
        :param bidirectional: If True, becomes a bidirectional RNN. Default: False
        :param rnn_mode: The different mode of RNN, can be either 'rnn','lstm' or 'gru'
        """
        super(DynamicRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.nonlinearity = nonlinearity
        self.batch_first = batch_first
        self.dropout = dropout
        self.bidirectional = bidirectional
        if rnn_mode is 'rnn':
            self.rnn = nn.RNN(input_size = input_size,
                              hidden_size = hidden_size,
                              num_layers = num_layers,
                              nonlinearity = nonlinearity,
                              bias = bias,
                              batch_first = batch_first,
                              dropout = dropout,
                              bidirectional = bidirectional)
        elif rnn_mode is 'lstm':
            self.rnn = nn.LSTM(input_size = input_size,
                               hidden_size = hidden_size,
                               num_layers = num_layers,
                               bias = bias,
                               batch_first = batch_first,
                               dropout = dropout,
                               bidirectional = bidirectional)
        else:
            self.rnn = nn.GRU(input_size = input_size,
                              hidden_size = hidden_size,
                              num_layers = num_layers,
                              bias = bias,
                              batch_first = batch_first,
                              dropout = dropout,
                              bidirectional = bidirectional)
        self.device = device
        self.to(device)
        
    def forward(self, input_x, x_len):
        """
        sequence -> sort -> pad and pack -> process using RNN -> unpack -> unsort

        :param input_x: FloatTensor, pre-padded input sequence (batch_size, seq_len, feature_dim)
        :param x_len: numpy list, indicating corresponding actual sequence length
        :return: output, h_n 
        - **output**: FloatTensor, packed output sequence (batch_size, seq_len, feature_dim * num_directions)
            containing the output features `(h_t)` from the last layer of the RNN,LSTM or GRU, for each t.
        - **h_n**: FloatTensor, (num_layers * num_directions, batch, hidden_size)
            containing the hidden state for `t = seq_len`
        """
        out_pack, h_n = self.rnn(input_x)
        if isinstance(h_n,tuple):
            h_n = h_n[0]
        output = out_pack
        
        return output,h_n

# Embedding Layer

In [None]:
class EmbeddingLayer(nn.Module):
    def __init__(self, embedding_size=None, embedding_matrix=None,
                 fine_tune=True, dropout=0.5,
                 padding_idx=None,
                 max_norm=None, norm_type=2, scale_grad_by_freq=False,
                 sparse=False,
                 device=torch.device("cpu")):
        '''
        Embedding Layer need at least one of `embedding_size` and `embedding_matrix`
        :param embedding_size: tuple, contains 2 integers indicating the shape of embedding matrix, eg: (20000, 300)
        :param embedding_matrix: torch.Tensor, the pre-trained value of embedding matrix
        :param fine_tune: boolean, whether fine tune embedding matrix
        :param dropout: float, dropout rate
        :param padding_idx: int, if given, pads the output with zeros whenever it encounters the index
        :param max_norm: float, if given, will renormalize the embeddings to always have a norm lesser than this
        :param norm_type: float, the p of the p-norm to compute for the max_norm option
        :param scale_grad_by_freq: boolean, if given, this will scale gradients by the frequency of the words in the mini-batch
        :param sparse: boolean, *unclear option copied from original module*
        '''
        super(EmbeddingLayer, self).__init__()

        if embedding_matrix is not None:
            embedding_size = embedding_matrix.size()
        else:
            embedding_matrix = torch.nn.init.uniform_(torch.FloatTensor(embedding_size[0], embedding_size[1]),
                                                      a=-0.15,
                                                      b=0.15) 
                                                      
        assert (embedding_size is not None)
        assert (embedding_matrix is not None)
        # Config copying
        
        self.matrix = nn.Embedding(num_embeddings=embedding_size[0],
                                   embedding_dim=embedding_size[1],
                                   padding_idx=padding_idx,
                                   max_norm=max_norm,
                                   norm_type=norm_type,
                                   scale_grad_by_freq=scale_grad_by_freq,
                                   sparse=sparse)
        self.matrix.weight.data.copy_(embedding_matrix)
        self.matrix.weight.requires_grad = fine_tune
        self.dropout = dropout if type(dropout) == float and -1e-7 < dropout < 1 + 1e-7 else None

        self.device = device
        self.to(device)

    def forward(self, x):
        '''
        Forward this module
        :param x: torch.LongTensor, token sequence or sentence, shape is [batch, sentence_len]
        :return: torch.FloatTensor, output data, shape is [batch, sentence_len, embedding_size]
        '''
        if self.dropout is not None:
            return F.dropout(self.matrix(x), p=self.dropout, training=self.training)
        else:
            return self.matrix(x)

# Trigger Embedding Layer

In [None]:
class TriggerEmbeddingLayer(nn.Module):
    def __init__(self,num_embedding,embedding_dim,padding_idx=None,device=torch.device("cpu")):
        super(TriggerEmbeddingLayer,self).__init__()
        embedding_matrix = torch.nn.init.uniform_(torch.FloatTensor(num_embedding, embedding_dim),
                                                  a=-0.15,
                                                  b=0.15)
        self.trigger = nn.Embedding(num_embeddings=num_embedding,embedding_dim=embedding_dim,padding_idx=padding_idx)
        self.trigger.weight.data.copy_(embedding_matrix)
        self.trigger.weight.requires_grad = True
        self.to(device)
    
    def forward(self, trigger):
        emb = self.trigger(trigger)
        return emb

# Entity Label Embedding Layer

In [None]:
class MultiLabelEmbeddingLayer(nn.Module):
    def __init__(self, embedding_size=None, embedding_matrix=None,
                 fine_tune=True, dropout=0.5,
                 padding_idx=None,
                 max_norm=None, norm_type=2, scale_grad_by_freq=False,
                 sparse=False,
                 device=torch.device("cpu")):
        '''
        MultiLabelEmbeddingLayer Layer need at least one of `embedding_size` and `embedding_matrix`
        :param embedding_size: tuple, contains 2 integers indicating the shape of embedding matrix, eg: (20000, 300)
        :param embedding_matrix: torch.Tensor, the pre-trained value of embedding matrix
        :param fine_tune: boolean, whether fine tune embedding matrix
        :param dropout: float, dropout rate
        :param padding_idx: int, if given, pads the output with zeros whenever it encounters the index
        :param max_norm: float, if given, will renormalize the embeddings to always have a norm lesser than this
        :param norm_type: float, the p of the p-norm to compute for the max_norm option
        :param scale_grad_by_freq: boolean, if given, this will scale gradients by the frequency of the words in the mini-batch
        :param sparse: boolean, *unclear option copied from original module*
        '''
        super(MultiLabelEmbeddingLayer, self).__init__()

        if embedding_matrix is not None:
            embedding_size = embedding_matrix.size()
        else:
            embedding_matrix = torch.torch.randn(embedding_size[0], embedding_size[1])
        assert (embedding_size is not None)
        assert (embedding_matrix is not None)
        # Config copying
        self.matrix = nn.Embedding(num_embeddings=embedding_size[0],
                                   embedding_dim=embedding_size[1],
                                   padding_idx=padding_idx,
                                   max_norm=max_norm,
                                   norm_type=norm_type,
                                   scale_grad_by_freq=scale_grad_by_freq,
                                   sparse=sparse)
        self.matrix.weight.data.copy_(embedding_matrix)
        self.matrix.weight.requires_grad = fine_tune
        self.dropout = dropout if type(dropout) == float and -1e-7 < dropout < 1 + 1e-7 else None

        self.device = device
        self.to(device)

    def forward(self, x):
        '''
        Forward this module
        :param x: list, token sequence or sentence, shape is [batch, sentence_len, variable_size(>=1)]
        :return: torch.FloatTensor, output data, shape is [batch, sentence_len, embedding_size]
        '''
        BATCH = len(x)
        SEQ_LEN = len(x[0])
        x = [self.matrix(torch.LongTensor(x[i][j]).to(self.device)).sum(0)
             for i in range(BATCH)
             for j in range(SEQ_LEN)]
        x = torch.stack(x).view(BATCH, SEQ_LEN, -1)
        if self.dropout is not None:
            return F.dropout(x, p=self.dropout, training=self.training)
        else:
            return x

# Multi-head Attention

In [None]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self,embed_dim,num_heads,batch_first=False,dropout=0.0,bias=True,device=torch.device("cpu")):
        '''
        :param embed_dim: total dimension of the model.
        :param num_heads: parallel attention heads.
        :param batch_first: If True, then the input and output tensors are provided as (batch, seq, embedding_dim)
        :param dropout: a Dropout layer on attn_output_weights. Default: 0.0
        :param bias: add bias as module parameter. Default: True
        '''
        super(MultiHeadAttentionLayer,self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.batch_first = batch_first
        self.dropout = dropout
        self.bias = bias
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim,
                                                 num_heads=num_heads,
                                                 dropout=dropout,
                                                 bias=bias)
        self.dense = nn.Linear(in_features=embed_dim,out_features=embed_dim)
        self.relu = nn.ReLU()
        self.device = device
        self.to(device)
    
    def forward(self,input_x,need_weights=True):
        '''
        :param input_x: input matrix, shape is (target_seq_len, batch, embed_dim)
        :param need_weights: if True, the weight matrix will be output
        
        :return: attn_output: the shape is (target_seq_len, batch, embed_dim),if batch_first=True, the shape is (batch, target_seq_len, embed_dim)
                 attn_output_weights: the shape is (batch,target_seq_len, source_seq_len)
        '''
        if self.batch_first:
            input_x = torch.transpose(input_x,0,1)
        Q = self.relu(self.dense(input_x))
        K = self.relu(self.dense(input_x))
        V = self.relu(self.dense(input_x))
        
        attn_output, attn_output_weights = self.attention(query=Q,key=K,value=V,need_weights=need_weights)
        
        if self.batch_first:
            attn_output = torch.transpose(attn_output,0,1)
        
        return attn_output,attn_output_weights

# Building BGModel

In [None]:
bert = BertModel.from_pretrained('bert-large-cased')

In [None]:
class BGModel(nn.Module):
    def __init__(self,hyps,device=torch.device("cpu"),embeddingMatrix=None):
        super(BGModel,self).__init__()
        self.hyperparams = copy.deepcopy(hyps)
        self.device = device
        self.bert=bert
        #get bert token embedding dim
        bert_embeddig_dim = bert.config.to_dict()['hidden_size']
        
        # POS-Tagging Layer
        self.posembedding = EmbeddingLayer(embedding_size=(hyps['pos_size'],hyps['posemd_dim']),
                                              dropout=hyps['pos_dp'],
                                              device=device)
        
        # Entity-Label Layer
        self.enembedding = MultiLabelEmbeddingLayer(embedding_size=(hyps['en_size'],hyps['enemb_dim']),
                                                    dropout=hyps['en_dp'],
                                                    device=device)
        
        self.BN = nn.BatchNorm1d(num_features=64).to(self.device)
        #bidirectional dynamicRNN Layer
        self.birnn = DynamicRNN(input_size=bert_embeddig_dim+hyps['posemd_dim']+hyps['enemb_dim'],
                                hidden_size=hyps['rnn_dim'],
                                num_layers=hyps['rnn_layers'],
                                dropout=hyps['rnn_dp'],
                                rnn_mode=hyps['rnn_mode'],
                                device=device)
        
         
    
        
        #GCN Layer
        self.gcns = nn.ModuleList()
        for i in range(hyps['gcn_layers']):
            gcn = GraphConvolution(inputs=bert_embeddig_dim,
                                   outputs=bert_embeddig_dim,
                                   edge_types=hyps['gcn_et'],
                                   dropout=hyps['gcn_dp'] if i != hyps["gcn_layers"] - 1 else None,
                                   use_bn=hyps['gcn_use_bn'],
                                   device=device)
            self.gcns.append(gcn)
        
        self.atten = MultiHeadAttentionLayer(embed_dim=bert_embeddig_dim,
                                             num_heads=8,
                                             batch_first=True,
                                             dropout=0.5,
                                             device=device)
        
        
        #Tagger layer
        self.tagger = nn.LSTM(input_size=2*hyps['rnn_dim']+bert_embeddig_dim,
                              hidden_size=hyps['tag_hidden'],
                              num_layers=1,
                              bidirectional=False,
                              batch_first=True).to(self.device)
        
        '''
        trigger输出层和argument输出层属于Decoder层
        '''
        #Trigger embedding layer
        self.triembedding = TriggerEmbeddingLayer(num_embedding=hyps['tri_size'],embedding_dim=hyps['tri_dim'],device=device)
        #Trigger output layer
        self.trigger_outlayer = BottledXavierLinear(in_features=hyps['tag_hidden'],out_features=triggers_size).to(self.device)
        #Argument output layer
        self.argument_outlayer = BottledXavierLinear(in_features=2*(hyps['tag_hidden']+hyps['tri_dim']+hyps['tri_size']),
                                                     out_features=arguments_size).to(self.device)
        
    
    def forward(self,token_sequence,pos_taggig_sequence,entity_type_sequence,trigger_type_sequence,
                adj,head_index,x_len,b_len,gold_triggers=None,gold_arguments=None,is_train=True):
        '''
        jointly extracte event trigger and argument
        
        :param token_sequence: LongTensor, padded word indices, (batch_size, bert_seq_len)
        :param pos_tagging_sequence: LongTensor, padded pos-tagging label indices, (batch_size, seq_len)
        :param entity_type_sequence: list, padded entity label indices keep all possible labels, (batch_size, seq_len, variable_length>=1)
        :param trigger_type_sequence: LongTensor, padded trigger label indices, (batch_size, seq_len)
        :param adj: sparse.FloatTensor, adjacent matrix for provided graph of padded sequences, (batch_size, edge_types, seq_len, seq_len)
        :param head_index: LongTensor,including [CLS] and [SEP] representation,we give credits only to the first piece. (batch_size, seq_len)
        :param x_len: numpy int64 array, indicating corresponding actual sequence length, (batch_size,)
        :param b_len: numpy int64 array, indicating corresponding actual bert sequence length, (batch_size,)
        :param gold_triggers: list, standerd trigger string label
        :param gold_arguments: list, standard argument string label
        '''
        
        BATCH_SIZE = head_index.shape[0]
        SEQ_LEN = head_index.shape[1]
        
        mask = np.zeros(shape=(BATCH_SIZE,SEQ_LEN),dtype=np.uint8)
        for i in range(BATCH_SIZE):
            s_len = x_len[i]
            mask[i,0:s_len] = np.ones(shape=(s_len),dtype=np.uint8)
        mask = torch.ByteTensor(mask).to(self.device)
        
        with torch.no_grad():
            token_emb = self.bert(token_sequence)[0]

            
        word_emb = self.reconstruct_embedding(token_emb,head_index,BATCH_SIZE,SEQ_LEN,x_len,b_len,mode=self.hyperparams['bert_mode'])
        pos_emd = self.posembedding(pos_taggig_sequence)
        enti_emb = self.enembedding(entity_type_sequence)

        rnn_in = torch.cat([word_emb,pos_emd,enti_emb],2)
        rnn_in = self.BN(rnn_in)
        x,_ = self.birnn(rnn_in,x_len)
        x = self.BN(x)
        
        x1 = word_emb
        for i in range(self.hyperparams['gcn_layers']):
            x1 = self.gcns[i](x1,adj)
            
        x1 = self.BN(x1)
        
        x1,weight = self.atten(x1)
        x1 = self.BN(x1)
        
        tagger_in = torch.cat([x,x1],2)
        #tagger
        x_tag,_ = self.tagger(tagger_in)
        x_tag = self.BN(x_tag)
        #decoder层
      
        trigger_loss,pred_trigger,argument_hidden,argument_keys = self.predict_triggers(x_tag,trigger_type_sequence,gold_arguments,mask,is_train=is_train)
        return trigger_loss,pred_trigger,argument_hidden,argument_keys
    
        
    def reconstruct_embedding(self,bert_emb,head_index,batch_size,seq_len,x_len,b_len,mode='first'):
        '''
        mode: str,有两种形式"first"和"average",first表示只是用bert向量的头token向量表示该词向量
                                               average表示是用该单词的所有token向量平均表示词向量
        '''
        
        word_emb = torch.empty(bert_emb.shape).to(self.device)
        
        if mode == 'first':
            for i in range(batch_size):
                word_emb[i] = torch.index_select(bert_emb[i],0,head_index[i])
        else:
            for i in range(batch_size):
                real_len = x_len[i]
                bert_len = b_len[i]
                temp_token_emb = bert_emb[i]
                temp_head_index = head_index[i]
                for j in range(real_len):
                    if j == real_len-1:
                        t = temp_token_emb[temp_head_index[j]:bert_len-1].mean(dim=0)
                        word_emb[i][j] = t
                    else:
                        t = temp_token_emb[temp_head_index[j]:temp_head_index[j+1]].mean(dim=0)
                        word_emb[i][j] = t

                for j in range(real_len,seq_len):
                    word_emb[i][j] = temp_token_emb[-1]
            
        return word_emb
    
    def predict_triggers(self,inputs,trigger_type_sequence,gold_arguments,mask,is_train=True):
        trigger_logits = self.trigger_outlayer(inputs)
        #trigger_soft = F.log_softmax(trigger_logits+1e-10,dim=2)
        trigger_soft = F.softmax(trigger_logits+1e-10,dim=2)
        pred_trigger = torch.argmax(trigger_soft,dim=2)
        
        BATCH = trigger_soft.shape[0]
        SEQ = trigger_soft.shape[1]
        output_ = trigger_soft.view(BATCH*SEQ,-1)
        label_ = trigger_type_sequence.view(BATCH*SEQ,-1)
        mask = mask.view(BATCH*SEQ,)
        mask_index = torch.LongTensor([x for x in range(BATCH*SEQ) if mask[x] == 1]).to(self.device)
        
        output_l = output_.index_select(0,mask_index)
        label_l = label_.index_select(0,mask_index).squeeze(1)
        
        trigger_loss = dice_loss(output_l,label_l,alpha=0.3,loss_type='DSC')
        
        if is_train:
            assert gold_arguments is not None
            trigger_emb = self.triembedding(trigger_type_sequence)
        else:
            trigger_emb = self.triembedding(pred_trigger)

        features = torch.cat([inputs,trigger_emb,trigger_soft],2)
        
        
        argument_hidden = []
        argument_keys = []
        
        for i in range(inputs.shape[0]):
            candidates = gold_arguments[i]['candidates']
            golden_candidate_tensor = {}
            
            for j in range(len(candidates)):
                can_st,can_ed,can_type = candidates[j]
                golden_candidate_tensor[candidates[j]] = features[i,can_st:can_ed, ].mean(dim=0)
            
            predicted_triggers = self.find_triggers([idx2trigger[t] for t in pred_trigger[i].tolist()])
            for predicted_trigger in predicted_triggers:
                t_start, t_end, t_type_str = predicted_trigger
                event_tensor = features[i,t_start:t_end, ].mean(dim=0)
                for j in range(len(candidates)):
                    if predicted_trigger != candidates[j]:
                        e_start, e_end, e_type_str = candidates[j]
                        candidates_tensor = golden_candidate_tensor[candidates[j]]
                        
                        argument_hidden.append(torch.cat([event_tensor,candidates_tensor]))
                        argument_keys.append((i,t_start, t_end, t_type_str,e_start, e_end, e_type_str))
        
        return trigger_loss,pred_trigger,argument_hidden,argument_keys
        
    def predict_arguments(self,argument_input,argument_keys,gold_arguments):
       
        argument_input = torch.stack(argument_input)
        argument_input = argument_input.masked_fill(torch.isnan(argument_input),0)
        argument_logits = self.argument_outlayer(argument_input)
        argument_soft = F.softmax(argument_logits,dim=1)
      
        pred_argument = argument_soft.argmax(-1)
        gold_argument_ids = []
        for i,trigger_st,trigger_en,trigger_type_str,entity_st,entity_en,entity_type_str in argument_keys:
            a_label = argument2idx[NONE]
            if (trigger_st,trigger_en,trigger_type_str) in gold_arguments[i]['events']: #if event match
                for (a_st,a_en,a_type_id) in gold_arguments[i]['events'][(trigger_st,trigger_en,trigger_type_str)]:
                    if a_st == entity_st and a_en == entity_en:
                        a_label = a_type_id
                        break
            gold_argument_ids.append(a_label)
        gold_argument_ids = torch.LongTensor(gold_argument_ids).to(self.device)
        
        loss = dice_loss(argument_soft,gold_argument_ids,loss_type='DSC')
        
        pred_argument = pred_argument.view(gold_argument_ids.size()).tolist()
        batch_size = len(gold_arguments)
        pred_event = [{'events':{}} for _ in range(batch_size)]
        for (i,t_st,t_ed,event_type_str,e_st,e_ed,entity_type),pred_label in zip(argument_keys,pred_argument):
            if pred_label == argument2idx[NONE]:
                continue

            if (t_st,t_ed,event_type_str) not in pred_event[i]['events']:
                pred_event[i]['events'][t_st,t_ed,event_type_str] = []
            pred_event[i]['events'][t_st,t_ed,event_type_str].append((e_st,e_ed,pred_label))

        return loss, pred_event
        
    def find_triggers(self,labels):
        '''
        :param labels: ['B-Conflict:Attack', 'I-Conflict:Attack', 'O', 'B-Life:Marry']
        :return: [(0, 2, 'Conflict:Attack'), (3, 4, 'Life:Marry')]
        '''
        result = []
        labels = [label.split('-',1) for label in labels]

        for i in range(len(labels)):
            if labels[i][0] == 'B':
                result.append([i, i + 1, labels[i][1]])

        for item in result:
            j = item[1]
            while j < len(labels):
                if labels[j][0] == 'I':
                    j = j + 1
                    item[1] = j
                else:
                    break

        return [tuple(item) for item in result]
    
    def save_model(self,path):
        state_dict = self.state_dict()
        for key,value in state_dict.items():
            state_dict[key] = value.cpu()
        torch.save(state_dict,path)
    
    def load_model(self,path):
        state_dict = torch.load(path)
        self.load_state_dict(state_dict)
    
    def __getnewargs__(self):
        # for pickle
        return self.hyperparams

# constant

In [None]:
NONE = 'O'
PAD = "[PAD]"
UNK = "[UNK]"

# for BERT
CLS = '[CLS]'
SEP = '[SEP]'

#event triggers
TRIGGERS = ['Dephosphorylation','Binding','Blood_vessel_development','Breakdown',
            'Catabolism','Cell_proliferation','Death','Development','Gene_expression',
            'Growth','Localization','Negative_regulation','Positive_regulation',
            'Phosphorylation','Planned_process','Regulation','Remodeling','Synthesis',
            'Transcription']

#argument roles
ARGUMENTS = ['Cause','Theme','Theme2','NONE']

#entities
ENTITIES = ['Anatomical_system', 'Cell', 'Cellular_component',
            'DNA_domain_or_region', 'Developing_anatomical_structure',
            'Drug_or_compound', 'Gene_or_gene_product', 'Immaterial_anatomical_entity',
            'Multi-tissue_structure','Organ','Organism','Organism_subdivision',
            'Organism_substance','Pathological_formation','Protein_domain_or_region','Tissue']

# POS tags
POSTAGS = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ',
           'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
           'PRON', 'PROPN', 'PUNCT', 'SCONJ',
           'SYM', 'VERB', 'X']

In [None]:
def build_vocab(labels, BIO_tag=False):
    '''
    building vocab by label list
    labels: list, all golden label type needed
    BIO_tag: default False, if True, we'll add BIO tag in each label in label list
    '''
    all_labels = []
    if BIO_tag:
        B_list = ['B-{}'.format(label) for label in labels]
        I_list = ['I-{}'.format(label) for label in labels]
        all_labels = [PAD,NONE]+B_list+I_list
    else:
        all_labels = [PAD,NONE]+labels
    
    label2idx = {tag:idx for idx,tag in enumerate(all_labels)}
    idx2label = {idx:tag for idx,tag in enumerate(all_labels)}
    vocab_size = len(all_labels)
    return vocab_size,all_labels,label2idx,idx2label

# building data

In [None]:
#Initialize related vocabulary
triggers_size,all_triggers,trigger2idx,idx2trigger = build_vocab(labels=TRIGGERS,BIO_tag=True)
arguments_size,all_arguments,argument2idx,idx2argument = build_vocab(labels=ARGUMENTS)
entities_size,all_entities,entity2idx,idx2entity = build_vocab(labels=ENTITIES,BIO_tag=True)
POS_size,all_POS,POS2idx,idx2POS = build_vocab(labels=POSTAGS)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased',do_lower_case=False)

In [None]:
class MLEEDataset(data.Dataset):
    def __init__(self,data_path):
        self.textlist = []
        self.is_headlist = []
        self.entitylist = []
        self.poslist = []
        self.triggerlist = []
        self.argumentlist = []
        self.graph = []
        self.acture_len = [] #the length of real word sequence
        self.acture_bert_len = [] #the length of real bert token(after bert participle)
        self.maxlen = 64
        
        with codecs.open(data_path,mode='r',encoding='utf-8') as f:
            data = json.load(f)
            for item in data:
                text,is_head,seqlen,bseqlen = self.getTextlist(item['tokens'])
                entities = self.getEntitylist(item['gold_entity_mentions'],seqlen)
                pos = self.getPoslist(item['pos-tags'],seqlen)
                triggers,arguments = self.getTriandArglist(item['gold_event_mentions'],item['gold_entity_mentions'],seqlen)
                adjpos,adjv = self.getAdjMatrix(item['parse'],seqlen)
                
                self.textlist.append([CLS]+text+[SEP])
                self.is_headlist.append([0]+is_head+[0])
                self.entitylist.append(entities)
                self.poslist.append(pos)
                self.triggerlist.append(triggers)
                self.argumentlist.append(arguments)
                self.graph.append((adjpos,adjv))
                self.acture_len.append(seqlen)
                self.acture_bert_len.append(bseqlen)
        
    def getTextlist(self,texts):
        #设置一个is_head的list，其作用有两个，第一可以计算bert分词以后，在设置完bert的要求后，真正的输入序列长度是多少
        #第二就是可以标记分词以后的头token，比如apples切分完以后为 apple,##s,实际参与运算的是apple向量，所以我们需要一个head标记
        is_head = []
        tokens = []
        for word in texts:
            token_li = tokenizer.tokenize(word)
            head = [1] + (len(token_li)-1)*[0]
            
            tokens.extend(token_li)
            is_head.extend(head)
            

        #由于bert可以处理的最长序列长度为512，另外bert需要添加cls和sep特殊标志，所以超过的长度需要减2
        if len(tokens)>=self.maxlen-1:
            tokens = tokens[0:self.maxlen-2]
            is_head = is_head[0:self.maxlen-2]

        real_seqlen = 0 #用于计算序列中还村子的真正原始单词序列长度，而不是分词后的长度
        for i in is_head:
            real_seqlen=real_seqlen+i #本实验，用first token代表整个单词，所以有多少个first token,则表示有多少个单词
        
        real_bert_sqlen = len(tokens)+2

        return tokens,is_head,real_seqlen,real_bert_sqlen
        
    def getEntitylist(self,entityjson,length):
        en_list = [[NONE] for _ in range(length)]
        for entity_mention in entityjson:
            start = entity_mention['start']
            end = entity_mention['end']
            for i in range(start,end):
                entity_type = entity_mention['entity_type']
                if i >=length:
                    break
                else:
                    if i==start:
                        entity_type = 'B-{}'.format(entity_type)
                    else:
                        entity_type = 'I-{}'.format(entity_type)

                    if len(en_list[i])==1 and en_list[i][0]==NONE:
                        en_list[i][0]=entity_type
                    else:
                        en_list[i].append(entity_type)
        return en_list
        
    def getPoslist(self,posjson,length):
        return posjson[0:length]
        
    def getTriandArglist(self,eventjson,entityjson,length):
        '''
        通过所给的json格式的event和entity，分别获取相应的trigger标注和argument标注
        其中，trigger标注格式为['B-event_type','I-event_type',...]
        argument标注格式为{
                            'candidates':[(start,end,entity_type),...],
                            'events':{(start,end,event_type):[(start,end,argument_role),...]}
                            }
        '''

        tri_list = [NONE]*length
        arg_dic = {
            'candidates':[],
            'events':{}
        }

        temp_candidates_for_tri = []
        for event_mention in eventjson:
            tri_start = event_mention['trigger']['start']
            tri_end = event_mention['trigger']['end']
            tri_type = event_mention['event_type']

            for i in range(tri_start,tri_end):
                type_ = tri_type
                if i >=length:
                    break
                else:
                    if i == tri_start:
                        tri_list[i] = 'B-{}'.format(type_)
                    else:
                        tri_list[i] = 'I-{}'.format(type_)

            event_key = (tri_start,tri_end,tri_type)
            temp_candidates_for_tri.append(event_key)
            event_value = []
            if len(event_mention['arguments']) == 0:
                arg_start = -1
                arg_end = -1
                arg_role = 'NONE'
                event_value.append((arg_start,arg_end,argument2idx[arg_role]))
            else:
                for argument_mention in event_mention['arguments']:
                    arg_start = argument_mention['start']
                    arg_end = argument_mention['end']
                    arg_role = argument_mention['role']
                    event_value.append((arg_start,arg_end,argument2idx[arg_role]))
            if event_key in arg_dic['events'].keys():
                event_value.extend(arg_dic['events'][event_key])
                new_event_value = list(set(event_value))
                arg_dic['events'][event_key] = list(sorted(new_event_value))
                
            else:
                arg_dic['events'][event_key]=list(sorted(event_value))

        arg_dic['candidates'].extend(temp_candidates_for_tri)
        for entity_mention in entityjson:
            entity_start = entity_mention['start']
            entity_end = entity_mention['end']
            entity_type = entity_mention['entity_type']
            arg_dic['candidates'].append((entity_start,entity_end,entity_type))
        arg_dic['candidates'].append((-1,-1,'NONE')) #MLEE语料中存在“事件中没有argument的情况，对于该情况，我们统一使用(-1,-1,'NONE')表示”

        return tri_list,arg_dic
        
    def getAdjMatrix(self,parsejson,length):
        sparseAdjMatrixPos = [[], [], []]
        sparseAdjMatrixValues = []

        def addedge(type_, from_, to_, value_):
            sparseAdjMatrixPos[0].append(type_)
            sparseAdjMatrixPos[1].append(from_)
            sparseAdjMatrixPos[2].append(to_)
            sparseAdjMatrixValues.append(value_)

        for edge in parsejson:
            temp = edge.strip().split('/')
            from_ = int(temp[2].split('=')[-1])
            to_ = int(temp[1].split('=')[-1])
            etype = temp[0]
            if etype == 'root' or from_ == -1 or to_ == -1 or from_ >= length or to_ >= length:
                continue
            addedge(0,from_,to_,1.0)
            addedge(1,to_,from_,1.0)

        for i in range(length):
            addedge(2,i,i,1.0)

        return sparseAdjMatrixPos,sparseAdjMatrixValues
        
    def __len__(self):
        return len(self.textlist)
        
    def __getitem__(self,idx):
        texts,is_heads,entities,poses,triggers,arguments,graphes,x_len,b_len = self.textlist[idx],self.is_headlist[idx],\
                                                                   self.entitylist[idx],self.poslist[idx],self.triggerlist[idx],\
                                                                   self.argumentlist[idx],self.graph[idx],self.acture_len[idx],\
                                                                   self.acture_bert_len[idx]

        input_texts_ids = tokenizer.convert_tokens_to_ids(texts)
        input_entities_ids = [[entity2idx[e] for e in entitys] for entitys in entities]
        input_poses_ids = [POS2idx[p] for p in poses]
        trigger_y_ids = [trigger2idx[t] for t in triggers]

        head_index = []
        for index,tag in enumerate(is_heads):
            if tag:
                head_index.append(index)

        return input_texts_ids,input_entities_ids,input_poses_ids,trigger_y_ids,\
                head_index,x_len,b_len,triggers,arguments,graphes
        
    def get_samples_weight(self):
        samples_weight = []
        for triggers in self.triggerlist:
            not_none = False
            for trigger in triggers:
                if trigger != NONE:
                    not_none = True
                    break
            if not_none:
                samples_weight.append(8.0)
            else:
                samples_weight.append(0.5)
        return np.array(samples_weight)

In [None]:
def pad(batch):
    input_ids_2d,entity_ids_3d,pos_ids_2d,trigger_ids_2d,head_index_2d,x_len_1d,b_len_1d,triggers_2d,arguments_2d,graph_3d = list(map(list,zip(*batch)))
    maxlen = 64
    for i in range(len(input_ids_2d)):
        input_ids_2d[i] = input_ids_2d[i] + [0]*(maxlen-len(input_ids_2d[i]))
        entity_ids_3d[i] = entity_ids_3d[i] + [[entity2idx[PAD]] for _ in range(maxlen-len(entity_ids_3d[i]))]
        pos_ids_2d[i] = pos_ids_2d[i] + [POS2idx[PAD]]*(maxlen-len(pos_ids_2d[i]))
        trigger_ids_2d[i] = trigger_ids_2d[i] + [trigger2idx[PAD]]*(maxlen-len(trigger_ids_2d[i]))
        head_index_2d[i] = head_index_2d[i] + [63]*(maxlen-len(head_index_2d[i]))
        
    x_len_1d = np.array(x_len_1d,dtype=np.int64)
    b_len_1d = np.array(b_len_1d,dtype=np.int64)
    
    return input_ids_2d,entity_ids_3d,pos_ids_2d,\
            trigger_ids_2d,head_index_2d,x_len_1d,\
            b_len_1d,triggers_2d,arguments_2d,graph_3d

# training and testing 

In [None]:
train_loss_li = []
dev_loss_li = []
test_loss_li = []

train_f = []
dev_f = []
test_f = []

lrs = []

In [None]:
def train(model,train_dataset,dev_dataset,test_dataset,optimizer,run_perparam):
    samples_weight = train_dataset.get_samples_weight()
    sampler = data.WeightedRandomSampler(samples_weight,len(samples_weight))
    
    train_iter = data.DataLoader(dataset=train_dataset,
                                 batch_size=run_perparam['train_batch'],
                                 shuffle=False,
                                 sampler=sampler,
                                 num_workers=10,
                                 collate_fn=pad)
    dev_iter = data.DataLoader(dataset=dev_dataset,
                               batch_size=run_perparam['dev_batch'],
                               shuffle=True,
                               collate_fn=pad)
    test_iter = data.DataLoader(dataset=test_dataset,
                                batch_size=run_perparam['test_batch'],
                                shuffle=True,
                                collate_fn=pad)
    
    best_scores = 0.0
    keep_best = 0
    
    
    epochs = run_perparam['epoch']
    print('training beginning.......')
    scheduler = StepLR(optimizer=optimizer,step_size=15,gamma=0.1,last_epoch=-1)
    for i in range(epochs):
        
        #training
        print("Epoch",i+1)
        training_loss,training_trigger_acc,training_trigger_p,training_trigger_r,training_trigger_f,\
        training_argument_p,training_argument_r,training_argument_f = run_over_data(data_iter = train_iter,
                                                                                    optimizer = optimizer,
                                                                                    total = math.ceil(len(train_dataset)/run_perparam['train_batch']),
                                                                                    model = model,
                                                                                    need_backward = True,
                                                                                    hyps = model.hyperparams,
                                                                                    device = model.device,
                                                                                    save_output = os.path.join(run_perparam['out'],
                                                                                                            'training_epoch_%d.txt'%(i+1)))
        
        print("\nEpoch", i + 1, " training loss: ", training_loss,
              "\ntraining trigger Acc: ",training_trigger_acc,
              "\ntraining trigger p: ", training_trigger_p,
              " training trigger r: ", training_trigger_r,
              " training trigger f1: ", training_trigger_f,
              "\ntraining argument p: ", training_argument_p,
              " training argument r: ", training_argument_r,
              " training argument f1: ", training_argument_f)
        
        train_loss_li.append(training_loss)
        train_f.append(training_trigger_f)
        #validation
        dev_loss,dev_trigger_acc,dev_trigger_p,dev_trigger_r,dev_trigger_f,\
        dev_argument_p,dev_argument_r,dev_argument_f = run_over_data(data_iter = dev_iter,
                                                                     optimizer = optimizer,
                                                                     total = math.ceil(len(dev_dataset)/run_perparam['dev_batch']),
                                                                     model = model,
                                                                     need_backward = False,
                                                                     hyps = model.hyperparams,
                                                                     device = model.device,
                                                                     save_output = os.path.join(run_perparam['out'],
                                                                                                            'dev_epoch_%d.txt'%(i+1)))
        
        print("\nEpoch", i + 1, " dev loss: ", dev_loss,
              "\ndev trigger Acc: ",dev_trigger_acc,
              "\ndev trigger p: ", dev_trigger_p,
              " dev trigger r: ", dev_trigger_r,
              " dev trigger f1: ", dev_trigger_f,
              "\ndev argument p: ", dev_argument_p,
              " dev argument r: ", dev_argument_r,
              " dev argument f1: ", dev_argument_f)
        dev_loss_li.append(dev_loss)
        dev_f.append(dev_trigger_f)
        
        #test
        test_loss,test_trigger_acc,test_trigger_p,test_trigger_r,test_trigger_f,\
        test_argument_p,test_argument_r,test_argument_f = run_over_data(data_iter = test_iter,
                                                                        optimizer = optimizer,
                                                                        total = math.ceil(len(test_dataset)/run_perparam['test_batch']),
                                                                        model = model,
                                                                        need_backward = False,
                                                                        hyps = model.hyperparams,
                                                                        device = model.device,
                                                                        save_output = os.path.join(run_perparam['out'],
                                                                                                            'test_epoch_%d.txt'%(i+1)))
        
        print("\nEpoch", i + 1, " test loss: ", test_loss,
              "\ntest trigger Acc: ",test_trigger_acc,
              "\ntest trigger p: ", test_trigger_p,
              " test trigger r: ", test_trigger_r,
              " test trigger f1: ", test_trigger_f,
              "\ntest argument p: ", test_argument_p,
              " test argument r: ", test_argument_r,
              " test argument f1: ", test_argument_f)
        
        test_loss_li.append(test_loss)
        test_f.append(test_trigger_f)
        
        lrs.append(scheduler.get_last_lr()[0])
        scheduler.step()
        #early stop
   
        if best_scores <= dev_trigger_f+dev_argument_f:
            best_scores = dev_trigger_f+dev_argument_f
            model.save_model(os.path.join(run_perparam['out'],'model.pt'))
            print('save the model on CPU at epoch ',i+1)
            keep_best = 0
        
        else:
            keep_best = keep_best+1
            if keep_best >= run_perparam['early_stop']:
                print('stopping training, best model is loaded')
                model.load_model(os.path.join(run_perparam['out'],'model.pt'))
                break
        
        
    #finnal test 
    test_loss,test_trigger_acc,test_trigger_p,test_trigger_r,test_trigger_f,\
    test_argument_p,test_argument_r,test_argument_f = run_over_data(data_iter = test_iter,
                                                                    optimizer = optimizer,
                                                                    total = math.ceil(len(test_dataset)/run_perparam['test_batch']),
                                                                    model = model,
                                                                    need_backward = False,
                                                                    hyps = model.hyperparams,
                                                                    device = model.device,
                                                                    save_output = os.path.join(run_perparam['out'],
                                                                                               'test_final.txt'))
    print("\nFinally test loss: ", test_loss,
          "\ntest trigger Acc: ",test_trigger_acc,
          "\ntest trigger p: ", test_trigger_p,
          " test trigger r: ", test_trigger_r,
          " test trigger f1: ", test_trigger_f,
          "\ntest argument p: ", test_argument_p,
          " test argument r: ", test_argument_r,
          " test argument f1: ", test_argument_f)
    
    print('training endding')

In [None]:
def run_over_data(data_iter,optimizer,total,model,need_backward,hyps,device,save_output):
    if need_backward:
        model.train()
    else:
        model.eval()
    
    all_tri = [] #golden trigger label 
    all_tri_ = [] #predicted trigger label
    all_event = [] #golden event 
    all_event_ = [] #predicted event
    all_tokens = []
    
    running_loss = 0.0
    cnt = 0
           
    scaler = GradScaler()
    
    for batch in data_iter:
        is_train = False
        if need_backward:
            optimizer.zero_grad()
            is_train = True
        
        input_ids,entity_ids,pos_ids,\
        trigger_label_ids,head_index,x_len,\
        b_len,gold_triggers,gold_arguments,graphs = batch
        
        input_tensor = torch.LongTensor(input_ids)
        pos_tensor = torch.LongTensor(pos_ids)
        trigger_label = torch.LongTensor(trigger_label_ids)
        head_index_tensor = torch.LongTensor(head_index)
        SEQ_LEN = head_index_tensor.size()[1]
        adjm = torch.stack([torch.sparse.FloatTensor(torch.LongTensor(graph[0]),
                                                     torch.FloatTensor(graph[1]),
                                                     torch.Size((hyps['gcn_et'],SEQ_LEN,SEQ_LEN))).to_dense() for graph in graphs])
        
        
        input_tensor = input_tensor.to(device)
        
        pos_tensor = pos_tensor.to(device)
        trigger_label = trigger_label.to(device)
        head_index_tensor = head_index_tensor.to(device)
        adjm = adjm.to(device)
        
        trigger_loss,pred_triggers,argument_input,argument_keys = model.forward(token_sequence = input_tensor,
                                                                                  pos_taggig_sequence = pos_tensor,
                                                                                  entity_type_sequence = entity_ids,
                                                                                  trigger_type_sequence = trigger_label,
                                                                                  adj = adjm,
                                                                                  head_index = head_index_tensor,
                                                                                  x_len = x_len,
                                                                                  b_len = b_len,
                                                                                  gold_triggers = gold_triggers,
                                                                                  gold_arguments = gold_arguments,
                                                                                  is_train = is_train)
        
       

        loss = trigger_loss
  
        if len(argument_keys) > 0:
            argument_loss, pred_events = model.predict_arguments(argument_input = argument_input,
                                                                 argument_keys = argument_keys,
                                                                 gold_arguments = gold_arguments)
            loss = trigger_loss+2*argument_loss
        else:
            loss = trigger_loss
            pred_events = [{'events':{}} for _ in range(head_index_tensor.size()[0])]
        
        
        all_event_.extend(pred_events)
        
        true_events = []
        for gold_argument in gold_arguments:
            event_val = gold_argument['events']
            true_events.append({'events':event_val})
        all_event.extend(true_events)
        
    
        pred_triggers = pred_triggers.view(trigger_label.size()).tolist()
        true_triggers = trigger_label.tolist()
        
        def clear_tag(list_):
            r = ''
            for t in list_:
                temp = t
                if temp.startswith('##'):
                    temp = temp[2:]
                r = r+temp
            return r
        
        def addtokens(input_tokens_ids,true_label_ids,pred_label_ids,x_len,b_len,head_index):
            all_tokens = []
            for i in range(len(input_tokens_ids)):
                input_tokens = tokenizer.convert_ids_to_tokens(input_tokens_ids[i])
                real_len = x_len[i]
                SEP_index = b_len[i]-1
                true_label = true_label_ids[i][:real_len]
                pred_label = pred_label_ids[i][:real_len]
                head_i = head_index[i][:real_len]
                words = []
                for i in range(real_len):
                    if i == real_len-1:
                        tokens = input_tokens[head_i[i]:SEP_index]
                        words.append(clear_tag(tokens))
                    else:
                        tokens = input_tokens[head_i[i]:head_i[i+1]]
                        words.append(clear_tag(tokens))
                
                atoken = []
                for w,tl,pl in zip(words,true_label,pred_label):
                    atoken.append((w,idx2trigger[tl],idx2trigger[pl]))
                all_tokens.append(atoken)
            return all_tokens
        
        all_tokens.extend(addtokens(input_ids,true_triggers,pred_triggers,x_len,b_len,head_index))
        
        for index,l in enumerate(x_len):
            pred_triggers[index] = pred_triggers[index][:l]
            true_triggers[index] = true_triggers[index][:l]
        
        true_triggers_strs = [idx2trigger[t] for trigger in true_triggers for t in trigger]
        pred_triggers_strs = [idx2trigger[t] for trigger in pred_triggers for t in trigger]
        p,r,f,acc = getEval(true_triggers_strs,pred_triggers_strs,average='macro')
        
        all_tri.append(true_triggers_strs)
        all_tri_.append(pred_triggers_strs)
        
        cnt += 1
        other_info = ''
        
        if need_backward:
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(model.parameters(),5)
            scaler.step(optimizer)
            scaler.update()
            other_info = 'Iter[{}] loss: {:.4f} TAcc: {:.2f}% TP: {:.2f}% TR: {:.2f}% TF1: {:.2f}%'.format(cnt, loss.item(),
                                                                                                               acc * 100.0,
                                                                                                               p * 100.0,
                                                                                                               r * 100.0,
                                                                                                               f * 100.0)
        progressbar(cnt,total,other_info)
        running_loss += loss.item()
    
    if save_output:
        with codecs.open(save_output, "w", encoding="utf-8") as f:
            for tokens in all_tokens:
                for token in tokens:
                    f.write("%s %s %s\n" % (token[0],token[1],token[2]))
                f.write('\n')
    
    running_loss = running_loss/cnt
    tp,tr,rf,acc = getEval(all_tri,all_tri_,average='macro')
    ep,er,ef = getEventEval(all_event,all_event_)

    
    return running_loss,acc,tp,tr,rf,ep,er,ef

In [None]:
def getEval(true_label,pred_label,average=None):
    '''
    true_label:list
    pred_label:list
    average:str，[None,'micro','macro']
    '''
    
    p = precision_score(true_label,pred_label,average=average)
    r = recall_score(true_label,pred_label,average=average)
    f1 = f1_score(true_label,pred_label,average=average)
    acc = accuracy_score(true_label,pred_label)
    
    return p,r,f1,acc

In [None]:
def getEventEval(true_label,pred_label):
    ct,p1,p2 = 0,0,0
    for event,event_ in zip(true_label,pred_label):
        true_event = event['events']
        pred_event = event_['events']
        for key,value in true_event.items():
            p1 +=len(value)
            if key not in pred_event:
                continue
            arguments = value
            arguments_ = pred_event[key]
            for temp,temp_ in zip(arguments,arguments_):
                if temp[2] == temp_[2]:
                    ct += 1
        
        for key,value in pred_event.items():
            p2 += len(value)
        
        if ct == 0 or p1 == 0 or p2 == 0:
            return 0.0,0.0,0.0
        else:
            p = 1.0 * ct / p2
            r = 1.0 * ct / p1
            f1 = 2.0 * p * r / (p + r)
            return p,r,f1

In [None]:
def progressbar(current,total,other_info):
    percent = '{:.2%}'.format(current / total)
    if type(other_info) is str:
        print("\r[%-50s] %s %s" % ('=' * int(math.floor(current * 50 / total)), percent, other_info))
    else:
        print("\r[%-50s] %s" % ('=' * int(math.floor(current * 50 / total)), percent))

# main

In [None]:
#Operating parameters
run_param = {
    'train_batch' : 64,              #train batch size
    'dev_batch' : 16,               #dev batch size
    'test_batch' : 16,              #test batch size
    'epoch' : 200,                 #int
    'lr' : 1e-1,                  #learning rate fload
    'decay': 0.25,               #L2，float
    'optimizer' : "sgd",         #Optimization，default is "adam"，others are "adadelta","sgd" and "adagrad"
    'out' : "./out",               #save path
    'train_set' : "train.json",     #train set path
    'dev_set' : "dev.json",           #dev set path
    'test_set': "test.json",           #test set path
    'early_stop' : 50,       
    'device' : "cuda"          #"cpu" or "cuda"
}

In [None]:
#hyperparameter
hyps = {
    'pos_size' : POS_size,
    'posemd_dim' : 64,
    'pos_dp' : 0.5,
    'en_size' : entities_size,     #entity type numbers
    'enemb_dim' : 64,              #multiEntity layer numbers
    'en_dp' : 0.5,                 #multiEntity layer drop out
    'rnn_dim' : 128,               #bidirectional rnn layer hidden numbers
    'rnn_layers' : 2,              #bidirectional rnn layer  numbers
    'rnn_dp' : 0.0,                #bidirectional rnn layer drop out
    'rnn_mode' : "lstm",           #type of rnn，default is "lstm",others are "gru" and "rnn"
    'gcn_layers' : 2,              #gcn layer numbers
    'gcn_et' : 3,                  #edge types
    'gcn_dp' : 0.5,                #gcn layer drop out
    'gcn_use_bn' : True,          #gcn layer bais
    'tag_hidden' : 128,           #tag layer hidden numbers
    'tri_size' : triggers_size,   #trigger type numbers
    'tri_dim' : 64,               #trigger dim
    'decode_dp' : 0.5,            #decode layer drop out
    'bert_mode': "average"        #bert vector representation，default is "first"，other is "average"
}

In [None]:
model = BGModel(hyps=hyps,device=torch.device(run_param['device']))

In [None]:
if run_param['device'] == 'cuda':
    model = model.cuda()

In [None]:
train_dataset = MLEEDataset(run_param['train_set'])
dev_dataset = MLEEDataset(run_param['dev_set'])
test_dataset = MLEEDataset(run_param['test_set'])

In [None]:
if run_param['optimizer'] == 'adam':
    optimizer = torch.optim.Adam(params=model.parameters(),lr=run_param['lr'],weight_decay=run_param['decay'])
elif run_param['optimizer'] == 'sgd':
    optimizer = torch.optim.SGD(params=model.parameters(),lr=run_param['lr'],weight_decay=run_param['decay'])
elif run_param['optimizer'] == 'adadelta':
    optimizer = torch.optim.Adadelta(params=model.parameters(),lr=run_param['lr'],weight_decay=run_param['decay'])
else:
    optimizer = torch.optim.Adagrad(params=model.parameters(),lr=run_param['lr'],weight_decay=run_param['decay'])

In [None]:
train(model=model,
      train_dataset=train_dataset,
      dev_dataset=dev_dataset,
      test_dataset=test_dataset,
      optimizer=optimizer,
      run_perparam=run_param)

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = range(0,200)
y1 = train_loss_li
y2 = dev_loss_li
y3 = test_loss_li

plt.subplot(3,1,1)
plt.plot(x,y1)
plt.title('train loss vs epoch')
plt.ylabel('train loss')

plt.subplot(3,1,2)
plt.plot(x,y2)
plt.title('dev loss vs epoch')
plt.ylabel('dev loss')

plt.subplot(3,1,3)
plt.plot(x,y3)
plt.title('test loss vs epoch')
plt.ylabel('test loss')

plt.tight_layout()
plt.show()

In [None]:
x = range(0,200)
y1 = train_f
y2 = dev_f
y3 = test_f

plt.subplot(3,1,1)
plt.plot(x,y1)
plt.title('train f vs epoch')
plt.ylabel('train f')

plt.subplot(3,1,2)
plt.plot(x,y2)
plt.title('dev f vs epoch')
plt.ylabel('dev f')

plt.subplot(3,1,3)
plt.plot(x,y3)
plt.title('test f vs epoch')
plt.ylabel('test f')

plt.tight_layout()
plt.show()

In [None]:
x = range(0,100)
z = lrs

plt.plot(x,z)
plt.title('LR vs epoch')
plt.ylabel('LR')

plt.show()