In [7]:
import torch,json,os
import torch.nn as nn
from transformers import AutoConfig,AutoModel
import sklearn.preprocessing

In [3]:
class NERNetwork(nn.Module):
    """A Generic Network for NERDA models.
    The network has an analogous architecture to the models in
    [Hvingelby et al. 2020](http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.565.pdf).
    Can be replaced with a custom user-defined network with 
    the restriction, that it must take the same arguments.
    """

    def __init__(self, model_name_or_path: str, n_tags: int, dropout: float = 0.1) -> None:
        """Initialize a NERDA Network
        Args:
            bert_model (nn.Module): huggingface `torch` transformers.
            device (str): Computational device.
            n_tags (int): Number of unique entity tags (incl. outside tag)
            dropout (float, optional): Dropout probability. Defaults to 0.1.
        """
        super(NERNetwork, self).__init__()
        
        # extract AutoConfig, from which relevant parameters can be extracted.
        bert_model_config = AutoConfig.from_pretrained(model_name_or_path)
        self.bert_model = AutoModel.from_pretrained(model_name_or_path)
        self.dropout = nn.Dropout(dropout)
        self.tags = nn.Linear(bert_model_config.hidden_size, n_tags)#BERT+Linear

    def forward(self, 
                input_ids: torch.Tensor, 
                attention_mask: torch.Tensor, 
                token_type_ids: torch.Tensor,
                ) -> torch.Tensor:
        """Model Forward Iteration
        Args:
            input_ids (torch.Tensor): Input IDs.
            attention_mask (torch.Tensor): Attention attention_mask.
            token_type_ids (torch.Tensor): Token Type IDs.
        Returns:
            torch.Tensor: predicted values.
        """

        # TODO: can be improved with ** and move everything to device in a
        # single step.
        bert_model_inputs = {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'token_type_ids': token_type_ids
            }
        
        # match args with bert_model
        # bert_model_inputs = match_kwargs(self.bert_model.forward, **bert_model_inputs)
           
        outputs = self.bert_model(**bert_model_inputs)
        # apply drop-out
        last_hidden_state=outputs.last_hidden_state
        last_hidden_state = self.dropout(last_hidden_state)

        # last_hidden_state for all labels/tags
        last_hidden_state = self.tags(last_hidden_state)

        return last_hidden_state

In [9]:
save_dir='/home/xhsun/Desktop/NER_Parsing/train_models/baseline_models'
tag_complete=json.load(open(os.path.join(save_dir,'label.json'))).split(' ')
tag_encoder=sklearn.preprocessing.LabelEncoder()
tag_encoder.fit(tag_complete)

LabelEncoder()

In [11]:
tag_encoder.classes_

array(['B-LOC', 'B-MISC', 'B-ORG', 'B-PER', 'I-LOC', 'I-MISC', 'I-ORG',
       'I-PER', 'O'], dtype='<U6')

In [2]:
from NERDA.datasets import get_conll_data
model = NERDA(dataset_training = get_conll_data('train'),
              dataset_validation = get_conll_data('valid'),
              transformer = 'bert-base-multilingual-uncased')

Device automatically set to: cuda


Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

In [3]:
model.train()


 Epoch 1 / 4


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [01:08<00:00, 15.71it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 407/407 [00:04<00:00, 84.69it/s]


Train Loss = 0.2562131281109975 Valid Loss = 0.10333481829908633

 Epoch 2 / 4


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [01:08<00:00, 15.72it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 407/407 [00:04<00:00, 83.81it/s]


Train Loss = 0.07882672009883983 Valid Loss = 0.07510552785006931

 Epoch 3 / 4


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [01:09<00:00, 15.54it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 407/407 [00:04<00:00, 84.35it/s]


Train Loss = 0.03754968284343636 Valid Loss = 0.0650229681199857

 Epoch 4 / 4


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [01:09<00:00, 15.48it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 407/407 [00:04<00:00, 84.86it/s]

Train Loss = 0.017491656300518552 Valid Loss = 0.06647421490569146





'Model trained successfully'

In [6]:
test = get_conll_data('test')
model.evaluate_performance(test,return_accuracy=True)



ValueError: Found input variables with inconsistent numbers of samples: [46014, 46003]

In [5]:
(0.96+0.98+0.87+0.86+0.92+0.82+0.82+0.66)/8

0.8612500000000001