# AIM

- if you set --save_model_core flag during initial training, then you do not need this tutorial since you already saved the core transformer weights (just transformer layers; no NER layer) as pytorch_model.bin

- this tutorial is just a quick guide on how to extract the transformer layers from a trained NER model using the package and save it as an individual model that can be used for other training tasks

In [1]:
from transformers import BertConfig, BertTokenizer

In [2]:
import sys
sys.path.append("../src/transformer_ner/")

# Let us use BERT as an example
from model import BertNerModel

In [3]:
MODEL_CLASSES = {
    'bert': (BertConfig, BertNerModel, BertTokenizer),
}

In [4]:
from pathlib import Path
import torch

# this function will load the BERT NER trained model from a checkpoint
def load_model(model_path, model_type, config):
    ckpt = torch.load(model_path, map_location=torch.device('cpu'))
   
    model = MODEL_CLASSES[model_type][1]
    model = model(config)
    model.load_state_dict(state_dict=ckpt)
    
    return model

In [21]:
# create BERT config from trained config
config_path = "./models/ner_bert/"
conf = MODEL_CLASSES[model_type][0].from_pretrained(config_path)

for k, v in conf.__dict__.items():
    if k not in {"label2id", "id2label", "label2idx"}:
        print(f"{k}:...........{v}")

return_dict:...........True
output_hidden_states:...........False
output_attentions:...........False
torchscript:...........False
torch_dtype:...........None
use_bfloat16:...........False
pruned_heads:...........{}
tie_word_embeddings:...........True
is_encoder_decoder:...........False
is_decoder:...........False
add_cross_attention:...........False
tie_encoder_decoder:...........False
max_length:...........20
min_length:...........0
do_sample:...........False
early_stopping:...........False
num_beams:...........1
num_beam_groups:...........1
diversity_penalty:...........0.0
temperature:...........1.0
top_k:...........50
top_p:...........1.0
repetition_penalty:...........1.0
length_penalty:...........1.0
no_repeat_ngram_size:...........0
encoder_no_repeat_ngram_size:...........0
bad_words_ids:...........None
num_return_sequences:...........1
chunk_size_feed_forward:...........0
output_scores:...........False
return_dict_in_generate:...........False
forced_bos_token_id:...........None
f

In [12]:
model_type = "bert"
model_path = "./models/ner_bert/checkpoint_34999.bin"

model = load_model(model_path, model_type, conf)

In [13]:
# show model state dictionarcy

model

BertNerModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30523, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)

In [22]:
# we only need BERT layers which is stored under "bert" dict
# we can get BERT layers as 
bert_core = model.bert
bert_core

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30523, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [23]:
# we then save this core using transformers API
bert_core.save_pretrained("./models/ner_bert/")

In [26]:
# check the files under the original model directory
# we can see we have a new file named pytorch_model.bin which is the core BERT model
# this can be loaded for other training tasks
! ls ./models/ner_bert/

added_tokens.json       config.json             special_tokens_map.json
base_model_name.txt     label2idx.json          tokenizer_config.json
checkpoint_34999.bin    pytorch_model.bin       vocab.txt
