# 数据集介绍

来源https://storage.googleapis.com/cluebenchmark/tasks/cluener_public.zip

# 导入包

In [1]:
import torch,json
import sys,os
import pandas as pd
import random
import numpy as np
from nlp_basictasks.tasks import Ner
from nlp_basictasks.evaluation import nerEvaluator
from nlp_basictasks.readers.ner import InputExample

# 获取数据

In [2]:
def _create_examples(input_path,mode):
    examples = []
    with open(input_path, 'r') as f:
        idx = 0
        for line in f:
            json_d = {}
            line = json.loads(line.strip())
            text = line['text']
            label_entities = line.get('label', None)
            words = list(text)
            labels = ['O'] * len(words)
            if label_entities is not None:
                for key, value in label_entities.items():
                    for sub_name, sub_index in value.items():
                        for start_index, end_index in sub_index:
                            assert ''.join(words[start_index:end_index + 1]) == sub_name
                            if start_index == end_index:
                                labels[start_index] = 'S-' + key
                            else:
                                labels[start_index] = 'B-' + key
                                labels[start_index + 1:end_index + 1] = ['I-' + key] * (len(sub_name) - 1)
            json_d['id'] = f"{mode}_{idx}"
            json_d['context'] = " ".join(words)
            json_d['tag'] = " ".join(labels)
            json_d['raw_context'] = "".join(words)
            idx += 1
            examples.append(json_d)
    return examples

In [3]:
data=_create_examples('/data/nfs14/nfs/aisearch/asr/xhsun/datasets/cluener/train.json',mode='train')

In [4]:
len(data)

10748

In [6]:
data[0]

{'id': 'train_0',
 'context': '浙 商 银 行 企 业 信 贷 部 叶 老 桂 博 士 则 从 另 一 个 角 度 对 五 道 门 槛 进 行 了 解 读 。 叶 老 桂 认 为 ， 对 目 前 国 内 商 业 银 行 而 言 ，',
 'tag': 'B-company I-company I-company I-company O O O O O B-name I-name I-name O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
 'raw_context': '浙商银行企业信贷部叶老桂博士则从另一个角度对五道门槛进行了解读。叶老桂认为，对目前国内商业银行而言，'}

In [9]:
train_examples=[]
for example in data:
    seq_in=example['context'].strip().split(' ')
    seq_out=example['tag'].strip().split(' ')
    assert len(seq_in)==len(seq_out)
    train_examples.append(InputExample(seq_in=seq_in,seq_out=seq_out))

dev_examples=train_examples[-2000:]
dev_seq_in=[]
dev_seq_out=[]
for example in dev_examples:
    dev_seq_in.append(example.seq_in)
    dev_seq_out.append(example.seq_out)
train_examples=train_examples[:-2000]

In [10]:
# output_path="/data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner"
# if not os.path.exists(os.path.join(output_path,'label2id.json')):
label_set=set()
for examples in data:
    label_list=examples['tag'].strip().split(' ')
    for label in label_list:
        label_set.add(label)
label2id={'[PAD]':0}
for label in label_set:
    label2id[label]=len(label2id)

In [11]:
label2id

{'[PAD]': 0,
 'B-address': 1,
 'S-company': 2,
 'I-book': 3,
 'I-government': 4,
 'B-book': 5,
 'O': 6,
 'I-position': 7,
 'B-scene': 8,
 'S-position': 9,
 'I-movie': 10,
 'S-address': 11,
 'B-game': 12,
 'I-organization': 13,
 'B-government': 14,
 'I-address': 15,
 'B-company': 16,
 'I-scene': 17,
 'B-name': 18,
 'I-company': 19,
 'I-game': 20,
 'I-name': 21,
 'B-movie': 22,
 'B-organization': 23,
 'B-position': 24,
 'S-name': 25}

In [12]:
model_path='/data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/'
ner_model=Ner(model_path,label2id=None,use_crf=True,device='cpu',is_finetune=True)

2021-08-17 09:41:28 - INFO - __init__ - 40 : Load label2id from /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/label2id.json
2021-08-17 09:41:28 - INFO - __init__ - 56 : The label2id is
 {"[PAD]": 0, "I-government": 1, "I-name": 2, "S-name": 3, "B-scene": 4, "S-company": 5, "B-game": 6, "I-company": 7, "O": 8, "I-position": 9, "B-address": 10, "B-name": 11, "B-company": 12, "I-game": 13, "I-movie": 14, "I-book": 15, "B-book": 16, "I-organization": 17, "B-position": 18, "I-scene": 19, "B-organization": 20, "S-address": 21, "B-government": 22, "B-movie": 23, "S-position": 24, "I-address": 25}
2021-08-17 09:41:28 - INFO - get_config_dict - 177 : loading configuration file /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-17 09:41:28 - INFO - from_pretrained - 404 : loading bert model file /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT
2021-08-17 09:41:28 - INFO - from_pretrained - 423 : BertConfig has been loaded from /data

In [12]:
from torch.utils.data import DataLoader
batch_size=32
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)
evaluator=nerEvaluator(label2id=label2id,seq_in=dev_seq_in,seq_out=dev_seq_out)

2021-08-16 15:39:05 - INFO - __init__ - 39 : Total evaluate nums : 2000
2021-08-16 15:39:05 - INFO - __init__ - 40 : input is string : False, input is list : True
2021-08-16 15:39:05 - INFO - __init__ - 41 : seq in and out like : 
['并', '且', '更', '有', '动', '力', '练', '习', 'W', 'A', 'R', '3', '，', '为', '一', '战', '成', '名', '而', '准', '备', '着', '，']	['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-game', 'I-game', 'I-game', 'I-game', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
2021-08-16 15:39:05 - INFO - __init__ - 42 : In this evaluator, slot contains ([PAD] I-name I-game S-position B-book S-company I-movie B-scene O B-movie B-game I-position I-organization I-address I-scene I-government B-government B-organization B-company I-company B-name S-name B-address I-book S-address B-position)


In [13]:
ner_model.model.tokenizer.convert_tokens_to_ids(ner_model.model.tokenizer.tokenize("为什么会出现UNJK的问题“?"))

[711,
 784,
 720,
 833,
 1139,
 4385,
 163,
 8171,
 8334,
 8197,
 4638,
 7309,
 7579,
 100,
 136]

In [14]:
ner_model.fit(train_dataloader=train_dataloader,evaluator=evaluator,epochs=5,
              output_path='/data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner')

2021-08-16 15:39:07 - INFO - fit - 163 : 一个epoch 下，每隔54个step会输出一次loss，每隔137个step会评估一次模型


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

0it [00:00, ?it/s]

2021-08-16 15:39:09 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 0 after 1 steps:
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        PAD]     0.0000    0.0000    0.0000         0
     address     0.0000    0.0000    0.0000       528
        book     0.0000    0.0000    0.0000       193
     company     0.0000    0.0000    0.0000       521
        game     0.0012    0.0244    0.0023       409
  government     0.0000    0.0000    0.0000       339
       movie     0.0000    0.0000    0.0000       212
        name     0.0000    0.0000    0.0000       700
organization     0.0000    0.0000    0.0000       648
    position     0.0000    0.0000    0.0000       584
       scene     0.0000    0.0000    0.0000       270

   micro avg     0.0002    0.0023    0.0003      4404
   macro avg     0.0001    0.0022    0.0002      4404
weighted avg     0.0001    0.0023    0.0002      4404



2021-08-16 15:39:32 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:39:32 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:39:32 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:39:32 - INFO - fit - 238 : In epoch 0, training_step 0, the eval score is 0.00030071570337403015, previous eval score is -9999999, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:40:55 - INFO - fit - 216 : Epoch : 0, train_step : 54/1370, loss_value : 2.4964273925180787 
2021-08-16 15:42:17 - INFO - fit - 216 : Epoch : 0, train_step : 108/1370, loss_value : 0.9397658170373352 
2021-08-16 15:43:00 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 0 afte

              precision    recall  f1-score   support

     address     0.1579    0.2917    0.2049       528
        book     0.1623    0.1917    0.1758       193
     company     0.4469    0.6468    0.5286       521
        game     0.3253    0.3985    0.3582       409
  government     0.2881    0.4012    0.3354       339
       movie     0.3458    0.6085    0.4410       212
        name     0.6798    0.8886    0.7703       700
organization     0.6617    0.6883    0.6747       648
    position     0.5210    0.5103    0.5156       584
       scene     0.0754    0.1111    0.0898       270

   micro avg     0.4012    0.5341    0.4582      4404
   macro avg     0.3664    0.4737    0.4094      4404
weighted avg     0.4271    0.5341    0.4707      4404



2021-08-16 15:43:22 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:43:22 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:43:22 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:43:22 - INFO - fit - 238 : In epoch 0, training_step 137, the eval score is 0.4582115721800117, previous eval score is 0.00030071570337403015, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:43:59 - INFO - fit - 216 : Epoch : 0, train_step : 162/1370, loss_value : 0.44044450808454444 
2021-08-16 15:45:18 - INFO - fit - 216 : Epoch : 0, train_step : 216/1370, loss_value : 0.29042251380505385 
2021-08-16 15:46:36 - INFO - fit - 216 : Epoch : 0, train_step : 270/1370, loss_value : 0.25

0it [00:00, ?it/s]

2021-08-16 15:46:42 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 1 after 1 steps:


              precision    recall  f1-score   support

     address     0.4269    0.6193    0.5054       528
        book     0.7233    0.7720    0.7469       193
     company     0.6431    0.7505    0.6926       521
        game     0.6848    0.7702    0.7250       409
  government     0.5945    0.8348    0.6945       339
       movie     0.7761    0.7358    0.7554       212
        name     0.7811    0.8971    0.8351       700
organization     0.7216    0.7562    0.7385       648
    position     0.5907    0.7860    0.6745       584
       scene     0.5598    0.5370    0.5482       270

   micro avg     0.6385    0.7591    0.6936      4404
   macro avg     0.6502    0.7459    0.6916      4404
weighted avg     0.6487    0.7591    0.6969      4404



2021-08-16 15:47:05 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:47:05 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:47:05 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:47:05 - INFO - fit - 238 : In epoch 1, training_step 0, the eval score is 0.6935684647302904, previous eval score is 0.4582115721800117, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:48:21 - INFO - fit - 216 : Epoch : 1, train_step : 108/1370, loss_value : 0.21624127264927934 
2021-08-16 15:49:38 - INFO - fit - 216 : Epoch : 1, train_step : 216/1370, loss_value : 0.20587620553043154 
2021-08-16 15:50:22 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epo

              precision    recall  f1-score   support

     address     0.4619    0.5398    0.4978       528
        book     0.6548    0.8549    0.7416       193
     company     0.6533    0.7812    0.7115       521
        game     0.7296    0.7653    0.7470       409
  government     0.6866    0.8466    0.7583       339
       movie     0.7246    0.7075    0.7160       212
        name     0.8077    0.9000    0.8514       700
organization     0.8013    0.7593    0.7797       648
    position     0.6671    0.7894    0.7231       584
       scene     0.5199    0.7259    0.6059       270

   micro avg     0.6761    0.7688    0.7195      4404
   macro avg     0.6707    0.7670    0.7132      4404
weighted avg     0.6835    0.7688    0.7217      4404



2021-08-16 15:50:44 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:50:44 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:50:44 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:50:44 - INFO - fit - 238 : In epoch 1, training_step 274, the eval score is 0.7195070123246919, previous eval score is 0.6935684647302904, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:51:18 - INFO - fit - 216 : Epoch : 1, train_step : 324/1370, loss_value : 0.19467210148771605 
2021-08-16 15:52:36 - INFO - fit - 216 : Epoch : 1, train_step : 432/1370, loss_value : 0.19225745096250815 
2021-08-16 15:53:55 - INFO - fit - 216 : Epoch : 1, train_step : 540/1370, loss_value : 0.180478

0it [00:00, ?it/s]

2021-08-16 15:54:01 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 2 after 1 steps:


              precision    recall  f1-score   support

     address     0.4918    0.5682    0.5272       528
        book     0.6454    0.8394    0.7297       193
     company     0.6391    0.7850    0.7046       521
        game     0.7305    0.8484    0.7851       409
  government     0.6644    0.8466    0.7445       339
       movie     0.7343    0.7170    0.7255       212
        name     0.8595    0.8829    0.8710       700
organization     0.7052    0.8009    0.7500       648
    position     0.6964    0.8014    0.7452       584
       scene     0.6170    0.6444    0.6304       270

   micro avg     0.6839    0.7802    0.7289      4404
   macro avg     0.6784    0.7734    0.7213      4404
weighted avg     0.6877    0.7802    0.7300      4404



2021-08-16 15:54:22 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:54:22 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:54:22 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:54:22 - INFO - fit - 238 : In epoch 2, training_step 0, the eval score is 0.7288926601612218, previous eval score is 0.7195070123246919, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:55:39 - INFO - fit - 216 : Epoch : 2, train_step : 162/1370, loss_value : 0.14935374025393416 
2021-08-16 15:56:56 - INFO - fit - 216 : Epoch : 2, train_step : 324/1370, loss_value : 0.1417187535100513 
2021-08-16 15:57:39 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoc

              precision    recall  f1-score   support

     address     0.4815    0.5682    0.5213       528
        book     0.6549    0.8653    0.7455       193
     company     0.6501    0.8023    0.7182       521
        game     0.7556    0.8313    0.7916       409
  government     0.6752    0.8584    0.7558       339
       movie     0.7514    0.6557    0.7003       212
        name     0.8195    0.8886    0.8526       700
organization     0.7776    0.7716    0.7746       648
    position     0.6695    0.8082    0.7324       584
       scene     0.5727    0.7000    0.6300       270

   micro avg     0.6843    0.7807    0.7293      4404
   macro avg     0.6808    0.7750    0.7222      4404
weighted avg     0.6902    0.7807    0.7308      4404



2021-08-16 15:58:03 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 15:58:03 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 15:58:03 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 15:58:03 - INFO - fit - 238 : In epoch 2, training_step 411, the eval score is 0.7293169282986849, previous eval score is 0.7288926601612218, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 15:58:37 - INFO - fit - 216 : Epoch : 2, train_step : 486/1370, loss_value : 0.13864906904874025 
2021-08-16 15:59:55 - INFO - fit - 216 : Epoch : 2, train_step : 648/1370, loss_value : 0.1580821915909096 
2021-08-16 16:01:12 - INFO - fit - 216 : Epoch : 2, train_step : 810/1370, loss_value : 0.1431288

0it [00:00, ?it/s]

2021-08-16 16:01:18 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 3 after 1 steps:


              precision    recall  f1-score   support

     address     0.4968    0.5852    0.5374       528
        book     0.7371    0.8135    0.7734       193
     company     0.7127    0.7620    0.7365       521
        game     0.7122    0.8411    0.7713       409
  government     0.7167    0.8584    0.7812       339
       movie     0.7183    0.7217    0.7200       212
        name     0.8388    0.9071    0.8717       700
organization     0.7571    0.8179    0.7864       648
    position     0.7024    0.7962    0.7464       584
       scene     0.6032    0.6926    0.6448       270

   micro avg     0.7044    0.7875    0.7436      4404
   macro avg     0.6996    0.7796    0.7369      4404
weighted avg     0.7069    0.7875    0.7446      4404



2021-08-16 16:01:40 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 16:01:40 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 16:01:40 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 16:01:40 - INFO - fit - 238 : In epoch 3, training_step 0, the eval score is 0.7436474750723706, previous eval score is 0.7293169282986849, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 16:02:57 - INFO - fit - 216 : Epoch : 3, train_step : 216/1370, loss_value : 0.11311661569332634 
2021-08-16 16:04:16 - INFO - fit - 216 : Epoch : 3, train_step : 432/1370, loss_value : 0.11942574802648138 
2021-08-16 16:04:59 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epo

              precision    recall  f1-score   support

     address     0.5160    0.5814    0.5467       528
        book     0.6510    0.8601    0.7411       193
     company     0.6953    0.7927    0.7408       521
        game     0.7582    0.8435    0.7986       409
  government     0.6848    0.8525    0.7595       339
       movie     0.7725    0.6887    0.7282       212
        name     0.8519    0.8957    0.8733       700
organization     0.7408    0.8117    0.7747       648
    position     0.6908    0.7997    0.7413       584
       scene     0.5935    0.6815    0.6345       270

   micro avg     0.7021    0.7879    0.7426      4404
   macro avg     0.6955    0.7808    0.7339      4404
weighted avg     0.7054    0.7879    0.7433      4404



2021-08-16 16:05:55 - INFO - fit - 216 : Epoch : 3, train_step : 648/1370, loss_value : 0.11390565273662408 
2021-08-16 16:07:16 - INFO - fit - 216 : Epoch : 3, train_step : 864/1370, loss_value : 0.1127172170413865 
2021-08-16 16:08:37 - INFO - fit - 216 : Epoch : 3, train_step : 1080/1370, loss_value : 0.12240925610617355 


0it [00:00, ?it/s]

2021-08-16 16:08:44 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoch 4 after 1 steps:


              precision    recall  f1-score   support

     address     0.5263    0.6250    0.5714       528
        book     0.7285    0.8342    0.7778       193
     company     0.6972    0.7735    0.7334       521
        game     0.7560    0.8411    0.7963       409
  government     0.7179    0.8407    0.7745       339
       movie     0.7895    0.7075    0.7463       212
        name     0.8491    0.9000    0.8738       700
organization     0.7657    0.8071    0.7859       648
    position     0.6979    0.7911    0.7416       584
       scene     0.6026    0.6852    0.6412       270

   micro avg     0.7143    0.7886    0.7496      4404
   macro avg     0.7131    0.7805    0.7442      4404
weighted avg     0.7181    0.7886    0.7510      4404



2021-08-16 16:09:06 - INFO - save_pretrained - 508 : Model weights saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/pytorch_model.bin
2021-08-16 16:09:06 - INFO - save_pretrained - 150 : Configuration saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/config.json
2021-08-16 16:09:06 - INFO - save_vocab - 51 : Vocab saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner/BERT/vocab.txt
2021-08-16 16:09:06 - INFO - fit - 238 : In epoch 4, training_step 0, the eval score is 0.7496222749838117, previous eval score is 0.7436474750723706, model has been saved in /data/nfs14/nfs/aisearch/asr/xhsun/CompetitionModel/tmp_ner
2021-08-16 16:10:27 - INFO - fit - 216 : Epoch : 4, train_step : 270/1370, loss_value : 0.0989733136224526 
2021-08-16 16:11:49 - INFO - fit - 216 : Epoch : 4, train_step : 540/1370, loss_value : 0.09727128112205753 
2021-08-16 16:12:35 - INFO - __call__ - 59 : nerEvaluator: Evaluating the model on  dataset in epoc

              precision    recall  f1-score   support

     address     0.5189    0.5985    0.5558       528
        book     0.7500    0.8238    0.7852       193
     company     0.7090    0.7716    0.7390       521
        game     0.7345    0.8386    0.7831       409
  government     0.7150    0.8437    0.7740       339
       movie     0.7550    0.7123    0.7330       212
        name     0.8581    0.9071    0.8819       700
organization     0.7419    0.8117    0.7752       648
    position     0.7029    0.8185    0.7563       584
       scene     0.5663    0.6963    0.6246       270

   micro avg     0.7087    0.7911    0.7476      4404
   macro avg     0.7052    0.7822    0.7408      4404
weighted avg     0.7120    0.7911    0.7489      4404



2021-08-16 16:13:32 - INFO - fit - 216 : Epoch : 4, train_step : 810/1370, loss_value : 0.10046491771936417 
2021-08-16 16:14:54 - INFO - fit - 216 : Epoch : 4, train_step : 1080/1370, loss_value : 0.09524496306699735 
2021-08-16 16:16:17 - INFO - fit - 216 : Epoch : 4, train_step : 1350/1370, loss_value : 0.09873104205837956 


In [None]:
dev_seq_out