In [1]:
import os
import sys
import random
import pickle
import numpy as np
from tqdm import tqdm
import tensorflow as tf 
from bert4keras.backend import K,keras,search_layer
from bert4keras.snippets import ViterbiDecoder,to_array

from data_load import *
from build_model import bert_bilstm_crf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# 固定随机种子
seed = 233
tf.set_random_seed(seed)
np.random.seed(seed)
os.environ['PYTHONHSHSEED'] = str(seed)

# 权重参数
epochs = 4
batch_size = 16
lstm_units = 128
drop_rate = 0.1 #有改动0.1-》0.01
learning_rate = 5e-5
max_len =168



# 权重路径
config_path = './bert_weight_file/uncased_L-4_H-768_A-12/bert_config.json'
checkpoint_path = './bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt'

# 模型保存路径
model_save_path = './save_model/bert_bilstm_crf.weight'
CRF_save_path = './save_model/CRF.npy'

class NamedEntityRecognizer(ViterbiDecoder):
    """命名实体识别器
    """
    def recognize(self, text):
        tokens = tokenizer.tokenize(text)
        while len(tokens) > max_len:
            tokens.pop(-2)
        mapping = tokenizer.rematch(text, tokens)
        token_ids = tokenizer.tokens_to_ids(tokens)
        segment_ids = [0] * len(token_ids)
        token_ids, segment_ids = to_array([token_ids], [segment_ids]) # ndarray
        nodes = model.predict([token_ids, segment_ids])[0] # [sqe_len,23]
        labels = self.decode(nodes) # id [sqe_len,], [0 0 0 0 0 7 8 8 0 0 0 0 0 0 0]
        entities, starting = [], False
        for i, label in enumerate(labels):
            if label > 0:
                if label % 2 == 1:
                    starting = True
                    entities.append([[i], id2label[(label - 1) // 2]])
                elif starting:
                    entities[-1][0].append(i)
                else:
                    starting = False
            else:
                starting = False
        return [(text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1], l) for w, l in entities]
    
#相等应加set（）中源文本的数量    
def ner_metrics(data):
    X,Y,Z = 1e-6,1e-6,1e-6
    count = 0
    for d in tqdm(data):
        text = ''.join([i[0] for i in d])
        pred= NER.recognize(text)
        R = set(pred)
        T = set([tuple(i) for i in d if i[1] != 'O'])
        
        # 便于T和R做交集
        m = []
        for i in T:
            for j in i[0]:
                m.append((j,i[1]))
        T = set(m)

        X += len(R&T)
        Y += len(R)
        Z += len(T)
        count += 1

    f1,precision,recall = 2 * X / (Y + Z),X / Y,X / Z
    return f1,precision,recall

class Evaluator(keras.callbacks.Callback):
    def __init__(self):
        super(Evaluator, self).__init__()
        self.best_val_f1 = 0
    def on_epoch_end(self, epoch,logs=None):
        NER.trans = K.eval(CRF.trans) # 可能有错
        f1, precision, recall = ner_metrics(valid_data)
        if f1 > self.best_val_f1:
            model.save_weights(model_save_path)
            self.best_val_f1 = f1
            print('save model to {}'.format(checkpoint_path))
        else:
            global learning_rate
            learning_rate = learning_rate / 5
        print(
              'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' %
              (f1,precision,recall,self.best_val_f1)
        )
        
# def adversarial_training(model, embedding_name, epsilon=1):
#     """
#     给模型添加对抗训练
#     其中model是需要添加对抗训练的keras模型
#     """
#     if model.train_function is None:  # 如果还没有训练函数
#         model._make_train_function()  # 手动make
#     old_train_function = model.train_function  # 备份旧的训练函数

#     # 查找Embedding层
#     for output in model.outputs:
#         embedding_layer = search_layer(output, embedding_name)
#         if embedding_layer is not None:
#             break
#     if embedding_layer is None:
#         raise Exception('Embedding layer not found')

#     # 求Embedding梯度
#     embeddings = embedding_layer.embeddings  # Embedding矩阵
#     gradients = K.gradients(model.total_loss, [embeddings])  # Embedding梯度
#     gradients = K.zeros_like(embeddings) + gradients[0]  # 转为dense tensor

#     # 封装为函数
#     inputs = (
#         model._feed_inputs + model._feed_targets + model._feed_sample_weights
#     )  # 所有输入层
#     embedding_gradients = K.function(
#         inputs=inputs,
#         outputs=[gradients],
#         name='embedding_gradients',
#     )  # 封装为函数

#     def train_function(inputs):
#         # 重新定义训练函数
#         grads = embedding_gradients(inputs)[0]  # Embedding梯度
#         delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
#         K.set_value(embeddings, K.eval(embeddings) + delta)  # 注入扰动
#         outputs = old_train_function(inputs)  # 梯度下降
#         K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
#         return outputs
#     model.train_function = train_function  # 覆盖原训练函数        



model,CRF = bert_bilstm_crf(config_path,checkpoint_path,num_labels,lstm_units,drop_rate,learning_rate)
# adversarial_training(model,'Embedding-Token',0.5)
NER = NamedEntityRecognizer(trans=K.eval(CRF.trans), starts=[0], ends=[0])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [4]:
if __name__ == "__main__":
    train_data,_ = load_data('./ner_data/train/train.txt',128)
    valid_data,_ = load_data('./ner_data/dev/test.txt',128)
    
    flag = False
    count = 0
    i = 0
    while(i<len(train_data)):
        if flag==True:
            i = i-1
        if train_data[i][0][1] == 'O'and len(train_data[i])==1:
            del train_data[i]
            flag = True
            count+=1
        else:
            for j in range(count):
                train_data.append(train_data[i])
            flag = False
            count = 0
        i += 1
    
    train_generator = data_generator(train_data, batch_size)
    valid_generator = data_generator(valid_data, batch_size*5)
    
    evaluator = Evaluator()
    
    def scheduler(epoch):
        return learning_rate/(max(2*(epoch-1),1))

    lr_scheduler = keras.callbacks.LearningRateScheduler(scheduler)

    
    model.fit(
        train_generator.forfit(),
        steps_per_epoch = len(train_generator),
        validation_data = valid_generator.forfit(),
        validation_steps = len(valid_generator),
        epochs = epochs,
        callbacks = [evaluator,lr_scheduler]
    )
    
    print(K.eval(CRF.trans))
    print(K.eval(CRF.trans).shape)
    model.save_weights(model_save_path)
    np.save(CRF_save_path, K.eval(CRF.trans))

    # torch.save(model, model_save_path)
    # pickle.dump(K.eval(CRF.trans),open('./save_model/crf_trans.pkl','rb'))
    
else:
    # model = torch.load(model_save_path)
    model.load_weights(model_save_path)
    # NER.trans = pickle.load(open('./save_model/crf_trans.pkl','rb'))


Epoch 1/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:28<00:00, 39.69it/s]


save model to ./bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt
valid: f1: 0.89317, precision: 0.84999, recall: 0.94098, best f1: 0.89317

Epoch 2/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:26<00:00, 40.07it/s]


save model to ./bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt
valid: f1: 0.89405, precision: 0.84702, recall: 0.94662, best f1: 0.89405

Epoch 3/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:29<00:00, 39.45it/s]


save model to ./bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt
valid: f1: 0.90605, precision: 0.86587, recall: 0.95014, best f1: 0.90605

Epoch 4/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:28<00:00, 39.69it/s]


valid: f1: 0.90277, precision: 0.85609, recall: 0.95482, best f1: 0.90605

[[ 0.5732396  -0.6188147  -0.12123274  0.10713068 -0.4679119  -0.59594375
  -0.5330648  -0.3448834  -0.1026888  -0.68678284 -0.38008773]
 [-0.3268835  -0.22179076 -0.41482008  0.20347288  0.15495309 -0.54423153
  -0.35120732 -0.22765681  0.2145274  -0.02894322 -0.12005794]
 [-0.6355784  -0.2859531  -0.1884572  -0.39584544 -0.36548477 -0.37046388
   0.44329137 -0.0971401  -0.4965401  -0.18640006 -0.17739114]
 [-0.6253033   0.03999732  0.18687652 -0.00191786 -0.07030854 -0.00507968
   0.14093065 -0.01452608 -0.30369285 -0.6493261  -0.17835732]
 [-0.6931408  -0.26227516  0.36901703 -0.23902875  0.06996739 -0.5383688
  -0.5162194   0.07795113  0.47598404 -0.44287404  0.50213104]
 [ 0.29303068  0.33775932  0.0663694   0.1427771  -0.19603327 -0.288441
  -0.38806555 -0.50234056 -0.6440322  -0.02040486 -0.3626405 ]
 [-0.81960636  0.03804528  0.25959396 -0.3833121   0.34006646 -0.26488525
  -0.02490926 -0.11169314  0.222

In [3]:
model.load_weights(model_save_path)

In [4]:
if __name__ == "__main__":
    epochs = 4
    
    train_data,_ = load_data('./ner_data/train/train.txt',128)
    valid_data,_ = load_data('./ner_data/dev/test.txt',128)
    

    train_generator = data_generator(train_data, batch_size)
    valid_generator = data_generator(valid_data, batch_size*5)
    
    checkpoint = keras.callbacks.ModelCheckpoint(
        model_save_path,
        monitor = 'val_sparse_accuracy',
        verbose = 1,
        save_best_only = True,
        mode = 'max'
    )
    evaluator = Evaluator()
    
#     def scheduler(epoch):
#         return learning_rate/(max(2*(epoch-1),1))

#     lr_scheduler = keras.callbacks.LearningRateScheduler(scheduler)

    
    model.fit(
        train_generator.forfit(),
        steps_per_epoch = len(train_generator),
        validation_data = valid_generator.forfit(),
        validation_steps = len(valid_generator),
        epochs = epochs,
        callbacks = [evaluator]
    )
    
    print(K.eval(CRF.trans))
    print(K.eval(CRF.trans).shape)
    model.save_weights(model_save_path)
    np.save(CRF_save_path, K.eval(CRF.trans))

    # torch.save(model, model_save_path)
    # pickle.dump(K.eval(CRF.trans),open('./save_model/crf_trans.pkl','rb'))
    
else:
    # model = torch.load(model_save_path)
    model.load_weights(model_save_path)
    # NER.trans = pickle.load(open('./save_model/crf_trans.pkl','rb'))


Epoch 1/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:37<00:00, 37.98it/s]


save model to ./bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt
valid: f1: 0.92760, precision: 0.92177, recall: 0.93351, best f1: 0.92760

Epoch 2/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [03:57<00:00, 34.85it/s]


valid: f1: 0.92492, precision: 0.91990, recall: 0.93000, best f1: 0.92760

Epoch 3/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [04:10<00:00, 33.02it/s]


valid: f1: 0.92755, precision: 0.91609, recall: 0.93930, best f1: 0.92760

Epoch 4/4


100%|██████████████████████████████████████████████████████████████████████████████| 8275/8275 [04:17<00:00, 32.09it/s]


save model to ./bert_weight_file/uncased_L-4_H-768_A-12/bert_model.ckpt
valid: f1: 0.93006, precision: 0.92065, recall: 0.93966, best f1: 0.93006

[[ 6.61567867e-01 -7.68517792e-01 -7.32621372e-01 -2.85716578e-02
  -1.07572722e+00 -7.15510130e-01 -1.13239563e+00 -4.00296718e-01
  -7.17471063e-01 -8.86213601e-01 -9.88571763e-01]
 [-4.85753953e-01 -1.33429423e-01 -7.60351479e-01  9.17750224e-02
  -1.94741100e-01 -6.42317533e-01 -6.96268857e-01 -2.71289468e-01
  -1.38125718e-01 -1.16618574e-01 -4.69828427e-01]
 [-1.24130547e+00 -6.43295884e-01 -1.94629222e-01 -6.59347475e-01
  -3.70389432e-01 -7.25305676e-01  4.33107167e-01 -2.23937005e-01
  -5.03868937e-01 -6.83418512e-01 -1.86347455e-01]
 [-7.49212861e-01 -8.28977600e-02 -1.15238063e-01  7.61296898e-02
  -3.69333923e-01 -1.07453898e-01 -1.55804187e-01 -5.58670275e-02
  -6.08527839e-01 -7.31610000e-01 -4.79127169e-01]
 [-1.29739940e+00 -6.18033767e-01  3.60542953e-01 -4.98364419e-01
   6.39630184e-02 -8.86259615e-01 -5.19447684e-01 -4.95