In [623]:
import numpy as np
import torch
import tqdm
from transformers import BertTokenizer

In [624]:
def readfile(filename):
    f = open(filename, encoding='utf8')
    data = []
    sentence = []
    tag = []
    polarity = []
    for line in f:
        if len(line) == 0 or line.startswith('-DOCSTART') or line[0] == "\n":
            if len(sentence) > 0:
                data.append((sentence, tag, polarity))
                sentence = []
                tag = []
                polarity = []
            continue
        splits = line.split(' ')
        if len(splits) != 3:
            print('warning! detected error line(s) in input file:{}'.format(line))
        sentence.append(splits[0])
        tag.append(splits[-2])
        polarity.append(int(splits[-1][:-1]))

    if len(sentence) > 0:
        data.append((sentence, tag, polarity))
    return data


train_data = readfile("./datasets/notebook/notebook.atepc.train.dat")
test_data = readfile("./datasets/notebook/notebook.atepc.test.dat")

print("训练集数量：%d 测试集数量：%d" % (len(train_data), len(test_data)))
print("实例：")
print(train_data[0])

训练集数量：496 测试集数量：123
实例：
(['外', '观', '上', '人', '性', '化', '设', '计', '也', '有', '值', '得', '一', '提', '的', '细', '微', '之', '处'], ['O', 'O', 'O', 'O', 'O', 'O', 'B-ASP', 'I-ASP', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], [-1, -1, -1, -1, -1, -1, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])


In [625]:
class InputExample(object):
    def __init__(self, guid, text_a, text_b=None, sentence_label=None, aspect_label=None,
                 polarity=None):  #InputExample类的构造函数
        self.guid = guid  # 输入数据的id
        self.text_a = text_a  # 输入的句子
        self.text_b = text_b  # 句子中的aspect(名词)
        self.sentence_label = sentence_label  # 句子标注
        self.aspect_label = aspect_label  # 名词的标注
        self.polarity = polarity  # 情感倾向


def create_example(lines, set_type):
    examples = []
    for i, (sentence, tag, polarity) in enumerate(lines):
        aspect = []
        aspect_tag = []
        aspect_polarity = [-1]
        for w, t, p in zip(sentence, tag, polarity):
            if t == "B-ASP" or t == "I-ASP":
                aspect.append(w)
                aspect_tag.append(t)
                aspect_polarity.append(p)
        guid = "%s-%s" % (set_type, i)
        text_a = sentence
        text_b = aspect
        polarity.extend(aspect_polarity)  #为了避免在下面句子与句子间添加【sep】符号时报错
        examples.append(
            InputExample(guid=guid, text_a=text_a, text_b=text_b, sentence_label=tag,
                         aspect_label=aspect_tag, polarity=polarity))
    return examples


train_examples = create_example(train_data, "train")
test_examples = create_example(test_data, "test")
print(train_examples[0].guid)
print(train_examples[0].text_a)
print(train_examples[0].text_b)
print(train_examples[0].sentence_label)
print(train_examples[0].aspect_label)
print(train_examples[0].polarity)

train-0
['外', '观', '上', '人', '性', '化', '设', '计', '也', '有', '值', '得', '一', '提', '的', '细', '微', '之', '处']
['设', '计']
['O', 'O', 'O', 'O', 'O', 'O', 'B-ASP', 'I-ASP', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['B-ASP', 'I-ASP']
[-1, -1, -1, -1, -1, -1, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, 2]


In [626]:
MAX_SEQUENCE_LENGTH = 80
LABEL_LIST = ["O", "B-ASP", "I-ASP", "[CLS]", "[SEP]"]
PRETRAINED_BERT_MODEL = "bert-base-chinese"
NUM_LABELS = len(LABEL_LIST) + 1
LEARNING_RATE = 3e-5
BATCH_SIZE = 32
# DEVICE = "cpu"#
DEVICE = "mps"

In [627]:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_BERT_MODEL, do_lower_case=True)

1.3.3 将原来数据中的情感标签对齐（把数据集里的2变为1）
-1 negative 0 普通 1（2）positive

In [628]:
def convert_polarity(examples):
    for i in range(len(examples)):
        polarities = []
        for polarity in examples[i].polarity:
            if polarity == 2:
                polarities.append(1)
            else:
                polarities.append(polarity)
        examples[i].polarity = polarities
    return examples


train_examples = convert_polarity(train_examples)
print(train_examples[0].polarity)
test_examples = convert_polarity(test_examples)

[-1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1]


In [629]:
class InputFeatures(object):
    def __init__(self, input_ids_spc, input_mask, segment_ids, label_id,
                 polarities=None, valid_ids=None, label_mask=None):
        self.input_ids_spc = input_ids_spc
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id
        self.valid_ids = valid_ids
        self.label_mask = label_mask
        self.polarities = polarities

In [630]:
def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer):
    label_map = {}
    for i, label in enumerate(label_list, 1):
        label_map[label] = i  #把每一种标志对应单独ID

    features = []
    for example in tqdm.tqdm(examples):
        text_spc_tokens = example.text_a  #句子token
        aspect_tokens = example.text_b  #名词token
        sentence_label = example.sentence_label  #句子标注
        aspect_label = example.aspect_label  #名词标注
        polaritiylist = example.polarity  #情感
        tokens = []
        labels = []
        polarities = []
        valid = []
        label_mask = []
        text_spc_tokens.extend(['[SEP]'])
        text_spc_tokens.extend(aspect_tokens)  # 将输入文本（text_a）和识别出来的实体(text_b)连接起来
        sentence_label.extend(['[SEP]'])
        sentence_label.extend(aspect_label)  #将句子标注和名词标注结合
        label_lists = sentence_label  #标注列表
        enum_tokens = text_spc_tokens  #所有原文本和名词文本
        for i, word in enumerate(enum_tokens):  # 为文本和实体生成标签序列
            token = tokenizer.tokenize(word)  #分词
            tokens.extend(token)
            label_1 = label_lists[i]  #添加标签
            polarity_1 = polaritiylist[i]  #添加情感
            for m in range(len(token)):  # 一个词，只在首字上标注
                if m == 0:
                    labels.append(label_1)
                    polarities.append(polarity_1)
                    valid.append(1)
                    label_mask.append(1)
                else:
                    valid.append(0)
        #如果超出max_seq_length则截断，留一个位置给【sep】
        if len(tokens) >= max_seq_length - 1:
            #token 从开始到max_seq_length-2 留两个位置给sep和cls标志符
            tokens = tokens[0:(max_seq_length - 2)]
            polarities = polarities[0:(max_seq_length - 2)]
            labels = labels[0:(max_seq_length - 2)]
            valid = valid[0:(max_seq_length - 2)]
            label_mask = label_mask[0:(max_seq_length - 2)]
        segment_ids = []  #单句子时为0，句子对时为1
        label_ids = []  #标签id
        final_tokens = []  #最终token（加入标志符后的）

        #添加CLS标记
        final_tokens.append("[CLS]")
        segment_ids.append(0)
        valid.insert(0, 1)
        label_mask.insert(0, 1)
        label_ids.append(label_map["[CLS]"])  #插入cls对应的唯一ID
        #添加token
        for i, token in enumerate(tokens):
            final_tokens.append(token)
            segment_ids.append(0)
            if len(labels) > i:
                label_ids.append(label_map[labels[i]])
        #添加SEP标记
        final_tokens.append("[SEP]")
        segment_ids.append(0)
        valid.append(1)
        label_mask.append(1)
        label_ids.append(label_map["[SEP]"])

        input_ids_spc = tokenizer.convert_tokens_to_ids(final_tokens)  #把token转换成id（使用内置词典）
        input_mask = [1] * len(input_ids_spc)
        label_mask = [1] * len(label_ids)
        # 将各属性补齐
        while len(input_ids_spc) < max_seq_length:
            input_ids_spc.append(0)
            input_mask.append(0)
            segment_ids.append(0)
            label_ids.append(0)
            valid.append(1)
            label_mask.append(0)
        while len(label_ids) < max_seq_length:
            label_ids.append(0)
            label_mask.append(0)
        while len(polarities) < max_seq_length:
            polarities.append(-1)
        assert len(input_ids_spc) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length
        assert len(label_ids) == max_seq_length
        assert len(valid) == max_seq_length
        assert len(label_mask) == max_seq_length

        features.append(
            InputFeatures(input_ids_spc=input_ids_spc,
                          input_mask=input_mask,
                          segment_ids=segment_ids,
                          label_id=label_ids,
                          polarities=polarities,
                          valid_ids=valid,
                          label_mask=label_mask))
    return features

In [631]:
train_features = convert_examples_to_features(train_examples, LABEL_LIST, MAX_SEQUENCE_LENGTH, tokenizer)
test_features = convert_examples_to_features(test_examples, LABEL_LIST, MAX_SEQUENCE_LENGTH, tokenizer)
print(train_features[0].input_ids_spc)
print(train_features[0].input_mask)
print(train_features[0].segment_ids)
print(train_features[0].label_id)
print(train_features[0].valid_ids)
print(train_features[0].label_mask)
print(train_features[0].polarities)

100%|██████████| 496/496 [00:00<00:00, 2639.12it/s]
100%|██████████| 123/123 [00:00<00:00, 4027.63it/s]

[101, 1912, 6225, 677, 782, 2595, 1265, 6392, 6369, 738, 3300, 966, 2533, 671, 2990, 4638, 5301, 2544, 722, 1905, 102, 6392, 6369, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[4, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 2, 3, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 




In [632]:
from transformers import BertModel

bert_base_model = BertModel.from_pretrained(PRETRAINED_BERT_MODEL)
bert_base_model.config.num_labels = NUM_LABELS

In [633]:
from transformers.models.bert.modeling_bert import BertSelfAttention


class SelfAttention(torch.nn.Module):
    def __init__(self, config):
        super(SelfAttention, self).__init__()
        self.config = config
        self.SA = BertSelfAttention(config)  #实现了官方的自注意力机制
        self.tanh = torch.nn.Tanh()  #使用Tanh作为激活函数

    def forward(self, inputs):
        zero_vec = np.zeros((inputs.size(0), 1, 1, MAX_SEQUENCE_LENGTH))
        zero_tensor = torch.tensor(zero_vec).float().to(DEVICE)
        SA_out = self.SA(inputs, zero_tensor)
        return self.tanh(SA_out[0])

In [634]:
from transformers.models.bert.modeling_bert import BertPooler
from transformers import BertForTokenClassification

class ModelBert(BertForTokenClassification):
    def __init__(self, bert_base_model):
        config = bert_base_model.config
        super(ModelBert, self).__init__(config=config)
        #定义基础bert任务
        self.bert_for_global_context = BertModel.from_pretrained(PRETRAINED_BERT_MODEL)
        #池化层，对BERT模型的输出进行处理，从而得到一个固定长度的表示
        self.pooler = BertPooler(config)
        #将768维的向量映射到3维，用于最终的分类任务。
        self.dense = torch.nn.Linear(768, 3)
        #随机将一些神经元的输出设为0，以防止过拟合。
        self.dropout = torch.nn.Dropout(0.1)
        #两个自定义的自注意力层，用于处理输入的注意力机制。
        self.SA1 = SelfAttention(config)
        self.SA2 = SelfAttention(config)
        #分别将向量映射回768维。
        self.linear_double = torch.nn.Linear(768 * 2, 768)
        self.linear_triple = torch.nn.Linear(768 * 3, 768)

    #在BERT模型中，[SEP] 标记用于分隔不同句子。为了只关注 SEP 标记之前的部分，这个方法将 SEP 标记后的所有标记设为0。
    def get_ids_for_local_context_extractor(self, text_indices):  #传入一个Tensor张量，里面内容是embedding后的文本
        text_ids = text_indices.detach().cpu().numpy()  #把张量detach，不计算梯度，并且转换为numpy数组
        for text_i in range(len(text_ids)):
            sep_index = np.argmax((text_ids[text_i] == 102))  #检查<第一个>Sep标志的位置
            text_ids[text_i][sep_index + 1:] = 0  #将后续全部置为0
        return torch.tensor(text_ids).to(DEVICE)  #转换为张量并移动到device

    #同样的，label也经过上述处理
    def get_batch_token_labels_bert_base_indices(self, labels):
        if labels is None:
            return
        labels = labels.detach().cpu().numpy()
        for text_i in range(len(labels)):
            sep_index = np.argmax((labels[text_i] == 5))
            labels[text_i][sep_index + 1:] = 0
        return torch.tensor(labels).to(DEVICE)

    def get_batch_polarities(self, b_polarities):
        b_polarities = b_polarities.detach().cpu().numpy()
        shape = b_polarities.shape  #获取b_polarities的形状（纬度）
        polarities = np.zeros((shape[0]))  #创建一个零向量
        i = 0
        for polarity in b_polarities:
            polarity_idx = np.flatnonzero(polarity + 1)  #把polarity+1，再找出非零元素的位置
            polarities[i] = polarity[polarity_idx[0]]  #取出第一个非零元素，并将其作为该输入的单一极性标签，存储在 polarities[i] 中。
            i += 1
        polarities = torch.from_numpy(polarities).long().to(DEVICE)  #数组转换回 PyTorch 张量,并移动回device
        return polarities

    def forward(self, input_ids_spc, token_type_ids=None, attention_mask=None, labels=None, polarities=None,
                valid_ids=None, attention_mask_label=None):
        #input_ids_spc: 编码后的输入文本
        #label_id: 实体标注序列
        #valid 指示哪些是有效的，1为真实
        #label_mask: 1是真实标注，为0的位置表示填充占位符的标注
        #polarities: 情感标注序列
        input_ids_spc = self.get_ids_for_local_context_extractor(input_ids_spc)
        labels = self.get_batch_token_labels_bert_base_indices(labels)
        polarity_labels = self.get_batch_polarities(polarities)

        global_context_out = self.bert_for_global_context(input_ids_spc, attention_mask=attention_mask)[
            0]  #调用默认的bert预训练模型，输出包含序列1中每个标记的隐藏状态向量
        batch_size, sequence_length, hidden_size = global_context_out.shape  #global_context_out 的形状为 (batch_size, sequence_length, hidden_size)
        global_valid_output = torch.zeros(batch_size, sequence_length, hidden_size, dtype=torch.float32).to(DEVICE)
        #经过此步骤后，模型将更关注有效标记的隐藏状态向量
        for i in range(batch_size):
            jj = -1
            for j in range(sequence_length):
                if valid_ids[i][j].item() == 1:
                    jj += 1
                    global_valid_output[i][jj] = global_context_out[i][j]

        global_context_out = self.dropout(global_valid_output)
        pooled_out = self.pooler(global_context_out)
        pooled_out = self.dropout(pooled_out)
        ate_logits = self.classifier(global_context_out)  #对每个标记的隐藏状态进行分类，生成序列标注任务（如命名实体识别）的预测结果 ate_logits
        apc_logits = self.dense(pooled_out)  #模型对极性分类任务的输出预测结果

        if labels is not None:
            #损失函数（交叉熵损失）
            loss_fct = torch.nn.CrossEntropyLoss(ignore_index=0)
            loss_sen = torch.nn.CrossEntropyLoss()
            #计算序列标注任务的损失
            loss_ate = loss_fct(ate_logits.view(-1, self.num_labels), labels.view(-1))
            #计算分类任务损失
            loss_apc = loss_sen(apc_logits, polarity_labels)
            return loss_ate, loss_apc
        else:
            return ate_logits, apc_logits


model = ModelBert(bert_base_model)

In [635]:
_ = model.to(DEVICE)

In [636]:
from torch.optim import AdamW

param_optimizer = list(model.named_parameters())  # 模型中的所有参数
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']  #定义不进行权重衰减的参数

#将模型参数分为两组：
#第一组参数:
#params: 过滤出不包含 no_decay 中指定参数的所有参数。
#weight_decay: 设置权重衰减率为 0.00001。

#第二组参数:
#params: 过滤出包含 no_decay 中指定参数的所有参数。
#weight_decay: 设置权重衰减率为 0.00001（尽管设置了 weight_decay，但实际不会应用到这些参数上）。
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.00001},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE, weight_decay=0.00001)

2.4.1 设置训练的输入

In [637]:
from torch.utils.data import DataLoader, RandomSampler, TensorDataset, SequentialSampler

all_spc_input_ids = torch.tensor([f.input_ids_spc for f in train_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
all_valid_ids = torch.tensor([f.valid_ids for f in train_features], dtype=torch.long)
all_lmask_ids = torch.tensor([f.label_mask for f in train_features], dtype=torch.long)
all_polarities = torch.tensor([f.polarities for f in train_features], dtype=torch.long)
train_data = TensorDataset(all_spc_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_polarities,
                           all_valid_ids, all_lmask_ids)  #组合为数据集
train_sampler = SequentialSampler(train_data)  #顺序采样数据
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)  #分批加载数据

2.4.2 设置测试的输入

In [638]:
all_spc_input_ids = torch.tensor([f.input_ids_spc for f in test_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in test_features], dtype=torch.long)
all_polarities = torch.tensor([f.polarities for f in test_features], dtype=torch.long)
all_valid_ids = torch.tensor([f.valid_ids for f in test_features], dtype=torch.long)
all_lmask_ids = torch.tensor([f.label_mask for f in test_features], dtype=torch.long)
eval_data = TensorDataset(all_spc_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_polarities,
                          all_valid_ids, all_lmask_ids)
eval_sampler = RandomSampler(eval_data)  #随机加载数据
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=BATCH_SIZE)

3.1 设置训练过程中的超参数，并利用logging模块输出训练过程的日志

In [639]:
import sys
import logging
import torch.nn.functional as F
from sklearn.metrics import f1_score, classification_report

EPOCH = 5  # 共计算5个epoch
EVAL_STEP = 10  # 每10个step执行一个评估

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))

In [640]:
def evaluate(dataloader, label_list):
    apc_result = {'max_apc_test_acc': 0, 'max_apc_test_f1': 0}
    ate_result = 0
    y_true = []  #真实标签
    y_pred = []  #预测标签
    n_test_correct, n_test_total = 0, 0  #用于定义极性分类的准确率
    test_apc_logits_all, test_polarities_all = None, None  #用于存储所有的预测和真实极性标签。
    model.eval()  # 将网络设置为评估的状态
    label_map = {i: label for i, label in enumerate(label_list, 1)}
    for input_ids_spc, input_mask, segment_ids, label_ids, polarities, valid_ids, l_mask in dataloader:  #遍历数据加载器中的批次数据。
        input_ids_spc = input_ids_spc.to(DEVICE)
        input_mask = input_mask.to(DEVICE)
        segment_ids = segment_ids.to(DEVICE)
        valid_ids = valid_ids.to(DEVICE)
        label_ids = label_ids.to(DEVICE)
        polarities = polarities.to(DEVICE)
        l_mask = l_mask.to(DEVICE)
        with torch.no_grad():
            ate_logits, apc_logits = model(
                input_ids_spc, segment_ids, input_mask,
                valid_ids=valid_ids, polarities=polarities, attention_mask_label=l_mask)  #计算损失
        polarities = model.get_batch_polarities(polarities)
        #逐元素比较预测的类别索引和实际的类别索引，如果相等则返回 True，否则返回 False，结果是一个布尔类型的张量，计算为true的个数，放到n_test_correct
        n_test_correct += (torch.argmax(apc_logits, -1) == polarities).sum().item()
        n_test_total += len(polarities)

        if test_polarities_all is None:
            test_polarities_all = polarities
            test_apc_logits_all = apc_logits
        else:
            test_polarities_all = torch.cat((test_polarities_all, polarities), dim=0)
            test_apc_logits_all = torch.cat((test_apc_logits_all, apc_logits), dim=0)
            label_ids = model.get_batch_token_labels_bert_base_indices(label_ids)  #处理标签
        ate_logits = torch.argmax(F.log_softmax(ate_logits, dim=2), dim=2)
        ate_logits = ate_logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        input_mask = input_mask.to('cpu').numpy()
        for i, label in enumerate(label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif label_ids[i][j] == len(
                        label_list):  #标签等于标签列表的长度（表示结束），将临时列表 temp_1 和 temp_2 中的内容添加到 y_true 和 y_pred。
                    y_true += temp_1
                    y_pred += temp_2
                    break
                else:  #否则，将标签和预测结果添加到临时列表中。
                    temp_1.append(label_map.get(label_ids[i][j], 'O'))
                    temp_2.append(label_map.get(ate_logits[i][j], 'O'))
                    test_acc = n_test_correct / n_test_total  #计算极性分类任务的准确率 test_acc。
    test_f1 = f1_score(torch.argmax(test_apc_logits_all, -1).cpu(), test_polarities_all.cpu(), labels=[0, 1],
                       average='macro')
    test_acc = round(test_acc * 100, 2)
    test_f1 = round(test_f1 * 100, 2)
    apc_result = {'max_apc_test_acc': test_acc, 'max_apc_test_f1': test_f1}
    report = classification_report(y_true, y_pred, digits=4)
    tmps = report.split()
    ate_result = round(float(tmps[7]) * 100, 2)
    return apc_result, ate_result

In [641]:
max_apc_test_acc = 0
max_apc_test_f1 = 0
max_ate_test_f1 = 0
global_step = 0

for epoch in range(EPOCH):
    # 每个epoch
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(train_dataloader):
        # 一个step
        model.train()  # 将网络设置为train的模式
        batch = tuple(t.to(DEVICE) for t in batch)
        input_ids_spc, input_mask, segment_ids, label_ids, polarities, valid_ids, l_mask = batch  # 取一个batch的数据

        loss_ate, loss_apc = model(
            input_ids_spc, segment_ids, input_mask, label_ids, polarities, valid_ids, l_mask)  # 前向传播，计算损失
        loss = loss_ate + loss_apc
        loss.backward()  # 反向传播计算梯度
        nb_tr_examples += input_ids_spc.size(0)  #累加当前 batch 的样本数。
        nb_tr_steps += 1  #增加训练步数计数器。
        optimizer.step()
        optimizer.zero_grad()
        global_step += 1  #增加全局步数计数器。
        if global_step % EVAL_STEP == 0:  # 评估
            apc_result, ate_result = evaluate(eval_dataloader, LABEL_LIST)
            if apc_result['max_apc_test_acc'] > max_apc_test_acc:
                max_apc_test_acc = apc_result['max_apc_test_acc']
            if apc_result['max_apc_test_f1'] > max_apc_test_f1:
                max_apc_test_f1 = apc_result['max_apc_test_f1']
            if ate_result > max_ate_test_f1:
                max_ate_test_f1 = ate_result
            current_apc_test_acc = apc_result['max_apc_test_acc']
            current_apc_test_f1 = apc_result['max_apc_test_f1']
            current_ate_test_f1 = round(ate_result, 2)
    logger.info('Epoch %s' % epoch)
    logger.info(f'APC_test_acc: {current_apc_test_acc}(max: {max_apc_test_acc})  '
                f'APC_test_f1: {current_apc_test_f1}(max: {max_apc_test_f1})')
    logger.info(f'ATE_test_f1: {current_ate_test_f1}(max:{max_ate_test_f1})')

Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
Epoch 0
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93)  APC_test_f1: 81.69(max: 81.69)
APC_test_acc: 82.93(max: 82.93) 

KeyboardInterrupt: 

3.4 保存模型

In [None]:
import json
import os

SAVE_PATH = "./temp"
os.makedirs(SAVE_PATH, exist_ok=True)
model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)
label_map = {i: label for i, label in enumerate(LABEL_LIST, 1)}
model_config = {
    "bert_model": PRETRAINED_BERT_MODEL,
    "do_lower": True,
    "max_seq_length": MAX_SEQUENCE_LENGTH,
    "num_labels": len(LABEL_LIST) + 1,
    "label_map": label_map
}
json.dump(model_config, open(os.path.join(SAVE_PATH, "config.json"), "w"))