In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append("D:/Experiment")
from MyKu import MyBERT
from MyKu import processing
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel
from tqdm import tqdm
import os
import time
from transformers import BertTokenizer
from transformers import logging
from sklearn import metrics


In [6]:

# 训练准备阶段，设置超参数和全局变量
file_name = 'readme.md'
batch_size = 16
num_epoch = 5  # 训练轮次
check_step = 1  # 用以训练中途对模型进行检验：每check_step个epoch进行一次测试和保存模型

learning_rate = 1e-5  # 优化器的学习率

# 获取训练、测试数据、分类类别总数
# train_data = processing.get_OLID_train_data()
# test_data = processing.get_OLID_testA_data()
train_data, test_data = processing.load_hasoc2020()
# train_data = processing.get_OLID_train_data(processing.OLID_DATASET + '/train.tsv')
# test_data = processing.get_OLID_testA_data(processing.OLID_DATASET + '/testA.tsv')
categories = 2

train_iter, test_iter = MyBERT.load_bert_data(train_data, test_data, batch_size)

#固定写法，可以牢记，cuda代表Gpu
# torch.cuda.is_available()可以查看当前Gpu是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载预训练模型，因为这里是英文数据集，需要用在英文上的预训练模型：bert-base-uncased
# uncased指该预训练模型对应的词表不区分字母的大小写
# 详情可了解：https://huggingface.co/bert-base-uncased
pretrained_model_name = 'bert-base-uncased'
# 创建模型 BertSST2Model
model = MyBERT.MyBertModel(categories, pretrained_model_name)
# 固定写法，将模型加载到device上，
# 如果是GPU上运行，此时可以观察到GPU的显存增加
model.to(device)

# 训练过程
# Adam是最近较为常用的优化器，详情可查看：https://www.jianshu.com/p/aebcaf8af76e
optimizer = Adam(model.parameters(), learning_rate)  # 使用Adam优化器
loss = nn.CrossEntropyLoss()  # 使用crossentropy作为二分类任务的损失函数

# 记录当前训练时间，用以记录日志和存储
timestamp = time.strftime("%m_%d_%H_%M", time.localtime())


In [7]:
fp = open(file_name, 'a+')
for epoch in range(1, num_epoch + 1):
    MyBERT.train(model, train_iter,device, optimizer, loss, epoch)
    MyBERT.test(model, test_iter, device, epoch, file_name)

Training Epoch 1: 100%|[31m██████████[0m| 232/232 [00:36<00:00,  6.33it/s]
Testing: 100%|[32m██████████[0m| 1592/1592 [00:17<00:00, 88.53it/s]


[[743  42]
 [150 657]]
              precision    recall  f1-score   support

           0       0.83      0.95      0.89       785
           1       0.94      0.81      0.87       807

    accuracy                           0.88      1592
   macro avg       0.89      0.88      0.88      1592
weighted avg       0.89      0.88      0.88      1592

Acc : 0.8793969849246231	 F1: 0.8725099601593626


Training Epoch 2: 100%|[31m██████████[0m| 232/232 [00:36<00:00,  6.44it/s]
Testing: 100%|[32m██████████[0m| 1592/1592 [00:17<00:00, 89.15it/s]


[[722  63]
 [ 81 726]]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       785
           1       0.92      0.90      0.91       807

    accuracy                           0.91      1592
   macro avg       0.91      0.91      0.91      1592
weighted avg       0.91      0.91      0.91      1592

Acc : 0.9095477386934674	 F1: 0.9097744360902256


Training Epoch 3: 100%|[31m██████████[0m| 232/232 [00:36<00:00,  6.43it/s]
Testing: 100%|[32m██████████[0m| 1592/1592 [00:17<00:00, 89.87it/s]


[[714  71]
 [ 87 720]]
              precision    recall  f1-score   support

           0       0.89      0.91      0.90       785
           1       0.91      0.89      0.90       807

    accuracy                           0.90      1592
   macro avg       0.90      0.90      0.90      1592
weighted avg       0.90      0.90      0.90      1592

Acc : 0.9007537688442211	 F1: 0.9011264080100125


Training Epoch 4: 100%|[31m██████████[0m| 232/232 [00:35<00:00,  6.45it/s]
Testing: 100%|[32m██████████[0m| 1592/1592 [00:18<00:00, 87.93it/s]


[[724  61]
 [ 95 712]]
              precision    recall  f1-score   support

           0       0.88      0.92      0.90       785
           1       0.92      0.88      0.90       807

    accuracy                           0.90      1592
   macro avg       0.90      0.90      0.90      1592
weighted avg       0.90      0.90      0.90      1592

Acc : 0.9020100502512562	 F1: 0.9012658227848102


Training Epoch 5: 100%|[31m██████████[0m| 232/232 [00:36<00:00,  6.43it/s]
Testing: 100%|[32m██████████[0m| 1592/1592 [00:18<00:00, 88.30it/s] 

[[716  69]
 [ 89 718]]
              precision    recall  f1-score   support

           0       0.89      0.91      0.90       785
           1       0.91      0.89      0.90       807

    accuracy                           0.90      1592
   macro avg       0.90      0.90      0.90      1592
weighted avg       0.90      0.90      0.90      1592

Acc : 0.9007537688442211	 F1: 0.9008782936010038





In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
tokenizer
from bertviz import head_view, model_view

In [8]:
sentence = ['you are a bitch']
inputs = tokenizer(sentence, return_tensors='pt',
                   padding=True, truncation=True)
inputs.to(device)
print(inputs)
inputs_ids = inputs['input_ids']
token_type_ids = inputs['token_type_ids']

tokens = tokenizer.convert_ids_to_tokens(inputs_ids[0])
tokens

{'input_ids': tensor([[ 101, 2017, 2024, 1037, 7743,  102]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]], device='cuda:0')}


['[CLS]', 'you', 'are', 'a', 'bitch', '[SEP]']

In [45]:

output = model(inputs)
attentions = output[2]
out = nn.Linear(72, 6)
out.to(device)
res = []
for i in range(12):
    temp = attentions[i]
    temp = temp.permute(0,2,1,3)
    temp = temp.reshape(temp.shape[0],temp.shape[1], -1)
    temp = temp.unsqueeze(1)
    res.append(out(temp))

# model_view(attentions, tokens)

In [46]:
res = tuple(res)

In [48]:
# head_view(res, tokens)

type(res[0])
for layer_attention in res:
    print(layer_attention.shape)
head_view(res, tokens)


torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])
torch.Size([1, 1, 6, 6])


<IPython.core.display.Javascript object>

In [61]:
out_put = res[0]
for i in range(1, 12):
    out_put += res[i]
out_put = out_put.squeeze()
out_put.shape
out2 = nn.Linear(6, 2)
out2.to(device)
rres = out2(out_put)
rres

tensor([[14.9932,  4.8412],
        [11.7724,  5.0840],
        [11.8480,  8.3845],
        [11.4185,  8.8563],
        [18.1975,  9.8138],
        [19.3964,  9.6495]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [64]:
indices = torch.topk(rres, 3, dim=0, largest=True, sorted=True)
indices

torch.return_types.topk(
values=tensor([[19.3964,  9.8138],
        [18.1975,  9.6495],
        [14.9932,  8.8563]], device='cuda:0', grad_fn=<TopkBackward0>),
indices=tensor([[5, 4],
        [4, 5],
        [0, 3]], device='cuda:0'))