In [1]:
import os
import onnx
import onnxruntime as rt
from transformers import BertTokenizer, BertModel

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import transformers
print(transformers.__version__)
print(onnx.__version__)
print(rt.__version__)

3.1.0
1.9.0
1.7.0


In [3]:
from transformers.optimization import AdamW

In [4]:
?AdamW

In [5]:
from transformers.optimization import get_linear_schedule_with_warmup

In [6]:
?get_linear_schedule_with_warmup

In [7]:
basedir = "data/bert-base-chinese"

In [8]:
os.listdir(basedir)

['config.json', 'pytorch_model.bin', 'vocab.txt']

In [9]:
tokernizer = BertTokenizer.from_pretrained(basedir)

In [10]:
model = BertModel.from_pretrained(basedir)

In [11]:
inputs = tokernizer("我们来试试牛逼的bert模型吧", return_tensors="pt")
# inputs = tokernizer.tokenize("我们来试试牛逼的bert模型吧")

In [12]:
inputs

{'input_ids': tensor([[ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,
         1798, 1416,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [13]:
tokernizer.decode(inputs["input_ids"].data.cpu().numpy().reshape(-1))

'[CLS] 我 们 来 试 试 牛 逼 的 bert 模 型 吧 [SEP]'

In [14]:
outputs = model(**inputs)
outputs

(tensor([[[-0.0022,  0.3962, -0.4054,  ...,  0.3902, -0.1599,  0.0457],
          [ 0.2589, -0.5536, -0.2424,  ..., -0.5843, -0.9316, -0.0326],
          [ 0.5397, -1.2938, -0.9617,  ...,  0.8388,  0.2714,  0.0597],
          ...,
          [-0.5858,  0.4624,  0.8773,  ...,  0.5462, -0.6654,  0.1387],
          [ 0.4450,  0.0043, -0.5730,  ...,  0.8095, -0.1073, -0.3027],
          [-0.7771,  0.0293, -0.1939,  ..., -0.1485, -0.3096, -0.4418]]],
        grad_fn=<NativeLayerNormBackward>),
 tensor([[ 0.9999,  1.0000,  0.9988,  0.8066,  0.6104,  0.9376, -0.9984,  0.5931,
           0.9948, -0.9984,  1.0000,  0.9997,  0.7948, -0.8251,  1.0000, -0.9999,
          -0.8589,  0.9303,  0.9632, -0.0167,  1.0000, -1.0000, -0.9846, -0.5761,
           0.7221,  0.9991,  0.9855,  0.5958, -1.0000,  0.9981,  0.9679,  0.9998,
           0.8287, -0.9999, -0.9998,  0.9071, -0.0574,  0.9975,  0.3821, -0.8907,
          -0.9937, -0.3870,  0.5671, -0.9984, -0.8570,  0.7311, -1.0000, -0.9999,
           0.75

In [15]:
outputs[0].shape

torch.Size([1, 15, 768])

In [16]:
outputs[-1].shape

torch.Size([1, 768])

In [17]:
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(21128, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

##  输入两个句子

In [16]:
inputs2 = tokernizer("我们来试试牛逼的BERT模型吧，哈哈哈", "听说BERT模型吊炸天！", return_tensors="pt")

In [17]:
?tokernizer

In [18]:
inputs2

{'input_ids': tensor([[ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,
         1798, 1416, 8024, 1506, 1506, 1506,  102, 1420, 6432, 8815, 8716, 3563,
         1798, 1396, 4156, 1921, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]])}

In [21]:
tokernizer.decode(inputs2["input_ids"].data.cpu().numpy().reshape(-1))

'[CLS] 我 们 来 试 试 牛 逼 的 bert 模 型 吧 ， 哈 哈 哈 [SEP] 听 说 bert 模 型 吊 炸 天 ！ [SEP]'

In [22]:
seq_outputs, pooled_outputs = model(**inputs2)

In [23]:
seq_outputs.shape

torch.Size([1, 30, 768])

In [24]:
pooled_outputs.shape

torch.Size([1, 768])

In [25]:
seq_outputs

tensor([[[ 0.5632, -0.2772, -0.7578,  ..., -0.4281, -0.6566,  0.0447],
         [ 0.1452, -0.3351, -0.1041,  ..., -0.7047, -0.7250,  0.1023],
         [ 0.4466, -1.6673, -0.9454,  ...,  0.6884,  0.2825,  0.1363],
         ...,
         [ 0.6694, -0.2453, -0.5614,  ..., -0.1644,  0.7202, -0.3962],
         [-0.5863, -0.1840,  0.5368,  ..., -0.1647,  0.2451, -0.8209],
         [ 0.1379, -0.5186, -0.7563,  ..., -0.2276, -0.3963, -0.5681]]],
       grad_fn=<NativeLayerNormBackward>)

In [26]:
seq_outputs.mean(1)  # 词向量平均q

tensor([[ 1.3726e-01, -3.5510e-01, -6.3008e-01,  5.3175e-01,  6.0181e-01,
         -6.8932e-01,  3.0354e-01, -2.3032e-01, -4.1510e-01,  4.3610e-01,
         -1.1629e-01, -3.5269e-01, -5.8402e-02,  3.7394e-01,  5.1749e-02,
         -1.6117e-01, -3.3302e-02,  2.0406e-01,  1.1197e-01,  2.0059e-01,
         -2.0750e-01,  4.1024e-01,  9.0569e-02,  6.2213e-02, -1.1207e-01,
         -3.7839e-01, -1.0782e-01, -2.9131e-01,  4.0371e-01, -6.3899e-01,
         -4.9931e-01, -3.3856e-01, -2.7771e-01,  7.9626e-02,  2.8813e-01,
         -4.8025e-01, -5.1427e-01, -1.7292e-01, -4.3150e-01, -3.8770e-01,
         -4.3781e-01, -3.5913e-01, -4.9081e-01,  1.9053e-01,  6.9059e-02,
          3.9864e-02,  3.1614e-01,  2.7560e-01,  7.8873e-02,  3.4491e-01,
          1.2843e-01,  8.5680e+00, -1.5622e-01, -4.5469e-01, -7.6156e-01,
          6.5412e-01,  1.0692e+00, -1.8387e-01,  4.1132e-02, -4.8158e-01,
         -4.7975e-02,  1.5753e-01, -1.5874e-01, -2.4830e-01,  2.0331e-02,
         -9.0839e-02, -1.3300e-01,  4.

In [27]:
seq_outputs[0, :26, 0]

tensor([ 0.5632,  0.1452,  0.4466, -0.1069, -0.5541, -0.7722,  0.1167,  0.7956,
        -0.6453,  0.4843,  1.5340, -0.0571, -0.4845,  0.5012,  0.9630,  1.1255,
         0.6296,  0.1824,  0.1379,  0.0819, -0.8469,  0.2036,  1.2898, -0.3107,
        -0.8340,  0.0492], grad_fn=<SelectBackward>)

In [28]:
seq_outputs[0, :26, 0].mean()

tensor(0.1784, grad_fn=<MeanBackward0>)

In [29]:
seq_outputs[0, :26, 1].mean()

tensor(-0.3420, grad_fn=<MeanBackward0>)

In [30]:
for idx, x in enumerate(model.parameters(),1):
    print(idx, x.size())

1 torch.Size([21128, 768])
2 torch.Size([512, 768])
3 torch.Size([2, 768])
4 torch.Size([768])
5 torch.Size([768])
6 torch.Size([768, 768])
7 torch.Size([768])
8 torch.Size([768, 768])
9 torch.Size([768])
10 torch.Size([768, 768])
11 torch.Size([768])
12 torch.Size([768, 768])
13 torch.Size([768])
14 torch.Size([768])
15 torch.Size([768])
16 torch.Size([3072, 768])
17 torch.Size([3072])
18 torch.Size([768, 3072])
19 torch.Size([768])
20 torch.Size([768])
21 torch.Size([768])
22 torch.Size([768, 768])
23 torch.Size([768])
24 torch.Size([768, 768])
25 torch.Size([768])
26 torch.Size([768, 768])
27 torch.Size([768])
28 torch.Size([768, 768])
29 torch.Size([768])
30 torch.Size([768])
31 torch.Size([768])
32 torch.Size([3072, 768])
33 torch.Size([3072])
34 torch.Size([768, 3072])
35 torch.Size([768])
36 torch.Size([768])
37 torch.Size([768])
38 torch.Size([768, 768])
39 torch.Size([768])
40 torch.Size([768, 768])
41 torch.Size([768])
42 torch.Size([768, 768])
43 torch.Size([768])
44 torch.S

In [31]:
for x in model.state_dict():
    print(x)

embeddings.position_ids
embeddings.word_embeddings.weight
embeddings.position_embeddings.weight
embeddings.token_type_embeddings.weight
embeddings.LayerNorm.weight
embeddings.LayerNorm.bias
encoder.layer.0.attention.self.query.weight
encoder.layer.0.attention.self.query.bias
encoder.layer.0.attention.self.key.weight
encoder.layer.0.attention.self.key.bias
encoder.layer.0.attention.self.value.weight
encoder.layer.0.attention.self.value.bias
encoder.layer.0.attention.output.dense.weight
encoder.layer.0.attention.output.dense.bias
encoder.layer.0.attention.output.LayerNorm.weight
encoder.layer.0.attention.output.LayerNorm.bias
encoder.layer.0.intermediate.dense.weight
encoder.layer.0.intermediate.dense.bias
encoder.layer.0.output.dense.weight
encoder.layer.0.output.dense.bias
encoder.layer.0.output.LayerNorm.weight
encoder.layer.0.output.LayerNorm.bias
encoder.layer.1.attention.self.query.weight
encoder.layer.1.attention.self.query.bias
encoder.layer.1.attention.self.key.weight
encoder.la

In [32]:
#!pip install transformers==3.1.0 -i https://mirrors.aliyun.com/pypi/simple

In [33]:
# !python -V

## pytorch 转onnx

In [4]:
import torch

In [5]:
basedir = "data/bert-base-chinese"

In [6]:
model = BertModel.from_pretrained(basedir)

In [7]:
tokernizer = BertTokenizer.from_pretrained(basedir)

In [8]:
inputs = tokernizer("我们来试试牛逼的bert模型吧, 听说它非常厉害啊",
                    max_length = 64,  # maximum length of a sentence
                    pad_to_max_length=True,  # Add [PAD]s
                    return_attention_mask = True,
                    return_tensors="pt")
inputs

Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


{'input_ids': tensor([[ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,
         1798, 1416,  117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557,  102,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [21]:
inp2 = tokernizer.encode_plus("我们来试试牛逼的bert模型吧, 听说它非常厉害啊",
                    padding='max_length',
                    truncation=True,
                    max_length = 64,  # maximum length of a sentence
                    pad_to_max_length=True,  # Add [PAD]s
                    return_attention_mask = True,
                    return_tensors="pt")
inp2

{'input_ids': tensor([[ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,
         1798, 1416,  117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557,  102,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [27]:
ids = inp2["input_ids"].data.cpu().numpy().reshape(-1)
ids[ids!=0]

array([ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716,
       3563, 1798, 1416,  117, 1420, 6432, 2124, 7478, 2382, 1326, 2154,
       1557,  102])

In [None]:
tokernizer.decode(inp2["input_ids"].data.cpu().numpy().reshape(-1))

In [69]:
?tokernizer.encode_plus

In [57]:
input_ids = inputs['input_ids']
input_ids

tensor([[ 101, 2769,  812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,
         1798, 1416,  117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557,  102]])

In [58]:
token_type_ids = inputs['token_type_ids']
token_type_ids

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [59]:
attention_mask = inputs['attention_mask']
attention_mask

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [42]:
x = (input_ids, token_type_ids, attention_mask)

In [43]:
torch.onnx.export(model, x, "roberta.onnx", opset_version=10, verbose=True)

  position_ids = self.position_ids[:, :seq_length]
  input_tensor.shape == tensor_shape for input_tensor in input_tensors


graph(%input.1 : Long(1, 15),
      %attention_mask : Long(1, 15),
      %input.3 : Long(1, 15),
      %embeddings.position_ids : Long(1, 512),
      %embeddings.word_embeddings.weight : Float(21128, 768),
      %embeddings.position_embeddings.weight : Float(512, 768),
      %embeddings.token_type_embeddings.weight : Float(2, 768),
      %embeddings.LayerNorm.weight : Float(768),
      %embeddings.LayerNorm.bias : Float(768),
      %encoder.layer.0.attention.self.query.weight : Float(768, 768),
      %encoder.layer.0.attention.self.query.bias : Float(768),
      %encoder.layer.0.attention.self.key.weight : Float(768, 768),
      %encoder.layer.0.attention.self.key.bias : Float(768),
      %encoder.layer.0.attention.self.value.weight : Float(768, 768),
      %encoder.layer.0.attention.self.value.bias : Float(768),
      %encoder.layer.0.attention.output.dense.weight : Float(768, 768),
      %encoder.layer.0.attention.output.dense.bias : Float(768),
      %encoder.layer.0.attention.outpu

In [44]:
model2 = onnx.load("roberta.onnx")
# 检查模型格式是否完整及正确
onnx.checker.check_model(model2)

print(onnx.helper.printable_graph(model2.graph))

graph torch-jit-export (
  %input.1[INT64, 1x15]
  %attention_mask[INT64, 1x15]
  %input.3[INT64, 1x15]
) initializers (
  %embeddings.LayerNorm.bias[FLOAT, 768]
  %embeddings.LayerNorm.weight[FLOAT, 768]
  %embeddings.position_embeddings.weight[FLOAT, 512x768]
  %embeddings.position_ids[INT64, 1x512]
  %embeddings.token_type_embeddings.weight[FLOAT, 2x768]
  %embeddings.word_embeddings.weight[FLOAT, 21128x768]
  %encoder.layer.0.attention.output.LayerNorm.bias[FLOAT, 768]
  %encoder.layer.0.attention.output.LayerNorm.weight[FLOAT, 768]
  %encoder.layer.0.attention.output.dense.bias[FLOAT, 768]
  %encoder.layer.0.attention.output.dense.weight[FLOAT, 768x768]
  %encoder.layer.0.attention.self.key.bias[FLOAT, 768]
  %encoder.layer.0.attention.self.key.weight[FLOAT, 768x768]
  %encoder.layer.0.attention.self.query.bias[FLOAT, 768]
  %encoder.layer.0.attention.self.query.weight[FLOAT, 768x768]
  %encoder.layer.0.attention.self.value.bias[FLOAT, 768]
  %encoder.layer.0.attention.self.value.

In [45]:
#获取输出层，包含层名称、维度信息
model2.graph.output

[name: "1598"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 15
      }
      dim {
        dim_value: 768
      }
    }
  }
}
, name: "1602"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 768
      }
    }
  }
}
]

In [46]:
session = rt.InferenceSession("roberta.onnx")

In [47]:
session.get_inputs()

[<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg at 0x7efcb3949538>,
 <onnxruntime.capi.onnxruntime_pybind11_state.NodeArg at 0x7efcb39495e0>,
 <onnxruntime.capi.onnxruntime_pybind11_state.NodeArg at 0x7efcb3949490>]

In [48]:
(session.get_inputs()[0].name, session.get_inputs()[1].name, session.get_inputs()[2].name, session.get_inputs()[-1].name)

('input.1', 'attention_mask', 'input.3', 'input.3')

In [49]:
?session.run

In [50]:
name1 = session.get_inputs()[0].name

In [51]:
name2 = session.get_inputs()[1].name
name2

'attention_mask'

In [60]:
out = session.run(None, {name1: input_ids.cpu().numpy(), session.get_inputs()[2].name: token_type_ids.cpu().numpy(), name2:  attention_mask.cpu().numpy() })

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input.3 for the following indices
 index: 1 Got: 24 Expected: 15
 Please fix either the inputs or the model.

In [53]:
out[0]

array([[[-0.00217071,  0.39623275, -0.40544528, ...,  0.39019328,
         -0.1599319 ,  0.04569437],
        [ 0.25887132, -0.5536411 , -0.24241525, ..., -0.58426225,
         -0.9316153 , -0.03258407],
        [ 0.5397451 , -1.2938117 , -0.96167237, ...,  0.8387685 ,
          0.27140135,  0.05966763],
        ...,
        [-0.58584267,  0.4624487 ,  0.87729377, ...,  0.54620314,
         -0.6653838 ,  0.13872091],
        [ 0.44499832,  0.00427569, -0.5730417 , ...,  0.80946803,
         -0.10727846, -0.30272254],
        [-0.7771462 ,  0.02932469, -0.1939237 , ..., -0.14846408,
         -0.30965   , -0.4417867 ]]], dtype=float32)

In [54]:
#[-0.0022,  0.3962, -0.4054,  ...,  0.3902, -0.1599,  0.0457]

In [55]:
?torch.onnx.export

In [70]:
torch.__version__

'1.3.0'