# 将模型转换成 ONNX

In [2]:
import os

import onnx
import onnxruntime as ort
import paddle2onnx
from onnxconverter_common import float16

In [3]:
onnx_dir = r"G:\code\github\PaddleNLP\outputs\tnews\export_model\onnx"
os.makedirs(onnx_dir, exist_ok=True)

float_onnx_file = os.path.join(onnx_dir, "model.onnx")

In [4]:
_static_model_file = r"G:\code\github\PaddleNLP\outputs\tnews\export_model\model.pdmodel"
_static_params_file = r"G:\code\github\PaddleNLP\outputs\tnews\export_model\model.pdiparams"

onnx_model = paddle2onnx.command.c_paddle_to_onnx(
    model_file=_static_model_file,
    params_file=_static_params_file,
    opset_version=13,
    enable_onnx_checker=True,
)
with open(float_onnx_file, "wb") as f:
    f.write(onnx_model)

In [None]:
# 可能有数值溢出, fp16 精度不够
# fp16_model_file = os.path.join(onnx_dir, "fp16_model.onnx")
# onnx_model = onnx.load_model(float_onnx_file)
# trans_model = float16.convert_float_to_float16(onnx_model, keep_io_types=True)
# onnx.save_model(trans_model, fp16_model_file)

In [6]:
providers = [("CUDAExecutionProvider", {"device_id": 0})]
sess_options = ort.SessionOptions()
sess_options.intra_op_num_threads = os.cpu_count() // 2
sess_options.inter_op_num_threads = os.cpu_count() // 2
# 原来是在这里初始化 predictor 的, 而且用的是 fp16 版本的
predictor = ort.InferenceSession(float_onnx_file, sess_options=sess_options, providers=providers)

In [7]:
print(predictor.get_providers())

input_handler = [i.name for i in predictor.get_inputs()]
print(input_handler)

['CUDAExecutionProvider', 'CPUExecutionProvider']
['input_ids', 'token_type_ids', 'position_ids', 'attention_mask', 'omask_positions', 'cls_positions']


# 好了, 转换成 onnx 模型已经可以了, 现在是要准备输入数据了

In [37]:
schema = ['故事', '文化', '娱乐', '体育', '财经', '房产', '汽车', '教育', '科技', '军事', '旅游', '国际', '股票', '农业', '电竞']
data = {
    "text_a": "农村依然很重视土葬",
    "text_b": "",
    "choices": schema,
}

In [9]:
from paddlenlp.transformers import UTC, AutoTokenizer
from paddlenlp.prompt import PromptDataCollatorWithPadding, UTCTemplate

task_path = r"G:\code\github\PaddleNLP\outputs\tnews"
max_seq_len = 512

tokenizer = AutoTokenizer.from_pretrained(task_path)
collator = PromptDataCollatorWithPadding(tokenizer, return_tensors="np")
template = UTCTemplate(tokenizer, max_seq_len)

[32m[2023-07-26 20:11:26,139] [    INFO][0m - We are using <class 'paddlenlp.transformers.ernie.tokenizer.ErnieTokenizer'> to load 'G:\code\github\PaddleNLP\outputs\tnews'.[0m


In [28]:
inputs = [data]
tokenized_inputs = [template(i) for i in inputs]

In [13]:
tokenized_inputs[0].keys()

dict_keys(['soft_token_ids', 'input_ids', 'position_ids', 'token_type_ids', 'attention_mask', 'choices', 'omask_positions', 'cls_positions'])

In [46]:
outputs = {}
outputs["text"] = inputs
# 这里会调用 _collator, 转换类型成 np
outputs["batches"] = collator(tokenized_inputs)

In [50]:
dtype_dict = {
    "input_ids": "int64",
    "token_type_ids": "int64",
    "position_ids": "int64",
    "attention_mask": "float32",
    "omask_positions": "int64",
    "cls_positions": "int64",
}

outputs["batch_logits"] = []
# onnx 推理
batch = outputs["batches"]
input_dict = {}
for input_name in dtype_dict:
    input_dict[input_name] = batch[input_name].astype(dtype_dict[input_name])
logits = predictor.run(None, input_dict)[0].tolist()
# 最后就是添加 logits
outputs["batch_logits"] = logits

In [51]:
outputs["batch_logits"]

[[-1.218139410018921,
  -2.090022325515747,
  -6.312975883483887,
  -8.242919921875,
  -6.24294900894165,
  -5.859382629394531,
  -6.72791051864624,
  -6.126931190490723,
  -6.983493328094482,
  -5.1142659187316895,
  -3.1050539016723633,
  -7.003054141998291,
  -8.959622383117676,
  0.9350083470344543,
  -6.280238151550293]]

In [54]:
outputs["batch_logits"]

[[-1.218139410018921,
  -2.090022325515747,
  -6.312975883483887,
  -8.242919921875,
  -6.24294900894165,
  -5.859382629394531,
  -6.72791051864624,
  -6.126931190490723,
  -6.983493328094482,
  -5.1142659187316895,
  -3.1050539016723633,
  -7.003054141998291,
  -8.959622383117676,
  0.9350083470344543,
  -6.280238151550293]]

In [55]:
from scipy.special import expit as np_sigmoid
from scipy.special import softmax as np_softmax
import numpy as np
pred_threshold = 0.5
single_label = True

result = []
for text, logits in zip(outputs["text"], outputs["batch_logits"]):
    # 重新构建每个输出
    output = {}
    if len(text["text_a"]) > 0:
        output["text_a"] = text["text_a"]
    if len(text["text_b"]) > 0:
        output["text_b"] = text["text_b"]
    
    # 单标签
    if single_label:
        score = np_softmax(logits, axis=-1)
        label = np.argmax(logits, axis=-1)
        output["predictions"] = [{"label": text["choices"][label], "score": score[label]}]
    else:
        scores = np_sigmoid(logits)
        output["predictions"] = []
        if scores.ndim == 2:
            scores = scores[0]
        for i, class_score in enumerate(scores):
            if class_score > pred_threshold:
                output["predictions"].append({"label": text["choices"][i], "score": class_score})
    result.append(output)

In [56]:
result

[{'text_a': '农村依然很重视土葬',
  'predictions': [{'label': '农业', 'score': 0.84022164792424}]}]