### 自定义算子

In [3]:
import torch
from torch import nn


def custom_relu(x):
    return torch.clamp(x, min=0.0)


def custom_shift(x, bias):
    x = x.cpu().detach().numpy()
    x += bias
    x = torch.from_numpy(x).cuda()
    return x


class CustomOperator(nn.Module):

    def __init__(self, bias):
        super(CustomOperator, self).__init__()
        self.bias = bias

    def forward(self, x):
        return custom_shift(x, self.bias)

### 使用自定义算子

In [58]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # self.custom_op = CustomOperator(bias=0.0004)
        self.out = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # x = self.custom_op(x)
        x = x.view(x.size(0), -1)
        return self.out(x)


### 模拟模型训练

In [60]:
from tqdm import tqdm

training_data_x = torch.randn((200, 1, 1, 28, 28)).cuda()
training_data_y = torch.randint(0, 9, [200, 1]).cuda()

model = CNN().cuda()
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = nn.CrossEntropyLoss()

for data in tqdm(zip(training_data_x, training_data_y)):
    x, y = data
    # x = torch.unsqueeze(x, dim=0)
    # y = torch.unsqueeze(y, dim=0)
    output = model(x)
    loss = loss_func(output, y)
    optimizer.zero_grad()  # 清除梯度参数
    loss.backward()  # 计算梯度
    optimizer.step()  # 应用梯度

torch.save(model.cpu().state_dict(), './tensorrt_custom_operator.pt')

200it [00:00, 318.75it/s]


### 模型转ONNX

In [61]:
weights = './tensorrt_custom_operator.pt'
model = CNN().eval()
weights_dict = torch.load(weights)
model.load_state_dict(weights_dict)

for m in model.modules():
    module_type = type(m)
    if module_type in (nn.ReLU, nn.ReLU6, nn.SiLU, nn.Hardswish, nn.LeakyReLU):
        m.inplace = True
    elif module_type is nn.Conv2d:
        m._non_persistent_buffers_set = set()

model.cuda()

onnx_file = './tensorrt_custom_operator.onnx'
dummy_input = torch.randn([1, 1, 28, 28]).cuda()
torch.onnx.export(
    model=model,
    args=dummy_input,
    f=onnx_file,
    verbose=False,
    opset_version=12,
    training=torch.onnx.TrainingMode.EVAL,
    do_constant_folding=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={
        'input': {
            0: 'batch'
        },
        'output': {
            0: 'batch'
        }
    }
)

### 验证onnx转换结果

In [62]:
import onnx

model_onnx = onnx.load(onnx_file)
onnx.checker.check_model(model_onnx)

### Simplify

In [63]:
import onnxsim

model_onnx, check = onnxsim.simplify(model_onnx)
assert check, 'assert check failed'
onnx_file = './tensorrt_custom_operator_simplified.onnx'
onnx.save(model_onnx, onnx_file)

### 转TensorRT

In [None]:
# !git clone https://github.com/NVIDIA-AI-IOT/torch2trt
# !cd torch2trt
# !python setup.py install

import tensorrt as trt
from torch2trt import torch2trt



