# Deploying yolort on ONNXRuntime

In [1]:
import cv2

import torch
import onnx
import onnxruntime

from yolort.models import yolov5s

from yolort.utils import get_image_from_url, read_image_to_tensor

In [2]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

device = torch.device('cpu')

## Model Definition and Initialization

In [3]:
model = yolov5s(export_friendly=True, pretrained=True, score_thresh=0.45)

model = model.eval()
model = model.to(device)

### Load images to infer

In [4]:
img_one = get_image_from_url("https://gitee.com/zhiqwang/yolov5-rt-stack/raw/master/test/assets/bus.jpg")
# img_one = cv2.imread('../test/assets/bus.jpg')
img_one = read_image_to_tensor(img_one, is_half=False)
img_one = img_one.to(device)

img_two = get_image_from_url("https://gitee.com/zhiqwang/yolov5-rt-stack/raw/master/test/assets/zidane.jpg")
# img_two = cv2.imread('../test/assets/zidane.jpg')
img_two = read_image_to_tensor(img_two, is_half=False)
img_two = img_two.to(device)

# images = [img_one, img_two]
# Uncomment the above line and comment the next line if you want to
# use the multi-batch inferencing on onnxruntime
images = [img_one]

### Inference on PyTorch backend

In [5]:
with torch.no_grad():
    model_out = model(images)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [6]:
%%time
with torch.no_grad():
    model_out = model(images)

CPU times: user 3.44 s, sys: 20 ms, total: 3.46 s
Wall time: 96.9 ms


In [7]:
model_out[0]['boxes']

tensor([[669.26556, 391.30249, 809.86627, 885.23444],
        [ 54.06350, 397.83176, 235.95316, 901.37323],
        [222.88336, 406.81192, 341.55716, 854.77924],
        [ 18.63205, 232.97676, 810.97394, 760.11700]])

In [8]:
model_out[0]['scores']

tensor([0.89005, 0.87333, 0.85366, 0.72340])

In [9]:
model_out[0]['labels']

tensor([0, 0, 0, 5])

## Export the model to ONNX

In [10]:
from torchvision.ops._register_onnx_ops import _onnx_opset_version

In [11]:
export_onnx_name = 'yolov5s.onnx'  # path of the exported ONNX models

print(f'We are using opset version: {_onnx_opset_version}')

We are using opset version: 11


In [12]:
# Export to ONNX model
torch.onnx.export(
    model,
    (images,),
    export_onnx_name,
    do_constant_folding=True,
    opset_version=_onnx_opset_version, 
    input_names=["images_tensors"],
    output_names=["scores", "labels", "boxes"],
    dynamic_axes={
        "images_tensors": [0, 1, 2],
        "boxes": [0, 1],
        "labels": [0],
        "scores": [0],
    },
)

  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  stride = torch.as_tensor([stride], dtype=dtype, device=device)
  anchor_grid = torch.as_tensor(anchor_grid, dtype=dtype, device=device)
  shifts = shifts - torch.tensor(0.5, dtype=shifts.dtype, device=device)
  for s, s_orig in zip(new_size, original_size)
  for s, s_orig in zip(new_size, original_size)
  "If indices include negative values, the exported graph will produce incorrect results.")
  "version 11 or higher.")


## Simplify the exported ONNX model (Optional)

*ONNX* is great, but sometimes too complicated. And thanks to @daquexian for providing a powerful tool named [`onnxsim`](https://github.com/daquexian/onnx-simplifier/) to eliminate some redundant operators.

First of all, let's install `onnx-simplifier` with following script.

```shell
pip install -U onnx-simplifier
```

In [13]:
import onnxsim

# onnx-simplifier version
print(f'Starting simplifing with onnxsim {onnxsim.__version__}')

Starting simplifing with onnxsim 0.3.6


In [14]:
onnx_simp_name = 'yolov5s.simp.onnx'  # path of the simplified ONNX models

In [15]:
# load your predefined ONNX model
onnx_model = onnx.load(export_onnx_name)

# convert model
model_simp, check = onnxsim.simplify(
    onnx_model,
    input_shapes={"images_tensors": [3, 640, 640]},
    dynamic_input_shape=True,
)

assert check, "Simplified ONNX model could not be validated"

# use model_simp as a standard ONNX model object
onnx.save(model_simp, onnx_simp_name)

## Inference on ONNXRuntime Backend

Now, We begin to verify whether the inference results are consistent with PyTorch's, similarly, install `onnxruntime` first.

```shell
pip install -U onnxruntime
```

In [16]:
print(f'Starting with onnx {onnx.__version__}, onnxruntime {onnxruntime.__version__}...')

Starting with onnx 1.9.0, onnxruntime 1.8.1...


In [17]:
images, _ = torch.jit._flatten(images)
outputs, _ = torch.jit._flatten(model_out)

In [18]:
def to_numpy(tensor):
    if tensor.requires_grad:
        return tensor.detach().cpu().numpy()
    else:
        return tensor.cpu().numpy()

In [19]:
inputs = list(map(to_numpy, images))
outputs = list(map(to_numpy, outputs))

In [20]:
# ort_session = onnxruntime.InferenceSession(export_onnx_name)
ort_session = onnxruntime.InferenceSession(onnx_simp_name)

In [21]:
# compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
ort_outs = ort_session.run(None, ort_inputs)

In [22]:
%%time
# compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
ort_outs = ort_session.run(None, ort_inputs)

CPU times: user 2.38 s, sys: 20 ms, total: 2.4 s
Wall time: 65.1 ms


In [23]:
for i in range(0, len(outputs)):
    torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-04, atol=1e-07)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Exported model has been tested with ONNXRuntime, and the result looks good!
