In [1]:
import torch
import torchvision

torch.hub._validate_not_a_forked_repo=lambda a,b,c: True

resnet50_model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
resnet50_model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
model = resnet50_model.to(device)

In [4]:
example = torch.randn((128,3,224,224), dtype=torch.float32, device=device)

In [5]:
script = torch.jit.trace(model,example)

In [6]:
script.save("./triton_model_repository/torch_resnet/1/model.pt")

In [7]:
x = torch.randn((1,3,224,224), dtype=torch.float32, device=device)

In [8]:
torch.onnx.export(model,
                  x,
                  "./triton_model_repository/onnx_resnet/1/model.onnx",
                  export_params=True,
                  opset_version=13,
                  input_names=['input'],
                  output_names=['output'],
                  dynamic_axes={'input': {0: 'batch_size'},
                                'output': {0: 'batch_size'}}
                )

In [9]:
!trtexec \
  --onnx="./triton_model_repository/onnx_resnet/1/model.onnx" \
  --explicitBatch \
  --optShapes=input:128x3x224x224 \
  --maxShapes=input:1024x3x224x224 \
  --minShapes=input:1x3x224x224 \
  --workspace=16382 \
  --saveEngine="./triton_model_repository/trt_fp_32_resnet/1/model.plan"

&&&& RUNNING TensorRT.trtexec [TensorRT v8402] # trtexec --onnx=./triton_model_repository/onnx_resnet/1/model.onnx --explicitBatch --optShapes=input:128x3x224x224 --maxShapes=input:1024x3x224x224 --minShapes=input:1x3x224x224 --workspace=16382 --saveEngine=./triton_model_repository/trt_fp_32_resnet/1/model.plan
[09/26/2022-06:22:37] [W] --explicitBatch flag has been deprecated and has no effect!
[09/26/2022-06:22:37] [W] Explicit batch dim is automatically enabled if input model is ONNX or if dynamic shapes are provided when the engine is built.
[09/26/2022-06:22:37] [W] --workspace flag has been deprecated by --memPoolSize flag.
[09/26/2022-06:22:37] [I] === Model Options ===
[09/26/2022-06:22:37] [I] Format: ONNX
[09/26/2022-06:22:37] [I] Model: ./triton_model_repository/onnx_resnet/1/model.onnx
[09/26/2022-06:22:37] [I] Output:
[09/26/2022-06:22:37] [I] === Build Options ===
[09/26/2022-06:22:37] [I] Max batch: explicit batch
[09/26/2022-06:22:37] [I] Memory Pools: workspace: 16382 

In [10]:
!trtexec \
  --onnx="./triton_model_repository/onnx_resnet/1/model.onnx" \
  --explicitBatch \
  --optShapes=input:128x3x224x224 \
  --maxShapes=input:1024x3x224x224 \
  --minShapes=input:1x3x224x224 \
  --workspace=16382 \
  --saveEngine="./triton_model_repository/trt_fp_16_resnet/1/model.plan" --fp16

&&&& RUNNING TensorRT.trtexec [TensorRT v8402] # trtexec --onnx=./triton_model_repository/onnx_resnet/1/model.onnx --explicitBatch --optShapes=input:128x3x224x224 --maxShapes=input:1024x3x224x224 --minShapes=input:1x3x224x224 --workspace=16382 --saveEngine=./triton_model_repository/trt_fp_16_resnet/1/model.plan --fp16
[09/26/2022-06:24:35] [W] --explicitBatch flag has been deprecated and has no effect!
[09/26/2022-06:24:35] [W] Explicit batch dim is automatically enabled if input model is ONNX or if dynamic shapes are provided when the engine is built.
[09/26/2022-06:24:35] [W] --workspace flag has been deprecated by --memPoolSize flag.
[09/26/2022-06:24:35] [I] === Model Options ===
[09/26/2022-06:24:35] [I] Format: ONNX
[09/26/2022-06:24:35] [I] Model: ./triton_model_repository/onnx_resnet/1/model.onnx
[09/26/2022-06:24:35] [I] Output:
[09/26/2022-06:24:35] [I] === Build Options ===
[09/26/2022-06:24:35] [I] Max batch: explicit batch
[09/26/2022-06:24:35] [I] Memory Pools: workspace: