In [1]:
!nvidia-smi
!pip install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org

Mon Sep 26 06:21:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100 80G...  Off  | 00000000:18:00.0 Off |                    0 |
| N/A   84C    P0   121W / 300W |  12090MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from numpy import vstack
from numpy import argmax
from pandas import read_csv
from sklearn.metrics import accuracy_score
from torchvision.datasets import MNIST
from torchvision.transforms import Compose
from torchvision.transforms import ToTensor
from torchvision.transforms import Normalize
from torch.utils.data import DataLoader
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
import torch
import torchvision

In [3]:
class CNN(Module):
    # define model elements
    def __init__(self, n_channels):
        super(CNN, self).__init__()
        # input to first hidden layer
        self.hidden1 = Conv2d(n_channels, 32, (3,3))
        kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
        self.act1 = ReLU()
        # first pooling layer
        self.pool1 = MaxPool2d((2,2), stride=(2,2))
        # second hidden layer
        self.hidden2 = Conv2d(32, 32, (3,3))
        kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
        self.act2 = ReLU()
        # second pooling layer
        self.pool2 = MaxPool2d((2,2), stride=(2,2))
        # fully connected layer
        self.hidden3 = Linear(5*5*32, 100)
        kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
        self.act3 = ReLU()
        # output layer
        self.hidden4 = Linear(100, 10)
        xavier_uniform_(self.hidden4.weight)
        self.act4 = Softmax(dim=1)

    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
        X = self.pool1(X)
        # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        X = self.pool2(X)
        # flatten
        X = X.view(-1, 4*4*50)
        # third hidden layer
        X = self.hidden3(X)
        X = self.act3(X)
        # output layer
        X = self.hidden4(X)
        X = self.act4(X)
        return X

In [4]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
model_mnist = CNN(1)

In [6]:
model_mnist.load_state_dict(torch.load('MNIST_Data/model.pth'))

<All keys matched successfully>

In [7]:
model_mnist.eval()

CNN(
  (hidden1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (act1): ReLU()
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (hidden2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (act2): ReLU()
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (hidden3): Linear(in_features=800, out_features=100, bias=True)
  (act3): ReLU()
  (hidden4): Linear(in_features=100, out_features=10, bias=True)
  (act4): Softmax(dim=1)
)

In [8]:
model = model_mnist.to(device)

In [9]:
example = torch.randn((64,1,28,28), dtype=torch.float32, device=device)

In [10]:
script = torch.jit.trace(model,example)

In [11]:
script.save("./triton_model_repository/torch_mnist/1/model.pt")

In [12]:
x = torch.randn((1,1,28,28), dtype=torch.float32, device=device)

In [13]:
torch.onnx.export(model,
                  x,
                  "./triton_model_repository/onnx_mnist/1/model.onnx",
                  export_params=True,
                  opset_version=13,
                  input_names=['input'],
                  output_names=['output'],
                  dynamic_axes={'input': {0: 'batch_size'},
                                'output': {0: 'batch_size'}}
                )

In [14]:
!trtexec \
  --onnx="./triton_model_repository/onnx_mnist/1/model.onnx" \
  --explicitBatch \
  --optShapes=input:64x1x28x28 \
  --maxShapes=input:1024x1x28x28 \
  --minShapes=input:1x1x28x28 \
  --workspace=16382 \
  --saveEngine="./triton_model_repository/trt_fp_32_mnist/1/model.plan"

&&&& RUNNING TensorRT.trtexec [TensorRT v8402] # trtexec --onnx=./triton_model_repository/onnx_mnist/1/model.onnx --explicitBatch --optShapes=input:64x1x28x28 --maxShapes=input:1024x1x28x28 --minShapes=input:1x1x28x28 --workspace=16382 --saveEngine=./triton_model_repository/trt_fp_32_mnist/1/model.plan
[09/26/2022-06:21:39] [W] --explicitBatch flag has been deprecated and has no effect!
[09/26/2022-06:21:39] [W] Explicit batch dim is automatically enabled if input model is ONNX or if dynamic shapes are provided when the engine is built.
[09/26/2022-06:21:39] [W] --workspace flag has been deprecated by --memPoolSize flag.
[09/26/2022-06:21:39] [I] === Model Options ===
[09/26/2022-06:21:39] [I] Format: ONNX
[09/26/2022-06:21:39] [I] Model: ./triton_model_repository/onnx_mnist/1/model.onnx
[09/26/2022-06:21:39] [I] Output:
[09/26/2022-06:21:39] [I] === Build Options ===
[09/26/2022-06:21:39] [I] Max batch: explicit batch
[09/26/2022-06:21:39] [I] Memory Pools: workspace: 16382 MiB, dlaSR

In [15]:
!trtexec \
  --onnx="./triton_model_repository/onnx_mnist/1/model.onnx" \
  --explicitBatch \
  --optShapes=input:64x1x28x28 \
  --maxShapes=input:1024x1x28x28 \
  --minShapes=input:1x1x28x28 \
  --workspace=16382 \
  --saveEngine="./triton_model_repository/trt_fp_16_mnist/1/model.plan" --fp16

&&&& RUNNING TensorRT.trtexec [TensorRT v8402] # trtexec --onnx=./triton_model_repository/onnx_mnist/1/model.onnx --explicitBatch --optShapes=input:64x1x28x28 --maxShapes=input:1024x1x28x28 --minShapes=input:1x1x28x28 --workspace=16382 --saveEngine=./triton_model_repository/trt_fp_16_mnist/1/model.plan --fp16
[09/26/2022-06:21:53] [W] --explicitBatch flag has been deprecated and has no effect!
[09/26/2022-06:21:53] [W] Explicit batch dim is automatically enabled if input model is ONNX or if dynamic shapes are provided when the engine is built.
[09/26/2022-06:21:53] [W] --workspace flag has been deprecated by --memPoolSize flag.
[09/26/2022-06:21:53] [I] === Model Options ===
[09/26/2022-06:21:53] [I] Format: ONNX
[09/26/2022-06:21:53] [I] Model: ./triton_model_repository/onnx_mnist/1/model.onnx
[09/26/2022-06:21:53] [I] Output:
[09/26/2022-06:21:53] [I] === Build Options ===
[09/26/2022-06:21:53] [I] Max batch: explicit batch
[09/26/2022-06:21:53] [I] Memory Pools: workspace: 16382 MiB