In [None]:
!pip install -qq onnx onnxscript onnxruntime

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader

import onnx
import onnxruntime


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
    ])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


In [None]:
class CNN_Model(nn.Module):

    def __init__(self):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(3, 6, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(6 * 7 * 7, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 6 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)
model = CNN_Model().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device: cuda


# Train

In [None]:
num_epochs = 5
model.train()
for epoch in range(num_epochs):
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/5], Loss: 0.1359
Epoch [2/5], Loss: 0.0930
Epoch [3/5], Loss: 0.0996
Epoch [4/5], Loss: 0.2569
Epoch [5/5], Loss: 0.1498


# Test

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Test Accuracy: 97.40%


# Export ONNX

In [None]:
model.to("cpu")
torch_input = torch.randn(2, 1, 28, 28)
export_options = torch.onnx.ExportOptions(dynamic_shapes=True)
onnx_program = torch.onnx.dynamo_export(model, torch_input,
                                        export_options=export_options
                                        )



In [None]:
onnx_program.save("cnn_model.onnx")

In [None]:
onnx_model = onnx.load("cnn_model.onnx")
onnx.checker.check_model(onnx_model)

In [None]:
# torch.onnx.export(
#     model,
#     torch_input,
#     "cnn_model_2.onnx",
#     input_names=["input"],
#     output_names=["output"],
#     dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}
# )

# Execute the ONNX model with ONNX Runtime

In [None]:
onnx_input = [torch.randn(2, 1, 28, 28),
              torch.randn(5, 1, 28, 28),
              torch.randn(3, 1, 28, 28)]
print(f"Input length: {len(onnx_input)}")
print(f"Sample input: {onnx_input}")

ort_session = onnxruntime.InferenceSession(
    "./cnn_model.onnx",
    providers=['CPUExecutionProvider',]
    )

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

onnxruntime_input = {k.name: to_numpy(v)
for k, v in zip(ort_session.get_inputs(), onnx_input)}

# onnxruntime returns a list of outputs
onnxruntime_outputs = ort_session.run(None, onnxruntime_input)[0]

Input length: 3
Sample input: [tensor([[[[ 0.6181,  0.8157,  2.5101,  ..., -0.7072, -1.1273, -1.3691],
          [-1.5253, -1.5387,  0.7521,  ..., -0.4119, -0.1180, -0.8194],
          [ 1.1372, -0.3255,  1.8639,  ...,  1.0781,  1.6716,  0.7560],
          ...,
          [-0.3478, -1.5809, -1.0452,  ..., -0.0446, -1.7842,  0.4616],
          [ 1.9251, -0.2804, -2.8128,  ..., -0.5941,  0.6544,  0.8159],
          [ 0.6301, -0.2596, -0.1456,  ..., -0.6066, -0.1811,  0.3224]]],


        [[[ 0.9361,  0.3533, -1.0349,  ...,  2.7408,  0.5204, -0.0874],
          [ 1.8348,  0.6356,  1.6431,  ..., -0.4123, -0.9477,  0.6689],
          [ 0.2274, -1.8075, -0.4096,  ...,  0.3364, -0.3241, -1.6592],
          ...,
          [-0.3595, -0.1504,  0.1568,  ..., -2.6497,  0.7735,  0.5589],
          [ 0.6576,  0.2229,  0.4319,  ..., -0.1802,  0.4499, -0.8557],
          [ 1.0354, -1.2581, -1.1112,  ...,  0.0964,  0.0570, -1.5447]]]]), tensor([[[[ 0.1661, -0.5909, -0.6635,  ...,  1.8527,  0.9053,  0.37

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        # images, labels = images.to(device), labels.to(device)

        images_np = {k.name: to_numpy(v)
for k, v in zip(ort_session.get_inputs(), [images])}

        outputs = ort_session.run(None, images_np)
        outputs = torch.tensor(outputs[0])
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels.cpu()).sum().item()

print(f'Test Accuracy with ONNX Runtime: {100 * correct / total:.2f}%')

Test Accuracy with ONNX Runtime: 97.40%
