# Converting PyTorch to TensorFlow Lite for xCORE Using ONNX

ONNX is an open format built to represent machine learning models. We can convert from PyTorch to ONNX, then from ONNX to TensorFlow, then from TensorFlow to TensorFlow Lite, and finally, run it through xformer to optimise it for xCORE.
Ensure that you have installed Python 3.8 and have the installed requirements.txt

## Import PyTorch Model

For this example, we use mobilenet_v2.

In [1]:
import torch

pytorch_model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

# Switch the model to eval mode
pytorch_model.eval()

Using cache found in /Users/salmankhan/.cache/torch/hub/pytorch_vision_v0.10.0


MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

### Run inference on model (to test)

In [2]:
# Download an image to test against
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [3]:
# Download Image Labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

--2023-04-03 16:33:47--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt.6’


2023-04-03 16:33:47 (52.6 MB/s) - ‘imagenet_classes.txt.6’ saved [10472/10472]



In [4]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

### Perform an infrence on the pytorch model directly

In [5]:
# We will test and train with these params
batch_size = 1
channels = 3
height = 224
width = 224

In [6]:
from PIL import Image
from torchvision import transforms

# Open testing image
input_image = Image.open(filename)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(height),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Note Pytorch is BCHW
input_tensor = preprocess(input_image)

In [7]:
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

with torch.no_grad():
    output = pytorch_model(input_batch)

probabilities = torch.nn.functional.softmax(output[0], dim=0)

# Show top categories per image
vals, idxs = torch.topk(probabilities, 5)
pytorch_results = [(categories[idx], prob) for (idx, prob) in zip(idxs.tolist(), vals.tolist())]
for cat, prob in  pytorch_results:
    print(cat, ':', prob)

Samoyed : 0.8303043246269226
Pomeranian : 0.06988773494958878
keeshond : 0.012964080087840557
collie : 0.010797776281833649
Great Pyrenees : 0.009886783547699451


## Convert to ONNX


In [8]:
# This is only for shape info for tracnig the model during conversion
sample_input = torch.rand((batch_size, channels, height, width))

onnx_model_path = "mobilenet_v2.onnx"

torch.onnx.export(
    pytorch_model,
    sample_input,
    onnx_model_path,
    input_names=['image'],
    output_names=['probabilities']
)

### Check the exported model

In [9]:
import onnx
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)

### Check ONNX Output

In [10]:
import onnxruntime 
import numpy as np
ort_session = onnxruntime.InferenceSession(onnx_model_path)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

def softmax(xs):
    return np.exp(xs)/sum(np.exp(xs))

# compute ONNX Runtime output prediction
input_batch_np = to_numpy(input_batch)
ort_inputs = {ort_session.get_inputs()[0].name: input_batch_np}
ort_outs = ort_session.run(None, ort_inputs)

# The input is still BCHW
data = zip(range(len(ort_outs[0][0])), softmax(ort_outs[0][0]))

onnx_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)[:5]]
for cat, prob in  onnx_results:
    print(cat, ':', prob)

Samoyed : 0.830305
Pomeranian : 0.06988746
keeshond : 0.012964004
collie : 0.010797733
Great Pyrenees : 0.009886744


## Using onnx2tf

Using package: https://github.com/PINTO0309/onnx2tf

### Convert ONNX to Keras

In [11]:
import onnx2tf

keras_model_path = 'mobilenet_v2.tf'

keras_model = onnx2tf.convert(
    input_onnx_file_path=onnx_model_path,
    output_folder_path=keras_model_path,
    non_verbose=True,
)



### Check the conversion to keras

In [12]:
#transpose the input_batch into BHWC order for tensorflow
tf_input_data = np.transpose( input_batch.numpy(), [0, 2, 3, 1])

keras_output_data = keras_model(tf_input_data)

probs = softmax(keras_output_data[0])
data = zip(range(len(probs)), probs)
keras_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)[:5]]
for cat, prob in  keras_results:
    print(cat, ':', prob)

Samoyed : 0.83030456
Pomeranian : 0.069887355
keeshond : 0.01296406
collie : 0.01079778
Great Pyrenees : 0.009886805


## Convert Keras to TFLite (float32)

In [13]:
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.inference_input_type = tf.float32 
converter.inference_output_type = tf.float32

tflite_model = converter.convert()

# Save the model.
tflite_float_model_path = 'mobilenet_v2_float.tflite'
with open(tflite_float_model_path, 'wb') as f:
  f.write(tflite_model)

### Check it Worked

In [14]:
tfl_interpreter = tf.lite.Interpreter(model_path=tflite_float_model_path)
tfl_interpreter.allocate_tensors()

tfl_input_details = tfl_interpreter.get_input_details()
tfl_output_details = tfl_interpreter.get_output_details()

# Convert PyTorch Input Tensor into Numpy Matrix and Reshape for TensorFlow
# (Pytorch model expects C x H x W but TF expects H x W x C)
tfl_interpreter.set_tensor(tfl_input_details[0]['index'], tf_input_data)
tfl_interpreter.invoke()

tfl_output_data = tfl_interpreter.get_tensor(tfl_output_details[0]['index'])

probs = softmax(tfl_output_data[0])
data = zip(range(len(probs)), probs)
tfl_float32_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)[:5]]
for cat, prob in  tfl_float32_results:
    print(cat, ':', prob)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Samoyed : 0.84704113
Pomeranian : 0.060772426
keeshond : 0.012674405
collie : 0.00844481
Great Pyrenees : 0.008021476


## Convert Keras to TFLite (int8)
We will still feed the data into the model in float32 format for convinence but the internals of the model will be int8. This will require representitive data but as we interface in float32 we can use the pytorch preprocessing. 

## Representative Dataset

To convert a model into to a TFLite flatbuffer, a representative dataset is required to help in quantisation. Refer to [Converting a keras model into an xcore optimised tflite model](https://colab.research.google.com/github/xmos/ai_tools/blob/develop/docs/notebooks/keras_to_xcore.ipynb) for more details on this.

#### Download & Extract Images from Data Set

In [15]:
import tensorflow as tf
import tensorflow_datasets as tfds

ds = tfds.load('imagenette', split='train', as_supervised=True, shuffle_files=True).shuffle(1000).batch(1).prefetch(10).take(1000)

# Iterate over the sampled images and preprocess them
def representative_dataset():
    for image, _ in ds:
        pil_img = tf.keras.utils.array_to_img(image[0])
        pytorch_batch = preprocess(pil_img).unsqueeze(0)
        tf_batch = np.transpose(pytorch_batch.numpy(), [0, 2, 3, 1])
        yield [tf_batch]

In [16]:
# Now do the conversion to int8
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32 
converter.inference_output_type = tf.float32

tflite_int8_model = converter.convert()

# Save the model.
tflite_int8_model_path = 'mobilenet_v2.tflite'
with open(tflite_int8_model_path, 'wb') as f:
  f.write(tflite_int8_model)

fully_quantize: 0, inference_type: 6, input_inference_type: FLOAT32, output_inference_type: FLOAT32


In [17]:
tfl_interpreter = tf.lite.Interpreter(model_path=tflite_int8_model_path)
tfl_interpreter.allocate_tensors()

tfl_input_details = tfl_interpreter.get_input_details()
tfl_output_details = tfl_interpreter.get_output_details()

# Convert PyTorch Input Tensor into Numpy Matrix and Reshape for TensorFlow
tfl_interpreter.set_tensor(tfl_input_details[0]['index'], tf_input_data)
tfl_interpreter.invoke()

tfl_output_data = tfl_interpreter.get_tensor(tfl_output_details[0]['index'])

probs = softmax(tfl_output_data[0])
data = zip(range(len(probs)), probs)
tfl_int8_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)[:5]]
for cat, prob in  tfl_int8_results:
    print(cat, ':', prob)

Samoyed : 0.87135535
Pomeranian : 0.049141813
West Highland white terrier : 0.015557991
Great Pyrenees : 0.008753966
Arctic fox : 0.008753966


# Analysing Models

Defined below is a function to print out the operator counts of a model.

In [18]:
import io
from contextlib import redirect_stdout

def get_operator_counts(model_content):
    with io.StringIO() as buf, redirect_stdout(buf):
        tf.lite.experimental.Analyzer.analyze(model_content=model_content)
        model_structure = buf.getvalue()

    operators = [op.strip().split(" ")[1].split("(")[0] for op in model_structure.split("\n") if "Op#" in op]
    op_counts = {}
    for operator in operators:
        if operator in op_counts:
            op_counts[operator] = op_counts[operator]+1
        else:
            op_counts[operator] = 1
        
    return (len(operators), op_counts)

def print_operator_counts(model_content):
    total_op_count, op_counts = get_operator_counts(model_content)
    print(f"{'Operator'.upper():<20} {'Count'.upper():>6}")
    print("-"*20 + " " + "-"*6)
    
    for operator, count in op_counts.items():
        print(f"{operator.lower():<20} {count:>6}")
        
    print("-"*20 + " " + "-"*6)
    print(f"{'Total'.upper():<20} {total_op_count:>6}")
    print("-"*20 + " " + "-"*6)

In [19]:
# Let's inspect the int8 model
print_operator_counts(tflite_int8_model)

OPERATOR              COUNT
-------------------- ------
quantize                  1
pad                       5
conv_2d                  35
depthwise_conv_2d        17
add                      10
mean                      1
fully_connected           1
dequantize                1
-------------------- ------
TOTAL                    71
-------------------- ------


## Compare Accuracy

In [20]:
import tensorflow_datasets as tfds
import tensorflow as tf
import requests
from typing import List

# download labels
resp = requests.get('https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_metadata.txt')
# make a dictionary of classes, e.g: {"n00440382": ["broad", "jump", "long jump"}
labels_dict = {y[0]: [z.strip().lower() for z in y[1].split(",")] for y in [x.split("\t") for x in resp.text.splitlines()]}

# make pytorch labels lowercase to standardise with tfds labels
torch_labels = [x.lower() for x in categories]

# load dataset
ds, info = tfds.load('imagenette', split='validation', with_info=True, as_supervised=True, shuffle_files=False)
ds = ds.shuffle(100, reshuffle_each_iteration=True)

# check if the labels for an image are present in the pytorch labels list 
def is_label_in_both_datasets(tfds_labels: List[str]):
    for label in tfds_labels:
        if label.lower() in torch_labels:
            return True
    return False

# returns True if any of the top_n_results are in expected, False otherwise
def check_correct(top_n_results: List[str], expected: List[str]):
    for term in top_n_results:
        if term in expected:
            return True
    return False


    
# see what % of samples have the correct answer in the top_n results
def accuracy_tflite(top_n: int = 1, samples=1000):
    if top_n < 1 or n > 1000:
        raise ValueError
    
    # take subset of dataset
    selection = ds.prefetch(10).take(samples)

    iterations = 0
    correct = 0

    for image, label in selection:
        # get actual words for the image classification
        label_terms = labels_dict[info.features['label'].int2str(label.numpy())]

        # only compare images where the label exists in both the dataset that the model was originally trained on, and in the tfds dataset
        if not is_label_in_both_datasets(label_terms):
            print(f"Skipping image: None of {label_terms} are in the PyTorch class List")
            break
            
        iterations = iterations + 1

        img = tf.keras.utils.array_to_img(image)
        pytorch_batch = preprocess(img).unsqueeze(0)
        tf_batch = np.transpose( pytorch_batch.numpy(), [0, 2, 3, 1])

        # use same interpreter as before
        tfl_interpreter.set_tensor(tfl_input_details[0]['index'], tf_batch)
        tfl_interpreter.invoke()

        output = tfl_interpreter.get_tensor(tfl_output_details[0]['index'])

        # Sort into List[Tuple[index, confidence]] ordered by confidence (descending)
        data = sorted(
            zip(range(len(output[0])), output[0]),
            key=lambda x: x[1], reverse=True
        )
        top_n_results: List[str] = [torch_labels[idx] for (idx, _) in data[:top_n]]
        
        if check_correct(top_n_results, label_terms):
            correct = correct + 1
        
    if iterations > 0:
        accuracy = correct / iterations
        print(f"Top-{top_n} accuracy (TFLite Model): {accuracy * 100}% ({correct}/{iterations})")
        return accuracy
    print("Error: no iterations ran")
    

# see what % of samples have the correct answer in the top_n results using PyTorch
def accuracy_torch(top_n: int = 1, samples=1000):
    if top_n < 1 or n > 1000:
        raise ValueError
    
    # take subset of dataset
    selection = ds.prefetch(10).take(samples)

    iterations = 0
    correct = 0

    for image, label in selection:
        # get actual words for the image classification
        label_terms = labels_dict[info.features['label'].int2str(label.numpy())]

        # only compare images where the label exists in both the dataset that the model was originally trained on, and in the tfds dataset
        if not is_label_in_both_datasets(label_terms):
            print(f"Skipping image: None of {label_terms} are in the PyTorch class List")
            break
            
        iterations = iterations + 1

        img = tf.keras.utils.array_to_img(image)
        input_batch = preprocess(img).unsqueeze(0)
        
        with torch.no_grad():
            output = pytorch_model(input_batch)

        # Show top categories per image
        vals, idxs = torch.topk(output[0], top_n)
        top_n_results: List[str] = [torch_labels[idx] for idx in idxs]
        
        if check_correct(top_n_results, label_terms):
            correct = correct + 1
        
    if iterations > 0:
        accuracy = correct / iterations
        print(f"Top-{top_n} accuracy (PyTorch Model): {accuracy * 100}% ({correct}/{iterations})")
        return accuracy
    print("Error: no iterations ran")

In [21]:
for n in range(5):
    accuracy_torch(n+1)
    accuracy_tflite(n+1)

Top-1 accuracy (PyTorch Model): 80.7% (807/1000)
Top-1 accuracy (TFLite Model): 82.19999999999999% (822/1000)
Top-2 accuracy (PyTorch Model): 89.5% (895/1000)
Top-2 accuracy (TFLite Model): 89.4% (894/1000)
Top-3 accuracy (PyTorch Model): 93.5% (935/1000)
Top-3 accuracy (TFLite Model): 93.5% (935/1000)
Top-4 accuracy (PyTorch Model): 95.3% (953/1000)
Top-4 accuracy (TFLite Model): 94.19999999999999% (942/1000)
Top-5 accuracy (PyTorch Model): 96.3% (963/1000)
Top-5 accuracy (TFLite Model): 95.89999999999999% (959/1000)
