# Converting PyTorch to TensorFlow Lite for xCORE Using ONNX

ONNX is an open format built to represent machine learning models. We can convert from PyTorch to ONNX, then from ONNX to TensorFlow, then from TensorFlow to TensorFlow Lite, and finally, run it through xformer to optimise it for xCORE.

In [None]:
!pip install torch
!pip install tensorflow
!pip install onnx
!pip install nvidia-pyindex
!pip install onnx-graphsurgeon
!pip install polygraphy
!pip install onnxruntime
!pip install onnxsim
!pip install simple_onnx_processing_tools
!pip install protobuf==3.20.3
!pip install h5py==3.7
!pip install onnx2tf
!pip install onnx-tf
!pip install tensorflow-probability

In [None]:
!pip install matplotlib

## Import PyTorch Model

For this example, we use mobilenet_v2.

In [None]:
import torch
import numpy as np

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.eval()

### Run inference on model

In [None]:
# Download an image to test against
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [None]:
# Download Image Labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

In [None]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

In [None]:
from PIL import Image
from torchvision import transforms

# Open testing image
input_image = Image.open(filename)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

with torch.no_grad():
    output = model(input_batch)

probabilities = torch.nn.functional.softmax(output[0], dim=0)

# Show top categories per image
vals, idxs = torch.topk(probabilities, 5)
pytorch_results = [(categories[idx], prob) for (idx, prob) in zip(idxs.tolist(), vals.tolist())]
print(pytorch_results)


## Convert to ONNX


In [None]:
batch_size = 1
channels = 3
height = 224
width = 224

sample_input = torch.rand((batch_size, channels, height, width))

onnx_model_path = "mobilenet_v2.onnx"

torch.onnx.export(
    model,
    sample_input,
    onnx_model_path,
    input_names=['image'],
    output_names=['probabilities']
)

### Check ONNX Output

In [None]:
import onnxruntime 

ort_session = onnxruntime.InferenceSession(onnx_model_path)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

def softmax(xs):
    return np.exp(xs)/sum(np.exp(xs))

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_batch)}
ort_outs = ort_session.run(None, ort_inputs)

data = zip(range(len(ort_outs[0][0])), softmax(ort_outs[0][0]))

onnx_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)[:5]]
print(onnx_results)

## Representative Dataset

To convert a model into to a TFLite flatbuffer, a representative dataset is required to help in quantisation. Refer to [Converting a keras model into an xcore optimised tflite model](https://colab.research.google.com/github/xmos/ai_tools/blob/develop/docs/notebooks/keras_to_xcore.ipynb) for more details on this.

In [None]:
import numpy as np
def representative_dataset():
    batch_size = 8
    for _ in range(100):
      data = np.random.uniform(-0.1, 0.001, (batch_size, height, width, channels))
      yield [data.astype(np.float32)]

## Using onnx-tensorflow (no longer maintained)

Official ONNX package, however no longer officially maintained: https://github.com/onnx/onnx-tensorflow

In [None]:
import onnx
from onnx_tf.backend import prepare

saved_model_path = "saved_model"
onnx_model = onnx.load(onnx_model_path)
prepare(onnx_model).export_graph(saved_model_path)

In [None]:
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8 
converter.inference_output_type = tf.int8

tflite_model = converter.convert()

# Save the model.
tflite_model_path = 'mobilenet_v2.tflite'
with open(tflite_model_path, 'wb') as f:
  f.write(tflite_model)

## Using onnx2tf

Using unofficial package: https://github.com/PINTO0309/onnx2tf

### Convert ONNX to Keras

In [None]:
import onnx2tf

keras_model = onnx2tf.convert(
    input_onnx_file_path=onnx_model_path,
    output_signaturedefs=True,
    non_verbose=True,
)

### Convert Keras to TFLite

In [None]:
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32 
converter.inference_output_type = tf.int8

tflite_model = converter.convert()

# Save the model.
tflite_model_path = 'mobilenet_v2.tflite'
with open(tflite_model_path, 'wb') as f:
  f.write(tflite_model)

### Check it Worked

In [None]:
tf_interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
tf_interpreter.allocate_tensors()

tf_input_details = tf_interpreter.get_input_details()
tf_output_details = tf_interpreter.get_output_details()

# Convert PyTorch Input Tensor into Numpy Matrix and Reshape for TensorFlow
# (Pytorch model expects C x H x W but TF expects H x W x C)
tf_input_shape = tf_input_details[0]['shape']
tf_input_data = input_batch.detach().numpy().reshape(tf_input_shape)

tf_interpreter.set_tensor(tf_input_details[0]['index'], tf_input_data)
tf_interpreter.invoke()

tf_output_data = tf_interpreter.get_tensor(tf_output_details[0]['index'])
probs = tf_output_data[0]
data = zip(range(len(probs)), probs)
tf_results = [(categories[idx], prob) for (idx, prob) in sorted(data, key=lambda x: x[1], reverse=True)]
print(tf_result)

In [None]:
import matplotlib.pyplot as plt
arr_ = np.squeeze(tf_input_data, 0)
plt.imshow(arr_)
plt.show()

# Analysing Models

Defined below is a function to print out the operator counts of each model.

In [None]:
import io
from contextlib import redirect_stdout

def get_operator_counts(model_content):
    with io.StringIO() as buf, redirect_stdout(buf):
        tf.lite.experimental.Analyzer.analyze(model_content=model_content)
        model_structure = buf.getvalue()

    operators = [op.strip().split(" ")[1].split("(")[0] for op in model_structure.split("\n") if "Op#" in op]
    op_counts = {}
    for operator in operators:
        if operator in op_counts:
            op_counts[operator] = op_counts[operator]+1
        else:
            op_counts[operator] = 1
        
    return (len(operators), op_counts)

def print_operator_counts(model_content):
    total_op_count, op_counts = get_operator_counts(model_content)
    print(f"{'Operator'.upper():<20} {'Count'.upper():>6}")
    print("-"*20 + " " + "-"*6)
    
    for operator, count in op_counts.items():
        print(f"{operator.lower():<20} {count:>6}")
        
    print("-"*20 + " " + "-"*6)
    print(f"{'Total'.upper():<20} {total_op_count:>6}")
    print("-"*20 + " " + "-"*6)

In [None]:
print_operator_counts(tflite_model)