# Notebook to convert a Pytroch model to a HEF file

In [None]:
# import the ClientRunner class from the hailo_sdk_client package
from hailo_sdk_client import ClientRunner
import onnx
import sys
import os
import numpy as np
import torch
import clip
import open_clip
from pathlib import Path
from onnxsim import simplify
from PIL import Image

# Own modules
print(Path.cwd())
sys.path.append(str(Path.cwd().parent))
os.chdir(str(Path.cwd().parent))
import folderManagment.pathsToFolders as ptf #Controlls all paths #

/home/lukasschoepf/Documents/ProjWork1_DFC/python


Paths and names for models

In [2]:
chosen_hw_arch = "hailo8l"

har_path = ptf.HarPath
hef_path = ptf.Hefpath


## Get ONNX file

Diffrent for CLIP / TinyCLIP


For CLIP

In [3]:
model_name = "RN50x4"

device = "cuda" if torch.cuda.is_available() else "cpu" 
model, preprocess = clip.load(model_name, device=device)

vit = model.visual
input_pixels = model.visual.input_resolution
input_image = torch.rand(1, 3, input_pixels, input_pixels)

#input_image = preprocess(Image.open("hailoDFC/pics/car-967387_1280.png")).unsqueeze(0).to(device)
onnx_path = str(ptf.onnxFolder/ f"{model_name}.onnx")
print(f"Input: {input_pixels}")
vit.eval()
torch.onnx.export(vit,
            input_image,
            onnx_path,
            opset_version=14,
            do_constant_folding=True,)

# Simplify the ONNX model to resolve mismatches
simplified_model, check = simplify(onnx_path)
model_path_simple = f"hailoDFC/models/baseAndSimple/{model_name}_simple.onnx"

# Save the simplified model
onnx.save(simplified_model, model_path_simple)

print(f"Simple Model saved at {model_path_simple}")

Input: 288
Simple Model saved at hailoDFC/models/baseAndSimple/RN50x4_simple.onnx


For TinyCLIP


In [45]:
model_name = "TinyCLIP-ResNet-19M-Text-19M" # TinyCLIP-ResNet-19M-Text-19M or TinyCLIP-ResNet-30M-Text-29M

device = "cuda" if torch.cuda.is_available() else "cpu" 
model, *_ = open_clip.create_model_and_transforms(model_name,pretrained=str(ptf.tinyClipModels / f"{model_name}-LAION400M.pt"))

vit = model.visual 
input_pixels = model.visual.image_size

print(f"Input: {input_pixels}")
input_image = torch.rand(1, 3, input_pixels, input_pixels)
# input_image = preprocess(Image.open("hailoDFC/pics/car-967387_1280.png")).unsqueeze(0).to(device)

onnx_path = str(ptf.onnxFolder/ f"{model_name}.onnx")
vit.eval()
torch.onnx.export(vit,
         input_image,
         onnx_path,
         opset_version=14,
         do_constant_folding=True,)

# Simplify the ONNX model to resolve mismatches
simplified_model, check = simplify(onnx_path)
model_name = "TinyRN"
model_path_simple = f"hailoDFC/models/{model_name}_simple.onnx"

# Save the simplified model
onnx.save(simplified_model, model_path_simple)

print(f"Simple Model saved at {model_path_simple}")

  checkpoint = torch.load(checkpoint_path, map_location=map_location)


Input: 224
Simple Model saved at hailoDFC/models/TinyRN_simple.onnx


Use model from Hailo

In [3]:
model_path_simple = "hailoDFC/models/modified/modified_RN50x4_simple.onnx"
model_name = "Modified_RN50x4"

### Onnx to Har

In [7]:
runner = ClientRunner(hw_arch=chosen_hw_arch)


print(f"Take model from {model_path_simple}")
hn, npz = runner.translate_onnx_model(
    model_path_simple,
    model_name,
    start_node_names=["onnx::Cast_0"],
    disable_shape_inference= True,
    net_input_shapes=[1,3,288,288]
    # end_node_names=["/attnpool/MatMul_4"]
)

hailo_model_har_name = str(har_path / f"{model_name}_hailo_model.har")
runner.save_har(hailo_model_har_name)

Take model from hailoDFC/models/modified/modified_RN50x4_simple.onnx
[info] Translation started on ONNX model Modified_RN50x4
[info] Restored ONNX model Modified_RN50x4 (completion time: 00:00:00.53)
[info] Extracted ONNXRuntime meta-data for Hailo model (completion time: 00:00:02.18)
[info] Start nodes mapped from original model: 'onnx::Cast_0': 'Modified_RN50x4/input_layer1'.
[info] End nodes mapped from original model: '/attnpool/MatMul_4'.
[info] Unable to export parsing report: output
[info] Translation completed on ONNX model Modified_RN50x4 (completion time: 00:00:02.41)
[info] Saved HAR to: /home/lukasschoepf/Documents/ProjWork1_DFC/hailoDFC/models/Harfiles/Modified_RN50x4_hailo_model.har


## Optimize Har file

The next step is to optimze the har file. To do this we need a calibration dataset.

In [12]:
# preprocessing
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize, PILToTensor
import os

try:
    from torchvision.transforms import InterpolationMode
    BICUBIC = InterpolationMode.BICUBIC
except ImportError:
    BICUBIC = Image.BICUBIC


datafolder = ptf.dataBaseFolder
input_folder = ptf.Dataset5Patch
calibFolder = datafolder / "calibData"

Functions to preprocess the input images to the right resolution

In [13]:
def _convert_image_to_rgb(image):
    return image.convert("RGB")

def transform(n_px):
    """
    n_px: input resolution of the network
    """
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])

`x_y_pixel` should be the input resolution.

In [17]:
x_y_pixel = 288
preprocess = transform(x_y_pixel)
images_list = [img_name for img_name in os.listdir(input_folder) if os.path.splitext(img_name)[1] == ".jpg"]
images_list = images_list[0:1024]
calib_dataset = np.zeros((len(images_list), 3, x_y_pixel, x_y_pixel))

for idx, img_name in enumerate(sorted(images_list)):
    img = Image.open(os.path.join(input_folder, img_name))
    # img = PILToTensor(img)
    img_preproc = preprocess(img)
    img_transposed = np.transpose(img_preproc.numpy(),(1,2,0))
    #input_data = (img_transposed * 255).astype(np.uint8)  # Assuming image is already normalized
    calib_dataset[idx, :, :, :] = img_preproc

np.save(calibFolder / f"calib_set_{model_name}.npy", calib_dataset)

We will load our parsed HAR from the Parsing Tutorial

In [22]:
# Second, we will load our parsed HAR from the Parsing Tutorial

hailo_model_har_name = f"{model_name}_hailo_model.har"
hailo_model_har_path = har_path / hailo_model_har_name
assert os.path.isfile(hailo_model_har_path), "Please provide valid path for HAR file"
print(f"Model from {hailo_model_har_path}")
runner = ClientRunner(har=str(hailo_model_har_path),hw_arch=chosen_hw_arch)

Model from hailoDFC/models/Harfiles/Modified_RN50x4_hailo_model.har


In [21]:
# Batch size is 8 by default
# alls = "normalization1 = normalization([123.675, 116.28, 103.53], [58.395, 57.12, 57.375])\n" # From tutorial

# Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), # From Lia
#alls = "normalization1 = normalization([0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])\n"

# Load the model script to ClientRunner so it will be considered on optimization
# runner.load_model_script(alls)

# Call Optimize to perform the optimization process
runner.optimize(calib_dataset)

# Save the result state to a Quantized HAR file
quantized_model_har_path = str(har_path / f"{model_name}_quantized_model.har")
runner.save_har(quantized_model_har_path)
print(f"saved model at {quantized_model_har_path}")

[info] Starting Model Optimization
[info] Model received quantization params from the hn
[info] Starting Mixed Precision
[info] Mixed Precision is done (completion time is 00:00:00.39)
[info] LayerNorm Decomposition skipped
[info] Starting Statistics Collector


BadInputsShape: Data shape (3, 288, 288) for layer Modified_RN50x4/input_layer1 doesn't match network's input shape (288, 288, 3)

## Compile the quantized model

Now we can compile the quantized model

In [None]:
quantized_model_har_path = str(har_path / f"{model_name}_quantized_model.har")
print(f"Model used:{model_name}_quantized_model.har")
runner = ClientRunner(har=quantized_model_har_path,hw_arch=chosen_hw_arch)
# By default it uses the hw_arch that is saved on the HAR. It is not recommended to change the hw_arch after Optimization.

hef = runner.compile()

file_name = str(hef_path / f"{model_name}.hef")
with open(file_name, "wb") as f:
    f.write(hef)

har_path =ptf.HarPath/ f"{model_name}_compiled_model.har"
runner.save_har(har_path)

Model used:Modified_RN50x4_quantized_model.har
[info] To achieve optimal performance, set the compiler_optimization_level to "max" by adding performance_param(compiler_optimization_level=max) to the model script. Note that this may increase compilation time.
[info] Loading network parameters
[info] Starting Hailo allocation and compilation flow
[info] Adding collapsed format conversion after const_input1
[info] Adding collapsed format conversion after const_input1
[info] Adding collapsed format conversion after conv87
[info] Finding the best partition to contexts...
[?25l[info] Iteration #1 - Contexts: 5
[info] Iteration #2 - Contexts: 5
[info] Iteration #3 - Contexts: 5
[info] Iteration #4 - Contexts: 5
[info] Iteration #5 - Contexts: 5
[info] Iteration #6 - Contexts: 5
[info] Iteration #7 - Contexts: 5
[info] Iteration #8 - Contexts: 5
[info] Iteration #9 - Contexts: 5
[info] Iteration #10 - Contexts: 5
[info] Iteration #11 - Contexts: 5
[info] Iteration #12 - Contexts: 5
[info] Ite