In [80]:
import torch
from torchvision import models, datasets, transforms as T
import albumentations as A
from albumentations.pytorch import ToTensorV2
from statistics import mean
import cv2
import numpy as np
import os
import time
from matplotlib import pyplot as plt
import onnxruntime
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType

In [31]:
x = np.zeros((960, 540, 3))
t = x.transpose([2, 0, 1])
print(t.shape)

(3, 960, 540)


In [32]:
def preprocess_image(image_path, channels=3):
    transform = A.Compose(
        [
            A.Resize(512, 512),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ]
    )
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = transform(image=image)["image"]
    image_data = np.asarray(image).astype(np.float32)
    image_data = image_data.transpose([2, 0, 1]) # transpose to CHW
    
    image_data = np.expand_dims(image_data, 0)
    return image_data


In [33]:
def preprocess_func(images_folder):
    image_names = os.listdir(images_folder)
    batch_data = []
    for image_name in image_names:
        image_filepath = os.path.join(images_folder, image_name)
        image_data = preprocess_image(image_filepath)
        batch_data.append(image_data)
    
    return batch_data


class PackagesDataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder):
        self.image_folder = calibration_image_folder
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0

    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            nhwc_data_list = preprocess_func(self.image_folder)
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{'input': nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)

In [99]:
curr_dir = os.getcwd()
calibration_data_folder = os.path.join(curr_dir, "calibration_images")
dr = PackagesDataReader(calibration_data_folder)
model_path = os.path.join(curr_dir, "supersmall400e_noinit.onnx")
quantize_static("supersmall400e_noinit.onnx",
                "supersmall400e_quant.onnx",
                dr, 
                activation_type=QuantType.QInt8,
                weight_type=QuantType.QInt8,)

print('ONNX full precision model size (MB):', os.path.getsize("supersmall400e_noinit.onnx")/(1024*1024))
print('ONNX quantized model size (MB):', os.path.getsize("supersmall400e_quant.onnx")/(1024*1024))


ONNX full precision model size (MB): 0.03706550598144531
ONNX quantized model size (MB): 0.042102813720703125


Now we compare the inference latency of the quantized model with the inference latency of the standard model

In [181]:
from torch.utils.data import Dataset

c_dir = os.getcwd()                 #quantization
mt_dir = os.path.dirname(c_dir)     #master_thesis
models_path = os.path.join(mt_dir, "models")
#sys.path.insert(1, models_path)
#from fast_scnn import FastSCNN
#from super_small_scnn import SuperSmallSCNN
#from bisenetv2 import BiSeNetV2


class PackagesInferenceDataset(Dataset):
    def __init__(self, images_filenames, images_directory, masks_directory, transform=None,):
        self.images_filenames = images_filenames
        self.images_directory = images_directory
        self.masks_directory = masks_directory
        self.transform = transform

    def __len__(self):
        return len(self.images_filenames)

    def __getitem__(self, idx):
        image_filename = self.images_filenames[idx]
        image = cv2.imread(os.path.join(self.images_directory, image_filename))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(
            os.path.join(self.masks_directory, image_filename), cv2.IMREAD_UNCHANGED,
        )
        mask = mask.astype(np.float32)
        mask[mask == 0.0] = 0.0
        mask[mask == 255.0] = 1.0
        original_size = tuple(image.shape[:2])
        if self.transform is not None:
            transformed = self.transform(image=image, mask=mask)
            image = transformed["image"]
            mask = transformed["mask"]
        return image, mask, original_size

test_transform = A.Compose(
    [
        A.Resize(512, 512),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

test_images_path = os.path.join(mt_dir, "full_dataset/test/images")
test_label_path = os.path.join(mt_dir, "full_dataset/test/labels")
test_images_filenames = [item for item in os.listdir(test_images_path) if item.endswith(".png")]
test_dataset = PackagesInferenceDataset(images_filenames=test_images_filenames, images_directory=test_images_path, masks_directory=test_label_path, transform=test_transform)

test_im = test_dataset[0][0].unsqueeze(0)
test_mask = test_dataset[0][1]

onnx_quant_path = os.path.join(c_dir, "supersmall400e_quant.onnx")
onnx_std_path = os.path.join(c_dir, "supersmall400e_noinit-opt.onnx")

ort_session = onnxruntime.InferenceSession(onnx_quant_path)
ort_session_std = onnxruntime.InferenceSession(onnx_std_path)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


#inference over the entire test dataset one image at a time
latency_quant = []
outputs_quant = []
i = 0
for image, mask, (height, width) in test_dataset:
    image = image.unsqueeze(0)
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(image)}
    i+=1
    start = time.time()
    ort_outs = ort_session.run(None, ort_inputs)
    lat = time.time()-start
    outputs_quant.append([ort_outs, mask])
    latency_quant.append(lat)
    #print(f"inference latency on image {i} is {lat}")
    
latency_std = []
outputs_std = []
i = 0
for image, mask, (height, width) in test_dataset:
    image = image.unsqueeze(0)
    ort_inputs = {ort_session_std.get_inputs()[0].name: to_numpy(image)}
    i+=1
    start = time.time()
    ort_outs = ort_session_std.run(None, ort_inputs)
    lat = time.time()-start
    outputs_std.append([ort_outs, mask])
    latency_std.append(lat)
    #print(f"inference latency on image {i} is {lat}")
    
print("latency of standard model: ", mean(latency_std))
print("latency of quantized model: ", mean(latency_quant))

latency of standard model:  0.0017830411593119304
latency of quantized model:  0.0027505636215209963


In [117]:
def jaccard(input, target):
    l_input = input.astype(bool)
    l_target = target.astype(bool)
    intersection = np.logical_and(l_input, l_target)
    union = np.logical_or(l_input, l_target)
    iou = np.sum(intersection)/np.sum(union)
    return iou

In [201]:
ious_quant = []
ious_std = []
tmp = []

for pred, g_truth in outputs_quant:
    g_truth = g_truth.numpy()
    pred = np.array(pred)
    pred = pred.squeeze(0) #batch size
    pred = pred.squeeze(0) #channels
    pred = pred.squeeze(0) # 1 x h x w
    pred = (pred >= 0.5) * 1
    pred = A.resize(
        pred, height=540, width=960, interpolation=cv2.INTER_NEAREST
    )
    g_truth = A.resize(
        g_truth, height=540, width=960, interpolation=cv2.INTER_NEAREST
    )
    tmp.append(pred-g_truth)
    ious_quant.append(jaccard(pred, g_truth))
    


for pred, g_truth in outputs_std:
    g_truth = g_truth.numpy()
    outs = np.array(pred)
    outs = outs.squeeze(0) #batch size
    outs = outs.squeeze(0) #channels
    outs = outs.squeeze(0) # 1 x h x w
    outs = (outs >= 0.5) * 1
    pred = A.resize(
        outs, height=540, width=960, interpolation=cv2.INTER_NEAREST
    )
    g_truth = A.resize(
        g_truth, height=540, width=960, interpolation=cv2.INTER_NEAREST
    )
    ious_std.append(jaccard(pred, g_truth))

    
print("mean iou of standard model: ", mean(ious_std))
print("mean iou of quantized model: ", mean(ious_quant))

mean iou of standard model:  0.9726124982048536
mean iou of quantized model:  0.7642729827990309
