# <b>Quantization</b>

<b>Required libaries:</b>

In [1]:
!pip install ultralytics
!pip install onnx
!pip install onnxruntime

Collecting ultralytics
  Downloading ultralytics-8.3.19-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.19-py3-none-any.whl (876 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m876.6/876.6 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.9-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.19 ultralytics-thop-2.0.9
Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.17.0
Collect

<b>Necessary imports:</b>

In [2]:
import os
import numpy as np
import cv2
import torch
from google.colab import drive
from onnxruntime.quantization import CalibrationDataReader, CalibrationMethod, QuantType, QuantFormat, quantize_static
from ultralytics import YOLO
from pycocotools.coco import COCO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# establish connection to google drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load and preprocess the model

In [4]:
# load the model
model=YOLO('yolov8x.pt')

# export the model to onnx format
model.export(format='onnx')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:00<00:00, 213MB/s]


Ultralytics 8.3.19 🚀 Python-3.10.12 torch-2.4.1+cu121 CPU (Intel Xeon 2.20GHz)
YOLOv8x summary (fused): 268 layers, 68,200,608 parameters, 0 gradients, 257.8 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8x.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (130.5 MB)
[31m[1mrequirements:[0m Ultralytics requirement ['onnxslim'] not found, attempting AutoUpdate...
Collecting onnxslim
  Downloading onnxslim-0.1.35-py3-none-any.whl.metadata (3.0 kB)
Downloading onnxslim-0.1.35-py3-none-any.whl (140 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 140.4/140.4 kB 4.2 MB/s eta 0:00:00
Installing collected packages: onnxslim
Successfully installed onnxslim-0.1.35

[31m[1mrequirements:[0m AutoUpdate success ✅ 4.6s, installed 1 package: ['onnxslim']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m


[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.35..

'yolov8x.onnx'

In [5]:
# preprocess the model
!python -m onnxruntime.quantization.preprocess --input yolov8x.onnx --output yolov8x_preprocessed.onnx

### Define class and functions

In [6]:
class ImageCalibrationDataReader(CalibrationDataReader):
    def __init__(self, image_paths, target_size=(640, 640)):
        """
        Initialize the calibration data reader.

        :param image_paths: List of paths to the calibration images.
        :param target_size: The target size to resize the images (width, height).
        """
        self.image_paths = image_paths
        self.target_size = target_size
        self.index = 0
        self.input_name = "images"

    def preprocess(self, image_path):
        """
        Preprocess the image: load, resize, normalize, and reformat for the model.

        :param image_path: The path to the image to preprocess.
        :return: Preprocessed image ready for input to the model.
        """
        image = cv2.imread(image_path)

        if image is None:
            raise ValueError(f"Image at path '{image_path}' could not be loaded.")

        # resize the image to the target size
        image = cv2.resize(image, self.target_size)

        # normalise image to [0, 1] range
        image_data = image.astype(np.float32) / 255.0

        # convert image from (H, W, C) to (C, H, W)
        image_data = np.transpose(image_data, (2, 0, 1))

        # add a batch dimension (1, C, H, W)
        image_data = np.expand_dims(image_data, axis=0)

        return image_data

    def get_next(self):
        """
        Get the next batch of data for calibration.

        :return: A dictionary with the model's input tensor name as key and input data as value.
        """
        if self.index >= len(self.image_paths):
            return None

        # preprocess the next image
        image_path = self.image_paths[self.index]
        input_data = self.preprocess(image_path)

        # move to the next image
        self.index += 1

        return {self.input_name: input_data}


In [7]:
def get_calibration_images(annotation_file, image_dir, num_images=100):
    """
    Load the COCO dataset and return paths to the first `num_images` images.

    :param annotation_file: Path to the COCO annotations JSON file.
    :param image_dir: Directory where the COCO images are stored.
    :param num_images: Number of images to select for calibration.
    :return: List of image file paths.
    """
    coco = COCO(annotation_file)
    image_ids = sorted(coco.getImgIds())
    selected_image_ids = image_ids[:num_images]

    calibration_image_paths = []
    for img_id in selected_image_ids:
        img_info = coco.loadImgs(img_id)[0]
        img_path = os.path.join(image_dir, img_info['file_name'])
        calibration_image_paths.append(img_path)

    return calibration_image_paths


### Create the ImageCalibrationDataReader

In [8]:
# specify the directories
annotation_file = 'drive/MyDrive/PA2/datasets/coco/annotations/instances_val2017.json'
image_dir = 'drive/MyDrive/PA2/datasets/coco/val2017/'

# get the calibration images
calibration_image_paths = get_calibration_images(annotation_file, image_dir, num_images=100)

# create an instance of the ImageCalibrationDataReader
calibration_data_reader_1 = ImageCalibrationDataReader(calibration_image_paths)


loading annotations into memory...
Done (t=2.66s)
creating index...
index created!


### Quantize the model from fp32 to int8

In [9]:
quantize_static('yolov8x_preprocessed.onnx',                # input fp32 model
                'yolov8x_quantized.onnx',                   # output int8 model
                calibration_data_reader_1,                  # calibration data reader
                quant_format=QuantFormat.QDQ,               # quant format
                reduce_range=True,                          # reduce quant range
                per_channel=True,                           # channelwise quant
                activation_type=QuantType.QUInt8,           # uint8 for activations
                weight_type=QuantType.QInt8,                # int8 for weights
                nodes_to_exclude=[                          # nodes to exclude from quant
                    '/model.22/Concat_3',
                    '/model.22/Split',
                    '/model.22/Sigmoid',
                    '/model.22/dfl/Reshape',
                    '/model.22/dfl/Transpose',
                    '/model.22/dfl/Softmax',
                    '/model.22/dfl/conv/Conv',
                    '/model.22/dfl/Reshape_1',
                    '/model.22/Slice_1',
                    '/model.22/Slice',
                    '/model.22/Add_1',
                    '/model.22/Sub',
                    '/model.22/Div_1',
                    '/model.22/Concat_4',
                    '/model.22/Mul_2',
                    '/model.22/Concat_5'
                ],
                calibrate_method=CalibrationMethod.MinMax)   # MinMax calibration method
