In [1]:
import numpy as np
import onnxruntime
import time
from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
import cv2
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
!python -m onnxruntime.quantization.preprocess --input ..\data\models\tha3\standard\fp32\decomposer.onnx --output .\preprocessed\decomposer.onnx
!python -m onnxruntime.quantization.preprocess --input ..\data\models\tha3\standard\fp32\combiner.onnx --output .\preprocessed\combiner.onnx
!python -m onnxruntime.quantization.preprocess --input ..\data\models\tha3\standard\fp32\morpher.onnx --output .\preprocessed\morpher.onnx
!python -m onnxruntime.quantization.preprocess --input ..\data\models\tha3\standard\fp32\rotator.onnx --output .\preprocessed\rotator.onnx
!python -m onnxruntime.quantization.preprocess --input ..\data\models\tha3\standard\fp32\editor.onnx --output .\preprocessed\editor.onnx

In [3]:
from onnxruntime.quantization import CalibrationDataReader

In [4]:
def _preprocess_images(images_folder: str):
    """
    Loads a batch of images and preprocess them
    parameter images_folder: path to folder storing images
    parameter height: image height in pixels
    parameter width: image width in pixels
    parameter size_limit: number of images to load. Default is 0 which means all images are picked.
    return: list of matrices characterizing multiple images
    """
    batch_filenames = os.listdir(images_folder)
    batch_data = []
    for image_name in tqdm(batch_filenames):
        image_filepath = os.path.join(images_folder, image_name)
        img = cv2.imread(image_filepath, cv2.IMREAD_UNCHANGED)
        if img is None:
            continue
        img = cv2.resize(img, (450,900),interpolation = cv2.INTER_LANCZOS4)
        padding_img = np.zeros((1024,1024,4),np.uint8)
        padding_img[40:40+900,287:287+450,:] = img
        img = cv2.resize(padding_img, (512,512),interpolation = cv2.INTER_LANCZOS4)
        batch_data.append(img)

    return batch_data

In [5]:
class DecomposerDataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str):
        self.enum_data = None

        # Use inference session to get input shape.
        session = onnxruntime.InferenceSession(model_path, None)

        # Convert image to input data
        self.data_list = _preprocess_images(
            calibration_image_folder
        )
        self.input_name = session.get_inputs()[0].name
        self.datasize = len(self.data_list)

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter(
                [{self.input_name: data} for data in self.data_list]
            )
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None

In [6]:
dr = DecomposerDataReader('Z:/ComfyUI-aki-v1.5/output/', 'C:/EasyVtuber/data/models/tha3/standard/fp32/decomposer.onnx')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 298/298 [00:20<00:00, 14.67it/s]


In [7]:
quantize_static(
        './preprocessed/decomposer.onnx',
        './quantized/decomposer.onnx',
        dr,
        quant_format=QuantFormat.QDQ,
        per_channel=True,
        weight_type=QuantType.QInt8,
        nodes_to_exclude = ['/Mul_3','/Add'],
        extra_options = {
            'ActivationSymmetric':True,
            'QuantizeBias':False
        }
    )



['ArgMax', 'Softmax', 'Unsqueeze', 'Clip', 'Add', 'Transpose', 'EmbedLayerNormalization', 'LayerNormalization', 'Split', 'Pad', 'Slice', 'GatherElements', 'MatMul', 'Reshape', 'BatchNormalization', 'Conv', 'Relu', 'InstanceNormalization', 'Gather', 'Sigmoid', 'LeakyRelu', 'MaxPool', 'Gemm', 'Where', 'Resize', 'AveragePool', 'Squeeze', 'Mul', 'Concat', 'ConvTranspose', 'GlobalAveragePool']

com.microsoft.nchwc
ai.onnx.ml
ai.onnx.training
ai.onnx.preview.training
com.microsoft
com.microsoft.experimental
org.pytorch.aten
com.microsoft.dml
[domain: ""
version: 16
]


In [8]:
non_quantized_session =onnxruntime.InferenceSession('./preprocessed/decomposer.onnx', None)
quantized_session =onnxruntime.InferenceSession('./quantized/decomposer.onnx', None)

In [9]:
dr.rewind()
inp = dr.get_next()
non_res =  non_quantized_session.run(None, inp)
qt_res = quantized_session.run(None, inp)
((qt_res[1] -non_res[1])**2).mean()

np.float32(0.00043852476)