In [None]:
%load_ext autoreload
%autoreload 2

from typing import List
import os
import note_seq
import tensorflow as tf
from codetiming import Timer
import tqdm
import gin
import numpy as np
from rich.table import Table
from rich.console import Console
from rich.jupyter import print as pprint

import tf2onnx.convert
import onnxruntime as ort
import ddsp
import ddsp.training
from ddsp.losses import SpectralLoss
from ddsp.training.ddsp_export import get_representative_dataset
import openvino.runtime
from onnxruntime.quantization import quantize_dynamic, quantize_static, QuantType, QuantFormat, CalibrationDataReader

from thesis.notebook_util import play_audio

gin.enter_interactive_mode()

In [None]:
class InferenceDecoder:

    def __call__(self, f0_scaled, pw_scaled):
        raise NotImplementedError


class TFLiteDecoder(InferenceDecoder):
    def __init__(self, model_path):
        self.name = f"TFLite: {os.path.basename(model_path)}"

        #tflite_file_path = "model_quantized.tflite" if quantized else "model_unquantized.tflite"
        #tflite_file_path = os.path.join(model_dir, "export/tflite", tflite_file_path)
        interpreter = tf.lite.Interpreter(model_path)
        self.my_signature = interpreter.get_signature_runner()

    def __call__(self, f0_scaled, pw_scaled):
        features = self.my_signature(
            f0_scaled=f0_scaled,
            pw_scaled=pw_scaled,
        )

        return features


class TensorFlowDecoder(InferenceDecoder):
    def __init__(self, model):
        self.name = "TensorFlow"
        self.model = model

    def __call__(self, f0_scaled, pw_scaled):
        features = self.model.decoder({"f0_scaled": f0_scaled, "pw_scaled": pw_scaled})

        return features


class ONNXRuntimeDecoder(InferenceDecoder):
    def __init__(self, model_path, params_as_list=False):
        self.name = f"ONNX Runtime: {os.path.basename(model_path)}"
        self.session = ort.InferenceSession(os.path.join(model_dir, model_path))
        self.params_as_list = params_as_list

    def __call__(self, f0_scaled, pw_scaled):
        keys = ["amps", "harmonic_distribution", "noise_magnitudes"]

        if self.params_as_list:
            params = {
                "args_0": f0_scaled.numpy()[0, :, 0],
                "args_1": pw_scaled.numpy()[0, :, 0],
            }
        else:
            params = {
                "f0_scaled": f0_scaled.numpy()[0, :, 0],
                "pw_scaled": pw_scaled.numpy()[0, :, 0],
            }

        features = self.session.run(keys, params)

        return dict(zip(keys, features))


class OpenVINODecoder(InferenceDecoder):
    def __init__(self, model_path):
        self.name = f"OpenVINO: {os.path.basename(model_path)}"
        self.ie = openvino.runtime.Core()
        # "/Volumes/euler/export/openvino/model_unquantized.xml"
        model = self.ie.read_model(model=model_path)
        self.compiled_model = self.ie.compile_model(model=model, device_name="CPU")
        self.input_names = [x.any_name for x in self.compiled_model.inputs]
        self.request = self.compiled_model.create_infer_request()


    def __call__(self, f0_scaled, pw_scaled):
        keys = ["amps", "harmonic_distribution", "noise_magnitudes"]

        self.request.infer({self.input_names[0]: f0_scaled[0, :, 0], self.input_names[1]: pw_scaled[0, :, 0]})

        return dict([(k, self.request.get_tensor(k).data) for k in keys])

In [None]:
def main(decoders: List[InferenceDecoder]):
    model_dir = "/Volumes/euler/"

    with gin.unlock_config():
        operative_config = ddsp.training.train_util.get_latest_operative_config(model_dir)
        gin.parse_config_file(operative_config, skip_unknown=True)
        # print(gin.config.config_str())

    representative_dataset = get_representative_dataset(
        "/Users/vaclav/prog/thesis/data/violin2/violin2.tfrecord*",
        include_f0_hz=True,
    )

    model = ddsp.training.models.get_model()
    checkpoint_path = tf.train.latest_checkpoint(model_dir, latest_filename=None)

    assert checkpoint_path is not None, f"No checkpoint found in {model_dir}"

    model.restore(checkpoint_path, verbose=False)

    decoders.append(TensorFlowDecoder(model))

    loss_fn = SpectralLoss(logmag_weight=1.0)
    assert loss_fn.logmag_weight == 1.0
    # print("Logmag weight (should be 1.0 to match operative config):", loss.logmag_weight)

    decoder_names = set()
    for decoder in decoders:
        assert decoder.name not in decoder_names, f"Non-unique decoder name {decoder.name}"
        decoder_names.add(decoder.name)

    losses = dict([(name, []) for name in decoder_names])

    for i, batch in enumerate(tqdm.tqdm(representative_dataset())):
        outputs = {}

        for decoder in decoders:
            with Timer(decoder.name, logger=None):
                features = decoder(f0_scaled=batch[0], pw_scaled=batch[1])

            features["f0_hz"] = batch[2]

            # Finish the computation in normal TensorFlow
            outputs[decoder.name] = model.processor_group(features, return_outputs_dict=True)

            losses[decoder.name].append(loss_fn(batch[3], outputs[decoder.name]["signal"]))

        if i == 50:
            break

    table = Table(title="Summary")
    table.add_column("Name")
    table.add_column("Loss")
    table.add_column("Inference time")

    for decoder in decoders:
        table.add_row(
            decoder.name,
            f"{np.mean(losses[decoder.name]):.3f}",
            f"{np.array(Timer.timers._timings[decoder.name])[1:].mean():.4f}",
        )

    #Console().print(table)
    pprint(table)

In [None]:
model_dir = "/Volumes/euler"
export_dir = "/Volumes/euler/export"

main([
    # TFLiteDecoder(os.path.join(export_dir, "tflite/model_dynamic_range_quantized.tflite")),
    TFLiteDecoder(os.path.join(export_dir, "tflite/model_unquantized.tflite")),
    ONNXRuntimeDecoder(os.path.join(export_dir, "onnx/model_unquantized.onnx")),
    # ONNXRuntimeDecoder(os.path.join(export_dir, "onnx/model_onnx_q_dynamic.onnx")),
    # ONNXRuntimeDecoder(os.path.join(export_dir, "onnx/model_from_keras.onnx"), params_as_list=True),
    # ONNXRuntimeDecoder(os.path.join(export_dir, "onnx/model_onnx_q_static.onnx")),
    OpenVINODecoder(os.path.join(export_dir, "openvino/model_unquantized.xml")),
])

In [None]:
model_fp32 = os.path.join(model_dir, 'export/onnx/model_unquantized.onnx')
model_q_static = os.path.join(model_dir, 'export/onnx/model_onnx_q_static.onnx')

quantize_static(
    model_fp32,
    model_q_static,
    ONNXDataReader(),
    # activation_type=QuantType.QUInt8,
    # weight_type=QuantType.QUInt8,
    nodes_to_exclude=["StatefulPartitionedCall/dilated_conv_decoder_1/split"],
    per_channel=False,
    quant_format=QuantFormat.QOperator,
)

In [None]:
# QDQ
# S8S8 - Could not find an implementation for QuantizeLinear(13) node
# U8U8 - same
# U8S8 (activation unsigned, weights signed) - same
# S8U8 - ONNXRuntime quantization doesn't support data format:activation_type=QuantType.QInt8, weight_type = QuantType.QUInt8

# QOperator
# S8S8 - works, huge loss, 2x slower than unquantized
# U8U8 - works, huge loss, 2x slower than unquantized
# U8S8 - works, huge loss, 2x slower than unquantized
# S8U8 - ONNXRuntime quantization doesn't support data format:activation_type=QuantType.QInt8, weight_type = QuantType.QUInt8

# Please use QuantFormat.QDQ for activation type QInt8 and weight type QInt8. Or it will lead to bad performance on x64

In [None]:
# Breaks!
# TFLiteDecoder(os.path.join(model_dir, "export/tflite/model_quantized.tflite"))

In [None]:
from onnxruntime.quantization import quantize_dynamic, quantize_static, QuantType, QuantFormat, CalibrationDataReader

#model_fp32 = os.path.join(model_dir, 'export/onnx/model_unquantized.onnx')
model_fp32 = os.path.join(model_dir, 'export/onnx/model_unquantized.onnx')
model_q_dynamic = os.path.join(model_dir, 'export/onnx/model_onnx_q_dynamic.onnx')

quantized_model = quantize_dynamic(
    model_fp32,
    model_q_dynamic,
    weight_type=QuantType.QUInt8
)

In [None]:
class ONNXDataReader(CalibrationDataReader):
    def __init__(self):
        self.dataset = get_representative_dataset("/Users/vaclav/prog/thesis/data/violin2/violin2.tfrecord*")()

    def get_next(self):
        batch = next(self.dataset, None)
        if batch is None:
            return None
        else:
            return {
                "f0_scaled": batch[0].numpy()[0, :, 0],
                "pw_scaled": batch[1].numpy()[0, :, 0],
            }

In [None]:
quantize_static(
    os.path.join(model_dir, 'export/onnx/model_from_keras.onnx'),
    os.path.join(model_dir, 'export/onnx/model_from_keras_q_static.onnx'),
    ONNXDataReader(),
    activation_type=QuantType.QUInt8,
    weight_type=QuantType.QUInt8,
    per_channel=False,
    quant_format=QuantFormat.QDQ,
)

In [None]:
session = ort.InferenceSession(os.path.join(export_dir, "onnx/model_from_keras.onnx"))

In [None]:
session.get_inputs()[1].name

In [None]:
session.run(
    ["amps", "harmonic_distribution", "noise_magnitudes"],
    #[np.zeros([201], dtype=np.float32), np.zeros([201], dtype=np.float32)],
    {
        "args_0": np.zeros([201], dtype=np.float32),
        "args_1": np.zeros([201], dtype=np.float32)
    },
)

In [None]:
import tf2onnx
import onnx
import onnxruntime
from onnxruntime.quantization import quantize_dynamic, quantize_static, QuantType, QuantFormat, CalibrationDataReader

class DummyDataReader(CalibrationDataReader):
    def __init__(self):
        self.dataset = get_representative_dataset("/Users/vaclav/prog/thesis/data/violin2/violin2.tfrecord*")()

    def get_next(self):
        batch = next(self.dataset, None)
        if batch is None:
            return None
        else:
            return {
                "input": batch[0].numpy()[0, :10*10, 0].reshape([1, 10, 10, 1]),
            }

def test_dummy():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters=16, kernel_size=3, activation=tf.nn.relu)
    ])
    input_shape = [1, 10, 10, 1]
    model.build(input_shape)

    onnx_model, _ = tf2onnx.convert.from_keras(
        model,
        input_signature=[tf.TensorSpec((1, 10, 10, 1), name="input")],
        opset=13,
    )
    path = "/tmp/dummy_model.onnx"
    path_q = "/tmp/dummy_model_q.onnx"
    onnx.save(onnx_model, path)

    # quantized_model = quantize_dynamic(
    #     path,
    #     path_q,
    #     # weight_type=QuantType.QUInt8
    # )

    quantize_static(
        path,
        path_q,
        # weight_type=QuantType.QUInt8
        # quant_format=QuantFormat.QDQ,
        # activation_type=QuantType.QUInt8,
        # weight_type=QuantType.QUInt8,
        calibration_data_reader=DummyDataReader(),
        per_channel=True,
        #quant_format=QuantFormat.QDQ,
        quant_format=QuantFormat.QOperator,
    )

    session = onnxruntime.InferenceSession(path_q)
    print(session.get_outputs()[0].name)
    features, = session.run(
        [session.get_outputs()[0].name],
        {"input": np.ones(input_shape, dtype=np.float32)}
    )
    print(features)


test_dummy()