In [None]:
%load_ext autoreload
%autoreload 2

# White background for matplotlib plots
%config InlineBackend.print_figure_kwargs={'facecolor': "w"}

import numpy as np
import tensorflow as tf
import torch
import tqdm.notebook as tqdm

import thesis.quantization_benchmark as qb

In [None]:
input_shape = (32, 32, 3)

dense_keras = tf.keras.Sequential(
    [
        tf.keras.layers.Input(input_shape),
        # tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation=tf.nn.relu),
        # tf.keras.layers.GlobalMaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation=tf.nn.relu),
        tf.keras.layers.Dense(128),
    ]
)

dense_torch = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(in_features=np.product(input_shape), out_features=256),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=256, out_features=128),
)

runtimes = [
    qb.TensorFlow(),
    qb.PyTorch(quantization_mode="off", use_torchscript=True),
    qb.PyTorch(quantization_mode="dynamic", use_torchscript=True),
    qb.PyTorch(quantization_mode="static", use_torchscript=True),
    qb.TFLite(quantization_mode="off"),
    qb.TFLite(quantization_mode="dynamic"),
    qb.TFLite(quantization_mode="static"),
    qb.ONNXRuntime(quantization_mode="off"),
    qb.ONNXRuntime(quantization_mode="dynamic"),
    qb.ONNXRuntime(quantization_mode="static_qoperator"),
    qb.ONNXRuntime(quantization_mode="static_qdq"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="off"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="dynamic"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qoperator"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qdq"),
    qb.OpenVINO(quantization_mode="off"),
    qb.OpenVINO(quantization_mode="dynamic"),
    # qb.OpenVINO(quantization_mode="static_qoperator"), # Doesn't work
    qb.OpenVINO(quantization_mode="static_qdq"),
]

times = qb.benchmark(dense_keras, dense_torch, runtimes, n_iterations=1000);

In [None]:
qb.plot_runs(times)

In [None]:
df = times.groupby("name").agg({"loss": ["mean", "std"], "inference_time": ["mean", "std"], "iteration": "count"})
df.columns = ['_'.join(col).strip() for col in df.columns.values]
df

## CNN model

In [None]:
cnn_keras = tf.keras.Sequential(
    [
        tf.keras.layers.Input((32, 32, 3)),
        tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation=tf.nn.relu),
        tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation=tf.nn.relu),
        # tf.keras.layers.Dense(128),
        tf.keras.layers.MaxPooling2D(32 - 4),
    ]
)

cnn_torch = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3),
    torch.nn.ReLU(),
    torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(32 - 4),
)

runtimes = [
    qb.TensorFlow(),
    qb.PyTorch(quantization_mode="off", use_torchscript=True),
    qb.PyTorch(quantization_mode="dynamic", use_torchscript=True),
    qb.PyTorch(quantization_mode="static", use_torchscript=True),
    qb.TFLite(quantization_mode="off"),
    qb.TFLite(quantization_mode="dynamic"),
    qb.TFLite(quantization_mode="static"),
    qb.ONNXRuntime(quantization_mode="off"),
    qb.ONNXRuntime(quantization_mode="dynamic", unsigned_weights=True),
    qb.ONNXRuntime(quantization_mode="static_qoperator"),
    qb.ONNXRuntime(quantization_mode="static_qdq"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="off"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="dynamic", unsigned_weights=True),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qoperator"),
    qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qdq"),
    qb.OpenVINO(quantization_mode="off"),
    qb.OpenVINO(quantization_mode="dynamic"),
    # qb.OpenVINO(quantization_mode="static_qoperator"), # Doesn't work
    qb.OpenVINO(quantization_mode="static_qdq"),
]

qb.benchmark(cnn_keras, cnn_torch, runtimes, n_iterations=300);

In [None]:
def get_runtimes(unsigned_weights=False):
    runtimes = [
        qb.TensorFlow(),
        qb.PyTorch(quantization_mode="off", use_torchscript=True),
        qb.PyTorch(quantization_mode="dynamic", use_torchscript=True),
        qb.PyTorch(quantization_mode="static", use_torchscript=True),
        qb.TFLite(quantization_mode="off"),
        # qb.TFLite(quantization_mode="dynamic"), # bad for CNN
        # qb.TFLite(quantization_mode="static"), # bad for CNN
        qb.ONNXRuntime(quantization_mode="off"),
        qb.ONNXRuntime(quantization_mode="dynamic", unsigned_weights=unsigned_weights),
        # qb.ONNXRuntime(quantization_mode="static_qoperator"), # bad
        qb.ONNXRuntime(quantization_mode="static_qdq"),
        # These generally seem to be worse
        # qb.ONNXRuntimeFromPyTorch(quantization_mode="off"),
        # qb.ONNXRuntimeFromPyTorch(quantization_mode="dynamic", unsigned_weights=unsigned_weights),
        # qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qoperator"), # bad
        # qb.ONNXRuntimeFromPyTorch(quantization_mode="static_qdq"),
        qb.OpenVINO(quantization_mode="off"),
        # qb.OpenVINO(quantization_mode="dynamic", unsigned_weights=unsigned_weights), # bad
        # qb.OpenVINO(quantization_mode="static_qoperator"), # Doesn't work
        qb.OpenVINO(quantization_mode="static_qdq"),
    ]

    return runtimes

In [None]:
runs_per_size = []
n_sizes = 10

for i in tqdm.trange(n_sizes):
    input_shape = (32, 32, 3)
    out_features = 2 ** (i + 1)

    dense_keras = tf.keras.Sequential(
        [
            tf.keras.layers.Input(input_shape),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(out_features, activation=tf.nn.relu),
            tf.keras.layers.Dense(1),
        ]
    )

    dense_torch = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(in_features=np.product(input_shape), out_features=out_features),
        torch.nn.ReLU(),
        torch.nn.Linear(in_features=out_features, out_features=1),
    )

    runtimes = get_runtimes()

    runs = qb.benchmark(dense_keras, dense_torch, runtimes, n_iterations=500)
    runs["network_size"] = i
    runs_per_size.append(runs)

In [None]:
df = pd.concat(runs_per_size)
df.to_csv("../data/benchmarks/0518-dense-timing-2.csv")

In [None]:
times = df.groupby(["name", "network_size"]).agg({"inference_time": "mean"})
times["relative_time"] = 0.0

for _, row in times.iterrows():
    row["relative_time"] = row["inference_time"] / times.loc[("ONNXRuntime", row.name[1]), "inference_time"]

times = times.reset_index()
times

In [None]:
g = sns.relplot(data=times.loc[times["name"] != "TensorFlow"], hue="name", x="network_size", y="relative_time", kind="line")
g.set(title="Dense")

In [None]:
import seaborn as sns

groups = ["ONNXRuntime", "OpenVINO", "TFLite", "TorchScript"]

for group in groups:
    filtered = times
    # filtered = filtered.loc[times["inference_time"] < 0.005]
    filtered = filtered.loc[filtered["name"].str.startswith(group)]
    filtered = filtered.loc[~filtered["name"].str.contains("_openvino")]
    filtered = filtered.loc[~filtered["name"].str.contains("static_qoperator")]

    sns.relplot(data=filtered, hue="name", x="network_size", y="relative_time", kind="line")
    plt.title(group)

In [None]:
df

In [None]:
runs_per_size = []
n_sizes = 4

for i in tqdm.trange(n_sizes):
    size = 8 * 2 ** i
    n_channels = 64

    cnn_keras = tf.keras.Sequential(
        [
            tf.keras.layers.Input((size, size, 3)),
            tf.keras.layers.Conv2D(filters=n_channels, kernel_size=3, activation=tf.nn.relu),
            tf.keras.layers.Conv2D(filters=n_channels, kernel_size=3, activation=tf.nn.relu),
            tf.keras.layers.MaxPooling2D(size - 4),
        ]
    )

    cnn_torch = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=3, out_channels=n_channels, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.Conv2d(in_channels=n_channels, out_channels=n_channels, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(size - 4),
    )

    runtimes = qb.get_runtimes(good_only=True, unsigned_weights=True)

    runs = qb.benchmark(cnn_keras, cnn_torch, runtimes, n_iterations=500)
    runs["network_size"] = i
    runs_per_size.append(runs)

In [None]:
df_cnn = pd.concat(runs_per_size)

In [None]:
df_cnn.to_csv("../data/benchmarks/0518-cnn-timing-2.csv")

In [None]:
times_cnn = df_cnn.groupby(["name", "network_size"]).agg({"inference_time": "mean"})
times_cnn["relative_time"] = 0.0

for _, row in times_cnn.iterrows():
    row["relative_time"] = row["inference_time"] / times_cnn.loc[("ONNXRuntime", row.name[1]), "inference_time"]

times_cnn = times_cnn.reset_index()
times_cnn

In [None]:
g = sns.relplot(data=times_cnn, hue="name", x="network_size", y="relative_time", kind="line")
# g.set(yscale="log")
g.set(title="CNN")

In [None]:
import seaborn as sns

groups = ["ONNXRuntime", "OpenVINO", "TFLite", "TorchScript"]

for group in groups:
    filtered = times_cnn
    # filtered = filtered.loc[times["inference_time"] < 0.005]
    filtered = filtered.loc[filtered["name"].str.startswith(group)]
    filtered = filtered.loc[~filtered["name"].str.contains("_openvino")]
    filtered = filtered.loc[~filtered["name"].str.contains("static_qoperator")]

    sns.relplot(data=filtered, hue="name", x="network_size", y="relative_time", kind="line")
    plt.title(group)

In [None]:
len(runs_per_size)