In [None]:
%load_ext autoreload
%autoreload 2

import thesis.runtimes
import thesis.benchmarking_models

In [None]:
models = thesis.benchmarking_models.dilated_conv_models(n_sizes=1, n_layers=3)
models = list(models)

In [None]:
model = models[0][0]

In [None]:
runtime = thesis.runtimes.TVM("off")

runtime.convert(model)

In [None]:
import tvm.auto_scheduler as auto_scheduler
from tvm.autotvm.tuner import XGBTuner
from tvm import autotvm, relay
import tvm
from tvm.contrib import graph_executor

In [None]:
number = 10
repeat = 1
min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
timeout = 10  # in seconds

# create a TVM runner
runner = autotvm.LocalRunner(
    number=number,
    repeat=repeat,
    timeout=timeout,
    min_repeat_ms=min_repeat_ms,
    enable_cpu_cache_flush=True,
)

tuning_option = {
    "tuner": "xgb",
    "trials": 20,
    "early_stopping": 100,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
    ),
    "tuning_records": "resnet-50-v2-autotuning.json",
}

In [None]:
# begin by extracting the tasks from the onnx model
tasks = autotvm.task.extract_from_program(runtime.mod["main"], target="llvm -mcpu=haswell", params=runtime.params)

In [None]:
# Tune the extracted tasks sequentially.
for i, task in enumerate(tasks):
    prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
    tuner_obj = XGBTuner(task, loss_type="rank")
    tuner_obj.tune(
        n_trial=min(tuning_option["trials"], len(task.config_space)),
        early_stopping=tuning_option["early_stopping"],
        measure_option=tuning_option["measure_option"],
        callbacks=[
            autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
            autotvm.callback.log_to_file(tuning_option["tuning_records"]),
        ],
    )

In [None]:
with autotvm.apply_history_best(tuning_option["tuning_records"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(runtime.mod["main"], target="llvm", params=runtime.params)

dev = tvm.device(str("llvm"), 0)
module = graph_executor.GraphModule(lib["default"](dev))

In [None]:
import timeit
import numpy as np

timing_number = 1
timing_repeat = 10
unoptimized = (
    np.array(timeit.Timer(lambda: runtime.module.run()).repeat(repeat=timing_repeat, number=timing_number))
    * 1000
    / timing_number
)
unoptimized = {"mean": np.mean(unoptimized), "median": np.median(unoptimized), "std": np.std(unoptimized)}


print("unoptimized: %s" % (unoptimized))
# print("unoptimized: %s" % (unoptimized))

In [None]:
import timeit
import numpy as np

timing_number = 2
timing_repeat = 10
optimized = (
    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
    * 1000
    / timing_number
)
optimized = {"mean": np.mean(optimized), "median": np.median(optimized), "std": np.std(optimized)}


print("optimized: %s" % (optimized))
# print("unoptimized: %s" % (unoptimized))