In [None]:
%load_ext autoreload
%autoreload 2

from typing import Dict
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import glob
import wandb

sns.set()

# matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'font.serif': 'Palatino',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

In [None]:
base_dir = "/Users/vaclav/prog/thesis/data/benchmark_data/"


def read_results(subdir):
    df = pd.read_csv(os.path.join(base_dir, subdir, "results.csv"), index_col=0)

    df["runtime"] = df["name"].str.split("_", n=1, expand=True)[0]

    sparse_rows_mask = df["name"].str.contains("sparse")
    assert df.loc[sparse_rows_mask, "name"].str.contains("0.90_sparse").all(), "Unimplemented sparsity level"
    assert (df.loc[
                sparse_rows_mask, "runtime"] == "DeepSparse").all(), "All sparse models are expected to be DeepSparse"
    df["sparsity"] = 0.0
    df.loc[sparse_rows_mask, "sparsity"] = 0.90

    df.loc[df["runtime"] == "DeepSparse", "runtime"] = "DeepSparse (0\%)"
    df.loc[sparse_rows_mask, "runtime"] = "DeepSparse (90\%)"

    df["quantization"] = "off"
    df.loc[df["name"].str.contains("quant_static"), "quantization"] = "static"
    df.loc[df["name"].str.contains("quant_dynamic"), "quantization"] = "dynamic"
    # df.loc["sparsity"] = df["name"].str.split("_", n=1, expand=True)[0]

    # Only the largest model
    df = df.loc[df["model_i"] == df["model_i"].max()]

    df["inference_time_ms"] = 1000 * df["inference_time_s"]
    df = df.sort_values("name", kind="stable")

    return df


In [None]:
plot_dir = "/Users/vaclav/prog/thesis/figures/"


# def make_barplot(df, filename):
#     fig = plt.figure()
#     sns.barplot(data=df, y="name", x='inference_time_s', ax=fig.gca(), ci=95)
#     plt.xlabel("Inference time (s)")
#     plt.ylabel("Runtime library")
#     fig.savefig(os.path.join(plot_dir, filename), bbox_inches="tight")

def make_table(df):
    df_latex_table = (df
                      .groupby(["runtime", "quantization"])
                      .agg({"inference_time_ms": "mean"})  #lambda x: x.quantile(0.99)
                      .reset_index()
                      .pivot(index="runtime",
                             columns="quantization",
                             values="inference_time_ms")
                      )[["off", "static", "dynamic"]]

    # df_latex_table.columns.name = "Quantization"
    # df_latex_table.index.name = "Runtime"
    df_latex_table.columns.name = None
    df_latex_table.index.name = None

    styler = (
        df_latex_table.style
            .format_index(escape="latex")
            .format(precision=2, na_rep="---")
    )

    return styler.to_latex(hrules=True)


def make_plots(df, name, display_name, pdf_metadata: Dict[str, str] = None):
    if pdf_metadata is None:
        pdf_metadata = {}

    ci = 95
    g = sns.catplot(
        data=df, kind="bar",
        y="runtime", x="inference_time_ms", hue="quantization",
        ci=ci, palette="dark", alpha=.6, height=4, aspect=4 / 3,
    )
    plt.xlabel("Inference time (ms)")
    plt.ylabel("Runtime name")

    plt.title(display_name)

    pdf_metadata["ci"] = ci
    pdf_metadata["iterations"] = df["iteration"].max()

    g.fig.savefig(
        os.path.join(plot_dir, f"{name}.pdf"),
        bbox_inches="tight",
        # Metadata doc: https://matplotlib.org/stable/api/backend_pdf_api.html#matplotlib.backends.backend_pdf.PdfFile
        metadata={
            "Title": " ".join([f"{k}={v}" for k, v in pdf_metadata.items()])
        },
        backend="pgf",
    )

    # print(make_table(df))

In [None]:
dirname = "0711-dense-2"
df_dense = read_results(dirname)
# df_dense = df_dense.loc[df_dense["quantization"] == "off"]
make_plots(df_dense, "dense", "Dense model", {"dirname": dirname})

In [None]:
dirname = "0630-inverted_bottleneck"
df_ib = read_results(dirname)
# df_ib = df_ib.loc[df_ib["quantization"] == "off"]
make_plots(df_ib, "ib", "Inverted bottleneck model", {"dirname": dirname})

In [None]:
# dirname = "0704-dilated_cnn"
# dirname = "0715-dilated_cnn-2" # input size 32
dirname = "0715-dilated_cnn-3" # input size 128
df_dilated_cnn = read_results(dirname)
# df_dilated_cnn = df_dilated_cnn.loc[df_dilated_cnn["quantization"] == "off"]
make_plots(df_dilated_cnn, "dilated_cnn", "Dilated CNN model", {"dirname": dirname})

## Architectural benchmarks

- 0725-ddspae-2
- 0715-ddspae-tiny
- 0721-ddspae-cnn-8
- TODO: fullrave
- TODO: fullrave noiseless

In [None]:
def prepare_columns(df, model_name):
    df_out = df[[]].copy()

    is_ddsp_like = "ddsp" in model_name or "newt" in model_name

    if is_ddsp_like:
        # There should be no encoder, because the encoding is done in the preprocessor
        assert df["Autoencoder.encoder"].mean() < 0.01 * df["Autoencoder"].mean()

        # The encoder is a pitch detector
        df_out["Encoder (pitch detector)"] = df["Autoencoder.preprocessor"]

        df_out["Decoder " + ("(CNN)" if "cnn" in model_name else "(RNN)")] = df["Autoencoder.decoder"]
        df_out["Synthesizer"] = df["Autoencoder.processor_group"]
    else:
        # df_out["Preprocessor"] = df["Autoencoder.preprocessor"]
        df_out["Encoder (CNN)"] = df["Autoencoder.encoder"]
        df_out["Decoder (CNN)"] = df["Autoencoder.decoder"]
        df_out["Multi-band decomposition"] = df["Autoencoder.preprocessor"] + df["Autoencoder.processor_group"]

    return df_out

In [None]:
models = [
    {"dataset": "Violin", "name": "0725-ddspae-2", "display_name": "DDSP-full"},
    {"dataset": "Violin", "name": "0715-ddspae-tiny", "display_name": "DDSP-tiny"},
    {"dataset": "Violin", "name": "0725-ddspae-cnn-1", "display_name": "DDSP-CNN"},
    {"dataset": "Violin", "name": "0726-fullrave-noiseless", "display_name": "RAVE-like"},
    {"dataset": "Violin", "name": "0726-ddspae-cnn", "display_name": "DDSP-CNN-IB"},  # IB, ch=32
    {"dataset": "Violin", "name": "0725-newt", "display_name": "NEWT-like"},
    # TRUMPET
    {"dataset": "Trumpet", "name": "0805-ddspae", "display_name": "DDSP-full"},
    {"dataset": "Trumpet", "name": "0805-ddspae-tiny", "display_name": "DDSP-tiny"},
    {"dataset": "Trumpet", "name": "0804-ddspae-cnn-3", "display_name": "DDSP-CNN"},
    {"dataset": "Trumpet", "name": "0809-fullrave-noiseless-6", "display_name": "RAVE-like"}, # still training
    {"dataset": "Trumpet", "name": "0809-ddspae-cnn", "display_name": "DDSP-CNN-IB"},  # IB, ch=32
    {"dataset": "Trumpet", "name": "0805-newt", "display_name": "NEWT-like"},
]

eval_dir = '/Users/vaclav/prog/thesis/data/eval_data/'

api = wandb.Api()

for model in models:
    model_name = model["name"]
    # !wandb artifact get 'neural-audio-synthesis-thesis/nas-evaluation/eval-'$model_name':latest' --root $eval_dir

    artifact = api.artifact(f"neural-audio-synthesis-thesis/nas-evaluation/eval-{model_name}:latest")
    csv_dir = artifact.checkout()

    run = artifact.logged_by()
    model["loss"] = run.summary["losses/total_loss"]

    # glob.glob(os.path.join(eval_dir, f"eval-{model_name}.csv"))
    model["csv_path"] = os.path.join(csv_dir, f"eval-{model_name}.csv")
    
    print(artifact.created_at, model["name"])

In [None]:
# print(run.config["operative_config"])

In [None]:
for model in models:
    dataset_artifact = {
        "Violin": "violin4",
        "Trumpet": "urmp_tpt2",
    }[model["dataset"]]
    print(f"$script {dataset_artifact}:latest {model['name']}")

In [None]:
def prepare_benchmark_results(must_contain, aliases=[]):
    df_all = []

    for model in models:
        model_name = model["name"]
        if must_contain not in model_name:
            continue

        if model["dataset"] != "Violin" and False:
            continue  # This would be redundant

        df = pd.read_csv(model["csv_path"], index_col=0)

        for col, display_name in aliases:
            df2 = df.copy()
            df2["name"] = display_name
            df_all.append(df2[["name", col]].rename(columns={col: "time"}))
            print(f"{col}: {df2[col].mean():.2f}")

        if must_contain == "ddsp":
            # Disambiguate DDSP variants
            df["name"] = f"Decoder ({model['display_name'][5:]})"
            df_all.append(df[["name", "Autoencoder.decoder"]].rename(columns={"Autoencoder.decoder": "time"}))

    df_all = pd.concat(df_all)
    df_all = df_all.rename(columns={"time": "Inference time (s)", "name": "Component"})

    return df_all

In [None]:
def plot_benchmark_results(df_all, filename):
    g = sns.catplot(data=df_all, y="Component", x="Inference time (s)", kind="bar", orient="h", ci=95, aspect=2, height=2)
    g.ax.bar_label(g.ax.containers[0], fmt="%.3f", padding=10)

    pdf_metadata = {"models": [x["name"] for x in models]}

    g.fig.savefig(
        os.path.join(plot_dir, filename),
        bbox_inches="tight",
        # Metadata doc: https://matplotlib.org/stable/api/backend_pdf_api.html#matplotlib.backends.backend_pdf.PdfFile
        metadata={
            "Title": " ".join([f"{k}={v}" for k, v in pdf_metadata.items()])
        },
        backend="pgf",
    )

In [None]:
df_all = prepare_benchmark_results(
    must_contain="ddsp",
    aliases=[
        ("Autoencoder.preprocessor", "Pitch detector"),
        ("Autoencoder.processor_group", "Synthesizer")
    ]
)
plot_benchmark_results(df_all, filename="ddsp-like-initial-benchmark.pdf")

In [None]:
df_all = prepare_benchmark_results(
    must_contain="newt",
    aliases=[
        ("Autoencoder.preprocessor", "Pitch detector"),
        ("Autoencoder.decoder", "Decoder"),
        ("Autoencoder.processor_group", "NEWT Synthesizer"),
    ]
)
plot_benchmark_results(df_all, filename="newt-initial-benchmark.pdf")

In [None]:
df_all = prepare_benchmark_results(
    must_contain="rave",
    aliases=[
        ("Autoencoder.preprocessor", "Multi-band analysis"),
        ("Autoencoder.encoder", "Encoder"),
        ("Autoencoder.decoder", "Decoder"),
        ("Autoencoder.processor_group", "Multi-band synthesis"),
    ]
)
plot_benchmark_results(df_all, filename="rave-initial-benchmark.pdf")

In [None]:
df = pd.DataFrame(
    [[m["display_name"], m["dataset"], m["loss"]] for m in models],
    columns=["Model", "Dataset", "Loss"]
)
df = df.pivot(index="Model", columns="Dataset")
df.columns = df.columns.get_level_values(1)
df.columns.name = None
df = df.reset_index()

In [None]:
styler = (
    df.style
        # .format_index(escape="latex")
        .hide(axis="index")
        .format(precision=2, na_rep="---")
)

print(styler.to_latex(hrules=True))

In [None]:
for model in models:
    df = pd.read_csv(model["csv_path"], index_col=0)
    df = prepare_columns(df, model["name"])
    g = sns.catplot(data=df, kind="bar", orient="h", ci=95, aspect=2, height=2)
    # g.set(xlim=(0, 3))
    g.ax.bar_label(g.ax.containers[0], fmt="%.2f", padding=10)
    plt.title(model.get("display_name", model["name"]))

In [None]:
g.ax.containers[0]

In [None]:
df = pd.DataFrame([[m["display_name"], m["loss"]] for m in models if "ddsp" in m["name"]], columns=["Model", "Loss"])

In [None]:
styler = (
    df.style
        # .format_index(escape="latex")
        .hide(axis="index")
        .format(precision=2, na_rep="---")
)

print(styler.to_latex(hrules=True))

In [None]:
sns.catplot(data=df, x=0, y=1, kind="bar")

In [None]:
for model in models:
    df = pd.read_csv(model["csv_path"], index_col=0)
    df = prepare_columns(df, model["name"])
    g = sns.catplot(data=df, kind="bar", orient="h", ci=95, aspect=2, height=2)
    # g.set(xlim=(0, 3))
    g.ax.bar_label(g.ax.containers[0], fmt="%.2f", padding=10)
    plt.title(model.get("display_name", model["name"]))

In [None]:
g.ax.containers[0]