In [None]:
%matplotlib widget

In [None]:
%%javascript
// Disable scrolling on plots
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

from ipywidgets import interact, interact_manual, interactive, fixed, interact_manual
import ipywidgets as widgets

from IPython.display import display
from IPython.core.display import display, HTML

from matplotlib.ticker import MultipleLocator, FormatStrFormatter, AutoMinorLocator
import matplotlib
import pickle

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 1000)
pd.options.display.float_format = "{:,}".format

In [None]:
# Make cells wider
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
RESULTS_DIR = Path("./results")
METHODS = ("qsort_c", "vanilla_quicksort", "insertion_sort", "std")
DEFAULT_METHOD = "qsort_c"
INPUT_TYPES = ("ascending", "descending", "random", "single_num", "N/A")


def load(in_dir=None):
    global df, avg_df
    if in_dir is None:
        # Find the latest results directory
        dirs = list(RESULTS_DIR.iterdir())
        dirs.sort()
        try:
            in_dir = dirs[-1]
        except IndexError as e:
            raise IndexError("No results found") from e

    csvs = list(in_dir.glob("*.csv"))
    if len(csvs) < 1:
        raise KeyError("CSV missing")
    in_file = csvs[0]

    # If possible, load the system details
    info_path = Path(in_dir, "system_details.pickle")
    if info_path.exists() and info_path.is_file():
        with open(info_path, "rb") as pickle_file:
            info = pickle.load(pickle_file)
        display(info)

    df = pd.read_csv(in_file)

    # TODO: Cleanup the input column, remove extraneous path elements

    # Convert microseconds to milliseconds and rename column

    df["Elapsed Time (microseconds)"] = df["Elapsed Time (microseconds)"] / 1000
    df.rename(
        columns={"Elapsed Time (microseconds)": "Runtime (milliseconds)"}, inplace=True
    )
    # Add seconds column
    df["Runtime (seconds)"] = df["Runtime (milliseconds)"] / 1000

    # Reorder columns
    df = df[
        [
            "Input",
            "Description",
            "Method",
            "Size",
            "Threshold",
            "Runtime (milliseconds)",
            "Runtime (seconds)",
        ]
    ]

    display(df[["Runtime (milliseconds)"]].describe())

    # Average runtime for repeated runs.
    avg_df = (
        df.groupby(["Input", "Method", "Description", "Size", "Threshold"])
        .mean()
        .reset_index()
    )

    print(f"Loaded: {in_file}")

In [None]:
def plot_vs_methods(
    method=DEFAULT_METHOD,
    method_2=DEFAULT_METHOD,
    thresh=4,
    unit="milliseconds",
    table=False,
):
    threshold = 0 if method != "qsort_c" else thresh
    threshold_2 = 0 if method_2 != "qsort_c" else thresh

    method_dfs = []
    method_2_dfs = []

    for i in INPUT_TYPES:
        m = avg_df[avg_df["Method"] == method]
        m2 = avg_df[avg_df["Method"] == method_2]

        d = m[m["Description"] == i]
        d2 = m2[m2["Description"] == i]

        method_dfs.append(d[d["Threshold"] == threshold])
        method_2_dfs.append(d2[d2["Threshold"] == threshold_2])

    for frame, frame2 in zip(method_dfs, method_2_dfs):
        # Skip if empty frame
        if not len(frame) or not len(frame2):
            continue

        # Sort for pretty graphs
        frame = frame.sort_values(["Description", "Method", "Threshold", "Size"])
        frame2 = frame2.sort_values(["Description", "Method", "Threshold", "Size"])

        # Select the first row to get some more info
        row_0 = frame.iloc[0]

        # Assemble the title
        title = ""
        if method == "qsort_c":
            title += f"{method:} (threshold: {threshold:,})"
        else:
            title += method
        title += " vs. "
        if method_2 == "qsort_c":
            title += f"{method_2:} (threshold: {threshold_2:,})"
        else:
            title += method_2
        title += f"\n{row_0['Description']}"

        # Plot
        ax = frame.plot(
            x="Size",
            y=f"Runtime ({unit})",
            marker=".",
            markersize=10,
        )
        frame2.plot(
            ax=ax,
            x="Size",
            y=f"Runtime ({unit})",
            marker=".",
            markersize=10,
            title=title,
            grid=True,
        )

        # Fix legends to method names.
        ax.legend([method, method_2])

        # Set axis titles
        ax.set_xlabel("Size")
        ax.set_ylabel(f"Runtime ({unit})")

        # Table of data
        if table:
            display(frame)
            display(frame2)

In [None]:
def plot_threshold_impact(size=50_000, unit="milliseconds", table=False):
    dfs = []

    for i in INPUT_TYPES:
        m = df[df["Method"] == "qsort_c"]
        d = m[m["Description"] == i]
        frame = d[d["Size"] == size]

        dfs.append(frame)

    for frame in dfs:
        # Skip if empty frame
        if not len(frame):
            continue

        group = frame.groupby(["Input", "Method", "Description", "Size", "Threshold"])

        means = group.mean().reset_index()
        errors = 2 * group.std().reset_index()

        # Sort for pretty graphs
        frame = frame.sort_values(["Description", "Method", "Threshold", "Size"])

        # Select the first row to get some more info
        row_0 = frame.iloc[0]

        # Assemble the title
        title = f"Qsort_c Threshold Value vs. Runtime at {size:,} Inputs\n{row_0['Description'].capitalize()}"

        # Plot
        ax = means.plot(
            x="Threshold",
            y=f"Runtime ({unit})",
            yerr=errors,
            capsize=4,
            ecolor="black",
            marker=".",
            markersize=10,
            title=title,
            grid=True,
            figsize=(16, 8),
        )

        # Set axis titles
        ax.set_xlabel("Threshold")
        ax.set_ylabel(f"Runtime ({unit})")

        # Force int x axis
        ax.get_xaxis().set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))

        # Table of data
        if table:
            display(frame)

In [None]:
results_dir = Path("./results")
dirs = list(sorted([x for x in results_dir.iterdir()]))

# Give the option to load any data set
interact(load, in_dir=dirs)

In [None]:
# Get min and max threshold
default_df = df[df["Method"] == DEFAULT_METHOD]
MIN_THRESHOLD = default_df["Threshold"].min()
MAX_THRESHOLD = default_df["Threshold"].max()

interact_manual(
    plot_vs_methods,
    name="Comparing Methods",
    method=list(METHODS),
    method_2=list(METHODS),
    unit=["milliseconds", "seconds"],
    thresh=widgets.IntSlider(
        min=MIN_THRESHOLD, max=MAX_THRESHOLD, step=1, value=MIN_THRESHOLD
    ),
    continuous_update=False,
)

In [None]:
# Get min and max size
default_df = df[df["Method"] == "qsort_c"]
MIN_SIZE = default_df["Size"].min()
MAX_SIZE = default_df["Size"].max()
STEP = MIN_SIZE

interact_manual(
    plot_threshold_impact,
    name="Threshold Impact",
    unit=["milliseconds", "seconds"],
    size=widgets.IntSlider(min=MIN_SIZE, max=MAX_SIZE, step=STEP, value=MIN_THRESHOLD),
    continuous_update=False,
)