In [None]:
%matplotlib widget

In [None]:
%%javascript
// Disable scrolling on plots
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import json
import re
from pathlib import Path

import ipywidgets as widgets
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.core.display import HTML, display
from IPython.display import JSON, display
from ipywidgets import fixed, interact, interact_manual, interactive
from matplotlib.ticker import (AutoMinorLocator, FormatStrFormatter,
                               MultipleLocator)

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 1000)
pd.set_option("display.precision", 2)
pd.options.display.float_format = "{0:,.6f}".format

In [None]:
# Make cells wider
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
RESULTS_DIR = Path("./results")
METHODS = ("qsort_c", "vanilla_quicksort", "insertion_sort", "std")
DEFAULT_METHOD = "qsort_c"
INPUT_TYPES = ("ascending", "descending", "random", "single_num", "N/A")
FIG_SIZE = (10, 4)


def clean_path(path: Path):
    for i in INPUT_TYPES:
        if i in path:
            return re.sub(f"^.*?/{i}", str(i), path)
    return path


def load(in_dir=None):
    global df, avg_df
    if in_dir is None:
        # Find the latest results directory
        dirs = list(RESULTS_DIR.iterdir())
        dirs.sort()
        try:
            in_dir = dirs[-1]
        except IndexError as e:
            raise IndexError("No results found") from e

    csvs = list(in_dir.glob("*.csv"))
    if len(csvs) < 1:
        raise KeyError("CSV missing")
    in_file = csvs[0]

    # If possible, load the system details
    try:
        info_path = Path(in_dir, "job_details.json")
        if info_path.exists() and info_path.is_file():
            with open(info_path, "r") as json_file:
                info = json.load(json_file)
            for i in info:
                print(f"{i}: {str(info[i]).rstrip()}")
    except Exception:
        print("Couldn't load system details")

    df = pd.read_csv(in_file)

    # Convert microseconds to milliseconds and rename column
    df["Elapsed Time (microseconds)"] = df["Elapsed Time (microseconds)"] / 1000
    df.rename(
        columns={"Elapsed Time (microseconds)": "Runtime (milliseconds)"}, inplace=True
    )
    # Add seconds column
    df["Runtime (seconds)"] = df["Runtime (milliseconds)"] / 1000

    # Reorder columns
    df = df[
        [
            "Input",
            "Description",
            "Method",
            "Size",
            "Threshold",
            "Runtime (milliseconds)",
            "Runtime (seconds)",
        ]
    ]

    # Cleanup the input column
    df["Input"] = df["Input"].apply(clean_path)

    display(df[["Runtime (milliseconds)", "Runtime (seconds)"]].describe())

    # Average runtime for repeated runs.
    avg_df = (
        df.groupby(["Input", "Method", "Description", "Size", "Threshold"])
        .mean()
        .reset_index()
    )

    print(f"Loaded: {in_file}")

In [None]:
def plot_vs_methods(
    method=DEFAULT_METHOD,
    method_2=DEFAULT_METHOD,
    thresh=4,
    unit="milliseconds",
    table=False,
):
    unit = unit.lower()

    threshold = 0 if method != "qsort_c" else thresh
    threshold_2 = 0 if method_2 != "qsort_c" else thresh

    method_dfs = []
    method_2_dfs = []

    for i in INPUT_TYPES:
        m = avg_df[avg_df["Method"] == method]
        m2 = avg_df[avg_df["Method"] == method_2]

        d = m[m["Description"] == i]
        d2 = m2[m2["Description"] == i]

        method_dfs.append(d[d["Threshold"] == threshold])
        method_2_dfs.append(d2[d2["Threshold"] == threshold_2])

    for frame, frame2 in zip(method_dfs, method_2_dfs):
        # Skip if empty frame
        if not len(frame) or not len(frame2):
            continue

        # Sort for pretty graphs
        frame = frame.sort_values(["Description", "Method", "Threshold", "Size"])
        frame2 = frame2.sort_values(["Description", "Method", "Threshold", "Size"])

        # Select the first row to get some more info
        row_0 = frame.iloc[0]

        # Assemble the title
        title = ""
        if method == "qsort_c":
            title += f"{method:} (threshold: {threshold:,})"
        else:
            title += method
        title += " vs. "
        if method_2 == "qsort_c":
            title += f"{method_2:} (threshold: {threshold_2:,})"
        else:
            title += method_2
        title += f"\n{row_0['Description']}"

        # Plot
        ax = frame.plot(
            x="Size",
            y=f"Runtime ({unit})",
            marker=".",
            markersize=10,
            figsize=FIG_SIZE,
        )
        frame2.plot(
            ax=ax,
            x="Size",
            y=f"Runtime ({unit})",
            marker=".",
            markersize=10,
            title=title,
            grid=True,
            figsize=FIG_SIZE,
        )

        # Fix legends to method names.
        ax.legend([method, method_2])

        # Set axis titles
        ax.set_xlabel("Size")
        ax.set_ylabel(f"Runtime ({unit})")

        # Table of data
        if table:
            display(frame)
            display(frame2)

In [None]:
def plot_threshold_impact(
    size=50_000,
    unit="milliseconds",
    error_bars=False,
    table=False,
    descrip=False,
):
    unit = unit.lower()
    dfs = []

    for i in INPUT_TYPES:
        m = df[df["Method"] == "qsort_c"]
        d = m[m["Description"] == i]
        frame = d[d["Size"] == size]

        dfs.append(frame)

    for frame in dfs:
        # Skip if empty frame
        if not len(frame):
            continue

        # Sort for pretty graphs
        frame = frame.sort_values(["Description", "Method", "Threshold", "Size"])

        # Error bar calc
        group = frame.groupby(["Input", "Method", "Description", "Size", "Threshold"])
        means = group.mean().reset_index()
        errors = 2 * group.std().reset_index()

        # Select the first row to get some more info
        row_0 = frame.iloc[0]

        # Assemble the title
        title = f"Qsort_c Threshold Value vs. Runtime at {size:,} Inputs\n{row_0['Description'].capitalize()}"

        # Plot
        if error_bars:
            ax = means.plot(
                x="Threshold",
                y=f"Runtime ({unit})",
                yerr=errors,
                capsize=4,
                ecolor="black",
                marker=".",
                markersize=10,
                title=title,
                grid=True,
                figsize=FIG_SIZE,
            )
        else:
            ax = means.plot(
                x="Threshold",
                y=f"Runtime ({unit})",
                marker=".",
                markersize=10,
                title=title,
                grid=True,
                figsize=FIG_SIZE,
            )

        # Set axis titles
        ax.set_xlabel("Threshold")
        ax.set_ylabel(f"Runtime ({unit})")

        # Force int x axis
        ax.get_xaxis().set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))

        # Table of data
        if table:
            display(frame)

        if descrip:
            display(frame[[f"Runtime ({unit})"]].describe())

In [None]:
units = ["milliseconds", "seconds"]

results_dir = Path("./results")
dirs = list(sorted([x for x in results_dir.iterdir()]))

# Give the option to load any data set
interact(
    load,
    in_dir=widgets.Dropdown(
        options=dirs,
        value=dirs[-1],
        description="Results Dir:",
    ),
)

# Get min and max threshold
default_df = df[df["Method"] == DEFAULT_METHOD]
thresholds = sorted(set(df["Threshold"]))
try:
    thresholds.remove(0)
except ValueError:
    pass

interact_manual(
    plot_vs_methods,
    name="Comparing Methods",
    method=widgets.Dropdown(options=METHODS, value=METHODS[0], description="Method 1:"),
    method_2=widgets.Dropdown(
        options=METHODS, value=METHODS[-1], description="Method 2:"
    ),
    unit=widgets.Dropdown(options=units, description="Unit"),
    thresh=widgets.SelectionSlider(options=thresholds, description="Threshold"),
    table=widgets.Checkbox(description="Table"),
    continuous_update=False,
)

default_df = df[df["Method"] == "qsort_c"]
sizes = sorted(set(default_df["Size"]))

interact_manual(
    plot_threshold_impact,
    name="Threshold Impact",
    unit=widgets.Dropdown(options=units, description="Unit"),
    size=widgets.SelectionSlider(options=sizes),
    error_bars=widgets.Checkbox(description="Error Bars"),
    table=widgets.Checkbox(description="Table"),
    descrip=widgets.Checkbox(description="Data Description"),
    continuous_update=False,
)