In [None]:
%matplotlib widget

In [None]:
%%javascript
// Disable scrolling on plots
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import json
import re
from pathlib import Path

import ipywidgets as widgets
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.core.display import HTML, display
from IPython.display import JSON, display
from ipywidgets import fixed, interact, interact_manual, interactive
from matplotlib.ticker import AutoMinorLocator, FormatStrFormatter, MultipleLocator

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)
pd.set_option("display.precision", 2)
pd.options.display.float_format = "{0:,.6f}".format

In [None]:
# Make cells wider
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
RESULTS_DIR = Path("./results")

METHODS = (
    "qsort_asm",
    "qsort_c",
    "qsort_cpp",
    "qsort_sanity",
    "insertion_sort",
    "insertion_sort_asm",
    "std",
)
THRESHOLD_METHODS = (
    "qsort_asm",
    "qsort_c",
    "qsort_cpp",
    "qsort_cpp_no_comp",
)
DEFAULT_METHOD = "qsort_c"
INPUT_TYPES = (
    "ascending",
    "descending",
    "random",
    "single_num",
    "N/A",
)
UNITS = ("seconds", "milliseconds")
FIG_SIZE = (10, 4)


def clean_path(path: Path):
    """Remove prefix paths."""
    # TODO, very slow, optimize.
    for i in INPUT_TYPES:
        if i in path:
            return re.sub(f"^.*?/{i}", str(i), path)
    return path


def load(in_dir=None):
    global df, avg_df, thresh_df
    if in_dir is None:
        # Find the latest results directory
        dirs = list(RESULTS_DIR.iterdir())
        dirs.sort()
        try:
            in_dir = dirs[-1]
        except IndexError as e:
            raise IndexError("No results found") from e

    csvs = list(in_dir.glob("*.csv"))
    if len(csvs) < 1:
        raise FileNotFoundError("CSV missing")
    in_file = csvs[0]

    # Read the actual data
    df = pd.read_csv(in_file)

    # If possible, load the system details
    info_path = Path(in_dir, "job_details.json")
    if info_path.is_file():
        with open(info_path, "r") as json_file:
            info = json.load(json_file)
        for i in info:
            print(f"{i}: {str(info[i]).rstrip()}")

        expected_num_sorts = int(info["Total number of sorts"])
        actual_num_sorts = len(df)

        if actual_num_sorts == expected_num_sorts:
            print(f"Actual number of sorts: {actual_num_sorts}")
        else:
            print(f"\033[41mActual number of sorts: {actual_num_sorts}\033[0m")

    # If possible, load the partition
    partition_path = Path(in_dir, "partition")
    if partition_path.is_file():
        print(f"Partition: {partition_path.read_text()}")
    else:
        print("Partition: none")

    # Convert microseconds to milliseconds and rename column
    df["Elapsed Time (microseconds)"] = df["Elapsed Time (microseconds)"] / 1000
    df.rename(
        columns={"Elapsed Time (microseconds)": "Runtime (milliseconds)"}, inplace=True
    )
    # Add seconds column
    df["Runtime (seconds)"] = df["Runtime (milliseconds)"] / 1000

    # Reorder columns
    df = df[
        [
            "Input",
            "Description",
            "Method",
            "Size",
            "Threshold",
            "Runtime (milliseconds)",
            "Runtime (seconds)",
        ]
    ]

    # Cleanup the input column
    df["Input"] = df["Input"].apply(clean_path)

    # Brief stats
    display(df[["Runtime (milliseconds)", "Runtime (seconds)"]].describe())

    # Average runtime for repeated runs.
    avg_df = (
        df.groupby(["Input", "Method", "Description", "Size", "Threshold"])
        .mean()
        .reset_index()
    )

    print(f"Loaded: {in_file}")

    # Parameters for user interaction.
    avail_methods = sorted(set(df["Method"]))
    thresholds = sorted(set(df["Threshold"]))

    # If the only threshold listed is 0, the user has only selected non-threshold methods,
    # so, don't remove it otherwise widgets get angry.
    if len(thresholds) > 1:
        try:
            thresholds.remove(0)
        except ValueError:
            pass

    if len(avail_methods):
        interact_manual(
            plot_vs_methods,
            methods=widgets.SelectMultiple(
                options=list(avail_methods),
                description="Methods:",
                layout={"width": "max-content"},
            ),
            unit=widgets.Dropdown(options=UNITS, description="Unit"),
            thresh=widgets.SelectionSlider(options=thresholds, description="Threshold"),
            error_bars=widgets.Checkbox(description="Error Bars"),
            table=widgets.Checkbox(description="Table"),
            continuous_update=False,
        )

    thresh_df = pd.DataFrame(columns=df.columns)
    for i in THRESHOLD_METHODS:
        thresh_df = thresh_df.append(df[df["Method"] == i])

    sizes = sorted(set(thresh_df["Size"]))
    # If no values remain, then none of the samples in this collection are influenced by
    # threshold value, so don't show the option to graph them.
    if len(sizes):
        interact_manual(
            plot_threshold_impact,
            methods=widgets.SelectMultiple(
                options=sorted(set(thresh_df["Method"])),
                description="Methods:",
                layout={"width": "max-content"},
            ),
            name="Threshold Impact",
            unit=widgets.Dropdown(options=UNITS, description="Unit"),
            size=widgets.SelectionSlider(options=sizes, description="Input Size"),
            error_bars=widgets.Checkbox(description="Error Bars"),
            table=widgets.Checkbox(description="Table"),
            continuous_update=False,
        )

In [None]:
def plot_vs_methods(
    methods=None,
    thresh=4,
    unit="milliseconds",
    error_bars=False,
    table=False,
):
    if methods is None:
        methods = []

    unit = unit.lower()

    thresholds: list[int] = []
    method_strs: list[str] = []
    types = sorted(set(df["Description"]))

    for m in methods:
        if m in THRESHOLD_METHODS:
            thresholds.append(thresh)
            method_strs.append(f"{m} (threshold: {thresh:})")
        else:
            thresholds.append(0)
            method_strs.append(m)

    for input_t in types:
        fig, ax = plt.subplots(figsize=FIG_SIZE)
        for k, m in enumerate(methods):
            my_df = df[
                (df["Method"] == m)
                & (df["Description"] == input_t)
                & (df["Threshold"] == thresholds[k])
            ]

            title = f"Runtime vs. Input Size\n{input_t.capitalize()}"

            my_df = my_df.sort_values(["Size", "Description", "Method", "Threshold"])
            group = my_df.groupby(
                ["Input", "Method", "Description", "Size", "Threshold"]
            )

            means = group.mean().reset_index().sort_values(["Size"])
            errors = (2 * group.std()).reset_index().sort_values(["Size"])

            means = means.set_index(["Size"])
            errors = errors.set_index(["Size"])

            if error_bars:
                means.plot(
                    ax=ax,
                    y=f"Runtime ({unit})",
                    yerr=errors,
                    ecolor="black",
                    capsize=4,
                    marker=".",
                    markersize=10,
                    title=title,
                    grid=True,
                )

            else:
                means.plot(
                    ax=ax,
                    y=f"Runtime ({unit})",
                    marker=".",
                    markersize=10,
                    title=title,
                    grid=True,
                )

            if table:
                display(my_df)

        # Fix legend
        ax.legend(method_strs)

        # Set axis titles
        ax.set_xlabel("Size")
        ax.set_ylabel(f"Runtime ({unit})")

In [None]:
def plot_threshold_impact(
    methods=None,
    size=50_000,
    unit="milliseconds",
    error_bars=False,
    table=False,
):
    if methods is None or not len(thresh_df):
        return

    unit = unit.lower()
    types = sorted(set(thresh_df["Description"]))

    for input_t in types:
        fig, ax = plt.subplots(figsize=FIG_SIZE)
        for k, m in enumerate(methods):
            my_df = thresh_df[
                (thresh_df["Method"] == m)
                & (thresh_df["Description"] == input_t)
                & (thresh_df["Size"] == size)
            ]

            my_df = my_df.sort_values(["Description", "Method", "Threshold", "Size"])
            group = my_df.groupby(
                ["Input", "Method", "Description", "Size", "Threshold"]
            )
            means = group.mean().reset_index().set_index(["Threshold"])
            errors = (2 * group.std()).reset_index().set_index(["Threshold"])

            title = f"Runtime vs. Threshold at {size:,} inputs\n{input_t.capitalize()}"

            if error_bars:
                means.plot(
                    ax=ax,
                    y=f"Runtime ({unit})",
                    yerr=errors,
                    capsize=4,
                    ecolor="black",
                    marker=".",
                    markersize=10,
                    title=title,
                    grid=True,
                )
            else:
                means.plot(
                    ax=ax,
                    y=f"Runtime ({unit})",
                    marker=".",
                    markersize=10,
                    title=title,
                    grid=True,
                )

            if table:
                display(my_df)

        ax.legend(methods)
        ax.set_xlabel("Threshold")
        ax.set_ylabel(f"Runtime ({unit})")

In [None]:
results_dir = Path("./results")
dirs = list(sorted([x for x in results_dir.iterdir()]))

# Give the option to load any data set
interact(
    load,
    in_dir=widgets.Dropdown(
        options=dirs,
        value=dirs[-1],
        description="Results Dir:",
    ),
)