In [None]:
%matplotlib widget

In [None]:
%%javascript
// Disable scrolling on plots
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import json
import re
from pathlib import Path

import ipywidgets as widgets
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.core.display import HTML, display
from IPython.display import JSON, display
from ipywidgets import fixed, interact, interact_manual, interactive
from matplotlib.ticker import (AutoMinorLocator, FormatStrFormatter,
                               MultipleLocator)
import matplotlib.pyplot as plt

from dataclasses import dataclass

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 1000)
pd.set_option("display.precision", 2)
pd.options.display.float_format = "{0:,.6f}".format

In [None]:
# Make cells wider
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
RESULTS_DIR = Path("./results")
METHODS = ("qsort_c", "vanilla_quicksort", "insertion_sort", "std")
THRESHOLD_METHODS = ("qsort_c")
DEFAULT_METHOD = "qsort_c"
INPUT_TYPES = ("ascending", "descending", "random", "single_num", "N/A")
UNITS = ("milliseconds", "seconds")
FIG_SIZE = (10, 4)

results = []

@dataclass
class Result:
    name: str
    dir_path: Path
    csv_path: Path
    job_details: dict
    df: pd.DataFrame
    avg_df: pd.DataFrame


def clean_path(path: Path):
    for i in INPUT_TYPES:
        if i in path:
            return re.sub(f"^.*?/{i}", str(i), path)
    return path


def clean_df(df):
    # Convert microseconds to milliseconds and rename column
    df["Elapsed Time (microseconds)"] = df["Elapsed Time (microseconds)"] / 1000
    df.rename(
        columns={"Elapsed Time (microseconds)": "Runtime (milliseconds)"}, inplace=True
    )
    # Add seconds column
    df["Runtime (seconds)"] = df["Runtime (milliseconds)"] / 1000

    # Reorder columns
    df = df[
        [
            "Input",
            "Description",
            "Method",
            "Size",
            "Threshold",
            "Runtime (milliseconds)",
            "Runtime (seconds)",
        ]
    ]

    # Cleanup the input column
    df["Input"] = df["Input"].apply(clean_path)

    # Average runtime for repeated runs.
    avg_df = (
        df.groupby(["Input", "Method", "Description", "Size", "Threshold"])
        .mean()
        .reset_index()
    )

    return df, avg_df


def load_multiple(in_dirs=None):
    if in_dirs is None:
        in_dirs = []

    global results
    results = []

    for d in in_dirs:
        csvs = list(d.glob("*.csv"))
        if len(csvs) < 1:
            raise FileNotFoundError("CSV Missing")
        in_file = csvs[0]

        info = {}
        try:
            info_path = Path(d, "job_details.json")
            if info_path.is_file():
                with open(info_path, "r") as json_file:
                    info = json.load(json_file)
        except Exception:
            pass

        partition = ""
        try:
            partition_path = Path(d, "partition")
            if partition_path.is_file():
                partition = partition_path.read_text()
        except Exception:
            pass

        df = pd.read_csv(in_file)
        df, avg_df = clean_df(df)

        results.append(Result(partition, d, in_file, info, df, avg_df))

    thresholds = sorted(set(results[0].avg_df["Threshold"]))
    try:
        thresholds.remove(0)
    except ValueError:
        pass

    interact_manual(
        plot_runtime,
        method=widgets.Dropdown(options=METHODS, value=METHODS[0], description="Method:"),
        threshold=widgets.SelectionSlider(options=thresholds, description="Threshold"),
        unit=widgets.Dropdown(options=UNITS, description="Unit"),
        results=fixed(results)
    )

    default_df = df[df["Method"] == "qsort_c"]
    sizes = sorted(set(default_df["Size"]))

    interact_manual(
        plot_threshold_impact,
        threshold=widgets.SelectionSlider(options=thresholds, description="Threshold"),
        unit=widgets.Dropdown(options=UNITS, description="Unit"),
        size=widgets.SelectionSlider(options=sizes),
        results=fixed(results)
    )

def plot_runtime(method: str, threshold: int, unit: str, results):
    unit = unit.lower()

    plots = {
        "ascending": plt.subplots(figsize=FIG_SIZE),
        "descending": plt.subplots(figsize=FIG_SIZE),
        "random": plt.subplots(figsize=FIG_SIZE),
        "single_num": plt.subplots(figsize=FIG_SIZE),
    }

    legend = [i.name for i in results]

    for r in results:
        avg_df = r.avg_df
        method_df = avg_df[avg_df["Method"] == method]
        if method in THRESHOLD_METHODS:
            t_df = method_df[method_df["Threshold"] == threshold]
        else:
            t_df = method_df
            
        for descrip, ax in plots.items():
            df = t_df[t_df["Description"] == descrip]

            df = df.sort_values(["Description", "Method", "Threshold", "Size"])
            title = f"Runtime vs. Size: {method:} "
            if method in THRESHOLD_METHODS:
                title += f"(threshold: {threshold})"
            title += f"\n{descrip}"
                

            df.plot(
                ax=ax[1],
                x="Size",
                y=f"Runtime ({unit})",
                marker=".",
                markersize=10,
                title=title,
                grid=True,
            )

            ax[1].legend(legend)

            # Set axis titles
            ax[1].set_xlabel("Size")
            ax[1].set_ylabel(f"Runtime ({unit})")

            ax[0].show()

def plot_threshold_impact(size: int, unit: str, results):
    unit = unit.lower()

    plots = {
        "ascending": plt.subplots(figsize=FIG_SIZE),
        "descending": plt.subplots(figsize=FIG_SIZE),
        "random": plt.subplots(figsize=FIG_SIZE),
        "single_num": plt.subplots(figsize=FIG_SIZE),
    }

    legend = [i.name for i in results]

    for r in results:
        avg_df = r.avg_df
        method_df = avg_df[avg_df["Method"] == "qsort_c"]
        size_df = method_df[method_df["Size"] == size]

        for descrip, ax in plots.items():
            df = size_df[size_df["Description"] == descrip]
            df = df.sort_values(["Description", "Method", "Threshold", "Size"])
            title = f"Qsort_c Threshold Value vs. Runtime at {size:,} Inputs\n{descrip.capitalize()}"

            df.plot(
                ax=ax[1],
                x="Threshold",
                y=f"Runtime ({unit})",
                marker=".",
                markersize=10,
                title=title,
                grid=True,
            )

            ax[1].legend(legend)

            # Set axis titles
            ax[1].set_xlabel("Threshold")
            ax[1].set_ylabel(f"Runtime ({unit})")

            # Force int x axis
            ax[1].get_xaxis().set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))

            ax[0].show()




In [None]:
results_dir = Path("./results")
dirs = list(sorted([x for x in results_dir.iterdir() if x.is_dir()]))

interact_manual(
    load_multiple,
    in_dirs=widgets.SelectMultiple(
        options=dirs,
        description="Input Dirs",
    )
)