In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
from glob import glob
import pickle
from copy import deepcopy
import subprocess

In [10]:
def plot_performance_vs_correctness(df, error_type, error_threshold):
    
    df_plot = df.dropna()

    baseline_total_cost = df_plot[(df['Configuration Number'] == 0)]['Total Cost'].iloc[0]
    baseline_subset_cost = df_plot[(df_plot['Configuration Number'] == 0)]['Subset Cost'].iloc[0]
    df_plot = df_plot.assign(Improvement = baseline_subset_cost/df_plot['Subset Cost'])
    df_plot = df_plot.assign(Total_Improvement = baseline_total_cost/df_plot['Total Cost'])
    df_plot = df_plot.assign(error_x_minus = df_plot['Improvement'] - df_plot['Total_Improvement'])
    df_plot = df_plot.assign(error_x = df_plot['Total_Improvement'] - df_plot['Improvement'])

    num = df_plot._get_numeric_data()
    num[num < 0] = 0

    fig = px.scatter(
        df_plot,
        x="Improvement",
        y=error_type,
        color = '32-bit %',
        hover_data = ['Configuration Number'],
        log_y = True, 
        # error_x_minus='error_x_minus',
        # error_x = 'error_x'
    )
    fig.update_traces(
        marker={
            'size' : 14,
            'opacity' : 0.5,
            'line_width' : 1,
            'line_color' : "black",
        }
    )
    if not df_plot[df_plot["Configuration Number"] == 1].empty:
        fig.add_trace(
            go.Scatter(
                x = df_plot[df_plot["Configuration Number"] == 1]["Improvement"],
                y = df_plot[df_plot["Configuration Number"] == 1][error_type],
                mode = "markers+text",
                marker_symbol = 'circle-open',
                marker_size = 14,
                marker_color = "black",
                marker_line_width = 3,
                marker_line_color = "black",
                name = "Uniform 32-bit",
                showlegend=False,
                text=["Uniform 32-bit"],
                textposition=["top right"],
                textfont_size=16
            )
        )
        # fig.add_annotation(
        #     x = df_plot[df_plot["Configuration Number"] == 1]["Improvement"].values[0],
        #     y = df_plot[df_plot["Configuration Number"] == 1][error_type].values[0],
        #     xref = "x",
        #     yref = "y",
        #     text = "Uniform 32-bit",
        #     showarrow=True,
        #     arrowhead=1,
        # )

    fig.update_layout(
        title=dict(
            text = "ADCIRC",
            y = 0.98,
            x = 0.5,
            xanchor = 'center',
            yanchor = 'top',
            font_size=30,
        ),
        width = 600,
        height = 300,
        coloraxis_showscale = False,
        yaxis_tickformat = ".0e",
        xaxis_tickformat = ".1f",
        xaxis_ticksuffix = "x",
        xaxis_title = "",
        yaxis_title = "Relative Error",
        font_size = 20,
        font_family = "Times New Roman",
        legend = dict(
            bgcolor = "#E5ECF6",
            entrywidth = 130,
            orientation = "h",
            yanchor = "bottom",
            xanchor = "right",
            x = 1.0,
            y = 1.02,
            title_text = ""
        ),
        margin = dict(
            r = 0,
            t = 40,
            b = 0,
            l = 0,
        ),
    coloraxis={
        "cmin" : 0,
        "cmax" : 100,
        "colorbar" : {
            'thickness' : 18,
            'title' : "% 32-bit<br>(Hotspot)",
            'orientation' : 'h',
            'title_font_size' : 24,
            'y' : -0.6,
            'len' : 0.8
        },
    },
    )
    fig.add_vline(x=1.0, line_width=2, line_dash="dash", line_color="grey")
    fig.add_hline(y=error_threshold, line_width=2, line_dash="dash", line_color="grey")

    return fig, df_plot

def get_ADCIRC_data(search_log_path):
    
    with open(search_log_path, "r") as f:
        search_log_lines = f.readlines()

    df_entire = []
    df_subset = []
    for line in search_log_lines:
        print(line)
        row = {}

        try:
            row['Configuration Number'] = int(line.split(":")[0])
        except ValueError:
            continue
        config_dir_path = os.path.join(os.path.dirname(search_log_path), f"{row['Configuration Number']:0>4}")

        config_path = glob(f"{config_dir_path}/config*")
        assert ( len(config_path) == 1)
        row['Configuration Path'] = config_path[0]

        float_count = 0
        double_count = 0
        with open(row['Configuration Path'], "r" ) as f:
            llines = f.readlines()
            for lline in llines:
                if ",4" in lline:
                    float_count += 1
                elif ",8" in lline:
                    double_count += 1

        row['32-bit %'] = 100*(float_count / (double_count + float_count))

        if "[PASSED]" in line:
            row['Label'] = "Passed"
        elif "error threshold was exceeded" in line:
            row['Label'] = "Exceeded Error Threshold"
        elif "(timeout)" in line:
            row['Label'] = "Timeout"
        elif "(runtime failure)" in line:
            row['Label'] = "Runtime Error"
        elif "(compilation error)" in line:
            row['Label'] = 'Compilation Error'
        elif "(plugin error)" in line:
            row['Label'] = 'Prose Plugin Error'
        else:
            continue

        if row["Configuration Number"] == 0:
            row["Water Elevation Error (L2 Norm)"] = 0
            row["Water Elevation Error (Max Norm)"] = 0
            row["Water Velocity Error (L2 Norm)"] = 0
            row["Water Velocity Error (Max Norm)"] = 0
        else:
            try:
                with open(os.path.join(config_dir_path,"error_metrics.txt"), "r") as f:
                    temp = f.readlines()[0]
                    temp = temp.split()
                    assert( len(temp) == 7 )
                    row["Water Elevation Error (L2 Norm)"] = float(temp[3])
                    row["Water Elevation Error (Max Norm)"] = float(temp[4])
                    row["Water Velocity Error (L2 Norm)"] = float(temp[5])
                    row["Water Velocity Error (Max Norm)"] = float(temp[6])

            except FileNotFoundError:
                row["Water Elevation Error (L2 Norm)"] = np.nan
                row["Water Elevation Error (Max Norm)"] = np.nan
                row["Water Velocity Error (L2 Norm)"] = np.nan
                row["Water Velocity Error (Max Norm)"] = np.nan

        tokens = [token.strip() for token in line.split()]
        try:
            row['Subset Cost'] = float(tokens[tokens.index("=") + 1])
            row['Total Cost'] = float(tokens[tokens.index("=", tokens.index("=") + 1) + 1][:-1])
        except ValueError:
            row['Subset Cost'] = np.nan
            row['Total Cost'] = np.nan

        df_entire.append(deepcopy(row))

        try:
            with open(os.path.join(config_dir_path, "gptl_subset_info.pckl"), "rb") as f:
                gptl_subset_info = pickle.load(f)
                
            subprocess.run(
                f"tar -xvf gptl_timing.tar.gz",
                shell = True,
                cwd = config_dir_path,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )

            execution_counts = {}
            with open(os.path.join(config_dir_path, "timing.000000"), "r") as f:
                for line in f.readlines():
                    if line[2:].lstrip().startswith("::"):
                        procedure_name = line[2:].split()[0].strip().lower()
                        execution_count = float(line[2:].split()[1].strip())
                        execution_counts[procedure_name] = execution_count

            subprocess.run(
                f"rm timing.*",
                shell = True,
                cwd = config_dir_path,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )

            for key, value in gptl_subset_info.items():
                row['Procedure Name'] = key
                row['CPU Time'] = value
                try:
                    row["Execution Count"] = execution_counts[row['Procedure Name'].lower()]
                except KeyError:
                    row["Execution Count"] = np.nan
                float_count = 0
                double_count = 0
                config_hash = ""
                with open(row['Configuration Path'], "r" ) as f:
                    llines = f.readlines()
                    for lline in llines:
                        if lline.strip().lower().startswith(key.lower() + "::"):
                            if ",4" in lline:
                                float_count += 1
                                config_hash = config_hash + "4"
                            elif ",8" in lline:
                                double_count += 1
                                config_hash = config_hash + "8"
                
                row["config_hash"] = hash(config_hash)
                try:
                    row['32-bit %'] = 100*(float_count / (double_count + float_count))
                except Exception as e:
                    row['32-bit %'] = np.nan

                df_subset.append(deepcopy(row))

        except FileNotFoundError:
            continue

    return pd.DataFrame(df_entire), pd.DataFrame(df_subset)

In [6]:
df_entire, df_subset = get_ADCIRC_data("./prose_logs/search_log.txt")

0000: [PASSED] subset cost = 20.903 (total cost = 955.398)

0001: [FAILED] subset cost = 17.804 (total cost = 855.620) but error threshold was exceeded

0002: [FAILED] subset cost = 18.420 (total cost = 768.981) but error threshold was exceeded

0003: [FAILED] subset cost = 21.680 (total cost = 789.413) but error threshold was exceeded

0004: [FAILED] (runtime failure) unable to execute transformed source code (see outlog.txt for details)

0005: [FAILED] (runtime failure) unable to execute transformed source code (see outlog.txt for details)

0006: [FAILED] (runtime failure) unable to execute transformed source code (see outlog.txt for details)

0007: [FAILED] (runtime failure) unable to execute transformed source code (see outlog.txt for details)

0008: [FAILED] subset cost = 18.589 (total cost = 771.974) but error threshold was exceeded

0009: [FAILED] subset cost = 16.866 (total cost = 756.882) but error threshold was exceeded

0010: [FAILED] (runtime failure) unable to execute tran

In [11]:
fig, _ = plot_performance_vs_correctness(df_entire, error_type="Water Elevation Error (L2 Norm)", error_threshold=1e-1)
fig.write_image("fig1.png")


Boolean Series key will be reindexed to match DataFrame index.



In [12]:
from plotly.subplots import make_subplots

df_subset["Average CPU Time Per Call"] = df_subset["CPU Time"] / df_subset["Execution Count"]
for procedure_name in df_subset[df_subset["Procedure Name"].str.contains("::")]["Procedure Name"].unique():
    for config_hash in df_subset[df_subset["Procedure Name"] == procedure_name]["config_hash"].unique():
        df_subset.loc[(df_subset["Procedure Name"] == procedure_name) & (df_subset["config_hash"] == config_hash), "Average CPU Time Per Call"] = df_subset.loc[(df_subset["Procedure Name"] == procedure_name) & (df_subset["config_hash"] == config_hash), "Average CPU Time Per Call"].mean()

baseline_costs = {
 '::itpackv::peror': 7.4881640625,
 '::itpackv::pjac': 6.6400781250000005,
 '::itpackv::scal': 2.229921875,
#  '::itpackv::unscal': 1.8463125,
#  '::itpackv::pmult': 1.0671562499999998,
#  '::itpackv::zbrent': 0.5800234375,
#  '::itpackv::determ': 0.42157031249999993,
#  '::itpackv::scopy': 0.17984374999999997,
#  '::itpackv::chgcon': 0.10775781250000002,
#  '::itpackv::pstop_nrms': 0.078828125,
#  '::itpackv::eigvss': 0.05520312500000002,
#  '::itpackv::parcon': 0.048570312500000004,
#  '::itpackv::echall': 0.015125000000000003,
#  '::itpackv::itjcg': 0.00148140625,
 '::itpackv::jcg': 4.827265625e-06,
#  '::itpackv::dfault': 3.13828125e-06,
}

procedure_percentages = {
 '::itpackv::peror': "36.3", #36.29100455739751,
 '::itpackv::pjac': "32.9", #31.977778859505513,
 '::itpackv::scal': "10.7", #10.732272246833244,
 '::itpackv::unscal': "8.9",#8.860188884896429,
 '::itpackv::pmult': "5.1",#5.139347218124911,
 '::itpackv::zbrent': "2.8",#2.784887123927813,
 '::itpackv::determ': "2.0",#2.0349276954776263,
 '::itpackv::scopy': "<1.0",# 0.8683757455311305,
 '::itpackv::chgcon': "<1.0",#0.5216739993869903,
 '::itpackv::pstop_nrms': "<1.0",#0.38129119693919067,
 '::itpackv::eigvss': "<1.0",#0.2654922149852498,
 '::itpackv::parcon': "<1.0",#0.23624467520250958,
 '::itpackv::echall': "<0.1",#0.0730656138543772,
 '::itpackv::itjcg': "<0.01",#0.0070421492289083586,
 '::itpackv::jcg': "<0.01",#2.286905914467208e-05,
 '::itpackv::dfault': "<0.01",#1.4990663907496354e-05,
}

fig = make_subplots(len(baseline_costs),1, subplot_titles=tuple([f'{x[x.rfind(":") + 1:]} ({procedure_percentages[x]}%)' for x in baseline_costs.keys()]), shared_xaxes=True, vertical_spacing=0.10)
for i, procedure_name in enumerate(baseline_costs.keys()):
    df_plot = df_subset[df_subset["Procedure Name"] == procedure_name]
    baseline_cost = baseline_costs[procedure_name] / df_plot[df_plot["Configuration Number"] == 0]["Execution Count"].values[0]
    baseline_cost = (baseline_cost + df_plot[df_plot["Configuration Number"] == 0]["Average CPU Time Per Call"].values[0]) / 2
    df_plot = df_plot.assign(Improvement = baseline_cost/df_plot['Average CPU Time Per Call'])
    df_plot = df_plot.drop_duplicates(subset=["config_hash"])
    fig.add_trace(
        go.Scatter(
            x = df_plot["Improvement"],
            y = np.random.rand(len(df_subset)),
            mode = 'markers',
            customdata=df_plot["Configuration Number"],
            hovertemplate="%{customdata}",
            marker = dict(
                size = 10,
                color=df_plot["32-bit %"],
                line_width = 1,
                line_color = "black",
                opacity = 0.6,
                coloraxis="coloraxis1",
                symbol = "diamond",
            ),
            showlegend=False
        ),
        i + 1,
        1
    )
    if i == 0:
        fig.update_traces(
            marker_colorbar_title = "% 32-bit",
            marker_colorscale = "Plasma",
        )


    # df_plot = df_plot[df_plot["Configuration Number"] == df_entire[df_entire["Cost"] == df_entire[df_entire["Label"] == "Passed"]["Cost"].min()]["Configuration Number"].values[0]]
    # fig.add_trace(
    #     go.Scatter(
    #         x = df_plot["Improvement"],
    #         y = [0],
    #         mode = 'markers',
    #         customdata=[df_plot["Configuration Number"]],
    #         hovertemplate="%{customdata}",
    #         marker = dict(
    #             size = 15,
    #             color=df_plot["32-bit %"],
    #             line_width= 1,
    #             line_color="black",
    #             coloraxis="coloraxis1",
    #             symbol="star",
    #         ),
    #         name = "Optimal Variant",
    #         showlegend=[True if i == 0 else False for x in range(1)][0],
    #     ),
    #     row = i + 1,
    #     col = 1,
    # )

fig.update_layout(
    width = 600,
    height = 280,
    coloraxis_showscale=False,
    showlegend = False,
    margin = dict(
        r = 0,
        t = 60,
        b = 0,
        l = 0,
    ),
    title=dict(
        text = "ADCIRC",
        y = 0.98,
        x = 0.5,
        xanchor = 'center',
        yanchor = 'top',
        font_size=30,
    ),
    font_size = 24,
    font_family = "Times New Roman",
    coloraxis={
        "cmin" : 0,
        "cmax" : 100,
        "colorbar" : {
            'thickness' : 18,
            'title' : "% 32-bit<br>(Procedure)",
            'orientation' : 'h',
            'y' : -0.6
        },
    },
    legend = dict(
        bgcolor = "#E5ECF6",
        entrywidth = 130,
        orientation = "h",
        yanchor = "bottom",
        xanchor = "right",
        x = 1.0,
        y = 1.02,
        title_text = ""
    ),
)

fig.update_xaxes(type="log", ticksuffix="x")
# fig.update_xaxes(title="Speedup", row=len(baseline_costs), col=1)
fig.update_yaxes(visible=False)
fig.update_annotations(yshift=-5, font_size=18)
fig.add_vline(x=1.0, line_width=2, line_dash="dash", line_color="grey")
fig.write_image("fig2.png")