In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
from glob import glob
import xarray as xr

In [None]:
def plot_performance_vs_correctness_L2(df, error_type):

    df_plot = df.dropna(subset=["Cost", f"{error_type}, Relative Error (L2 Norm)"])

    baseline_cost = df[(df['Configuration Number'] == 0)]['Cost'].iloc[0]
    df_plot = df_plot.assign(Improvement = baseline_cost/df_plot['Cost'])
    
    fig = px.scatter(
        df_plot,
        x = "Improvement",
        y = f"{error_type}, Relative Error (L2 Norm)",
        color = '32-bit %',
        hover_data = ['Configuration Number'],
    )

    fig.update_traces(
        marker={
            'size' : 14,
            'opacity' : 0.5,
        }
    )

    if not df_plot[df_plot["Configuration Number"] == 1].empty:
        fig.add_trace(
            go.Scattergl(
                x = df_plot[df_plot["Configuration Number"] == 1]["Improvement"],
                y = df_plot[df_plot["Configuration Number"] == 1][f"{error_type}, Relative Error (L2 Norm)"],
                mode = "markers",
                marker_symbol = 'star',
                marker_size = 20,
                marker_color = "red",
                marker_line_width = 1,
                marker_line_color = "black",
                name = "Uniform 32-bit"
            )
        )

    fig.update_traces(
        marker={
            'size' : 14,
            'line_width' : 1,
            'line_color' : "black",
        }
    )
    fig.update_layout(
        width = 600,
        height = 400,
        title=dict(
            text = "MPAS",
            y = 0.97,
            x = 0.25,
            xanchor = 'left',
            yanchor = 'top',
        ),
        yaxis_tickformat = ".0e",
        xaxis_tickformat = ".1f",
        xaxis_ticksuffix = "x",
        xaxis_title = "Speedup",
        yaxis_title = "Relative Error",
        font_size = 20,
        legend = dict(
            bgcolor = "#E5ECF6",
            entrywidth = 130,
            orientation = "h",
            yanchor = "bottom",
            xanchor = "right",
            x = 1.0,
            y = 1.02,
            title_text = ""
        ),
        margin = dict(
            r = 0,
            t = 0,
            b = 0,
            l = 0,
        )
    )

    fig.update_coloraxes(
        colorbar = {
            'thickness' : 20,
            'title' : "% 32-bit",
        },
        cmin = 0,
        cmax = 100,
    )

    fig.add_vline(x=1.0, line_width=2, line_dash="dash", line_color="grey")
    # fig.add_hline(y=1.0e-1, line_width=2, line_dash="dash", line_color="grey")

    return fig


def get_MPAS_data(search_log_path):

    global LONG_NAME_MAP

    with open(search_log_path, "r") as f:
        search_log_lines = f.readlines()

    df = []
    for line in search_log_lines:
        row = {}

        try:
            row['Configuration Number'] = int(line.split(":")[0])
        except ValueError:
            continue
        config_dir_path = os.path.join(os.path.dirname(search_log_path), f"{row['Configuration Number']:0>4}")

        config_path = glob(f"{config_dir_path}/config*")
        assert ( len(config_path) == 1)
        row['Configuration Path'] = config_path[0]

        float_count = 0
        double_count = 0
        with open(row['Configuration Path'], "r" ) as f:
            llines = f.readlines()
            for lline in llines:
                if ",4" in lline:
                    float_count += 1
                elif ",8" in lline:
                    double_count += 1

        row['32-bit %'] = 100*(float_count / (double_count + float_count))

        try:
            row['Cost'] = float(line.split()[4])
        except ValueError:
            try:
                row['Cost'] = float(line.split()[5])
            except ValueError:
                row['Cost'] = np.nan

        if "[PASSED]" in line:
            row['Label'] = "Passed"
        elif "error threshold was exceeded" in line:
            row['Label'] = "Exceeded Error Threshold"
        elif "(timeout)" in line:
            row['Label'] = "Timeout"
        elif "(runtime failure)" in line:
            row['Label'] = "Runtime Error"
        elif "(compilation error)" in line:
            row['Label'] = 'Compilation Error'
        elif "(plugin error)" in line:
            row['Label'] = 'Prose Plugin Error'
        else:
            continue

        try:
            errors_df = pd.read_pickle(os.path.join(config_dir_path, "errors.pckl"))
            for column_name in errors_df.columns:
                if column_name != "time":
                    metric = LONG_NAME_MAP[column_name].long_name
                    # row[f"{metric}, Relative Error (Average)"] = np.mean(errors_df[column_name])
                    # row[f"{metric}, Relative Error (Variance)"] = np.var(errors_df[column_name])
                    # row[f"{metric}, Relative Error (Median)"] = np.median(errors_df[column_name])
                    # row[f"{metric}, Relative Error (Max)"] = np.max(errors_df[column_name])
                    # row[f"{metric}, Relative Error (Min)"] = np.min(errors_df[column_name])
                    # row[f"{metric}, Relative Error (75th percentile)"] = np.percentile(errors_df[column_name], 75)
                    # row[f"{metric}, Relative Error (25th percentile)"] = np.percentile(errors_df[column_name], 25)
                    row[f"{metric}, Relative Error (L2 Norm)"] = np.linalg.norm(errors_df[column_name], ord=2)

        except FileNotFoundError:
            for column_name in errors_df.columns:
                if column_name != "time":
                    metric = LONG_NAME_MAP[column_name].long_name
                    # row[f"{metric}, Relative Error (Average)"] = np.nan
                    # row[f"{metric}, Relative Error (Variance)"] = np.nan
                    # row[f"{metric}, Relative Error (Median)"] = np.nan
                    # row[f"{metric}, Relative Error (Max)"] = np.nan
                    # row[f"{metric}, Relative Error (Min)"] = np.nan
                    # row[f"{metric}, Relative Error (75th percentile)"] = np.nan
                    # row[f"{metric}, Relative Error (25th percentile)"] = np.nan
                    row[f"{metric}, Relative Error (L2 Norm)"] = np.nan

        df.append(row)

    return pd.DataFrame(df)

In [None]:
LONG_NAME_MAP = xr.open_dataset("./prose_logs/0000/history.2014-09-10_00.00.00.nc")
df = get_MPAS_data("./prose_logs/__search_log_12hr.txt")

In [None]:
fig = plot_performance_vs_correctness_L2(df, error_type="Pressure")
fig.show()

In [None]:
fig = plot_performance_vs_correctness_L2(df, error_type="Kinetic energy at a cell center")
fig.show()