In [28]:
import os

def parse_time(time_str):
    if time_str[-2:] == "us":
        return int(time_str[:-2])
    elif time_str[-2:] == "ms":
        return int(time_str[:-2]) * 1000
    else:
        print(f"Unknown time format {time_str}")
        assert False

all_data = {}
benchmarks = ["carbon", "silicon", "fstar", "verus", "dafny", "smt-comp"]
for dir in benchmarks:
    dir_data = {}
    print(f"Finding *.data files in ./data_v1/{dir}")
    for root, dirs, files in os.walk(f"./data_v1/{dir}_eval"):
        for file in files:
            if file.endswith(".data"):
                data_str = open(os.path.join(root, file), "r").read()
                data_lines = data_str.split("\n")

                first_line = data_lines[0].split(" ")
                if first_line[2] == "b":
                    print(f"Skipping {dir} {file}")
                    continue # log file was not generated
                assert len(first_line) == 3 and first_line[0] == "[LOGFILE]" and first_line[2][-1] == "b"
                log_size = int(first_line[2][:-1])
                data = { "log_size": log_size, "axiom_profiler": {}, "smt_scope": {} }

                assert data_lines[1] == "[tool] smt-scope"
                assert data_lines[2] == "Z3 4.8.7"

                i = 3
                current = data["smt_scope"]
                while i < len(data_lines):
                    line = data_lines[i].split(" ")
                    # ['[Parse] 1789us', '[Graph] 752us', '[Analysis] 202us', '[Loops] 0 true, 0 false', '[Branching] 0']
                    # ['[Parse] 148ms', '[Graph] 1ms', '[Analysis] 16ms', '[Loops] 0 true, 0 false', '']
                    match line[0]:
                        case "[tool]":
                            assert len(line) == 2 and line[1] == "axiom-profiler"
                            current = data["axiom_profiler"]
                        case "[Parse]":
                            parse = {}
                            time = line[1]
                            if time == "Err":
                                assert len(line) == 4 and line[2][-1] == "‰"
                                time = line[3]
                                parse["err"] = float(line[2][:-1]) / 1030
                                if parse["err"] > 1:
                                    print(line)
                                    assert False
                            parse["time"] = parse_time(time)
                            current["parse"] = parse
                        case "[Graph]":
                            current["graph"] = {}
                            current["graph"]["time"] = parse_time(line[1])
                        case "[Analysis]":
                            analysis = {}
                            time = line[1]
                            if time == "Err":
                                time = line[2]
                                analysis["err"] = " ".join(line[3:])
                            analysis["time"] = parse_time(time)
                            current["analysis"] = analysis
                        case "[Loops]":
                            assert len(line) == 5 and line[2] == "true," and line[4] == "false"
                            current["loops"] = (int(line[1]), int(line[3]))
                        case "[Branching]":
                            assert len(line) == 2
                            current["branching"] = int(line[1])
                        case "":
                            break
                        case _ if data_lines[i].startswith("This log file contains multiple checks; they will be merged and displayed as one, but the data could be invalid, confusing, or both."):
                            current["warn_multiple_checks"] = True
                        case _:
                            print(f"Unknown line {data_lines[i]}")
                            assert False
                    i += 1

                # print(data)
                dir_data[os.path.join(root, file)] = data
    all_data[dir] = dir_data
    print(f"{dir} files: {len(dir_data)}")


In [39]:
import matplotlib.pyplot as plt
import numpy as np

In [103]:
ap = { "xs": [], "ys": [], "exs": [], "eys": [] }
ss = { "xs": [], "ys": [] }
for dir in benchmarks:
    dir_data = all_data[dir]
    print(f"Processing {dir}")
    # ap = { "xs": [], "ys": [], "exs": [], "eys": [] }
    # ss = { "xs": [], "ys": [] }
    for file, data in dir_data.items():
        size = data["log_size"]
        for tool in ["axiom_profiler", "smt_scope"]:
            for step in ["parse", "graph", "analysis"]:
                if not step in data[tool]:
                    continue
                tool_data = data[tool][step]
                this_size = size

                if tool == "axiom_profiler":
                    # key "err" exists and is a float
                    if "err" in data[tool]["parse"]:
                        this_size *= data[tool]["parse"]["err"]
                        xs = ap["exs"]
                        ys = ap["eys"]
                    else:
                        xs = ap["xs"]
                        ys = ap["ys"]
                else:
                    assert not "err" in tool_data
                    xs = ss["xs"]
                    ys = ss["ys"]
                if step == "parse":
                    xs.append(this_size / 1024)
                    ys.append(tool_data["time"] / 1000)
                else:
                    ys[-1] += tool_data["time"] / 1000
# Plot the time taken to parse each file as a scatter plot
# x-axis: size of the log file, y-axis: time taken to parse the log file
# there should be two types of points (one for axiom-profiler and one for smt-scope)
fig, ax = plt.subplots()
marker_size = 10
marker_shape = "."
marker_edgecolors = "none"
ax.scatter(ap["xs"], ap["ys"], color="red", label="axiom-profiler", s=marker_size, marker=marker_shape, edgecolors=marker_edgecolors)
if len(ap["exs"]) > 0:
    ax.scatter(ap["exs"], ap["eys"], color="orange", label="axiom-profiler (err)", s=marker_size, marker=marker_shape, edgecolors=marker_edgecolors)
ax.scatter(ss["xs"], ss["ys"], color="blue", label="smt-scope", s=marker_size, marker=marker_shape, edgecolors=marker_edgecolors)
# Fit a best fit line to the ap["xs"]/ap["ys"] dots using `polyfit`
# and plot the line using `plot`
ap_log = { "xs": np.log(ap["xs"]), "ys": np.log(ap["ys"]) }
ap_line = np.poly1d(np.polyfit(ap_log["xs"], ap_log["ys"], 2))
ax.plot(np.unique(ap["xs"]), np.exp(ap_line(np.unique(ap_log["xs"]))), color="black")
ss_log = { "xs": np.log(ss["xs"]), "ys": np.log(ss["ys"]) }
ss_line = np.poly1d(np.polyfit(ss_log["xs"], ss_log["ys"], 1))
ax.plot(np.unique(ss["xs"]), np.exp(ss_line(np.unique(ss_log["xs"]))), color="black")

interest = 200 * 1024
ap_interest = np.exp(ap_line(np.log(interest)))
ss_interest = np.exp(ss_line(np.log(interest)))
print(f"Interest point: {interest / 1024}mb, ap: {ap_interest / 1000}s, ss: {ss_interest / 1000}s")
plt.axvline(interest, color="black", linestyle="dashed", alpha=0.4)
plt.axhline(ap_interest, color="black", linestyle="dashed", alpha=0.4)
plt.axhline(ss_interest, color="black", linestyle="dashed", alpha=0.4)
plt.plot([interest, interest], [ss_interest, ap_interest], color="black")


ax.set_xlabel("Log file size (kb)")
ax.set_ylabel(f"Time taken (ms)")
ax.set_title(f"Time")
# Set x-axis to log base 2 scale
ax.set_xscale('log')#, base=2)
ax.set_yscale('log')
lgnd = ax.legend(markerscale=5)
plt.savefig(f"./data_v1/all.pdf")
plt.show()
