This is the notebook, which contains the results of running our evaluation.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import math
import scipy.stats as stats
from pathlib import Path

def pretty(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    for spine in [ax.spines['left'], ax.spines['bottom'], ax.spines["right"], ax.spines['top']]:
        spine.set_position(("outward", 5))
        spine.set_color("gray")
        
    for axis in [ax.yaxis, ax.xaxis]:
        for x in axis.get_major_ticks():
            x.label1.set_color("gray")
            x.label2.set_color("gray")
            x.tick1line.set_color("gray")
            x.tick2line.set_color("gray")


# Full

This section covers the evaluation where we preserve the full bug. We start by loading the the data and indexing by `name`, `predicate`, and `strategy`. The data have been computed and put in `results/result.csv` by our evalutation framework.

In [None]:
results = pd.read_csv("result/full/result.csv").set_index(["name", "predicate","strategy"])


A single line of our data looks like this, we store the follwing data: 

*  `bugs` which contain the number of lines in the cleaned up bug-report

*  `initial-scc` and `scc` contain the number of strongly connected components before and after reduction,

*  `initial-classes` and `classes` contain the number of classes before and after reduction,

*  `initial-bytes` and `bytes` contain the number of bytes before and after reduction,

*  `iters` which contain the number of invocations of the predicate, 

*  `searches` the number of binary searches made by algorithm

*  `time` which records the time to reach the final successfull solution,

*  `status` which records whether the reduction completed correctly,

*  `verify` which records information about if bug is preserved.

Here is an example:


In [None]:
results.loc["url0067cdd33d_goldolphin_Mi", "cfr", "logic"]

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(10,3), sharey=True)

bugs = results["bugs"].unstack("strategy")["classes"]
initial_bytes = results["initial-bytes"].unstack("strategy")["classes"]
initial_classes = results["initial-classes"].unstack("strategy")["classes"]
initial_variables = results["initial-scc"].unstack("strategy")["logic"]

number_of_benchmarks = len(bugs.index)

diagrams = [
    { "title": "Histogram of Classes"
    , "data": initial_classes
    , "xlabel": "Classes"
    },
    { "title": "Histogram of Bytes (in MB)"
    , "data": initial_bytes
    , "xformat" : lambda x, pos: f'{x / 1000000 :0.2f}'
    , "format" : lambda x, pos: f'{x / 1000 :0.0f} KB'
    , "xlabel": "Bytes (in MB)"
    },
#    { "title": "Histogram of Variables"
#    , "data": initial_variables
#    , "xformat" : lambda x, pos: f'{x / 1000 :0.1f} k'
#    , "xlabel": "Variables"
#    },
    { "title": "Histogram of Bugs"
    , "data": bugs
    , "xlabel": "Bugs"
    , "splits": [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    },
    
]

axes[0].set_ylabel("Bencmarks")
for ax, diagram in zip(axes, diagrams):
    #ax.set_title(diagram["title"])
    
    
    pretty(ax)
    
    data = diagram["data"]
    xlim = (data.min(), data.max())
    splits = diagram.get("splits",np.linspace(*xlim, 11).round(0))
    
    
    ax.set_xlim(*xlim)
    ax.set_xticks(splits[::2])
    ax.set_xticks(splits, minor=True)
    
    
    ylim = (0, number_of_benchmarks)
    ax.set_ylim(*ylim)
    ax.set_yticks(np.linspace(*ylim, 8).round(0))
    if ax != axes[0]:
        ax.spines["left"].set_visible(False)
        for x in ax.yaxis.get_major_ticks():
            x.set_visible(False)
    ax.set_xlabel(diagram["xlabel"])
   
    blocks = ax.hist(diagram["data"], splits, color="black", rwidth=0.75)
    
    
    xformat = diagram.get("xformat", lambda x, pos: f'{x:0.0f}')
    ax.xaxis.set_major_formatter(plt.FuncFormatter(xformat))
    #ax.xaxis.set_tick_params(rotation=70)
    
    gmean = stats.gmean(diagram["data"])
    v = ax.vlines(gmean, *ylim)
    v.set_color("gray")
    v.set_linestyle(":")
    
    t = ax.text(gmean + xlim[1] * 0.05, ylim[1] * 0.94, "GM " + diagram.get("format", xformat)(gmean, 0))
    t.set_color("gray")
    
fig.tight_layout()
fig.subplots_adjust(wspace=0.18)
fig.savefig("benchmarks.eps")

In [None]:
results

We are testing four startegies: 

- classes
- logic+approx
- logic+graph
- logic


In [None]:
strategies = list(reversed(["classes", "logic"]))

p = ["#0a1058", "#ee4242", "#ff9135", "#9857ff", "#4cb2ff"]


colors = {"classes" : "#5F99E7", 
          "logic": "#1956A7",
        }
shade = {"classes" : "#F6F1B0",
          "logic": "#B0B6F6"
        }

labels = { "jreduce": "J-Reduce"
         , "classes": "J-Reduce"
         , "items+hdd": "HDD"
         , "logic+ddmin": "ddmin+"
         , "logic+approx": "binary+"
         , "logic": "Our Reducer"
         }

styles = { "jreduce": ":"
         , "classes": "-."
         , "items+hdd": ":"
         , "logic+ddmin": "--"
         , "logic+approx": "-."
         , "logic": "-"
         }


## Sanity Checks

Before we go on to evaluate the code we check that the system is working correctly. First we check that the status is "success". We find the following distribution of statuses:


In [None]:
fig, ax = plt.subplots(1, figsize=(14,2))

timeouts = (results.status == "timeout").groupby("strategy").mean()

ax.set_xlim(0, 100)

pretty(ax)
x = ax.barh(
        [labels[s] for s in strategies], 
        [timeouts[s] * 100 for s in strategies], 
        color=[colors[s] for s in strategies],
    )

The following is a list of all the experiments that failed:

In [None]:
m = 10000000
for i in results.index:
    (b, p, x) = i
    if x != "classes": continue
    if results.classes.loc[(b, p, x)] < results.classes.loc[(b, p, "logic")]:
        if results["initial-bytes"][i] < m:
            m = results["initial-bytes"][i]
            print('/'.join(i), results["initial-bytes"][i], results.bytes[i], results.bytes[(b, p, "logic")])
            print(results.loc[(b, p, "logic")])

## Comparative reduction

In our comparative reducetion results we will update the times of all timeout items.

In [None]:
TIMEOUT = 3600
full = results.copy()
full.loc[full.status == "timeout", "time"] = TIMEOUT

In our first experiment we are going to look at comparative final size, and time. We use the geometric mean, so that we can compare the results:

In [None]:
r = full[["time", "bytes", "classes"]].groupby("strategy").agg(stats.gmean).loc[strategies]
r.round(1)

### Graphical Results

In [None]:
def draw_diagram(full):
    fig, axes = plt.subplots(1, 3, figsize=(10,3.5), sharey=True)
    
    diagrams = [
        { "title": "Finished Programs over Time"
        , "xformat": lambda x, pos: f'{x:0.0f}'
        , "data": lambda s: list(sorted(d for d in full["time"].unstack("strategy")[s] if d < TIMEOUT)) + [TIMEOUT]
        , "xlim": (0, 3600)
        , "xlabel": "Time Spend (s)"
        , "percent": False
        },        
        { "title": "Finished Programs over Invocations"
        , "xformat": lambda x, pos: f'{x:0.0f}'
        , "data": lambda s: sorted(full["iters"].unstack("strategy")[s])
        , "xlim": (0, 1500)
        , "xlabel": "Invocations Made"
        },
        { "title": "Finished Programs over Invocations"
        , "xformat": lambda x, pos: f'{x*100:0.0f}%'
        , "data": lambda s: sorted(full["classes"].unstack("strategy")[s] / initial_classes)
        , "xticks": np.linspace(0,1, 6)
        , "xlabel": "Final Relative Size (Bytes)"
        },
        
        ]

    for diagram, ax in zip(diagrams, axes.flatten()):
        maxx, minx = 0, 1000000000
        pretty(ax)
       
        strats = sorted(strategies, key=lambda s: np.mean(diagram["data"](s)))
        for s in strats:
            data = diagram["data"](s)
            ax.plot(data, [i + 1 for i,_ in enumerate(data)], 
                    label=labels[s], 
                    linestyle=styles[s],
                    color=colors[s])
            maxx = max(maxx, max(data))
            minx = min(minx, min(data))
            
        minx = max(1, minx)
        

        xlim = diagram.get("xlim", (0, maxx))
        ax.set_xlim(*xlim)
        xtics = diagram.get("xticks", np.linspace(*xlim, 7))
        ax.set_xticks(xtics)
        
        ylim = 0, number_of_benchmarks
        ax.set_yticks(np.linspace(*ylim, 7).round())
        ax.set_yticks([], minor=True)
        ax.set_ylim(*ylim)
        if ax == axes[0]:
            ax.set_ylabel("Benchmarks")
        
        
        if diagram.get("percent", False):
            ax2 = ax.twinx()
            pretty(ax2)
            
            yticks = [227, 200]
            strats = sorted(strategies, key=lambda s: -len(diagram["data"](s)))
            ytickslabels = [f"{(len(diagram['data'](s)) - 1) / number_of_benchmarks * 100:0.0f} %" for s in strats]
            ax2.set_yticks(yticks)
            ax2.set_yticklabels(ytickslabels)
            #ax2.set_ylabel("Completion Rate")
        
        
        ax.xaxis.set_major_formatter(plt.FuncFormatter(diagram.get("xformat", lambda x, pos: f'{x:0.0f}')))
        
        v = ax.hlines(round(number_of_benchmarks/2),*xlim)
        v.set_color("gray")
        v.set_linestyle(":")
        
        #v = ax.hlines(54 ,*xlim)
        #v.set_color("gray")
        #v.set_linestyle(":")
                            
        if ax == axes[1]:
            #t = ax.text(xlim[1] * 0.45, 54 + 5, "ONE BUG")
            #t.set_color("gray")
            t = axes[1].text(xlim[1] * 0.47, round(len(data)/2) + 5, "MEDIAN")
            t.set_color("gray")
        
        ax.set_xlabel(diagram["xlabel"])    
    
    axes[2].legend(loc="lower right")

    fig.tight_layout()
    
    return fig

fig = draw_diagram(full)
fig.savefig("timings.eps")

In [None]:
full[["time", "iters"]].groupby("strategy").agg(stats.gmean).round(1)

The graphs are formatted like the previous article: In the top row we have number programs that complete before a certain time and iterations. In the bottom row we have the number of programs that have been reduced to a size below a certian number of bytes or classes.


In [None]:
full.iters.unstack("strategy").max()

In [None]:
from pathlib import Path

iter_maxes = full.iters.unstack("strategy").max()

glob = Path("result/full/").glob("url*/*")
idfCs = []
idfBs = []
tdfCs = []
tdfBs = []
j = 0 
for b in glob:
    if not b.name in {"cfr", "procyon", "fernflower"}: continue
    j += 1
    #if j > 10: break
    try:
        idfC = pd.DataFrame()
        idfB = pd.DataFrame()
        tdfC = pd.DataFrame()
        tdfB = pd.DataFrame()
        metrics = list(b.glob("*/workfolder/metrics.csv"))
        if not metrics: continue
        for i in metrics:
            strat, *_ = i.relative_to(b).parts
            if strat == "logic+single": continue
            # print(i)
            m = pd.read_csv(i)
            
            m.time = m.time.floordiv(60) + 1
            x = m[m.judgment == "success"]\
                .groupby("time")[["classes", "bytes"]]\
                .min()\
                .reindex(index=range(0, 61))\
                .expanding().min()
        
            tdfC = tdfC.assign(**{strat: x["classes"].fillna(m.loc[0]["classes"])})
            tdfB = tdfB.assign(**{strat: x["bytes"].fillna(m.loc[0]["bytes"])})
        
            y = m[m.judgment == "success"]\
                .groupby("folder")[["classes", "bytes"]]\
                .min()\
                .reindex(index=range(0, int(iter_maxes.max())))\
                .expanding().min()
            
            idfC = idfC.assign(**{strat: y["classes"].fillna(m.loc[0]["classes"])})
            idfB = idfB.assign(**{strat: y["bytes"].fillna(m.loc[0]["bytes"])})
        
        idfCs.append(idfC)
        idfBs.append(idfB)
        tdfCs.append(tdfC)
        tdfBs.append(tdfB)
    except TypeError: raise
    except Exception as e :
        print("WARNING", type(e), e, i)
        continue

times = (tdfCs, tdfBs)
iters = (idfCs, idfBs)

In [None]:
pd.concat(times[1]).groupby("time").quantile(0.1)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(10,7))

tbytes = pd.concat(times[1]).groupby("time").agg(stats.gmean)

classgroup = pd.concat(times[0]).groupby("time")
tclasses = classgroup.agg(stats.gmean)

tclasses1 = classgroup.quantile(0.25)
tclasses2 = classgroup.quantile(0.75)


diagrams = [
    { "title": "Mean Classes Left Over Time (s)"
    , "data":  tclasses
    # , "quantiles": (tclasses1, tclasses2)
    , "format": lambda x, pos: f"{x:0.0f}"
    , "percent": True
    , "ylabel": "Classes Left"
    , "best": min
    },
    { "title": "Mean Bytes Left Over Time (s)"
    , "data":  tbytes
    , "format": lambda x, pos: f"{x / 1000:0.0f} KB"
    , "percent": True
    , "ylabel": "Bytes Left"
    , "best": min
    },
    { "title": "Mean Reduction of Classes Over Time (s)"
    , "data":  tclasses.rdiv(tclasses.max())
    #, "quantiles": (tclasses1.rdiv(tclasses.max()), tclasses2.rdiv(tclasses.max()))
    , "format": lambda x, pos: f"x {x:0.0f}"
    , "ylim": (1, 20)
    , "yscale": "linear"
    , "yticks": np.linspace(1, 20, 10)
    , "ylabel": "Times Smaller (Classes)"
    #, "yticks2": 
    #     zip([11, 10, 9, 5, 4],
    #         [f"{1/max(tclasses.rdiv(tclasses.max())[s])* 100:0.1f}%" for s in strategies ]
    #     )
    , "percent": False
    , "best": max
    },
    { "title": "Mean Reduction of Bytes Over Time (s)"
    , "data":  tbytes.rdiv(tbytes.max())
    , "ylim": (1, 20)
    , "yscale": "linear"
    , "yticks": np.linspace(1, 20, 10)
    , "format": lambda x, pos: f"x {x:0.0f}"
    , "ylabel": "Times Smaller (Bytes)"
    , "percent": False
    , "best": max
    }
    
   # ("Mean Percentage of Classes Left", dfCs, lambda x: x.mean()), 
   # ("Mean Percentage of Bytes Left", dfBs, lambda x: x.mean()),
   # ("Moving Geometric Mean of Relative Reduction of Classes", times[0], lambda x: x.agg(stats.gmean)), 
   # ("Moving Geometric Mean of Relative Reduction of Bytes", times[1], lambda x: x.agg(stats.gmean)),
   # ("Median Percentage of Classes Left", dfCs, lambda x: x.median()), 
   # ("Median Percentage of Bytes Left", dfBs, lambda x: x.median()),  
]

for ax, diagram in zip(axes.flatten(), diagrams):
    data = diagram["data"]
   
    pretty(ax)
    for s in sorted(strategies, key=lambda x: -diagram["best"](data[x])):
        ax.plot(data.index * 60, data[s], label=labels[s], color=colors[s], linestyle=styles[s])
        
        v = ax.hlines(diagram["best"](data[s]),(data.index * 60).min(), (data.index * 60).max())
        v.set_color("lightgray")
        v.set_linestyle(":")
        
        quantiles = diagram.get("quantiles", None)
        if quantiles:
            low,high = quantiles
            ax.fill_between(low.index * 60, low[s], high[s], label=labels[s], color=shade[s], linestyle=styles[s])
            #ax.plot(high.index * 60, high[s], label=labels[s], color=colors[s], linestyle=styles[s])
            
        
    ylim = diagram.get("ylim", (0, data[strategies].max().max()))
    ax.set_ylim(*ylim)
    ax.set_yscale(diagram.get("yscale", "linear"))
    yticks = diagram.get("yticks", np.linspace(*ylim, 6).round())
    ax.set_yticks([],minor=True)
    ax.set_yticks(yticks)
    yformat = diagram["format"]
    ax.yaxis.set_major_formatter(plt.FuncFormatter(yformat))
    
    ax.set_ylabel(diagram.get("ylabel"))
    
    
    #ax.set_yscale("log")
    #ticks = list(np.linspace(m["logic"].min(), 0.4, 7))
    #ticks.append(1)
    
    ax.set_title(diagram["title"])
    
    if diagram.get("percent", False):
        ax2 = ax.twinx()
        ax2.set_ylabel("Percentage Left")
        pretty(ax2)
        ax2.spines['right'].set_visible(True)
        ax2.yaxis.set_major_formatter(matplotlib.ticker.PercentFormatter(1, 0))
    else: 
        ax2 = ax.twinx()
        pretty(ax2)
        ax2.spines['right'].set_visible(True)
        ax2.set_ylabel("Percentage Left")
        ax2.set_ylim(*ylim)
        ax2.set_yscale(diagram.get("yscale", "linear"))
        yticks, ytickslabels = zip(
            *diagram.get("yticks2",
                        [ (d, f"{1/d * 100:0.1f}%") for d in (diagram["best"](data[s]) for s in strategies)
                        ]
            ))
        ax2.set_yticks(yticks)
        ax2.set_yticklabels(ytickslabels)
        ax2.set_yticks([],minor=True)
        
        ax2.invert_yaxis()
        ax.invert_yaxis()
      
    ax.set_xlim(0,3600)
    ax.set_xticks(np.linspace(0,3600, 7))
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.0f}'))
 
axes[0][0].legend()
fig.tight_layout()
fig.savefig("by-time.eps")
fig.savefig("by-time.pdf")
    

In [None]:
last_xmin = 10
bytepm = (tbytes["items+hdd"][60 - last_xmin] - tbytes["items+hdd"][60]) / last_xmin 
mins = (tbytes["items+hdd"][59] - tbytes["logic+ddmin"][60]) / bytepm
print (bytepm, mins)

last_xmin = 10
bytepm = (tbytes["items+hdd"][60 - last_xmin] - tbytes["items+hdd"][60]) / last_xmin 
mins = (tbytes["items+hdd"][59] - tbytes["logic+ddmin"][60]) / bytepm
print (bytepm, mins)

In [None]:
tbytes[(tbytes < tbytes["classes"][60])].iloc[0:10]

In [None]:
full.groupby("strategy").median()

In [None]:
from pathlib import Path

iter_maxes = full.iters.unstack("strategy").max()

glob = Path("result/full/").glob("url*/*")
columns = ["name", "predicate", "strategy", "time", "classes", "bytes", "iters"]
data = pd.DataFrame(columns = columns)
j = 0 
for b in glob:
    name, predicate = b.relative_to("result/full").parts
    if not predicate in {"cfr", "procyon", "fernflower"}: continue
    print(name, predicate)
    j += 1
    #if j > 1: break
    try:
        metrics = list(b.glob("*/workfolder/metrics.csv"))
        if not metrics: continue
        for i in metrics:
            strat, *_ = i.relative_to(b).parts
            m = pd.read_csv(i)
            
            m.time = m.time.floordiv(15) + 1
            x = m[m.judgment == "success"]\
                .groupby("time")[["classes", "bytes"]]\
                .min()\
                .reindex(index=range(0, 241))\
                .expanding().min()\
                .fillna(m.loc[0][["classes","bytes"]])
            
            x = pd.DataFrame(x, columns=columns)
            x.time = x.index * 15
            x.name = name
            x.strategy = strat
            x.predicate = predicate
            x.iters = m.groupby("time")\
                .folder.max().reindex(index=range(0,241)).expanding().max().fillna(0)
            
            data = data.append(x, sort=True)
        
            # y = m[m.judgment == "success"]\
            #     .groupby("folder")[["classes", "bytes"]]\
            #     .min()\
            #     .reindex(index=range(0, iter_maxes.max()))\
            #     .expanding().min()
    except Exception as e :
        print("WARNING", e, i)
        continue
data

In [None]:
from matplotlib.animation import FuncAnimation


fig, axes = plt.subplots(1, 2, figsize=(10,4))
fig.set_tight_layout(True)


def handler(ax, tp): 
    pretty(ax)

    v = data[data.time <= 0].groupby(["name", "predicate","strategy"])\
        [tp].min()

    benchmarks = data.groupby(["name", "predicate"])[tp].max()
    
    ax.set_ylim(0, len(benchmarks))
    ax.set_yticks(np.linspace(0, len(benchmarks), 8).round())

    #ax.set_xscale("log")
    ax.set_xlim(0,1)
    ax.set_xticks(np.linspace(0,1, 5))
    
    t = v.unstack('strategy') 
    curves = [ ax.plot(sorted((t[s] / benchmarks)), 
                      list([i + 1 for i,_ in enumerate(benchmarks)]), 
                      color=colors[s], 
                      linestyle=styles[s],
                      label=labels[s]
                     )[0]
             for s in strategies
            ]
    
    gmean = stats.gmean(benchmarks)
    lines = [ 
        ax.vlines((stats.gmean(t[s]) / gmean), 0, len(benchmarks),)
        for s in strategies
    ]
    for line, s in zip(lines, strategies):
        line.set_color(colors[s])
        line.set_linestyle(":")


    def update(i):
        
        t = data[data.time <= i * 15].groupby(["name", "predicate","strategy"])[tp].min().unstack('strategy')
        for l, s in zip(curves, strategies):
            l.set_xdata(sorted((t[s] / benchmarks)))
            
        for l, s in zip(lines, strategies):
            seg = l.get_segments()
            seg[0][0,0] = (stats.gmean(t[s]) / gmean)
            seg[0][1,0] = (stats.gmean(t[s]) / gmean)
            l.set_segments(seg)
            
        ax.set_title(f"Distribution of Reduction of {tp} After {i*15:4.0f} s")
        return lines,curves,ax

    return update


updates = []
for ax, tp in zip(axes, ["classes", "bytes"]):
    updates.append(handler(ax, tp))
    

ax.legend()

def updateboth(i):
    print(f"update {i}")
    return updates[0](i) + updates[1](i)

#updateboth(30)

anim = FuncAnimation(fig, updateboth, frames=np.arange(0, 241), interval=200)
anim.save('bytes.gif', dpi=80, writer='imagemagick')

In [None]:
# import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

fig, ax = plt.subplots()
fig.set_tight_layout(True)

# Plot a scatter that persists (isn't redrawn) and the initial line.
x = np.arange(0, 20, 0.1)
ax.scatter(x, x + np.random.normal(0, 3.0, len(x)))
line, = ax.plot(x, x - 5, 'r-', linewidth=2)

def update(i):
    label = 'timestep {0}'.format(i)
    print(label)
    # Update the line and the axes (with a new xlabel). Return a tuple of
    # "artists" that have to be redrawn for this frame.
    line.set_ydata(x - 5 + i)
    ax.set_xlabel(label)
    return line, ax

anim = FuncAnimation(fig, update, frames=np.arange(0, 10), interval=200)
anim.save('line.gif', dpi=80, writer='imagemagick')
#if __name__ == '__main__':
#    # FuncAnimation will call the 'update' function for each frame; here
#    # animating over 10 frames, with an interval of 200ms between frames.
#    
#    if len(sys.argv) > 1 and sys.argv[1] == 'save':
#        
#    else:
#        # plt.show() will just loop the animation forever.
# plt.show()

In [None]:
from pathlib import Path



iter_maxes = full.iters.unstack("strategy").max()

glob = Path("result/full/").glob("url*/*")
dfCs = []
dfBs = []
j = 0 
for b in glob:
    if not b.name in {"cfr", "procyon", "fernflower"}: continue
    j += 1
    #if j > 10: break
    try:
        dfC = pd.DataFrame()
        dfB = pd.DataFrame()
        metrics = list(b.glob("*/workfolder/metrics.csv"))
        if not metrics: continue
        for i in metrics:
            strat, *_ = i.relative_to(b).parts
            # print(i)
            m = pd.read_csv(i)
            x = m[m.judgment == "success"]\
                .groupby("folder")[["classes", "bytes"]]\
                .min()\
                .rdiv(m.iloc[0][["classes", "bytes"]])\
                .reindex(index=range(0, iter_maxes.max()))\
                .expanding().max()\
                .fillna(1)
        
            dfC = dfC.assign(**{strat: x["classes"]})
            dfB = dfB.assign(**{strat: x["bytes"]})
        
        dfCs.append(dfC)
        dfBs.append(dfB)
    except Exception as e:
        print(e)
        print("WARNING", i)
        continue

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(11,4), sharex=True)

graphs = [ 
  #  ("Mean Percentage of Classes Left", dfCs, lambda x: x.mean()), 
  #  ("Mean Percentage of Bytes Left", dfBs, lambda x: x.mean()),
    ("Moving Geometric Mean of Relative Reduction of Classes", dfCs, lambda x: x.agg(stats.gmean)), 
    ("Moving Geometric Mean of Relative Reduction of Bytes", dfBs, lambda x: x.agg(stats.gmean)),
 #   ("Median Percentage of Classes Left", dfCs, lambda x: x.median()), 
  #  ("Median Percentage of Bytes Left", dfBs, lambda x: x.median()),  
]

for ax, (title, dfc, fn) in zip(axes.flatten(), graphs):
    m = fn(pd.concat(dfc).groupby("folder"))
    for s in strategies:
        indices = m.index[range(0, maxes[s])]
        ax.plot(indices, m[s].iloc[indices], label=s, color=colors[s])
        
        v = ax.hlines(max(m[s]),0, 3600)
        v.set_color("lightgray")
        v.set_linestyle(":")
        
    ax.set_ylim(0,m["logic"].max() * 1.1)
    ax.set_xlim(0,iter_maxes.max())
    ax.set_xticks(np.linspace(0,iter_maxes.max(), 7))
    ax.set_title(title)
    
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'x {x:0.0f}'))
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.0f}'))
    
    pretty(ax)

ax.legend()
fig.tight_layout()
fig.savefig("by-iters.pdf")
    

In [None]:
full.iters.unstack("strategy").max()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(11,6), sharey=True, sharex=True)

graphs = [
    ("Mean Percentage of Classes Left", dfCs, lambda x: x.mean()), 
    ("Mean Percentage of Bytes Left", dfBs, lambda x: x.mean()),
    ("Median Percentage of Classes Left", dfCs, lambda x: x.median()), 
    ("Median Percentage of Bytes Left", dfBs, lambda x: x.median()),  
]

for ax, (title, dfc, fn) in zip(axes.flatten(), graphs):
    m = fn(pd.concat(dfc, keys=range(0, len(dfc))).groupby("time"))
    for s in strategies:
        ax.plot(m.index * 60, m[s], label=s, color=colors[s])
        
        v = ax.hlines(min(m[s]),0, 3600)
        v.set_color("lightgray")
        v.set_linestyle(":")
        
    ax.set_ylim(0,1)
    ax.set_xlim(0,3600)
    ax.set_xticks(np.linspace(0,3600, 7))
    ax.set_title(title)
    
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x*100:0.0f}%'))
    ax.xaxis.set_maj  or_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.0f} s'))
    
    pretty(ax)

ax.legend()
fig.tight_layout()
fig.savefig("new-approach.pdf")
    

## Extra evaluation

Here i have left some space for some extra interesting questions: 

The first question is how the size of the input in bytes affect the time to setup and run the predicate. In this case it is fernflower.

The interesting thing here is that the execution time of the predicate is dependent on the size of the input, and by testing small inputs it can be up to 10 times faster than testing the large inputs.

In [None]:
try:
    metrics = pd.read_csv("result/full/url0e7ea11f42_rbouckaert_DensiTree/fernflower/logic/workfolder/metrics.csv")

    fig, axes = plt.subplots(2, 1, figsize=(7,7), sharex=True)


    for key, ax in zip(["setup time", "run time"], axes):
        ax.scatter(metrics.bytes, metrics[key])
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.1f} s'))
        ax.set_xlim(0, metrics.bytes.max() * 1.1)
        ax.set_ylim(0, metrics[key].max() * 1.1)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.set_title(key)
        pretty(ax)
        
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x/1000:0.0f} Kb'))
    
    fig.tight_layout()
except:
    pass

# Part

Here we analyse given only 1 bug being preserved by Javac

In [None]:
part_results = pd.read_csv("result/part/result.csv").set_index(["name", "predicate","strategy"])

In [None]:
fig, ax = plt.subplots(1, figsize=(14,2))

timeouts = (part_results.status == "timeout").groupby("strategy").mean()

ax.set_xlim(0, 100)

pretty(ax)
x = ax.barh(
        strategies, 
        [timeouts[s] * 100 for s in strategies], 
        color=[colors[s] for s in strategies]
    )

## Comparative reduction

In our first experiment we are going to look at comparative final size, and time. We use the geometric mean, so that we can compare the results:

In [None]:
TIMEOUT = 3600
part = part_results.copy()
part.time.loc[part.status == "timeout"] = TIMEOUT

In [None]:
keyvalues = part.filter(["bytes", "classes", "time"], axis=1)\
    .unstack("strategy")\
    .agg(stats.gmean)\
    .unstack()

v = part.filter(["initial-bytes", "initial-classes"], axis=1).unstack("strategy")\
    .agg(stats.gmean)\
    .unstack()["classes"]\
    .rename(lambda a: a.lstrip("initial-"))

Geometric Averages

In [None]:
keyvalues.round(1)[list(reversed(strategies))]

We can compare them on how much reduction each of them have made.

In [None]:
(keyvalues.loc[["bytes","classes"]].div(v, axis='rows') * 100).round(1)[list(reversed(strategies))]

### Graphical Results

In [None]:
fig = draw_diagram(part)