In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

In [None]:
results = pd.read_csv("result/result.csv")
results = results.set_index(["name", "predicate","strategy"])

In [None]:
results[results.verify != "success"]

In [None]:
def investigate(name, predicate, strategy):
    folder = Path("result") / name / predicate / strategy / "workfolder"
    final = folder / "final/sandbox" / predicate 
    print((final / "compiler.out.txt").read_text())
    for f in final.glob("src/**/*.java"):
        print(f)
        print(f.read_text())
        
    print("=======")
    
    print((folder / "initial/sandbox" / predicate / "src/mi/task/FlattenTask.java").read_text())
    
    v = results.loc[(name, predicate, strategy)].verify
    if v != "success":
        a = set((folder / "reduction" / v / "stdout").read_text().splitlines())
        b = set((folder / "initial" / "stdout").read_text().splitlines())
        
        print (a - b)
        
        # print(folder / "reduction" / v / "sandbox" / predicate / "src" / "mi/regex/GroupRegex.java")
        # print( (folder / "reduction" / v / "sandbox" / predicate / "src" / "mi/regex/GroupRegex.java").read_text())
        # print( (folder / "initial/sandbox" / predicate / "src" / "mi/regex/GroupRegex.java").read_text())
        

investigate("url0067cdd33d_goldolphin_Mi", "fernflower", "deep+i2m")



In [None]:
success = results.status.apply(lambda x: x == "success").groupby(["name", "predicate"]).all()

fig, ax = plt.subplots(figsize=(7,7))
x = results.status.value_counts().plot.pie()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14,7), sharey=True)

metrics = [ "bytes", "classes" ] 

sucessfull = results[success[results.index]]

labels = [f"{a}:{b}" for a, b in sucessfull.unstack("strategy").index]
x = np.arange(len(labels))
total_width = 0.75
  
for metric, ax in zip(metrics, list(axes)): 
    m = sucessfull[metric].unstack("strategy")
    rest = 1 / m.drop(["classes"], axis=1).div(m.classes, axis=0, level=0)

    width = total_width / len(rest.columns)
   
    minx, maxx = 1, 0
    for n, i in enumerate(rest.columns):
        offset = x - (total_width/2 - width*n - width/2)
        ax.barh(offset,
                [ 1 - x if x < 1 else x - 1 for x in rest[i] ], 
                height=width * 0.75, 
                left=[min(x, 1) for x in rest[i]], 
                label=i)
        minx = min(minx, rest[i].min())
        maxx = max(maxx, rest[i].max())

        
    ax.set_ylim(-total_width, len(labels) -1 + total_width)
    ax.set_xlabel(metric)
    ax.set_xlim(minx/2,maxx*2)
    ax.set_xscale("log")
    #ax.set_xticks([1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1,2, 4, 8])
    ax.xaxis.set_major_locator(plt.LogLocator(10))
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, y: f"{1/x}x")) #plt.LogFormatterMathtext(2))
    #ax.set_xticklabels([1/8, 1/4, 1/2,1,2, 4, 8])
   
    ax.axvline(1, ls='-', color='lightgray', lw=1)

    for spine in ax.spines.values():
        spine.set_visible(False)

    ax.spines["left"].set_position(("outward", 5))
    ax.spines["bottom"].set_position(("outward", 5))

ax.set_yticks(x)
ax.set_yticklabels(labels)
    
ax.legend()
fig.tight_layout()

In [None]:
for i in set((i[0], i[1]) for i in results[success[results.index] != True].index):
    print(f"{i[0]}/{i[1]}")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(9,7), sharey=True)

for lb, ax in zip(["time", "iters", "bytes", "classes"], axes.flatten()):
    maxx = 0
    x = results[success[results.index]][lb].unstack("strategy")
    for key in ["deep", "deep+m2m", "deep+i2m", "classes"]:
        ax.plot(sorted(x[key]), [i + 1 for i,_ in enumerate(x[key])], label=key)
        maxx = max(maxx, max(x[key]))
        
    ylim = 1, len(x[key])
    ax.set_yticks(np.linspace(*ylim, 6))
    ax.set_ylim(*ylim)
    
    if lb == "time":
        xlim = 0, 3600
    else:
        xlim = 0, maxx
    ax.set_xticks(np.linspace(*xlim, 5))
    ax.set_xlim(*xlim)
    
    if lb == "bytes":
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x/1000:0.0f} Kb'))
    elif lb == "time" :
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.0f} s'))
    else:
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.0f}'))
    
    ax.set_xlabel(lb)    
    
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    for spine in [ax.spines['left'], ax.spines['bottom']]:
        spine.set_position(("outward", 5))
        spine.set_color("gray")
        
    for axis in [ax.yaxis, ax.xaxis]:
        for x in axis.get_major_ticks():
            x.label1.set_color("gray")
            x.tick1line.set_color("gray")

ax.legend()
fig.tight_layout()

In [None]:
results[success[results.index]].unstack("strategy")["bugs"]["classes"]

In [None]:
results.loc["urlde8e6ba918_Michael_Heinzelmann_IT_Consulting_dog4sql"]

In [None]:
metrics = pd.read_csv("result/urlde8e6ba918_Michael_Heinzelmann_IT_Consulting_dog4sql/procyon/reduce/deep/workfolder/metrics.csv")

fig, axes = plt.subplots(2, 1, figsize=(7,7), sharex=True)


for key, ax in zip(["setup time", "run time"], axes):
    ax.scatter(metrics.bytes, metrics[key])
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x:0.1f} s'))
    ax.set_xlim(0, metrics.bytes.max() * 1.1)
    ax.set_ylim(0, metrics[key].max() * 1.1)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    for spine in [ax.spines['left'], ax.spines['bottom']]:
        spine.set_position(("outward", 5))
        spine.set_color("gray")
        
    for axis in [ax.yaxis, ax.xaxis]:
        for x in axis.get_major_ticks():
            x.label1.set_color("gray")
            x.tick1line.set_color("gray")
    
ax1.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: f'{x/1000:0.0f} Kb'))

fig.tight_layout()