## Throughput

In [1]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display
from ipywidgets import Layout, Button, Box
from nested_dict import nested_dict

import json
import os, glob
import pandas as pd
import pandas.io.json as pdjson
import seaborn as sns
import ipywidgets as widgets

sns.set(style="whitegrid")

In [2]:
# Initialization
artifacts_dir = "/home/guest/artifacts/"
selected_files = []
selected_baseline = ""
enable_gui = True

In [3]:
# Parameters cell
if enable_gui == False:
    print(enable_gui)
    print(artifacts_dir)
    print(selected_benches)
    print(selected_baseline)

In [4]:
if enable_gui:
    from IPython.core.display import display, HTML
    display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
bench_files = []

artifacts_path = artifacts_dir.split("/artifacts/")[0]

for root, dirs, files in os.walk(artifacts_dir):
    for file in files:
        if file.endswith(".orun.bench"):
            # print(root)
            f = root.split("/artifacts/")[1]
            # print(f)
            if (len (f.split("/")) <= 5):
                bench_files.append((os.path.join(root, file)))

In [6]:
len(bench_files)

0

In [7]:
nd = nested_dict(2, list)
for x in bench_files:
    l = x.split("/artifacts/")[1]
    d = l.split("/")
    host      = d[0]
    repo      = d[1]
    commit    = d[2]
    variant   = d[3]
    timestamp = d[4]
    ocaml     = d[5]
    value      = commit + " " + variant + " " + timestamp + " " + ocaml
    nd[host][repo].append(value)
benches = nd.to_dict()    

In [8]:
benches

{}

In [9]:
def f(x):
    return x

def disp(benches):
    def select_repo(host):
        repoW.options = benches[host]
    
    def select_commit(repo):
        commitW.options = repo

    def select_variant(commit):
        return None

    hostW = widgets.Dropdown(options=benches.keys(), description='Host', disabled=False)
    hostS = hostW.value
    hostD = widgets.interactive(select_repo, host=hostW)

    repoW = widgets.Dropdown(options=benches[hostS].keys(), description='Repository', disabled=False)
    repoS = repoW.value
    repoD = widgets.interactive(select_commit, repo=repoW)

    commitW = widgets.Dropdown(options=benches[hostS][repoS], description='Commit', disabled=False)
    commitS = commitW.value
    commitD = widgets.interactive(select_variant, commit=commitW)

    items_layout = Layout( width='auto' )
    
    box_layout = Layout(display='flex',
                       flex_flow='row wrap',
                       align_items='flex-start',
                       #border='solid',
                       width='100%')
    
    items = [hostD, repoD, commitD]
    box = Box(children=items, layout=box_layout)    
    
    # display(hostD, repoD, commitD)
    display(box)
    return (hostD, repoD, commitD)

def get_filename(h, r, c):
    host = h.children[0].value
    # print(host)
    
    repos = list(benches[host].keys())
    repo= repos[r.children[0].index]
    # print(repo)
    
    entries = list(benches[host][repo])
    commit_last = entries[c.children[0].index]

    commit_list = commit_last.split(" ")
    filename = os.path.join(artifacts_path, 'artifacts', host, repo, '/'.join(commit_list))
    print(filename)
    return(filename)

In [10]:
comparisons = interactive(f, x=widgets.IntText(value=2, 
                                               description='Comparisons', 
                                               disabled=False))
display(comparisons)

interactive(children=(IntText(value=2, description='Comparisons'), Output()), _dom_classes=('widget-interact',…

In [11]:
print(comparisons.result)

2


In [12]:
matrix = [[0 for x in range(4)] for y in range(comparisons.result)]
    
for i in range(comparisons.result):
    matrix[i][0], matrix[i][1], matrix[i][2] = disp(benches)

KeyError: None

In [None]:
for i in range(comparisons.result):
    matrix[i][3] = get_filename(matrix[i][0], matrix[i][1], matrix[i][2])

In [None]:
for i in range (comparisons.result):
    print(matrix[i][3])
    selected_files.append(matrix[i][3])

In [None]:
if enable_gui == False:
    for i in selected_benches:
        commit_list = i[2].split(" ")
        selected_files.append(os.path.join(artifacts_dir, i[0], i[1], '/'.join(commit_list)))

In [None]:
data_frames = []

for file in selected_files:
    with open(file) as f:
        data = []
        for l in f:
            data.append(json.loads(l))
        df = pdjson.json_normalize(data)
        df['variant'] = os.path.basename(file).replace(".orun.bench","")
        data_frames.append(df)

df = pd.concat (data_frames, sort=False)
df = df.sort_values(['name']) 
# Uncomment the following to display all the lines in pandas output
pd.set_option('display.max_rows', df.shape[0]+1)
df

### Drop some benchmarks


In [None]:
df = df[(df.name != 'alt-ergo.fill.why') &         #multicore version does not exist
        (df.name != 'alt-ergo.yyll.why') &         #multicore version does not exist
        (df.name != 'frama-c.slevel') &            #multicore version does not exist
        (df.name != 'js_of_ocaml.frama-c_byte')]   #multicore version does not exist
throughput_df = df
df

### Selection example

```
select * from df where variant = '4.10.0+trunk' and time_secs > 10
```

In [None]:
baseline_h, baseline_r, baseline_c = disp(benches)

In [None]:
selected_baseline = baseline_c.children[0].value.split(" ")[3].split(".orun.bench")[0]

print(selected_baseline)

In [None]:
df.loc[(df['variant'] == selected_baseline) & (df['time_secs'] > 10)]

In [None]:
df.loc[df['name'] == 'LU_decomposition.1024']

### Projection example

```
select name, variant, time_secs from df
```

In [None]:
df.filter(['name','variant','time_secs'])

## Time

In [None]:
g = sns.catplot (x='name', y='time_secs', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
def create_column(df, variant, metric):
    df = pd.DataFrame.copy(df)
    variant_metric_name = list([ zip(df[metric], df[x], df['name']) 
              for x in df.columns.array if x == "variant" ][0])
    name_metric = {n:t for (t, v, n) in variant_metric_name if v == variant}
    return name_metric

def add_display_name(df,variant, metric):
    name_metric = create_column(pd.DataFrame.copy(df), variant, metric)
    disp_name = [name+" ("+str(round(name_metric[name], 2))+")" for name in df["name"]]
    df["display_name"] = pd.Series(disp_name, index=df.index)
    return df

def normalise(df,variant,topic,additionalTopics=[]):
    df = add_display_name(df,variant,topic)
    df = df.sort_values(["name","variant"])
    grouped = df.filter(items=['name',topic,'variant','display_name']+additionalTopics).groupby('variant')
    ndata_frames = []
    for group in grouped:
        (v,data) = group
        if(v != variant):
            data['b'+topic] = grouped.get_group(variant)[topic].values
            data[['n'+topic]] = data[[topic]].div(grouped.get_group(variant)[topic].values, axis=0)
            for t in additionalTopics:
                print(variant, t)
                data[[t]] = grouped.get_group(variant)[t].values
            ndata_frames.append(data)
    df = pd.concat (ndata_frames)
    return df

def plot_normalised(df,variant,topic):
    df = pd.DataFrame.copy(df)
    df.sort_values(by=[topic],inplace=True)
    df[topic] = df[topic] - 1
    g = sns.catplot (x="display_name", y=topic, hue='variant', data = df, kind ='bar', aspect=4, bottom=1)
    g.set_xticklabels(rotation=90)
    g.ax.legend(loc=8)
    g._legend.remove()
    g.ax.set_xlabel("Benchmarks")
    return g
    # g.ax.set_yscale('log')

In [None]:
ndf = normalise(df, selected_baseline, 'time_secs')
plot_normalised(ndf, selected_baseline,'ntime_secs')

## Top heap words

In [None]:
g = sns.catplot (x='name', y='gc.top_heap_words', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df, selected_baseline, 'gc.top_heap_words')
plot_normalised(ndf, selected_baseline,'ngc.top_heap_words')

## Max RSS (KB)

In [None]:
g = sns.catplot (x='name', y='maxrss_kB', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df, selected_baseline,'maxrss_kB')
plot_normalised(ndf, selected_baseline,'nmaxrss_kB')

## Major Collections

In [None]:
g = sns.catplot (x='name', y='gc.major_collections', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df, selected_baseline,'gc.major_collections')
plot_normalised(ndf, selected_baseline,'ngc.major_collections')
ndf

## Major words

In [None]:
g = sns.catplot (x='name', y='gc.major_words', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df, selected_baseline,'gc.major_words')
plot_normalised(ndf, selected_baseline,'ngc.major_words')

## Minor Collections

In [None]:
g = sns.catplot (x='name', y='gc.minor_collections', hue='variant', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

In [None]:
ndf = normalise(df, selected_baseline, 'gc.minor_collections')
plot_normalised(ndf, selected_baseline, 'ngc.minor_collections')

# Latency

In [None]:
data_frames = []

for file in glob.glob("*.pausetimes_*.bench"):
    with open(file) as f:
        data = []
        for l in f:
            data.append(json.loads(l))
        ldf = pdjson.json_normalize(data)
        ldf['variant'] = file.replace(".pausetimes_multicore.bench","").replace(".pausetimes_trunk.bench","")
        data_frames.append(ldf)

df2 = pd.concat(data_frames, sort=False)
df2 = df2.sort_values(['name'])

## Drop some benchmarks
df2 = df2[(df2.name != 'alt-ergo.fill.why') & #multicore version does not exist
        (df2.name != 'alt-ergo.yyll.why') & #multicore version does not exist
        (df2.name != 'frama-c.slevel') &    #multicore version does not exist
        (df2.name != 'js_of_ocaml.frama-c_byte') &    #multicore version does not exist
        (df2.name != 'cpdf.merge')]         #Not a macro benchmark. Will be removed from subsequent runs.
df2.count()
latency_df = df2
df2

The latency distributions `distr_latency` are a list of latencies at `[10,20,30,40,50,60,70,80,90,95,99,99.9]`th percentiles.

## Max latency

In [None]:
df2.filter(["name","variant","max_latency"])

In [None]:
def plotLatencyAt(df,at,aspect):
    fdf = df.filter(["name","variant",at + "_latency"])
    fdf.sort_values(by=[at + '_latency'],inplace=True)
    fdf[at + "_latency"] = fdf[at + "_latency"] / 1000.0
    g = sns.catplot (x='name', y=at+'_latency', hue='variant', data = fdf, kind ='bar', aspect=aspect)
    g.set_xticklabels(rotation=90)
    g.ax.set_ylabel(at + " latency (microseconds)")
    g.ax.set_xlabel("Benchmarks")
    g.ax.set_yscale('log')
    return g

plotLatencyAt(df2,"max",4)

## 99.9th percentile latency

In [None]:
def getLatencyAt(df,percentile,idx):
    groups = df.groupby('variant')
    ndfs = []
    for group in groups:
        (v,df) = group
        for i, row in df.iterrows():
            df.at[i,percentile+"_latency"] = list(df.at[i,"distr_latency"])[idx]
        ndfs.append(df)
    return pd.concat(ndfs)

df2 = getLatencyAt(df2,"99.9",-1)
plotLatencyAt(df2,"99.9",4)

## 99th percentile latency

In [None]:
df2 = getLatencyAt(df2,"99",-2)
plotLatencyAt(df2,"99",4)