In [1]:
import common
import importlib
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import socket

# Show all columns and rows in a dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# graphalytics inserts
data_ins = common.import_gfe("view_graphalytics_inserts").query("omp_threads.isnull() and cluster == 'stones2' and num_threads_read == 0").copy() # data from the experiments
data_ins["build_frequency"].fillna(pd.Timedelta(0), inplace=True) # replace NaT with 0, otherwise the records are ignored in the group by

# graphalytics load
data_load = common.import_gfe("view_graphalytics_load").query("omp_threads.isnull() and cluster == 'stones2' and num_threads_read == 0").copy()
# shape the dataframe as data_ins
data_load["build_frequency"] = pd.Timedelta(0)
data_load["build_frequency_secs"] = np.NaN
data_load["num_snapshots_created"] = 0
data_load["num_threads_write"] = 0

data = pd.concat([data_ins, data_load])
data = data.groupby(["library", "compiler_family", "graph", "build_frequency","num_threads_read", "num_threads_write", "algorithm"]) \
    .agg(completion_time=("median_secs", "median"), count=("median_secs", "count"))
data = data.unstack("algorithm")[("completion_time")]
data.index.set_names("compiler", level=1, inplace=True)

List of available executions:

In [5]:
fn_join_threads = lambda grp: ", ".join(map(str, grp.unique()))
data.reset_index().groupby(["library", "compiler"]).agg(num_threads=("num_threads_write", fn_join_threads))

Unnamed: 0_level_0,Unnamed: 1_level_0,num_threads
library,compiler,Unnamed: 2_level_1
csr,gcc,0
csr-lcc,gcc,0
csr-lcc-numa,gcc,0
csr-numa,gcc,0
g1_v4-bw-sp-ignore-build,gcc,"3, 6, 12"
g1_v4-ref-ignore-build,gcc,"3, 6, 12, 20"
livegraph_ro,gcc,20
llama6,gcc,16
llama6-ref,gcc,16
stinger5,gcc,"20, 40"


### CSR

In [8]:
csr = data.loc[("csr", "gcc")].droplevel([1,2,3]).rename(columns = {"lcc": "lcc_std"})
csr["lcc_sorted"] = data.loc[("csr-lcc", "gcc")].droplevel([1,2,3]).rename(columns = {"lcc": "lcc_sorted"})["lcc_sorted"]
csr = csr[["bfs", "cdlp", "lcc_std", "lcc_sorted", "pagerank", "sssp", "wcc"]] # reoder the columns
csr = pd.concat([csr], names=["library"], keys=["csr"]) # prepend the key `csr`

In [9]:
csr

Unnamed: 0_level_0,algorithm,bfs,cdlp,lcc_std,lcc_sorted,pagerank,sssp,wcc
library,graph,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
csr,dota-league,1.235302,2.721085,488.513049,70.919549,1.585912,1.732922,1.144053
csr,graph500-22,1.639669,13.167843,449.72993,19.488179,2.252329,2.988205,1.648705
csr,graph500-24,2.509866,52.147986,3271.091807,108.531754,5.164838,9.470398,3.196715
csr,graph500-26,5.02545,110.386501,,704.976753,19.204755,36.034476,9.659982
csr,uniform-22,1.610202,14.723076,8.904657,2.186826,2.389066,3.528628,1.55821
csr,uniform-24,2.402618,64.695137,34.872056,6.121123,6.595718,10.972599,3.632904
csr,uniform-26,4.180822,175.494414,142.640311,22.798893,23.739186,44.953436,10.600717


### Other systems

In [10]:
stinger = pd.concat( [
    data.loc[("stinger5-ref", "gcc", slice(None), slice(None), 0, 40)].droplevel([0, 1, 3, 4, 5])
], names=["library"], keys=["stinger"])
llama = pd.concat( [
    data.loc[("llama6-ref", "gcc", slice(None), "00:00:10", 0, 16)].droplevel([0, 1, 3, 4, 5]) # dynamic scheduling in OpenMP
], names=["library"], keys=["llama"] )
graphone = pd.concat( [
   data.loc[("g1_v4-ref-ignore-build", "gcc", slice(None), slice(None), 0, 12)].droplevel([0, 1, 3, 4, 5])
], names=["library"], keys=["graphone"] )
livegraph = pd.concat( [
    data.loc[("livegraph_ro", "gcc", slice(None), slice(None), slice(None), 20)].droplevel([0, 1, 3, 4, 5])
], names=["library"], keys=["livegraph"] )
competitors = pd.concat([stinger,llama,graphone, livegraph]).rename(columns = {"lcc": "lcc_std"})
competitors["lcc_sorted"] = np.NaN

### Teseo

In [11]:
teseo9 = data.loc[("teseo-ta.9", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_std"})
teseo9_dv = data.loc[("teseo-ta-dv.9", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_std"})
teseo9_lcc = data.loc[("teseo-ta-lcc.9", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_sorted"})
teseo9_lcc_dv = data.loc[("teseo-ta-lcc-dv.9", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_sorted"})
teseo_logical = pd.concat([ pd.concat([teseo9, teseo9_lcc["lcc_sorted"]], axis=1 ) ], names=["library"], keys=["teseo_logical"])
teseo_real = pd.concat([ pd.concat([teseo9_dv, teseo9_lcc_dv["lcc_sorted"]], axis=1 ) ], names=["library"], keys=["teseo_real"])

### Dataset

In [12]:
ds = pd.concat([csr, competitors, teseo_logical, teseo_real])
ds = pd.concat([ds, pd.concat([ds.query("library != 'csr'").groupby(by="graph", axis=0).apply(lambda t: t.min())], names=["library"], keys=["best_excl_csr"])])

In [13]:
ds

Unnamed: 0_level_0,Unnamed: 1_level_0,bfs,cdlp,lcc_std,lcc_sorted,pagerank,sssp,wcc
library,graph,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
csr,dota-league,1.235302,2.721085,488.513049,70.919549,1.585912,1.732922,1.144053
csr,graph500-22,1.639669,13.167843,449.72993,19.488179,2.252329,2.988205,1.648705
csr,graph500-24,2.509866,52.147986,3271.091807,108.531754,5.164838,9.470398,3.196715
csr,graph500-26,5.02545,110.386501,,704.976753,19.204755,36.034476,9.659982
csr,uniform-22,1.610202,14.723076,8.904657,2.186826,2.389066,3.528628,1.55821
csr,uniform-24,2.402618,64.695137,34.872056,6.121123,6.595718,10.972599,3.632904
csr,uniform-26,4.180822,175.494414,142.640311,22.798893,23.739186,44.953436,10.600717
stinger,dota-league,1.357981,2.009458,1565.04254,,2.558133,2.5919,1.324046
stinger,graph500-22,1.810404,5.970912,1329.964047,,3.593634,4.241988,2.25952
stinger,graph500-24,3.133861,26.748108,,,10.85473,15.577805,5.814058


### Create the output file

In [83]:
d0 = ds.loc[("csr", "dota-league")]

In [84]:
path = os.environ["HOME"] + "/workspace/eclipse/paper_gfe/table_graphalytics_auto.tex"

In [85]:
def fmtsecs(value):
    if(value >= 20):
        return "{} s".format(int(value))
    else:
        return "{:.2f} s".format(value)

In [86]:
f = open(path, "w")
f.write(r"""
% This file was autogenerated by the script graphalytics_gen_table.ipynb

\resizebox{\textwidth}{!}{ % the table should use 100% of the available width
    \begin{tabular}[t]{clccccccc|clcccccccc}
    \toprule
        Graph  & System            & BFS       & CDLP       & LCC         & LCC (opt)   & PageRank     & SSSP        & WCC             
      & Graph  & System            & BFS       & CDLP       & LCC         & LCC (opt)   & PageRank     & SSSP        & WCC  \\   
    \midrule
""")

def write_csr(graph0, graph1, name0, name1, num_libraries):
    # first graph
    f.write("\\parbox[t]{2mm}{\\multirow{%s}{*}{\\rotatebox[origin=c]{90}{%s}}} & %% %s \n" % (num_libraries, name0, graph0)) 
    f.write("CSR (baseline) ")
    d0 = ds.loc[("csr", graph0)]
    for c in d0: 
        if np.isnan(c):
            f.write("& DNF")
        else:   
            f.write("& $%s$ " % (fmtsecs(c)))
    f.write(" &\n")
        
    # second graph
    f.write("\\parbox[t]{2mm}{\\multirow{%s}{*}{\\rotatebox[origin=c]{90}{%s}}} & %% %s \n" % (num_libraries, name1, graph1)) 
    f.write("CSR (baseline) ") # without &
    d0 = ds.loc[("csr", graph1)]
    for c in d0: 
        if np.isnan(c):
            f.write("& DNF")
        else:       
            f.write("& $%s$ " % (fmtsecs(c)))
    f.write("\\\\\n")
    
def write_speedup_single0(library, graph):
    completionTime = ds.loc[(library, graph)]
    speedup = completionTime  / ds.loc[("csr", graph)] 
    for column in speedup.index:
        f.write(" & ")
        value = speedup[column]
        best = ds.loc[("best_excl_csr", graph), column]
        if(np.isnan(value)):
            f.write("N/A" if (column == "lcc_sorted" and not library.startswith("teseo")) else "DNF")
        elif completionTime[column] <= best:
            f.write("$\winner{");
            f.write("{:.2f}x".format(value))
            f.write("}$")
        else:
            f.write("${:.2f}x$".format(value))
    
def write_speedup_single(label, library, graph0, graph1):
    f.write(" & " + label)
    write_speedup_single0(library, graph0)
    f.write(" & & " + label)
    write_speedup_single0(library, graph1)
    f.write(" \\\\ \n")
    
def write_speedup(graph0, graph1, include_llama = True):
    write_speedup_single("Stinger", "stinger", graph0, graph1)
    if(include_llama): write_speedup_single("LLAMA", "llama", graph0, graph1)
    write_speedup_single("GraphOne", "graphone", graph0, graph1)
    write_speedup_single("LiveGraph", "livegraph", graph0, graph1)
    write_speedup_single("Teseo, log. vtx", "teseo_logical", graph0, graph1)
    write_speedup_single("Teseo, real vtx", "teseo_real", graph0, graph1)
    

write_csr("dota-league", "graph500-22", "DOTA League", "Graph500 SF 22", 7)
write_speedup("dota-league", "graph500-22")
f.write("\\midrule \n")
write_csr("uniform-24", "graph500-24", "Uniform SF 24", "Graph500 SF 24", 7)
write_speedup("uniform-24", "graph500-24")
f.write("\\midrule \n")
write_csr("uniform-26", "graph500-26", "Uniform SF 26", "Graph500 SF 26", 6)
write_speedup("uniform-26", "graph500-26", include_llama = False)
    
f.write(r"""
        \bottomrule
    \end{tabular}
} % end fo resizebox
""")
f.close()