In [95]:
%%capture
%pip install pandas plotly humanize nbformat tabulate numpy

In [96]:
import pandas as pd
import numpy as np
import plotly.express as px
import datetime as dt
import humanize
import tabulate
import copy

In [97]:
def duration_to_string(x):
    if x!= x:
        return
    if x<0.000001:
        return humanize.precisedelta(dt.timedelta(seconds=x), minimum_unit='microseconds').replace('microsecond', 'µsec')
    if x < 1:
        return humanize.precisedelta(dt.timedelta(seconds=x), minimum_unit='milliseconds').replace('millisecond', 'msec')
    elif x < 60:
        return humanize.precisedelta(dt.timedelta(seconds=x), minimum_unit='seconds').replace('second', 'sec')
    else:
        return humanize.precisedelta(dt.timedelta(seconds=x), minimum_unit='minutes').replace('minute', 'min')


In [98]:
results_paths = ['report/results.json']

In [99]:
benches = pd.read_json(*results_paths, orient='records')
benches = benches.rename(columns={
    'backend': 'Backend Engine',
    'operation': 'Query',
    'dataset': 'Dataset',
    'iterations': 'Iterations',
    'dataset_size': 'Dataset Size, bytes',
    'seconds': 'Benchmark Duration, seconds',
})
benches['Iteration Duration, seconds'] = benches['Benchmark Duration, seconds'] / benches['Iterations']
benches['Iteration Duration'] = benches['Iteration Duration, seconds'].apply(duration_to_string)
benches.loc[benches['error'] != '', ['Iteration Duration, seconds', 'Iteration Duration']] = benches['Iteration Duration, seconds'].max(),'ERROR'
benches.loc[benches['error'] == 'NotImplementedError()', 'Iteration Duration'] = 'Not Implemented'
benches.loc[benches['error'] == 'TimeOut', 'Iteration Duration'] = 'TimeOut'


In [100]:
for name, group in benches.groupby(['Query']):
    if name == 'Close':
        continue

    # Docs:
    # https://plotly.com/python/bar-charts/
    fig = px.bar(
        group, 
        title=name,
        x='Dataset', 
        y='Iteration Duration, seconds',
        color='Backend Engine',
        barmode='group',
        text='Iteration Duration',
        log_y=True,
    )
    fig.update_layout(
        title_x=0.5,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1,
            x=0.5,
            xanchor='center',
        ),
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=True),
        bargap=0.30,
        bargroupgap=0.10,
    )
    fig.update_traces(
        textfont_size=14,
        textangle=-60, 
        textposition='auto', 
        cliponaxis=False,
    )
    fig.show()

## Exporting a Table

For every combination of backend and engine, we want to show the span of potential relative speedups.
Backends will be the columns. Rows will be the operations.

In [107]:
backends = ['Snap', 'RetworkX', 'IGraph', 'CuGraph'] # list(benches['Backend Engine'].unique())
operations = list(benches['Query'].unique())
datasets = list(benches['Dataset'].unique())
pairwise_speedups: list[list[list[float]]] = []
backend_baseline = 'NetworkX'    
benches_dict = {(d['Query'], d['Backend Engine'], d['Dataset']) : d for d in benches.to_records() }

for row, operation in enumerate(operations):
    cols = []
    for col, backend in enumerate(backends):
        multiples: list[float] = list()
        
        for dataset in datasets:
            baseline_result = benches_dict[(operation, backend_baseline, dataset)]
            improved_result = benches_dict[(operation, backend, dataset)]

            if baseline_result is None or improved_result is None or len(baseline_result['error']) or len(improved_result['error']):
                # print('Skipping', operation, backend, dataset)
                continue

            speedup: float = baseline_result['Iteration Duration, seconds'] / improved_result['Iteration Duration, seconds']
            multiples.append(speedup)

        cols.append(multiples)
    pairwise_speedups.append(cols)

len(pairwise_speedups)

7

In [108]:
def map2d(func, mat):
    # return list(map(lambda row: list(map(func, row)), mat))
    return [[func(cell) for cell in row] for row in mat]

In [102]:
def describe(results: list):
    if len(results) == 0:
        return ''
    mean = np.mean(results)
    std = np.std(results)
    min = np.min(results)
    max = np.max(results)
    return f'x̅ = {mean:.2f}, N = {len(results)}\nσ = {std:.2f}, {min:.4f} ≤ x ≤ {max:.2f}'

Don't forget to prepend with an index column, describing the target operations

In [106]:
mat = map2d(describe, pairwise_speedups)
mat = [[operations[i]] + content for i, content in enumerate(mat)]
print(tabulate.tabulate(mat, headers=backends))

                             Snap                                   RetworkX                         IGraph                             CuGraph
---------------------------  -------------------------------------  -------------------------------  ---------------------------------  -----------------------------------
Parse                        x̅ = 6.82, N = 10                       x̅ = 14.59, N = 10                x̅ = 2.89, N = 10                   x̅ = 162.47, N = 10
                             σ = 1.21, 4.3278 ≤ x ≤ 8.80            σ = 5.24, 0.8521 ≤ x ≤ 22.51     σ = 0.54, 2.0694 ≤ x ≤ 3.66        σ = 164.98, 0.3150 ≤ x ≤ 498.48
PageRank                     x̅ = 5.46, N = 10                                                        x̅ = 26.94, N = 10                  x̅ = 151.30, N = 10
                             σ = 3.39, 1.1757 ≤ x ≤ 11.57                                            σ = 20.20, 2.9756 ≤ x ≤ 80.03      σ = 161.42, 0.3707 ≤ x ≤ 527.15
Community Detection            