In [57]:
import sys
import os

# Link BigPy
sys.path.insert(0, '/home/av/AI-Lab/BigPy/')
from bigpy.bench_report import *  # nopep8
from bigpy.inmem_md import InMemMD  # nopep8
import bigpy.pretty_print as pp  # nopep8

In [58]:
import plotly.express as px
import pandas as pd
import numpy as np

In [59]:
backend_sources = ['numpy_blis', 'numpy_openblas', 'numpy_mkl', 'cupy', 'cupy_tf32', 'cupy_cub', 'cupy_cutensor', 'cupy_cub_cutensor', 'cupy_cub_cutensor_tf32']
backend_names = ['NumPy on BLIS', 'NumPy on OpenBLAS', 'NumPy on MKL', 'CuPy', 'CuPy +TF32', 'CuPy +CUB', 'CuPy +TCs', 'CuPy +CUB+TCs', 'CuPy +CUB+TCs+TF32']
benches = pd.DataFrame()
for file, name in zip(backend_sources, backend_names):
    part = frame_google_benchmarks(file + '.json')
    part['Backend'] = name
    benches = pd.concat([benches, part], ignore_index=True)

In [60]:
benches['mean_duration'] = benches['seconds'] / benches['iterations']
workloads = benches['operation'].unique().tolist()
sizes = sorted(benches['size'].unique())

In [61]:
workloads_names = ['Matrix Multiply', 'Rows Moving Average', 'Pearson Correlation of Rows', '2D FFT', 'Singular Values Decomposition', 'Array Median', 'Array Sorting', 'Array Summation']
workloads

['matrix_multiply',
 'moving_average',
 'pearson_correlations',
 'fft2d',
 'singular_decomposition',
 'flat_median',
 'flat_sort',
 'flat_sum']

In [62]:
sorted(benches['Backend'].unique())

['CuPy',
 'CuPy +CUB',
 'CuPy +CUB+TCs',
 'CuPy +CUB+TCs+TF32',
 'CuPy +TCs',
 'CuPy +TF32',
 'NumPy on BLIS',
 'NumPy on MKL',
 'NumPy on OpenBLAS']

In [63]:
for workload, workload_name in zip(workloads, workloads_names):

    # For every workload generate a performance chart
    speedups_dfs = []
    baselines = filtered(benches, operation=workload, Backend=backend_names[0])['mean_duration'].to_numpy()
    for backend in backend_names:
        measurements = filtered(benches, operation=workload, Backend=backend)['mean_duration'].to_numpy()
        assert len(baselines) == len(measurements)
        speedups = baselines / measurements
        df = pd.DataFrame({
            'Speedup over BLIS': speedups,
            'Matrix Elements': np.square(sizes),
            'Backend': [backend]*len(baselines),
        })
        speedups_dfs.append(df)

    speedups_df = pd.concat(speedups_dfs, ignore_index=True)
    fig = px.line(speedups_df, 
        x='Matrix Elements', 
        y='Speedup over BLIS', 
        color='Backend', 
        title=workload_name, 
        log_y=True,
        log_x = True,
    )
    fig.show()
    # fig.write_image('chart_' + workload + '.svg')

    # For every workload, print a table
    # best_cpu = filtered(benches, operation=workload, Backend='NumPy on MKL')['mean_duration'].to_numpy()
    # best_gpu = filtered(benches, operation=workload, Backend='CuPy +CUB+TCs+TF32')['mean_duration'].to_numpy()
    # speedups = best_cpu / best_gpu
    # speedups = ['{0:.1f}x'.format(x) for x in speedups]
    # speedups = ' | '.join(speedups)
    