In [2]:
import sys
import os

# Link BigPy
sys.path.insert(0, '/home/av/AI-Lab/BigPy/')
from bigpy.bench_report import *  # nopep8
from bigpy.inmem_md import InMemMD  # nopep8
import bigpy.pretty_print as pp  # nopep8

In [30]:
import plotly.express as px
import pandas as pd
import numpy as np

In [32]:
backend_sources = ['numpy_blis', 'numpy_openblas', 'numpy_mkl', 'cupy', 'cupy_tf32']
backend_names = ['NumPy on BLIS', 'NumPy on OpenBLAS', 'NumPy on MKL', 'CuPy with F32', 'CuPy with TF32']
benches = pd.DataFrame()
for file, name in zip(backend_sources, backend_names):
    part = frame_google_benchmarks(file + '.json')
    part['Backend'] = name
    benches = pd.concat([benches, part], ignore_index=True)

In [33]:
benches['mean_duration'] = benches['seconds'] / benches['iterations']
workloads = benches['operation'].unique().tolist()
sizes = sorted(benches['size'].unique())

In [34]:
workloads_names = ['Matrix Multiply', 'Rows Moving Average', 'Pearson Correlation of Rows', '2D FFT', 'Singular Values Decomposition', 'Array Median', 'Array Sorting']
workloads

['matrix_multiply',
 'moving_average',
 'pearson_correlations',
 'fft2d',
 'singular_decomposition',
 'flat_median',
 'flat_sort']

In [35]:
sorted(benches['Backend'].unique())

['CuPy with F32',
 'CuPy with TF32',
 'NumPy on BLIS',
 'NumPy on MKL',
 'NumPy on OpenBLAS']

In [41]:
for workload, workload_name in zip(workloads, workloads_names):

    # For every workload generate a performance chart
    speedups_dfs = []
    baselines = filtered(benches, operation=workload, Backend=backend_names[0])['mean_duration'].to_numpy()
    for backend in backend_names:
        measurements = filtered(benches, operation=workload, Backend=backend)['mean_duration'].to_numpy()
        assert len(baselines) == len(measurements)
        speedups = baselines / measurements
        df = pd.DataFrame({
            'Speedup over BLIS': speedups,
            'Matrix Elements': np.square(sizes),
            'Backend': [backend]*len(baselines),
        })
        speedups_dfs.append(df)

    speedups_df = pd.concat(speedups_dfs, ignore_index=True)
    fig = px.line(speedups_df, 
        x='Matrix Elements', 
        y='Speedup over BLIS', 
        color='Backend', 
        title=workload_name, 
        log_y=True,
        log_x = True,
    )
    fig.show()
    fig.write_image('chart_' + workload + '.svg')

    # For every workload, print a table
    best_cpu = filtered(benches, operation=workload, Backend='NumPy on MKL')['mean_duration'].to_numpy()
    best_gpu = filtered(benches, operation=workload, Backend='CuPy with TF32')['mean_duration'].to_numpy()
    print(sizes)
    print(best_cpu / best_gpu)
    

[512, 1024, 2048, 4096, 8192, 16384]
[ 8.31277676 17.89669765 25.13376635 21.02900716 19.86265239 23.37981007]


[512, 1024, 2048, 4096, 8192, 16384]
[10.91917032 19.85752349 48.97636315 59.26873098 59.37655407 53.85907313]


[512, 1024, 2048, 4096, 8192, 16384]
[2.48655281 1.96642439 1.44922829 1.1383998  1.45580572 1.38934951]


[512, 1024, 2048, 4096, 8192, 16384]
[  79.6298464   319.31204218 1064.86611723 1482.78049641 1311.22906762
 1661.94513442]


[512, 1024, 2048, 4096, 8192, 16384]
[0.80642345 0.72751829 0.57604938 0.46030402 0.50983903 0.63757857]


[512, 1024, 2048, 4096, 8192, 16384]
[11.16442819 30.30332849 58.84609703 72.19952481 99.39219713 83.49855611]


[512, 1024, 2048, 4096, 8192, 16384]
[  87.71729714  274.3733074   547.28636634  788.7560285   920.4883158
 1008.53688104]
