In [1]:
import numpy as np
import pandas as pd
import os.path as path

data_path = "../data/"
results_path = "../results/archive/"
kernels = pd.read_csv(data_path+'kernel_list.csv')
runtimes_path = results_path+"T4_18-6-3_sm52.csv"
run_times = pd.DataFrame(columns=['path',"function","time",'blocks','matrix'])
if path.exists(runtimes_path):
    run_times = pd.read_csv(runtimes_path,low_memory=False)
    # Pathnames cleanup
    run_times['path'] = run_times['path'].apply(lambda x: x.replace('step2/', data_path+'kernels/').replace('/main.cu', ''))

Add the file path to the dataframe

In [2]:
kernels['kernel_path'] = [data_path+"kernels/"+str(r["Repo"])+"/"+str(r["underdirectory"]) 
                        for _, r in kernels.iterrows()]

Explore the dataset

In [3]:
kernels.head()

Unnamed: 0,Repo,underdirectory,function,status,variables,kernel_path
0,1,0,euclidean_kernel,runs,"[('const float *', 'vg_a'), ('size_t', 'pitch_...",../data/kernels/1/0
1,1,1,euclidean_kernel_same,runs,"[('const float *', 'vg_a'), ('size_t', 'pitch_...",../data/kernels/1/1
2,1,2,maximum_kernel,runs,"[('const float *', 'vg_a'), ('size_t', 'pitch_...",../data/kernels/1/2
3,1,3,maximum_kernel_same,runs,"[('const float *', 'vg_a'), ('size_t', 'pitch_...",../data/kernels/1/3
4,1,4,manhattan_kernel,runs,"[('const float *', 'vg_a'), ('size_t', 'pitch_...",../data/kernels/1/4


In [4]:
print(len(kernels))

20258


In [5]:
run_times.head()

Unnamed: 0,path,function,blocks,matrix,time
0,../data/kernels/1/0,euclidean_kernel,"(1, 1024)","(1016, 1016)",2970.211154
1,../data/kernels/1/0,euclidean_kernel,"(1, 1024)","(1232, 1232)",2786.452857
2,../data/kernels/1/0,euclidean_kernel,"(1, 1024)","(16, 16)",3124.981053
3,../data/kernels/1/0,euclidean_kernel,"(1, 1024)","(1680, 1680)",2591.644286
4,../data/kernels/1/0,euclidean_kernel,"(1, 1024)","(2024, 2024)",2476.894286


The matrix sizes and block sizes that are reported are:

In [6]:
allowed_blocks=['(8, 8)','(16, 16)'  ,'(24, 24)','(32, 32)','(1, 64)','(1, 128)',
                '(1, 192)','(1, 256)','(1, 320)','(1, 384)','(1, 448)','(1, 512)',
                '(1, 576)','(1, 640)','(1, 704)','(1, 768)','(1, 832)','(1, 896)',
                '(1, 960)','(1, 1024)']
allowed_matrix=['(240, 240)','(496, 496)','(784, 784)','(1016, 1016)',
                '(1232, 1232)','(1680, 1680)','(2024, 2024)']

In [7]:
ok_matrix = run_times[run_times['matrix'].isin(allowed_matrix)].index
ok_block  = run_times[run_times['blocks'].isin(allowed_blocks)].index
ok_index  = ok_matrix.intersection(ok_block)
proc_run_times = run_times.iloc[ok_index]

In [8]:
print(f'Removed {len(run_times) - len(proc_run_times)} unrelated rows. Left {len(proc_run_times)} rows')

Removed 402141 unrelated rows. Left 2531763 rows


Group columns to get times for each kernel-matrix_size-block combination

In [9]:
df = proc_run_times.set_index(['path', 'function', 'matrix', 'blocks']).unstack('blocks')
df = df.reindex(columns=sorted(df.columns, key=lambda x: x[::-1]))
df.columns = ['block_{}'.format(t) for _, t in df.columns]

Now we can index into this dataframe and get the times for each kernel-matrix id

In [10]:
(df.iloc[50].min() / df.iloc[50])

block_(1, 1024)    0.997701
block_(1, 128)     0.931484
block_(1, 192)     0.889089
block_(1, 256)     0.958916
block_(1, 320)     0.961311
block_(1, 384)     0.870674
block_(1, 448)     0.988039
block_(1, 512)     0.960920
block_(1, 576)     0.972484
block_(1, 64)      0.903603
block_(1, 640)     0.994653
block_(1, 704)     0.992841
block_(1, 768)     0.968683
block_(1, 832)     0.996149
block_(1, 896)     0.937261
block_(1, 960)     0.899451
block_(16, 16)     1.000000
block_(24, 24)     0.961750
block_(32, 32)     0.929681
block_(8, 8)       0.883821
Name: (../data/kernels/1/15, gpuFindMax, (1680, 1680)), dtype: float64

The best time can be obtained as follows:

In [11]:
(df.iloc[50].min() / df.iloc[50]).idxmax()

'block_(16, 16)'

or...

In [12]:
(df.iloc[50]).idxmin()

'block_(16, 16)'

To get the information for the indexed element, we can use:

In [13]:
df.iloc[50].name

('../data/kernels/1/15', 'gpuFindMax', '(1680, 1680)')

The second element is the function name, so we can use:

In [19]:
kernels.loc[np.logical_and(kernels['function'] == df.iloc[50].name[1], kernels['kernel_path'] == df.iloc[50].name[0])]

Unnamed: 0,Repo,underdirectory,function,status,variables,kernel_path
15,1,15,gpuFindMax,runs,"[('int', 'n'), ('float *', 'data'), ('int', 't...",../data/kernels/1/15
