In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import *
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

In [2]:
type = "random_access"
data = pd.read_csv(f"./data/{type}_results.csv")
ars = {"SQ":"SQarray", "sliced":"Sliced", "mono":"Monolithic", "base":"Base", "HAT":"HAT", 
       "MSarray_2":"Multisliced, r=2", "MSarray_3":"Multisliced, r=3", "MSarray_4":"Multisliced, r=4", 
       "geo":"Geometric"}

avg = data.groupby(['array_type', 'element_size']).mean().reset_index()
arrays = {}
for ar in ars:
    arrays[ars[ar]] = avg[avg["array_type"] == ar]

In [7]:
arrays["SQarray"]

Unnamed: 0,array_type,element_size,time
512,SQ,16,0.025239
513,SQ,32,0.028275
514,SQ,48,0.032296
515,SQ,64,0.031591
516,SQ,80,0.035489
...,...,...,...
635,SQ,1984,0.281286
636,SQ,2000,0.339627
637,SQ,2016,0.285460
638,SQ,2032,0.343737


In [3]:
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral10

def visualise_position(arrays):
    p = figure(title = f'avg run time for insertion', plot_width = 900, x_axis_label = 'size of each element (bytes)', y_axis_label = 'avergae time (s)')
    keys = list(arrays.keys())
    colors = ["blue", "red", "green", "orange"]
    shapes = ["none", "circle", "triangle"]
    for i in range(len(arrays)):
        key = keys[i]
        p.line(arrays[key].element_size, arrays[key].time, color = colors[i%len(colors)], legend_label = key)
        xspace = arrays[key].element_size[0:-1:4]
        yspace = arrays[key].time[0:-1:4]
        if shapes[i%len(shapes)] == "circle":
            p.circle(xspace, yspace, size = 6, color = colors[i%len(colors)], legend_label = key)
        if shapes[i%len(shapes)] == "triangle":
            p.triangle(xspace, yspace, size = 6, color = colors[i%len(colors)], legend_label = key)
    p.legend.location = "top_left"
    show(p)
visualise_position(arrays)

In [24]:
def visualise_with_base(r1, r2, r3, r4, r5, r6, r7, r8):
    p = figure(title = 'run time of different array types', plot_width = 900, x_axis_label = 'size of each element', y_axis_label = 'avergae time')
    p.line(r1.element_size, r1.time, color = 'blue', legend_label = "SQarray")
    p.line(r2.element_size, r2.time, color = 'red', legend_label = "sliced")
    p.line(r3.element_size, r3.time, color = 'green', legend_label = "monolithic")
    p.line(r4.element_size, r4.time, color = 'black', legend_label = "base")
    p.line(r5.element_size, r5.time, color = 'yellow', legend_label = "HAT")
    p.line(r6.element_size, r6.time, color = 'orange', legend_label = "MS, r=2")
    p.line(r7.element_size, r7.time, color = 'purple', legend_label = "MS, r=3")
    p.line(r8.element_size, r8.time, color = 'indigo', legend_label = "MS, r=4")
    p.legend.location = "top_left"
    show(p)
visualise_with_base(SQ_data, sliced_data, mono_data, base_data, HAT_data, MS2, MS3, MS4)

In [16]:
volatility = data.groupby(['array_type', 'element_size']).std().reset_index()
SQ_vol = volatility[volatility["array_type"] == "SQ"]
sliced_vol = volatility[volatility["array_type"] == "sliced"]
mono_vol = volatility[volatility["array_type"] == "mono"]
base_vol = volatility[volatility["array_type"] == "base"]
MS4_vol = volatility[volatility["array_type"] == "MSarray_4"]
MS3_vol = volatility[volatility["array_type"] == "MSarray_3"]

visualise_with_base(SQ_vol, sliced_vol, mono_vol, base_vol, MS3_vol, MS4_vol, MS3_vol, MS3_vol)

In [5]:
def visualise_log(r1, r2, r3, r4):
    p = figure(title = 'run time of different array types', plot_width = 900, x_axis_label = 'size of each element', y_axis_label = 'avergae time')
    p.line(np.log2(r1.element_size), np.log2(r1.time), color = 'blue', legend_label = "SQarray")
    p.line(np.log2(r2.element_size), np.log2(r2.time), color = 'red', legend_label = "sliced")
    p.line(np.log2(r3.element_size), np.log2(r3.time), color = 'green', legend_label = "monolithic")
    p.line(np.log2(r4.element_size), np.log2(r4.time), color = 'black', legend_label = "base")
    p.legend.location = "top_left"
    show(p)
visualise_log(SQ_data, sliced_data, mono_data, base_data)

In [45]:
MS3_ins = pd.read_csv(f"./data/MS3_results.csv")
avgMS3 = MS3_ins.groupby(['array_type', 'element_size']).mean().reset_index()
def visualise_MS3(r1):
    p = figure(title = 'avg run time of incone', plot_width = 900, x_axis_label = 'size of each element (bytes)', y_axis_label = 'avergae time (s)')
    p.line(r1.element_size, r1.time, color = 'blue', legend_label = "Monolithic")
    p.legend.location = "top_left"
    show(p)
visualise_MS3(avgMS3[avgMS3['array_type'] == 'mono'])

In [20]:
avgMS3[avgMS3["element_size"] == 1568]

Unnamed: 0,array_type,element_size,time
97,MSarray_3,1568,0.386794


In [22]:
volMS3 = MS3_ins.groupby(['array_type', 'element_size']).std().reset_index()
visualise_MS3(volMS3)

In [13]:
visualise_with_base(SQ_data, sliced_data, mono_data, base_data, MS3, MS2, avgMS3, MS4)