# Python vs C++ vs GPU   -  Visualized

<img src="./imgs/py_v_cpp.png" width="900"/>

## Note the scale change between the 2 charts

<img src="./imgs/bokeh_gpu_plot.png" width="900"/>

# My storage: PCIe NVME SSD

<img src="./imgs/nvme.jpg" alt="nvme" style="width: 220px;"/>

# My GPU: NVidia GeForce 1060 (laptop edition)

<img src="./imgs/geforce_1060_laptop.png" alt="gpu" style="width: 180px;"/>

# GPU Specs

<img src="./imgs/gpu_specs.png" alt="gpu" style="width:400px;"/>

# CUDA Version - 10.1

In [44]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Fri_Feb__8_19:08:17_PST_2019
Cuda compilation tools, release 10.1, V10.1.105


# Analysis: Need Faster Storage

### * The C++ execution is much faster than Python
### * The GPU execution is ridiculously faster than both C++ and Python
### * Need more larger and faster storage capacity to peg GPU
### * Python maxes out around 60 thousand pixels processes per second
### * C++ maxes out around 4 millions pixels processed per second
### * Despite being PCIe NVMe storage, my old GeForce 1060 mobile edition is not fed fast enough

# Load Results from GPU, Python, and C++ executions

In [45]:
import pandas as pd
df = pd.read_csv('./results/py_v_cpp_v_gpu.csv')
df

Unnamed: 0,time,recursion_count,image_size,pixels_calculated,gpu_seconds,gpu_pixels_per_second,cpp_seconds,cpp_pixels_per_second,python_seconds,python_pixels_per_second
0,1554397000.0,1,810000,810000,0.179029,4524401.0,0.359895,2250654.0,15.595147,51939.235038
1,1554397000.0,10,810000,8015910,0.217322,36884930.0,2.518677,3182588.0,130.327321,61505.983053
2,1554397000.0,50,810000,38243550,0.17056,224223300.0,9.951778,3842886.0,600.306362,63706.721145
3,1554397000.0,100,810000,72047100,0.168422,427776100.0,16.87111,4270442.0,1122.440159,64187.920783
4,1554399000.0,200,810000,127334200,0.163306,779726500.0,29.678302,4290481.0,1980.219002,64303.089659
5,1554401000.0,400,810000,195628400,0.159782,1224346000.0,45.243556,4323895.0,3043.072407,64286.475596
6,1554406000.0,500,810000,212635500,0.152472,1394591000.0,58.215669,3652548.0,3560.143256,59726.669599


# Shape Data for Visualization

### drop unused columns

In [46]:
df = df.drop(['time','image_size','cpp_seconds','gpu_seconds','python_seconds','pixels_calculated'], axis=1)
df

Unnamed: 0,recursion_count,gpu_pixels_per_second,cpp_pixels_per_second,python_pixels_per_second
0,1,4524401.0,2250654.0,51939.235038
1,10,36884930.0,3182588.0,61505.983053
2,50,224223300.0,3842886.0,63706.721145
3,100,427776100.0,4270442.0,64187.920783
4,200,779726500.0,4290481.0,64303.089659
5,400,1224346000.0,4323895.0,64286.475596
6,500,1394591000.0,3652548.0,59726.669599


### rename columns

In [47]:
df['recursions'] = df.recursion_count
df['Python'] = df.python_pixels_per_second
df['Cpp']    = df.cpp_pixels_per_second
df['GPU']    = df.gpu_pixels_per_second
df = df.drop(['recursion_count','cpp_pixels_per_second','python_pixels_per_second'], axis=1)
df

Unnamed: 0,gpu_pixels_per_second,recursions,Python,Cpp,GPU
0,4524401.0,1,51939.235038,2250654.0,4524401.0
1,36884930.0,10,61505.983053,3182588.0,36884930.0
2,224223300.0,50,63706.721145,3842886.0,224223300.0
3,427776100.0,100,64187.920783,4270442.0,427776100.0
4,779726500.0,200,64303.089659,4290481.0,779726500.0
5,1224346000.0,400,64286.475596,4323895.0,1224346000.0
6,1394591000.0,500,59726.669599,3652548.0,1394591000.0


### convert recursion counts to labels

In [48]:
df.recursions = [str(i) for i in df.recursions.tolist()]
df.recursions.tolist()

['1', '10', '50', '100', '200', '400', '500']

### pixels to megapixels

In [49]:
df.Python = [round(i/1000000,4) for i in df.Python ]
df.Cpp = [ round(i/1000000,4) for i in df.Cpp ] 
df.GPU = [ round(i/1000000,4) for i in df.GPU ] 
df

Unnamed: 0,gpu_pixels_per_second,recursions,Python,Cpp,GPU
0,4524401.0,1,0.0519,2.2507,4.5244
1,36884930.0,10,0.0615,3.1826,36.8849
2,224223300.0,50,0.0637,3.8429,224.2233
3,427776100.0,100,0.0642,4.2704,427.7761
4,779726500.0,200,0.0643,4.2905,779.7265
5,1224346000.0,400,0.0643,4.3239,1224.3462
6,1394591000.0,500,0.0597,3.6525,1394.5914


# Visualize Comparison

In [50]:
import numpy as np
from bokeh.core.properties import value
from bokeh.io import show, output_notebook, reset_output
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.palettes import Spectral6
from bokeh.transform import dodge
from bokeh.transform import factor_cmap
reset_output()
output_notebook()

In [52]:
x_max  = max(df.recursions)
y_max  = max(df.GPU) + 20
source = ColumnDataSource(data=df)
width  = 0.20
alpha  = 0.80
ticker_buffer = 0.22

p = figure(
    x_range=df.recursions, y_range=(0, y_max), 
    plot_height=600,plot_width=970,
    title="Megapixels Processed per Second"
)
p.vbar(
    x=dodge('recursions', 0-ticker_buffer, range=p.x_range), 
    top='Python', 
    width=width, source=source,alpha=alpha,
    color="#e84d60",legend=value("Python")
)
p.vbar(
    x=dodge('recursions',  0,  range=p.x_range), 
    top='Cpp', 
    width=width, source=source,alpha=alpha,
    color="#718dbf", legend=value("C++")
)
p.vbar(
    x=dodge('recursions',  ticker_buffer,  range=p.x_range), 
    top='GPU', 
    width=width, source=source,alpha=alpha,
    color="green", legend=value("GPU")
)

p.x_range.range_padding = 0.01
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "vertical"
p.xaxis.axis_label = "Recursions"
p.xaxis.axis_line_width = 0.1
p.yaxis.axis_line_width = 0.1
p.yaxis.major_label_text_color = "orange"
p.yaxis.major_label_orientation = "vertical"

show(p)

# NEXT - easy "Pooling Convolution" with GPU

http://localhost:8888/notebooks/5_Calling_GPU_CUDA_code_from_Python.ipynb

![raw](./imgs/rari.jpg)