# Comparison  -  GPU - C++ - Python

### code from previous notebooks

#### from http://localhost:8888/notebooks/1_Image_Metadata.ipynb

In [10]:
from PIL import Image
import ctypes
from ctypes import *
import numpy as np

In [11]:
def image_meta(image_file):    
    height = !identify -format '%h' {image_file}
    height = int(height[0])
    width = !identify -format '%w' {image_file}
    width = int(width[0])
    has_alpha_channel = !identify -format '%A' {image_file}
    has_alpha_channel = has_alpha_channel[0]
    channel_cnt = 3
    if has_alpha_channel:
        channel_cnt = 4
    # only 3 channels, red, green, blue, no alpha
    channel_size = width * height
    
    pixels = list(Image.open(image_file).getdata())
    red = list()
    green = list()
    blue = list()
    # will discard alpha channel
    for quadruple in pixels:
        red.append(quadruple[0])
        green.append(quadruple[1])
        blue.append(quadruple[2])
    
    return [height, width, red, green, blue]   

In [12]:
def display_channel(pixels, width):
    from PIL import Image
    height = (int) (len(pixels)/width)
    img = Image.new('L', (width, height))
    img.putdata(pixels)
    display(img)

#### from http://localhost:8888/notebooks/2_Py_Convolution.ipynb

In [13]:
def PY_apply_simple_kernel_func(mat, w, kernel_funct):
    
    # determine height
    h=(int)(len(mat)/w)
    
    # list for convolution
    new_image = list()
    
    # slide window over matrix
    for i,g in enumerate(mat):
    
        # out of bounds?
        if i >= (w*(h-1)): 
            break
        if i!= 0 and (i+1) % w == 0:
            continue
        
        # grab window of pixels
        window = [mat[i], mat[(i+1)], mat[(i+w)], mat[(i+1+w)]]
        
        # apply function
        output = kernel_funct(window)
        
        # round half to even
        output = int(round(output))
        
        # add pixel to convolution
        new_image.append(output)
    return new_image

In [14]:
# recursify the function
def PY_recurse_pooling(mat, width, kernel_funct, recurse_cnt):
    m = mat
    w = width
    for i in range(recurse_cnt):    
        m = PY_apply_simple_kernel_func(m, w, kernel_funct)
        w = w - 1
    return m

#### from http://localhost:8888/notebooks/3_CPP_Convolution.ipynb

In [15]:
def native_recurse_pooling(mat, width, recurse_cnt, use_max, function_ptr):    
    height = (int)(len(mat) / width)
    # create pointer array
    ptr_param = pointer((ctypes.c_int * len(mat))(*mat))

    # calculate array size after function call
    returned_height = height - recurse_cnt
    returned_width  = width - recurse_cnt
    returned_convolution_array_size = returned_height * returned_width
    
    # define return type of integer pointer array
    function_ptr.restype = ctypes.POINTER(ctypes.c_int * (returned_convolution_array_size))
    # conver boolean to int
    f=0
    if use_max:
       f=1 
    # actuall call to C++ code
    ptr_array_convolution = function_ptr(ptr_param, width, height, recurse_cnt, f)
    # convert int pointer array to Python list
    ret_convo = np.ctypeslib.as_array( ptr_array_convolution.contents ,shape=(1,)).astype(int).tolist()
    # return Python list of convolution pixels
    return ret_convo

## Let's compare the performance between C++ and Python

In [16]:
image_file = './imgs/green_911_turbo_86.jpeg'
[height, width, red,green,blue] = image_meta(image_file)
channel_size = height * width

print('image height  : ', height)
print('image width   : ', width)
print('chanel size   : ', channel_size)
print('total pixels  : ', channel_size * 3) # 3 channels

image height  :  600
image width   :  800
chanel size   :  480000
total pixels  :  1440000


### verify results visually

In [17]:
recurse_cnt = 1
CPP_pooling = cdll.LoadLibrary('./cpp/cmake-build-debug/libconvolution.so').recurse_convolution
GPU_pooling = cdll.LoadLibrary('/home/will/cuda-workspace/simple_pooling/src/gpu.so').pooling

display_channel(native_recurse_pooling(green, width, recurse_cnt, False, CPP_function_ptr), width - (recurse_cnt))
display_channel(native_recurse_pooling(green, width, recurse_cnt, False, GPU_pooling), width - (recurse_cnt))
display_channel(PY_recurse_pooling (green, width, np.mean, recurse_cnt), width - (recurse_cnt))

NameError: name 'CPP_function_ptr' is not defined

### let's test performance at different scales

In [None]:
import time

print('mat_size\t,\trecurse_cnt,\tcpp_time,\tpy_time')

for recurse_cnt in [1,5,10,15,20,25]:
    
    print(len(green),  end='', flush=True)
    print('\t\t,\t',   end='', flush=True)
    
    print(recurse_cnt, end='', flush=True)
    print(',\t\t',   end='', flush=True)
    
    start = time.time()
    c_conv = CPP_recurse_pooling(green, width, recurse_cnt, False)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
    print(',\t\t',   end='', flush=True)
    
    start = time.time()
    p_conv = PY_recurse_pooling(green, width, np.mean, recurse_cnt)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
        
    print()
    
    if recurse_cnt == 25:
        display_channel(c_conv, width - (recurse_cnt))
        display_channel(p_conv, width - (recurse_cnt))
    

In [None]:
import time

print('mat_size\t,\trecurse_cnt,\tcpp_time,\tpy_time')

for recurse_cnt in [30, 40, 50]:
        
    print(len(green),  end='', flush=True)
    print('\t\t,\t',   end='', flush=True)
    
    print(recurse_cnt, end='', flush=True)
    print(',\t\t',   end='', flush=True)
    
    start = time.time()
    c_conv = CPP_recurse_pooling(green, width, recurse_cnt, False)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
    print(',\t\t',   end='', flush=True)
    
    start = time.time()
    p_conv = PY_recurse_pooling(green, width, np.mean, recurse_cnt)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
        
    print()
    
    if recurse_cnt == 50:
        display_channel(c_conv, width - (recurse_cnt))
        display_channel(p_conv, width - (recurse_cnt))


In [None]:
import time

print('mat_size\t,\trecurse_cnt,\tcpp_time,\tpy_time')

for recurse_cnt in [200]:
        
    print(len(green),  end='', flush=True)
    print('\t\t,\t',   end='', flush=True)
    
    print(recurse_cnt, end='', flush=True)
    print(',\t\t',   end='', flush=True)        
    
    start = time.time()
    c_conv = CPP_recurse_pooling(green, width, recurse_cnt, False)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
    print(',\t\t',   end='', flush=True)
    
    display_channel(c_conv, width - (recurse_cnt))
    
    start = time.time()
    p_conv = PY_recurse_pooling(green, width, np.mean, recurse_cnt)    
    secs = round(time.time() - start,2)
    print(secs, end='', flush=True)
        
    print()
    
    display_channel(p_conv, width - (recurse_cnt))