# PyOpenCL basics.

In [1]:
#%install_ext https://github.com/dpsanders/ipython_extensions/tree/master/section_numbering
#%load_ext secnum
#%secnum

In [2]:
# to install, execute in a cell: 
#%install_ext https://raw.github.com/minrk/ipython_extensions/master/nbtoc.py
# Optional for index view
#%load_ext nbtoc
#%nbtoc

## Versions

%install_ext http://raw.github.com/jrjohansson/version_information/master/version_information.py
%load_ext version_information
%version_information numpy, scipy, matplotlib, sympy, pyopencl

In [3]:
import pyopencl as pycl
import numpy as np

In [4]:
pycl.VERSION

(2016, 2, 1)

In [5]:
vCL = pycl.get_cl_header_version()
print "OpenCL version {}.{}".format(vCL[0],vCL[1])

OpenCL version 1.2


## Exploring your GPU device.

Platform

In [6]:
platforms = pycl.get_platforms()

In [7]:
print platforms

[<pyopencl.Platform 'NVIDIA CUDA' at 0x1fd1c10>]


In [10]:
plCUDA = platforms[0]
#plAMD  = platforms[0]

In [11]:
#devCL = plAMD.get_devices()
devCU = plCUDA.get_devices()

In [13]:
#devCL, 
devCU

[<pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x20c4630>,
 <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x225c7f0>,
 <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2610>,
 <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2660>]

In [19]:
devs = []
i = 0
for pl in platforms:
    print pl.name
    print pl.version
    print pl.vendor
    print pl.extensions
    print pl.profile
    print '______________'
    print '              '
    devs.append(pl.get_devices()[i])
    i+=1

NVIDIA CUDA
OpenCL 1.2 CUDA 8.0.0
NVIDIA Corporation
cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_copy_opts
FULL_PROFILE
______________
              


In [20]:
devs

[<pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x20c4630>]

In [21]:
a=devCU[0]

In [22]:
a.platform.name

'NVIDIA CUDA'

Devices and properties

In [24]:
for device in devCU:
        print("---------------------------------------------------------------")
        print("Device name:", device.name)
        print("Device type:", pycl.device_type.to_string(device.type))
        print("Device memory: ", device.global_mem_size//1024//1024, 'MB')
        print("Device max clock speed:", device.max_clock_frequency, 'MHz')
        print("Device compute units:", device.max_compute_units)
        print("Device max work group size:", device.max_work_group_size)
        if device.platform.name =='NVIDIA CUDA':
            print("Device warp size:", device.warp_size_nv)
        print("====== IMAGE ======")
        print('Device image support:', device.image_support)
        print('Device image 2D max dimensions: [', device.image2d_max_height,',',device.image2d_max_width,']')
        print('Device image 3D max dimensions: [', device.image3d_max_height,',',device.image3d_max_width,',',device.image3d_max_depth,']')

---------------------------------------------------------------
('Device name:', 'GeForce GTX 1080')
('Device type:', 'GPU')
('Device memory: ', 8114L, 'MB')
('Device max clock speed:', 1847, 'MHz')
('Device compute units:', 20)
('Device max work group size:', 1024L)
('Device warp size:', 32)
('Device image support:', 1)
('Device image 2D max dimensions: [', 32768L, ',', 16384L, ']')
('Device image 3D max dimensions: [', 16384L, ',', 16384L, ',', 16384L, ']')
---------------------------------------------------------------
('Device name:', 'GeForce GTX 1080')
('Device type:', 'GPU')
('Device memory: ', 8114L, 'MB')
('Device max clock speed:', 1847, 'MHz')
('Device compute units:', 20)
('Device max work group size:', 1024L)
('Device warp size:', 32)
('Device image support:', 1)
('Device image 2D max dimensions: [', 32768L, ',', 16384L, ']')
('Device image 3D max dimensions: [', 16384L, ',', 16384L, ',', 16384L, ']')
---------------------------------------------------------------
('Device

In [25]:
devs[0].double_fp_config

63L

## Default Context and Arrays

**Default Context**

In [26]:
ctx = pycl.Context(devCU)
queue = pycl.CommandQueue(ctx)

In [27]:
ctx = pycl.Context

In [28]:
ctx = pycl.create_some_context

In [29]:
dtype = np.float32
N = 10

In [30]:
import pyopencl.array as cl_array

These alredy in GPU Device (Default: device = 0)

In [31]:
a_gpu = cl_array.to_device(queue, np.random.rand(N).astype(dtype))
b_gpu = cl_array.to_device(queue, np.ones(N).astype(dtype))
c_gpu = cl_array.to_device(queue, np.zeros(N).astype(dtype))

  ´a_gpu´ is a special data struct that manage numpy type array in the Device. This is a powerful tool shuch as we can use these structures as the classic numpy case (formally these data is a map in the DEVICE). This data structure is called ARRAY and is different from the linear mem alloc of OpenCL.

### Array properties

In [32]:
print 'Classic print: ',a_gpu
print 'Context own  : ',a_gpu.context.devices
print 'Data struct  : ',a_gpu.data
print 'Kernel dims? : ',a_gpu.get_sizes(queue)
print 'Type         : ',a_gpu.dtype
print 'Size (Bytes) : ',a_gpu.nbytes
print 'Length       : ',a_gpu.size
print 'Shape        : ',a_gpu.shape
print 'Show astype  : ',a_gpu.view(dtype=np.int16)

Classic print:  [ 0.7385717   0.11840069  0.71266776  0.89199919  0.80601317  0.7953859
  0.78386664  0.90044475  0.43459898  0.56051666]
Context own  :  [<pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x20c4630>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x225c7f0>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2610>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2660>]
Data struct  :  <pyopencl.cffi_cl.Buffer object at 0x7fed54051a50>
Kernel dims? :  ((32,), (32,))
Type         :  float32
Size (Bytes) :  40
Length       :  10
Shape        :  (10,)
Show astype  :  [  4873  16189  31760  15858  29029  16182  23055  16228  22241  16206
 -24983  16203 -21636  16200 -31860  16230 -31806  16094  32261  16143]


This class of array structures in DEVICE have many operation ready for use (All in parallel optimized)

In [33]:
(a_gpu+b_gpu).view() #works as print

array([ 1.73857164,  1.11840069,  1.7126677 ,  1.89199924,  1.80601311,
        1.79538584,  1.78386664,  1.90044475,  1.43459892,  1.5605166 ], dtype=float32)

In [34]:
c_gpu = a_gpu*b_gpu #direct asignation between arrays

In [35]:
print c_gpu, a_gpu

[ 0.7385717   0.11840069  0.71266776  0.89199919  0.80601317  0.7953859
  0.78386664  0.90044475  0.43459898  0.56051666] [ 0.7385717   0.11840069  0.71266776  0.89199919  0.80601317  0.7953859
  0.78386664  0.90044475  0.43459898  0.56051666]


In [36]:
assert (c_gpu.get()).all() == (a_gpu.get()).all() #get to evaluate the boolen in HOST

In [37]:
pycl.characterize.usable_local_mem_size(devs[0], nargs=None)

49152L

More dimension!

In [38]:
d_gpu = cl_array.to_device(queue, np.ones([N,N,N,N]).astype(dtype))
e_gpu = cl_array.to_device(queue, np.random.rand(N*N*N*N).astype(dtype))

In [39]:
#print 'Classic print: ',d_gpu
print 'Context own  : ',d_gpu.context.devices
print 'Data struct  : ',d_gpu.data
print 'Kernel dims? : ',d_gpu.get_sizes(queue)
print 'Type         : ',d_gpu.dtype
print 'Size (Bytes) : ',d_gpu.nbytes
print 'Length       : ',d_gpu.size
print 'Shape        : ',d_gpu.shape
#print 'Show astype  : ',d_gpu.view(dtype=np.int16) #may not func poperly?

Context own  :  [<pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x20c4630>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x225c7f0>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2610>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2660>]
Data struct  :  <pyopencl.cffi_cl.Buffer object at 0x7fed6cc83e50>
Kernel dims? :  ((10016,), (32,))
Type         :  float32
Size (Bytes) :  40000
Length       :  10000
Shape        :  (10, 10, 10, 10)


In [40]:
e_gpu=e_gpu.reshape([N,N,N,N])

In [41]:
print 'Context own  : ',e_gpu.context.devices
print 'Data struct  : ',e_gpu.data
print 'Kernel dims? : ',e_gpu.get_sizes(queue)
print 'Type         : ',e_gpu.dtype
print 'Size (Bytes) : ',e_gpu.nbytes
print 'Length       : ',e_gpu.size
print 'Shape        : ',e_gpu.shape

Context own  :  [<pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x20c4630>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x225c7f0>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2610>, <pyopencl.Device 'GeForce GTX 1080' on 'NVIDIA CUDA' at 0x22a2660>]
Data struct  :  <pyopencl.cffi_cl.Buffer object at 0x7fed2071e150>
Kernel dims? :  ((10016,), (32,))
Type         :  float32
Size (Bytes) :  40000
Length       :  10000
Shape        :  (10, 10, 10, 10)


Other form of constructing arrays

In [42]:
other_gpu = cl_array.zeros_like(a_gpu)
other_gpu = cl_array.empty_like(a_gpu)
other2_gpu = cl_array.arange(queue,1.,10.,0.5,dtype=dtype)

In [43]:
other2_gpu

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5], dtype=float32)

### Array math implemented

**REDUCTIONS**

In [44]:
result = pycl.array.sum(d_gpu).get() # Get function copy data to HOST
result_gpu = pycl.array.sum(e_gpu)

In [45]:
type(result),result

(numpy.ndarray, array(10000.0, dtype=float32))

In [46]:
type(result_gpu)

pyopencl.array.Array

In [47]:
pycl.array.dot(e_gpu,d_gpu)

array(4987.8896484375, dtype=float32)

In [48]:
print pycl.array.min(e_gpu).get()
print pycl.array.max(e_gpu).get()

7.30476604076e-05
0.999942243099


**FUNCTIONS**

In [49]:
from pyopencl import clmath 

In [50]:
clmath.cos(a_gpu)

array([ 0.7394309 ,  0.99299884,  0.75662023,  0.62785727,  0.69238055,
        0.70000923,  0.70818889,  0.62126154,  0.90703899,  0.84698057], dtype=float32)

In [51]:
clmath.sqrt(a_gpu)

array([ 0.85940194,  0.34409401,  0.8441965 ,  0.94445705,  0.89778233,
        0.89184409,  0.88536245,  0.94891763,  0.6592412 ,  0.7486766 ], dtype=float32)

**KERNELS**