In [37]:
from time import time
import pyopencl as cl
import numpy as np

In [38]:
a = np.random.rand(1000).astype(np.float32)
b = np.random.rand(1000).astype(np.float32)
# CPU
def cpu_sum(a,b):
    c_cpu = np.empty_like(a)
    cpu_start_time = time()
    for i in range(1000):
        for j in range(1000):
            c_cpu[i] += a[i] + b[j]
    cpu_end_time = time()
    print("CPU time: ", cpu_end_time - cpu_start_time)
    return c_cpu
# GPU
def gpu_sum(a,b):
    platform = cl.get_platforms()[0]
    device = platform.get_devices()[0]
    context = cl.Context([device])
    queue = cl.CommandQueue(context, device, properties=cl.command_queue_properties.PROFILING_ENABLE)
    a_buffer = cl.Buffer(context, cl.mem_flags.COPY_HOST_PTR, hostbuf = a)
    b_buffer = cl.Buffer(context, cl.mem_flags.COPY_HOST_PTR, hostbuf = b)
    c_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, a.nbytes)
    kernel = """
    __kernel void sum(__global const float *a, __global const float *b, __global float *c) {
        int i = get_global_id(0);
        int j;
        for (j = 0; j < 1000; j++) {
            c[i] += a[i] + b[i];
        }
        }

        """
    program = cl.Program(context, kernel).build()
    gpu_start_time = time()
    event = program.sum(queue, a.shape, None , a_buffer, b_buffer, c_buffer)
    event.wait()
    elapsed = 1e-9 * (event.profile.end - event.profile.start)
    print("GPU Kernel Time: {0} s".format(elapsed)) 
    c_gpu = np.empty_like(a)
    cl.enqueue_copy(queue, c_buffer, c_gpu).wait()
    gpu_end_time = time()
    print("GPU Total Time: ", gpu_end_time - gpu_start_time)
    return c_gpu

In [39]:
c_cpu = cpu_sum(a,b)

CPU time:  0.2631964683532715


In [40]:
c_gpu = gpu_sum(a,b)

GPU Kernel Time: 8.6601e-05 s
GPU Total Time:  0.02225518226623535


  warn("Non-empty compiler output encountered. Set the "
