forked from boostorg/compute
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_command_queue.cpp
352 lines (285 loc) · 10.7 KB
/
test_command_queue.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
//---------------------------------------------------------------------------//
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#define BOOST_TEST_MODULE TestCommandQueue
#include <boost/test/unit_test.hpp>
#include <iostream>
#include <boost/compute/kernel.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/program.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/fill.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/utility/dim.hpp>
#include <boost/compute/utility/source.hpp>
#include <boost/compute/detail/diagnostic.hpp>
#include "check_macros.hpp"
#include "context_setup.hpp"
namespace bc = boost::compute;
namespace compute = boost::compute;
BOOST_AUTO_TEST_CASE(get_context)
{
BOOST_VERIFY(queue.get_context() == context);
BOOST_VERIFY(queue.get_info<CL_QUEUE_CONTEXT>() == context.get());
}
BOOST_AUTO_TEST_CASE(get_device)
{
BOOST_VERIFY(queue.get_info<CL_QUEUE_DEVICE>() == device.get());
}
BOOST_AUTO_TEST_CASE(equality_operator)
{
compute::command_queue queue1(context, device);
BOOST_CHECK(queue1 == queue1);
compute::command_queue queue2 = queue1;
BOOST_CHECK(queue1 == queue2);
compute::command_queue queue3(context, device);
BOOST_CHECK(queue1 != queue3);
}
BOOST_AUTO_TEST_CASE(event_profiling)
{
bc::command_queue queue(context, device, bc::command_queue::enable_profiling);
int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
bc::buffer buffer(context, sizeof(data));
bc::event event =
queue.enqueue_write_buffer_async(buffer,
0,
sizeof(data),
static_cast<const void *>(data));
queue.finish();
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
}
BOOST_AUTO_TEST_CASE(kernel_profiling)
{
// create queue with profiling enabled
boost::compute::command_queue queue(
context, device, boost::compute::command_queue::enable_profiling
);
// input data
int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
boost::compute::buffer buffer(context, sizeof(data));
// copy input data to device
queue.enqueue_write_buffer(buffer, 0, sizeof(data), data);
// setup kernel
const char source[] =
"__kernel void iscal(__global int *buffer, int alpha)\n"
"{\n"
" buffer[get_global_id(0)] *= alpha;\n"
"}\n";
boost::compute::program program =
boost::compute::program::create_with_source(source, context);
program.build();
boost::compute::kernel kernel(program, "iscal");
kernel.set_arg(0, buffer);
kernel.set_arg(1, 2);
// execute kernel
size_t global_work_offset = 0;
size_t global_work_size = 8;
boost::compute::event event =
queue.enqueue_nd_range_kernel(kernel,
size_t(1),
&global_work_offset,
&global_work_size,
0);
// wait until kernel is finished
event.wait();
// check profiling information
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
// read results back to host
queue.enqueue_read_buffer(buffer, 0, sizeof(data), data);
// check results
BOOST_CHECK_EQUAL(data[0], 2);
BOOST_CHECK_EQUAL(data[1], 4);
BOOST_CHECK_EQUAL(data[2], 6);
BOOST_CHECK_EQUAL(data[3], 8);
BOOST_CHECK_EQUAL(data[4], 10);
BOOST_CHECK_EQUAL(data[5], 12);
BOOST_CHECK_EQUAL(data[6], 14);
BOOST_CHECK_EQUAL(data[7], 16);
}
BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)
{
// create cl_command_queue
cl_command_queue cl_queue;
#ifdef BOOST_COMPUTE_CL_VERSION_2_0
if (device.check_version(2, 0)){ // runtime check
cl_queue =
clCreateCommandQueueWithProperties(context, device.id(), 0, 0);
} else
#endif // BOOST_COMPUTE_CL_VERSION_2_0
{
// Suppress deprecated declarations warning
BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
cl_queue =
clCreateCommandQueue(context, device.id(), 0, 0);
BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
}
BOOST_VERIFY(cl_queue);
// create boost::compute::command_queue
boost::compute::command_queue queue(cl_queue);
// check queue
BOOST_CHECK(queue.get_context() == context);
BOOST_CHECK(cl_command_queue(queue) == cl_queue);
// cleanup cl_command_queue
clReleaseCommandQueue(cl_queue);
}
#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_AUTO_TEST_CASE(write_buffer_rect)
{
REQUIRES_OPENCL_VERSION(1, 1);
// skip this test on AMD GPUs due to a buggy implementation
// of the clEnqueueWriteBufferRect() function
if(device.vendor() == "Advanced Micro Devices, Inc." &&
device.type() & boost::compute::device::gpu){
std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl;
return;
}
int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
boost::compute::buffer buffer(context, 8 * sizeof(int));
// copy every other value to the buffer
size_t buffer_origin[] = { 0, 0, 0 };
size_t host_origin[] = { 0, 0, 0 };
size_t region[] = { sizeof(int), sizeof(int), 1 };
queue.enqueue_write_buffer_rect(
buffer,
buffer_origin,
host_origin,
region,
sizeof(int),
0,
2 * sizeof(int),
0,
data
);
// check output values
int output[4];
queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output);
BOOST_CHECK_EQUAL(output[0], 1);
BOOST_CHECK_EQUAL(output[1], 3);
BOOST_CHECK_EQUAL(output[2], 5);
BOOST_CHECK_EQUAL(output[3], 7);
}
#endif // BOOST_COMPUTE_CL_VERSION_1_1
static bool nullary_kernel_executed = false;
static void nullary_kernel()
{
nullary_kernel_executed = true;
}
BOOST_AUTO_TEST_CASE(native_kernel)
{
cl_device_exec_capabilities exec_capabilities =
device.get_info<CL_DEVICE_EXECUTION_CAPABILITIES>();
if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){
std::cerr << "skipping native_kernel test: "
<< "device does not support CL_EXEC_NATIVE_KERNEL"
<< std::endl;
return;
}
compute::vector<int> vector(1000, context);
compute::fill(vector.begin(), vector.end(), 42, queue);
BOOST_CHECK_EQUAL(nullary_kernel_executed, false);
queue.enqueue_native_kernel(&nullary_kernel);
queue.finish();
BOOST_CHECK_EQUAL(nullary_kernel_executed, true);
}
BOOST_AUTO_TEST_CASE(copy_with_wait_list)
{
int data1[] = { 1, 3, 5, 7 };
int data2[] = { 2, 4, 6, 8 };
compute::buffer buf1(context, 4 * sizeof(int));
compute::buffer buf2(context, 4 * sizeof(int));
compute::event write_event1 =
queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1);
compute::event write_event2 =
queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2);
compute::event read_event1 =
queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1);
compute::event read_event2 =
queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2);
read_event1.wait();
read_event2.wait();
CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8));
CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7));
}
#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
{
using boost::compute::dim;
using boost::compute::uint_;
const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
__kernel void foo(__global int *output1, __global int *output2)
{
output1[get_global_id(0)] = get_local_id(0);
output2[get_global_id(1)] = get_local_id(1);
}
);
compute::kernel kernel =
compute::kernel::create_with_source(source, "foo", context);
compute::vector<uint_> output1(4, context);
compute::vector<uint_> output2(4, context);
kernel.set_arg(0, output1);
kernel.set_arg(1, output2);
queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));
CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
// Maximum number of work-items that can be specified in each
// dimension of the work-group to clEnqueueNDRangeKernel.
std::vector<size_t> max_work_item_sizes =
device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
if(max_work_item_sizes[0] < size_t(2)) {
return;
}
queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));
CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
if(max_work_item_sizes[1] < size_t(2)) {
return;
}
queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));
CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1));
}
#endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
#ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(get_default_device_queue)
{
REQUIRES_OPENCL_VERSION(2, 1);
boost::compute::command_queue default_device_queue(
context, device,
boost::compute::command_queue::on_device |
boost::compute::command_queue::on_device_default |
boost::compute::command_queue::enable_out_of_order_execution
);
BOOST_CHECK_NO_THROW(queue.get_info<CL_QUEUE_DEVICE_DEFAULT>());
BOOST_CHECK_EQUAL(
queue.get_default_device_queue(),
default_device_queue
);
}
BOOST_AUTO_TEST_CASE(set_as_default_device_queue)
{
REQUIRES_OPENCL_VERSION(2, 1);
boost::compute::command_queue new_default_device_queue(
context, device,
boost::compute::command_queue::on_device |
boost::compute::command_queue::enable_out_of_order_execution
);
new_default_device_queue.set_as_default_device_queue();
BOOST_CHECK_EQUAL(
queue.get_default_device_queue(),
new_default_device_queue
);
}
#endif
BOOST_AUTO_TEST_SUITE_END()