Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UR] Add handles to opencl adapter #17572

Open
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions sycl/source/detail/buffer_impl.cpp
Original file line number Diff line number Diff line change
@@ -86,17 +86,17 @@ buffer_impl::getNativeVector(backend BackendName) const {

auto Adapter = Platform->getAdapter();

if (Platform->getBackend() == backend::opencl) {
__SYCL_OCL_CALL(clRetainMemObject, ur::cast<cl_mem>(NativeMem));
}

ur_native_handle_t Handle = 0;
// When doing buffer interop we don't know what device the memory should be
// resident on, so pass nullptr for Device param. Buffer interop may not be
// supported by all backends.
Adapter->call<UrApiKind::urMemGetNativeHandle>(NativeMem, /*Dev*/ nullptr,
&Handle);
Handles.push_back(Handle);

if (Platform->getBackend() == backend::opencl) {
__SYCL_OCL_CALL(clRetainMemObject, ur::cast<cl_mem>(Handle));
}
}

addInteropObject(Handles);
7 changes: 4 additions & 3 deletions sycl/source/kernel.cpp
Original file line number Diff line number Diff line change
@@ -22,9 +22,10 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) {
ur_kernel_handle_t hKernel = nullptr;
ur_native_handle_t nativeHandle =
reinterpret_cast<ur_native_handle_t>(ClKernel);
Adapter->call<detail::UrApiKind::urKernelCreateWithNativeHandle>(
nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(),
nullptr, nullptr, &hKernel);
Adapter
->call<errc::invalid, detail::UrApiKind::urKernelCreateWithNativeHandle>(
nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(),
nullptr, nullptr, &hKernel);
impl = std::make_shared<detail::kernel_impl>(
hKernel, detail::getSyclObjImpl(SyclContext), nullptr, nullptr);
// This is a special interop constructor for OpenCL, so the kernel must be
5 changes: 5 additions & 0 deletions unified-runtime/source/adapters/opencl/adapter.hpp
Original file line number Diff line number Diff line change
@@ -7,6 +7,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "logger/ur_logger.hpp"
#include "platform.hpp"

#include "CL/cl.h"
#include "logger/ur_logger.hpp"
@@ -18,6 +20,9 @@ struct ur_adapter_handle_t_ {
std::mutex Mutex;
logger::Logger &log = logger::get_logger("opencl");

std::vector<std::unique_ptr<ur_platform_handle_t_>> URPlatforms;
uint32_t NumPlatforms = 0;

// Function pointers to core OpenCL entry points which may not exist in older
// versions of the OpenCL-ICD-Loader are tracked here and initialized by
// dynamically loading the symbol by name.
100 changes: 59 additions & 41 deletions unified-runtime/source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
@@ -10,13 +10,18 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"

/// The ur_exp_command_buffer_handle_t_ destructor calls CL release
/// command-buffer to free the underlying object.
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
urQueueRelease(hInternalQueue);

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
@@ -44,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
UR_RETURN_ON_FAILURE(
urQueueCreate(hContext, hDevice, &QueueProperties, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
@@ -54,7 +59,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
const bool IsUpdatable = pCommandBufferDesc->isUpdatable;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(hDevice);
cl_device_id CLDevice = hDevice->CLDevice;
CL_RETURN_ON_FAILURE(
getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities));
bool DeviceSupportsUpdate = UpdateCapabilities > 0;
@@ -68,16 +73,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0};

cl_int Res = CL_SUCCESS;
auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), Properties, &Res);
const cl_command_queue CLQueue = Queue->CLQueue;
auto CLCommandBuffer =
clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
@@ -102,7 +110,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_ASSERT(!hCommandBuffer->IsFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
@@ -134,7 +142,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
UR_ASSERT(!(phCommandHandle && !hCommandBuffer->IsUpdatable),
UR_RESULT_ERROR_INVALID_OPERATION);

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
@@ -162,10 +170,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
RetSyncPoint, OutCommandHandle));
hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->CLKernel,
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, WaitListSize,
SyncPointWaitList, RetSyncPoint, OutCommandHandle));

try {
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -225,7 +232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
(void)phEventWaitList;
(void)phEvent;
(void)phCommand;
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
@@ -238,10 +245,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, srcOffset, dstOffset, size, WaitListSize,
SyncPointWaitList, RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
@@ -268,7 +274,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
@@ -281,11 +287,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, OpenCLOriginRect, OpenCLDstRect, OpenCLRegion,
srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, WaitListSize,
SyncPointWaitList, RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
@@ -377,7 +382,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
[[maybe_unused]] ur_event_handle_t *phEvent,
[[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
@@ -390,9 +395,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->CLMemory,
pPattern, patternSize, offset, size, WaitListSize, SyncPointWaitList,
RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
@@ -448,21 +453,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->CLEvent;
}
cl_command_queue CLQueue = hQueue->CLQueue;
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));

NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), &Event));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer if we could avoid creating a cl_event if the user hasn't passed a phEvent, this is the kind of in-order queue use-case there's an effort to optimize DPC++ for at the moment.

if (phEvent) {
try {
auto UREvent =
std::make_unique<ur_event_handle_t_>(Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
return UR_RESULT_SUCCESS;
}

@@ -502,11 +520,11 @@ void updateKernelArgs(std::vector<cl_mutable_dispatch_arg_khr> &CLArgs,
for (uint32_t i = 0; i < NumMemobjArgs; i++) {
const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
ArgMemobjList[i];
cl_mem arg_value = URMemObjArg.hNewMemObjArg->CLMemory;
cl_mutable_dispatch_arg_khr CLArg{
URMemObjArg.argIndex, // arg_index
sizeof(cl_mem), // arg_size
cl_adapter::cast<const cl_mem *>(
&URMemObjArg.hNewMemObjArg) // arg_value
&arg_value // arg_value
};

CLArgs.push_back(CLArg);
@@ -550,7 +568,7 @@ ur_result_t validateCommandDesc(
// Verify that the device supports updating the aspects of the kernel that
// the user is requesting.
ur_device_handle_t URDevice = CommandBuffer->hDevice;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(URDevice);
cl_device_id CLDevice = URDevice->CLDevice;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0;
CL_RETURN_ON_FAILURE(
@@ -602,7 +620,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
}

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;

cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clUpdateMutableCommandsKHR)>(
@@ -658,8 +677,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
updateNDRange(CLLocalWorkSize, CommandWorkDim, LocalWorkSizePtr);
}

cl_mutable_command_khr CLCommand =
cl_adapter::cast<cl_mutable_command_khr>(Command->CLMutableCommand);
cl_mutable_command_khr CLCommand = Command->CLMutableCommand;
Config = cl_mutable_dispatch_config_khr{
CLCommand,
static_cast<cl_uint>(CLArgs.size()), // num_args
@@ -737,7 +755,7 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp(
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR =
nullptr;
UR_RETURN_ON_FAILURE(
14 changes: 0 additions & 14 deletions unified-runtime/source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
@@ -156,20 +156,6 @@ extern thread_local char ErrorMessage[MaxMessageSize];
// Utility function for setting a message and warning
[[maybe_unused]] void setErrorMessage(const char *Message,
ur_result_t ErrorCode);

template <class To, class From> To cast(From Value) {

if constexpr (std::is_pointer_v<From>) {
static_assert(std::is_pointer_v<From> == std::is_pointer_v<To>,
"Cast failed pointer check");
return reinterpret_cast<To>(Value);
} else {
static_assert(sizeof(From) == sizeof(To), "Cast failed size check");
static_assert(std::is_signed_v<From> == std::is_signed_v<To>,
"Cast failed sign check");
return static_cast<To>(Value);
}
}
} // namespace cl_adapter

namespace cl_ext {
Loading
Oops, something went wrong.