Skip to content

[UR] Add handles to opencl adapter #17572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions sycl/source/detail/buffer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ buffer_impl::getNativeVector(backend BackendName) const {

auto Adapter = Platform->getAdapter();

if (Platform->getBackend() == backend::opencl) {
__SYCL_OCL_CALL(clRetainMemObject, ur::cast<cl_mem>(NativeMem));
}

ur_native_handle_t Handle = 0;
// When doing buffer interop we don't know what device the memory should be
// resident on, so pass nullptr for Device param. Buffer interop may not be
// supported by all backends.
Adapter->call<UrApiKind::urMemGetNativeHandle>(NativeMem, /*Dev*/ nullptr,
&Handle);
Handles.push_back(Handle);

if (Platform->getBackend() == backend::opencl) {
__SYCL_OCL_CALL(clRetainMemObject, ur::cast<cl_mem>(Handle));
}
}

addInteropObject(Handles);
Expand Down
7 changes: 4 additions & 3 deletions sycl/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) {
ur_kernel_handle_t hKernel = nullptr;
ur_native_handle_t nativeHandle =
reinterpret_cast<ur_native_handle_t>(ClKernel);
Adapter->call<detail::UrApiKind::urKernelCreateWithNativeHandle>(
nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(),
nullptr, nullptr, &hKernel);
Adapter
->call<errc::invalid, detail::UrApiKind::urKernelCreateWithNativeHandle>(
nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(),
nullptr, nullptr, &hKernel);
impl = std::make_shared<detail::kernel_impl>(
hKernel, detail::getSyclObjImpl(SyclContext), nullptr, nullptr);
// This is a special interop constructor for OpenCL, so the kernel must be
Expand Down
5 changes: 5 additions & 0 deletions unified-runtime/source/adapters/opencl/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "logger/ur_logger.hpp"
#include "platform.hpp"

#include "CL/cl.h"
#include "logger/ur_logger.hpp"
Expand All @@ -18,6 +20,9 @@ struct ur_adapter_handle_t_ {
std::mutex Mutex;
logger::Logger &log = logger::get_logger("opencl");

std::vector<std::unique_ptr<ur_platform_handle_t_>> URPlatforms;
uint32_t NumPlatforms = 0;

// Function pointers to core OpenCL entry points which may not exist in older
// versions of the OpenCL-ICD-Loader are tracked here and initialized by
// dynamically loading the symbol by name.
Expand Down
89 changes: 49 additions & 40 deletions unified-runtime/source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"

/// The ur_exp_command_buffer_handle_t_ destructor calls CL release
/// command-buffer to free the underlying object.
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
urQueueRelease(hInternalQueue);

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
Expand All @@ -43,7 +48,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
UR_RETURN_ON_FAILURE(
urQueueCreate(hContext, hDevice, &QueueProperties, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
Expand All @@ -53,7 +58,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
const bool IsUpdatable = pCommandBufferDesc->isUpdatable;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(hDevice);
cl_device_id CLDevice = hDevice->CLDevice;
CL_RETURN_ON_FAILURE(
getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities));
bool DeviceSupportsUpdate = UpdateCapabilities > 0;
Expand All @@ -67,16 +72,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0};

cl_int Res = CL_SUCCESS;
auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), Properties, &Res);
const cl_command_queue CLQueue = Queue->CLQueue;
auto CLCommandBuffer =
clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
Expand All @@ -101,7 +109,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_ASSERT(!hCommandBuffer->IsFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
Expand Down Expand Up @@ -133,7 +141,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
UR_ASSERT(!(phCommandHandle && !hCommandBuffer->IsUpdatable),
UR_RESULT_ERROR_INVALID_OPERATION);

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
Expand Down Expand Up @@ -161,10 +169,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
RetSyncPoint, OutCommandHandle));
hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->CLKernel,
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, WaitListSize,
SyncPointWaitList, RetSyncPoint, OutCommandHandle));

try {
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
Expand Down Expand Up @@ -224,7 +231,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
(void)phEventWaitList;
(void)phEvent;
(void)phCommand;
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
Expand All @@ -237,10 +244,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, srcOffset, dstOffset, size, WaitListSize,
SyncPointWaitList, RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand All @@ -267,7 +273,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
Expand All @@ -280,11 +286,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, OpenCLOriginRect, OpenCLDstRect, OpenCLRegion,
srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, WaitListSize,
SyncPointWaitList, RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -376,7 +381,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
[[maybe_unused]] ur_event_handle_t *phEvent,
[[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
Expand All @@ -389,9 +394,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
IsInOrder ? nullptr : pSyncPointWaitList;
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->CLMemory,
pPattern, patternSize, offset, size, WaitListSize, SyncPointWaitList,
RetSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -447,21 +452,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->CLEvent;
}
cl_command_queue CLQueue = hQueue->CLQueue;
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));
NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), ifUrEvent(phEvent, Event)));

UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent));
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -501,11 +510,11 @@ void updateKernelArgs(std::vector<cl_mutable_dispatch_arg_khr> &CLArgs,
for (uint32_t i = 0; i < NumMemobjArgs; i++) {
const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
ArgMemobjList[i];
cl_mem arg_value = URMemObjArg.hNewMemObjArg->CLMemory;
cl_mutable_dispatch_arg_khr CLArg{
URMemObjArg.argIndex, // arg_index
sizeof(cl_mem), // arg_size
cl_adapter::cast<const cl_mem *>(
&URMemObjArg.hNewMemObjArg) // arg_value
&arg_value // arg_value
};

CLArgs.push_back(CLArg);
Expand Down Expand Up @@ -549,7 +558,7 @@ ur_result_t validateCommandDesc(
// Verify that the device supports updating the aspects of the kernel that
// the user is requesting.
ur_device_handle_t URDevice = CommandBuffer->hDevice;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(URDevice);
cl_device_id CLDevice = URDevice->CLDevice;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0;
CL_RETURN_ON_FAILURE(
Expand Down Expand Up @@ -601,7 +610,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
}

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;

cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clUpdateMutableCommandsKHR)>(
Expand Down Expand Up @@ -657,8 +667,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
updateNDRange(CLLocalWorkSize, CommandWorkDim, LocalWorkSizePtr);
}

cl_mutable_command_khr CLCommand =
cl_adapter::cast<cl_mutable_command_khr>(Command->CLMutableCommand);
cl_mutable_command_khr CLCommand = Command->CLMutableCommand;
Config = cl_mutable_dispatch_config_khr{
CLCommand,
static_cast<cl_uint>(CLArgs.size()), // num_args
Expand Down Expand Up @@ -736,7 +745,7 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp(
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR =
nullptr;
UR_RETURN_ON_FAILURE(
Expand Down
14 changes: 0 additions & 14 deletions unified-runtime/source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,20 +156,6 @@ extern thread_local char ErrorMessage[MaxMessageSize];
// Utility function for setting a message and warning
[[maybe_unused]] void setErrorMessage(const char *Message,
ur_result_t ErrorCode);

template <class To, class From> To cast(From Value) {

if constexpr (std::is_pointer_v<From>) {
static_assert(std::is_pointer_v<From> == std::is_pointer_v<To>,
"Cast failed pointer check");
return reinterpret_cast<To>(Value);
} else {
static_assert(sizeof(From) == sizeof(To), "Cast failed size check");
static_assert(std::is_signed_v<From> == std::is_signed_v<To>,
"Cast failed sign check");
return static_cast<To>(Value);
}
}
} // namespace cl_adapter

namespace cl_ext {
Expand Down
Loading