Skip to content

Commit 804b2ac

Browse files
authored
Expose DMatrix API for CUDA columnar and array. (dmlc#7217)
* Use JSON encoded configurations. * Expose them into header file.
1 parent 68a2c7b commit 804b2ac

File tree

4 files changed

+57
-24
lines changed

4 files changed

+57
-24
lines changed

include/xgboost/c_api.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,36 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
210210
DMatrixHandle* out,
211211
int nthread);
212212

213+
/*!
214+
* \brief Create DMatrix from CUDA columnar format. (cuDF)
215+
* \param data Array of JSON encoded __cuda_array_interface__ for each column.
216+
* \param json_config JSON encoded configuration. Required values are:
217+
*
218+
* - missing
219+
* - nthread
220+
*
221+
* \param out created dmatrix
222+
* \return 0 when success, -1 when failure happens
223+
*/
224+
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
225+
char const* json_config,
226+
DMatrixHandle *out);
227+
228+
/*!
229+
* \brief Create DMatrix from CUDA array.
230+
* \param data JSON encoded __cuda_array_interface__ for array data.
231+
* \param json_config JSON encoded configuration. Required values are:
232+
*
233+
* - missing
234+
* - nthread
235+
*
236+
* \param out created dmatrix
237+
* \return 0 when success, -1 when failure happens
238+
*/
239+
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
240+
char const* json_config,
241+
DMatrixHandle *out);
242+
213243
/*
214244
* ========================== Begin data callback APIs =========================
215245
*

python-package/xgboost/data.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -421,17 +421,17 @@ def _transform_cudf_df(data, feature_names, feature_types, enable_categorical):
421421

422422
def _from_cudf_df(
423423
data, missing, nthread, feature_names, feature_types, enable_categorical
424-
):
424+
) -> Tuple[ctypes.c_void_p, Any, Any]:
425425
data, feature_names, feature_types = _transform_cudf_df(
426426
data, feature_names, feature_types, enable_categorical
427427
)
428428
_, interfaces_str = _cudf_array_interfaces(data)
429429
handle = ctypes.c_void_p()
430+
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
430431
_check_call(
431-
_LIB.XGDMatrixCreateFromArrayInterfaceColumns(
432+
_LIB.XGDMatrixCreateFromCudaColumnar(
432433
interfaces_str,
433-
ctypes.c_float(missing),
434-
ctypes.c_int(nthread),
434+
config,
435435
ctypes.byref(handle),
436436
)
437437
)
@@ -469,11 +469,11 @@ def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
469469
data = _transform_cupy_array(data)
470470
interface_str = _cuda_array_interface(data)
471471
handle = ctypes.c_void_p()
472+
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
472473
_check_call(
473-
_LIB.XGDMatrixCreateFromArrayInterface(
474+
_LIB.XGDMatrixCreateFromCudaArrayInterface(
474475
interface_str,
475-
ctypes.c_float(missing),
476-
ctypes.c_int(nthread),
476+
config,
477477
ctypes.byref(handle)))
478478
return handle, feature_names, feature_types
479479

src/c_api/c_api.cc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,19 +169,17 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
169169
}
170170

171171
#ifndef XGBOOST_USE_CUDA
172-
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
173-
bst_float missing,
174-
int nthread,
175-
DMatrixHandle* out) {
172+
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
173+
char const* c_json_config,
174+
DMatrixHandle *out) {
176175
API_BEGIN();
177176
common::AssertGPUSupport();
178177
API_END();
179178
}
180179

181-
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
182-
bst_float missing,
183-
int nthread,
184-
DMatrixHandle* out) {
180+
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
181+
char const* c_json_config,
182+
DMatrixHandle *out) {
185183
API_BEGIN();
186184
common::AssertGPUSupport();
187185
API_END();

src/c_api/c_api.cu

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,28 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
2626

2727
using namespace xgboost; // NOLINT
2828

29-
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
30-
bst_float missing,
31-
int nthread,
32-
DMatrixHandle* out) {
29+
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
30+
char const* c_json_config,
31+
DMatrixHandle *out) {
3332
API_BEGIN();
34-
std::string json_str{c_json_strs};
33+
std::string json_str{data};
34+
auto config = Json::Load(StringView{c_json_config});
35+
float missing = GetMissing(config);
36+
auto nthread = get<Integer const>(config["nthread"]);
3537
data::CudfAdapter adapter(json_str);
3638
*out =
3739
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
3840
API_END();
3941
}
4042

41-
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
42-
bst_float missing, int nthread,
43-
DMatrixHandle* out) {
43+
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
44+
char const* c_json_config,
45+
DMatrixHandle *out) {
4446
API_BEGIN();
45-
std::string json_str{c_json_strs};
47+
std::string json_str{data};
48+
auto config = Json::Load(StringView{c_json_config});
49+
float missing = GetMissing(config);
50+
auto nthread = get<Integer const>(config["nthread"]);
4651
data::CupyAdapter adapter(json_str);
4752
*out =
4853
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));

0 commit comments

Comments
 (0)