@@ -308,35 +308,40 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *
308
308
* used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
309
309
* input, and concatenate them into 1 final big CSR. The related functions are:
310
310
*
311
- * - \ ref XGBCallbackSetData
312
- * - \ ref XGBCallbackDataIterNext
313
- * - \ ref XGDMatrixCreateFromDataIter
311
+ * - @ ref XGBCallbackSetData
312
+ * - @ ref XGBCallbackDataIterNext
313
+ * - @ ref XGDMatrixCreateFromDataIter
314
314
*
315
- * Another set is used by external data iterator. It accept foreign data iterators as
315
+ * Another set is used by external data iterator. It accepts foreign data iterators as
316
316
* callbacks. There are 2 different senarios where users might want to pass in callbacks
317
- * instead of raw data. First it's the Quantile DMatrix used by hist and GPU Hist. For
318
- * this case, the data is first compressed by quantile sketching then merged. This is
319
- * particular useful for distributed setting as it eliminates 2 copies of data. 1 by a
320
- * `concat` from external library to make the data into a blob for normal DMatrix
321
- * initialization, another by the internal CSR copy of DMatrix. The second use case is
322
- * external memory support where users can pass a custom data iterator into XGBoost for
323
- * loading data in batches. There are short notes on each of the use cases in respected
324
- * DMatrix factory function.
317
+ * instead of raw data. First it's the Quantile DMatrix used by the hist and GPU-based
318
+ * hist tree method. For this case, the data is first compressed by quantile sketching
319
+ * then merged. This is particular useful for distributed setting as it eliminates 2
320
+ * copies of data. First one by a `concat` from external library to make the data into a
321
+ * blob for normal DMatrix initialization, another one by the internal CSR copy of
322
+ * DMatrix.
323
+ *
324
+ * The second use case is external memory support where users can pass a custom data
325
+ * iterator into XGBoost for loading data in batches. For both cases, the iterator is only
326
+ * used during the construction of the DMatrix and can be safely freed after construction
327
+ * finishes. There are short notes on each of the use cases in respected DMatrix factory
328
+ * function.
325
329
*
326
330
* Related functions are:
327
331
*
328
332
* # Factory functions
329
- * - \ref XGDMatrixCreateFromCallback for external memory
330
- * - \ref XGQuantileDMatrixCreateFromCallback for quantile DMatrix
333
+ * - @ref XGDMatrixCreateFromCallback for external memory
334
+ * - @ref XGQuantileDMatrixCreateFromCallback for quantile DMatrix
335
+ * - @ref XGExtMemQuantileDMatrixCreateFromCallback for External memory Quantile DMatrix
331
336
*
332
337
* # Proxy that callers can use to pass data to XGBoost
333
- * - \ ref XGProxyDMatrixCreate
334
- * - \ ref XGDMatrixCallbackNext
335
- * - \ ref DataIterResetCallback
336
- * - \ ref XGProxyDMatrixSetDataCudaArrayInterface
337
- * - \ ref XGProxyDMatrixSetDataCudaColumnar
338
- * - \ ref XGProxyDMatrixSetDataDense
339
- * - \ ref XGProxyDMatrixSetDataCSR
338
+ * - @ ref XGProxyDMatrixCreate
339
+ * - @ ref XGDMatrixCallbackNext
340
+ * - @ ref DataIterResetCallback
341
+ * - @ ref XGProxyDMatrixSetDataCudaArrayInterface
342
+ * - @ ref XGProxyDMatrixSetDataCudaColumnar
343
+ * - @ ref XGProxyDMatrixSetDataDense
344
+ * - @ ref XGProxyDMatrixSetDataCSR
340
345
* - ... (data setters)
341
346
*
342
347
* @{
@@ -346,7 +351,7 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *
346
351
347
352
/* ! \brief handle to a external data iterator */
348
353
typedef void *DataIterHandle; // NOLINT(*)
349
- /* ! \ brief handle to a internal data holder. */
354
+ /* * @ brief handle to an internal data holder. */
350
355
typedef void *DataHolderHandle; // NOLINT(*)
351
356
352
357
@@ -473,7 +478,7 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
473
478
*/
474
479
475
480
/* *
476
- * @brief Create a Quantile DMatrix with data iterator.
481
+ * @brief Create a Quantile DMatrix with a data iterator.
477
482
*
478
483
* Short note for how to use the second set of callback for (GPU)Hist tree method:
479
484
*
@@ -494,7 +499,13 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
494
499
* - missing: Which value to represent missing value
495
500
* - nthread (optional): Number of threads used for initializing DMatrix.
496
501
* - max_bin (optional): Maximum number of bins for building histogram. Must be consistent with
497
- the corresponding booster training parameter.
502
+ * the corresponding booster training parameter.
503
+ * - max_quantile_blocks (optional): For GPU-based inputs, XGBoost handles incoming
504
+ * batches with multiple growing substreams. This parameter sets the maximum number
505
+ * of batches before XGBoost can cut the sub-stream and create a new one. This can
506
+ * help bound the memory usage. By default, XGBoost grows new sub-streams
507
+ * exponentially until batches are exhausted. Only used for the training dataset and
508
+ * the default is None (unbounded).
498
509
* @param out The created Quantile DMatrix.
499
510
*
500
511
* @return 0 when success, -1 when failure happens
@@ -509,7 +520,7 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
509
520
*
510
521
* @since 3.0.0
511
522
*
512
- * @note This is still under development, not ready for test yet .
523
+ * @note This is experimental and subject to change .
513
524
*
514
525
* @param iter A handle to external data iterator.
515
526
* @param proxy A DMatrix proxy handle created by @ref XGProxyDMatrixCreate.
@@ -521,12 +532,18 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
521
532
* - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
522
533
* - nthread (optional): Number of threads used for initializing DMatrix.
523
534
* - max_bin (optional): Maximum number of bins for building histogram. Must be consistent with
524
- the corresponding booster training parameter.
535
+ * the corresponding booster training parameter.
525
536
* - on_host (optional): Whether the data should be placed on host memory. Used by GPU inputs.
526
537
* - min_cache_page_bytes (optional): The minimum number of bytes for each internal GPU
527
538
* page. Set to 0 to disable page concatenation. Automatic configuration if the
528
539
* parameter is not provided or set to None.
529
- * @param out The created Quantile DMatrix.
540
+ * - max_quantile_blocks (optional): For GPU-based inputs, XGBoost handles incoming
541
+ * batches with multiple growing substreams. This parameter sets the maximum number
542
+ * of batches before XGBoost can cut the sub-stream and create a new one. This can
543
+ * help bound the memory usage. By default, XGBoost grows new sub-streams
544
+ * exponentially until batches are exhausted. Only used for the training dataset and
545
+ * the default is None (unbounded).
546
+ * @param out The created Quantile DMatrix.
530
547
*
531
548
* @return 0 when success, -1 when failure happens
532
549
*/
0 commit comments