Open
Description
Description
The num_thread
argument does not do anything when using lgb.train()
in R
.
Reproducible example
Using a modified reprex from #4192:
library(lightgbm)
library(microbenchmark)
library(nycflights13)
data(flights, package = "nycflights13")
flights <- as.data.frame(flights)
dtrain <- lgb.Dataset(
as.matrix(
flights[, c("year", "sched_dep_time", "distance", "hour", "minute")]
)
, label = flights[, "dep_delay"]
, free_raw_data = FALSE
)
for (num_thread in 1:8) {
print(paste0("num_thread: ", num_thread))
print(
microbenchmark::microbenchmark({
lgb.train(
params = list(
num_thread = num_thread
, objective = "regression_l2"
, num_leaves = 31L
, max_depth = 8L
, learning_rate = 0.01
, min_data_in_leaf = 1
)
, data = dtrain
, nrounds = 1000L
, verbose = -1L
)
}, times = 5, unit = "s")
)
}
results in this output:
[1] "num_thread: 1"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.360725 4.388432 4.425465 4.432834 4.454532 4.490802 5
[1] "num_thread: 2"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.346324 4.359153 4.372369 4.365649 4.393762 4.396955 5
[1] "num_thread: 3"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.35831 4.369259 4.368202 4.369422 4.370649 4.37337 5
[1] "num_thread: 4"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.36882 4.369379 4.395463 4.396702 4.417304 4.425108 5
[1] "num_thread: 5"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.384001 4.385201 4.401785 4.40377 4.414547 4.421405 5
[1] "num_thread: 6"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.327206 4.345328 4.369879 4.352958 4.387374 4.43653 5
[1] "num_thread: 7"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.270927 4.321445 4.355477 4.382068 4.383753 4.419192 5
[1] "num_thread: 8"
Unit: seconds
expr
{ lgb.train(params = list(num_thread = num_thread, objective = "regression_l2", num_leaves = 31L, max_depth = 8L, learning_rate = 0.01, min_data_in_leaf = 1), data = dtrain, nrounds = 1000L, verbose = -1L) }
min lq mean median uq max neval
4.35626 4.382811 4.395294 4.393897 4.412376 4.431127 5
As you can see, it doesn't matter if I use one CPU or eight, it always takes ~4.35s to run. I can confirm my CPU utilization always suggests a single CPU is in use.
Environment info
> sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
setting value
version R version 4.5.0 (2025-04-11)
os macOS Sequoia 15.5
system aarch64, darwin20
ui RStudio
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Los_Angeles
date 2025-05-23
rstudio 2025.05.0+496 Mariposa Orchid (desktop)
pandoc NA
quarto 1.6.42 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
data.table 1.17.2 2025-05-12 [1] CRAN (R 4.5.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
lattice 0.22-6 2024-03-20 [1] CRAN (R 4.5.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.5.0)
lightgbm * 4.6.0 2025-02-13 [1] CRAN (R 4.5.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.5.0)
Matrix 1.7-3 2025-03-11 [1] CRAN (R 4.5.0)
microbenchmark * 1.5.0 2024-09-04 [1] CRAN (R 4.5.0)
nycflights13 * 1.0.2 2021-04-12 [1] CRAN (R 4.5.0)
pillar 1.10.2 2025-04-05 [1] CRAN (R 4.5.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
rlang 1.1.6 2025-04-11 [1] CRAN (R 4.5.0)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.5.0)
sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
tibble 3.2.1 2023-03-20 [1] CRAN (R 4.5.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.5.0)
[1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────