-
Notifications
You must be signed in to change notification settings - Fork 7
/
crew_monitor_slurm.R
124 lines (123 loc) · 4.02 KB
/
crew_monitor_slurm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#' @title `r lifecycle::badge("experimental")` Create a SLURM monitor object.
#' @export
#' @family slurm
#' @description Create an `R6` object to monitor SLURM cluster jobs.
#' @inheritParams crew_monitor_cluster
crew_monitor_slurm <- function(
verbose = TRUE,
command_list = as.character(Sys.which("squeue")),
command_terminate = as.character(Sys.which("scancel"))
) {
out <- crew_class_monitor_slurm$new(
verbose = verbose,
command_list = command_list,
command_terminate = command_terminate
)
out$validate()
out
}
#' @title `r lifecycle::badge("experimental")` SLURM monitor class
#' @export
#' @family slurm
#' @description SLURM monitor `R6` class
#' @details See [crew_monitor_slurm()].
crew_class_monitor_slurm <- R6::R6Class(
classname = "crew_class_monitor_slurm",
inherit = crew_class_monitor_cluster,
cloneable = FALSE,
public = list(
#' @description List SLURM jobs.
#' @details This function loads the entire SLURM queue for all users,
#' so it may take several seconds to execute.
#' It is intended for interactive use, and
#' should especially be avoided in scripts where it is called
#' frequently. It requires SLURM version 20.02 or higher,
#' along with the YAML plugin.
#' @return A `tibble` with one row per SLURM job and columns with
#' specific details.
#' @param user Character of length 1, user name of the jobs to list.
jobs = function(user = ps::ps_username()) {
# Cannot be tested with automated tests.
# Tested in tests/slurm/monitor.R.
# nocov start
crew::crew_assert(
user,
is.character(.),
length(.) == 1L,
!anyNA(.),
nzchar(.),
message = "'user' must be `NULL` or a character vector of length 1"
)
text <- system2(
private$.command_list,
args = shQuote(c("--yaml")),
stdout = TRUE,
stderr = if_any(private$.verbose, "", FALSE),
wait = TRUE
)
monitor_cols <- c("job_id", "partition", "name", "user_name", "job_state",
"start_time", "node_count", "state_reason")
yaml <- yaml::read_yaml(text = text)
out <- map(
yaml$jobs,
~ tibble::new_tibble(
c(
map(.x[monitor_cols], ~ unlist(.x) %||% NA),
list(
nodes = paste(
unlist(.x$job_resources$nodes),
collapse = ","
) %||% NA
)
)
)
)
out <- do.call(vctrs::vec_rbind, out)
out <- out[out$user_name == user, ]
out <- out[which(out$job_state != "CANCELLED"), ]
out$job_id <- as.character(out$job_id)
out$start_time <- as.POSIXct(out$start_time, origin = "1970-01-01")
out
# nocov end
},
#' @description Terminate one or more SLURM jobs.
#' @return `NULL` (invisibly).
#' @param jobs Character vector of job names or job IDs to terminate.
#' Ignored if `all` is set to `TRUE`.
#' @param all Logical of length 1, whether to terminate all the jobs
#' under your user name. This terminates ALL your SLURM jobs,
#' regardless of whether `crew.cluster` launched them,
#' so use with caution!
terminate = function(jobs = NULL, all = FALSE) {
# Cannot be tested with automated tests.
# Tested in tests/slurm/monitor.R.
# nocov start
crew::crew_assert(
jobs %||% "x",
is.character(.),
!anyNA(.),
nzchar(.),
message = paste(
"'jobs' must be `NULL` or a character vector of",
"valid job names or IDs."
)
)
crew::crew_assert(
all,
isTRUE(.) || isFALSE(.),
message = "'all' must be TRUE or FALSE."
)
args <- shQuote(if_any(all, c("-u", ps::ps_username()), jobs))
stream <- if_any(private$.verbose, "", FALSE)
system2(
command = private$.command_terminate,
args = args,
stdout = stream,
stderr = stream,
wait = TRUE
)
invisible()
# nocov end
}
)
)