/
sim-diagnostics.R
244 lines (217 loc) · 7.66 KB
/
sim-diagnostics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#' Convert a NONMEM run to a simulation
#'
#' @description
#'
#' `r lifecycle::badge("stable")`
#'
#' Replaces $EST with $SIM.
#'
#' @param m A nm object.
#' @param seed Numeric (default = `12345`). seed value to include in $SIM.
#' @param subpr Numeric (default = `1`). SUBPR value to include in $SIM.
#'
#' @details Will only change $EST/$SIM, therefore it will not be sufficient to
#' change a categorical estimation control file to simulation. You will likely
#' need to perform a `manual edit` for categorical data simulation.
#'
#' @return An nm object with modified `ctl_contents` field.
#'
#' @examples
#'
#' \dontrun{
#'
#' ## requires NONMEM to be installed
#'
#' m1s <- m1 %>%
#' child(run_id = "m1s") %>%
#' update_parameters(m1) %>%
#' convert_to_simulation(subpr = 50) %>%
#' run_nm()
#'
#' m1s %>% nm_render("Scripts/basic_vpc.Rmd")
#' m1s %>% nm_render("Scripts/basic_ppc.Rmd")
#'
#' }
#'
#' @export
convert_to_simulation <- function(m, seed = 12345, subpr = 1) {
UseMethod("convert_to_simulation")
}
#' @export
convert_to_simulation.nm_generic <- function(m, seed = 12345, subpr = 1) {
## comment out $EST
old_target <- target(m)
m <- m %>%
target("$EST") %>%
comment_out() %>%
untarget()
m <- m %>%
target("$COV") %>%
comment_out() %>%
untarget()
if (any(grepl("\\$SIM", ctl_contents(m)))) { ## if there is a $SIM
m <- m %>% uncomment(pattern = "\\$SIM")
} else { ## if there is NOT a $SIM
m <- m %>% insert_dollar(dollar = "SIM", "$SIM (1234) ONLYSIM SUBPR=1", after_dollar = "SIGMA")
}
m <- m %>% gsub_ctl("(^.*\\$SIM.*)\\([0-9]+\\)(.*$)", paste0("\\1(", seed, ")\\2"))
m <- m %>% gsub_ctl("(N?SUBPR.*\\=\\s*)[0-9]+", paste0("\\1", subpr))
m <- m %>% target(old_target)
m
}
#' @export
convert_to_simulation.nm_list <- Vectorize_nm_list(
convert_to_simulation.nm_generic,
SIMPLIFY = FALSE
)
#' PPC functions: process data from simulation and plot
#'
#' @description
#'
#' `r lifecycle::badge("stable")`
#'
#' @param r An nm object (a simulation run).
#' @param FUN Statistic function accepting a NONMEM dataset `data.frame` as an
#' argument and returns `data.frame` with a column `"statistic"`.
#' @param ... Additional arguments for `FUN`.
#' @param pre_proc Function to apply to dataset prior to compute statistics.
#' @param max_mod_no Integer. Maximum model number to read (set low for debugging).
#' @param DV Character (default = `"DV"`).
#' @param statistic Character (default = `"statistic"`). Name of statistic column
#' returned by FUN.
#' @param group,var1,var2 Grouping variables for plotting.
#'
#' @return The function `ppc_data()` return a `data.frame` with observed and
#' predicted statistics. The `ppc_*_plot()` plotting functions return `ggplot`
#' objects.
#'
#' @seealso [nm_render()]
#' @examples
#'
#' ## requires NONMEM to be installed
#' \dontrun{
#'
#' idEXPstat <- function(d, ...) { ## example individual statistic function
#' ## arg = nonmem dataset data.frame
#' ## return data.frame with statistic column
#' d %>%
#' group_by(ID, ...) %>%
#' filter(is.na(AMT)) %>%
#' summarise(
#' AUC = AUC(time = TIME, conc = DV),
#' CMAX = max(DV, na.rm = TRUE),
#' TMAX = TIME[which.max(DV)]
#' ) %>%
#' tidyr::gather(key = "exposure", value = "statistic", AUC:TMAX) %>%
#' ungroup()
#' }
#'
#' EXPstat <- function(d, ...) { ## example summary statistic function
#' ## arg = nonmem dataset data.frame
#' ## return data.frame with statistic column
#' d %>%
#' idEXPstat(...) %>% ## reuse idEXPstat for individual stats
#' ## summarise over study and any other variables (...)
#' group_by(exposure, ...) %>%
#' summarise(
#' median = median(statistic, na.rm = TRUE),
#' cv = 100 * sd(statistic, na.rm = TRUE) / mean(statistic, na.rm = TRUE)
#' ) %>%
#' tidyr::gather(key = "type", value = "statistic", median:cv)
#' }
#'
#' dppc <- m1s %>% ppc_data(EXPstat)
#'
#' dppc %>% ppc_whisker_plot()
#' dppc %>% ppc_forest_plot()
#' }
#' @rdname ppc
#' @export
ppc_data <- function(r, FUN, ..., pre_proc = identity, max_mod_no = NA, DV = "DV", statistic = "statistic") {
if (!"..." %in% names(formals(FUN))) stop("FUN must have ... in arguments")
if (length(names(formals(FUN))) < 2) stop("FUN must have at least two arguments (a data.frame and ...")
if (length(unique(data_path(r))) > 1) stop("non-unique datasets")
dorig <- input_data(r[1], filter = TRUE)
dsims <- output_table(r) %>%
dplyr::bind_rows() %>%
dplyr::filter(.data$INNONMEM)
total_rows <- nrow(dorig)
nsim <- nrow(dsims) / nrow(dorig)
dsims$mod_no <- rep(1:nsim, each = total_rows)
if (!is.na(max_mod_no)) {
dsims <- dsims[dsims$mod_no <= max_mod_no, ]
nsim <- nrow(dsims) / nrow(dorig)
}
dsims <- dsims[, c("DV_OUT", "mod_no")]
names(dsims)[1] <- DV
dorig2 <- dorig
dorig2[[DV]] <- NULL
dorig2$ORD <- 1:nrow(dorig2)
dsims$ORD <- rep(1:nrow(dorig2), length = nrow(dsims))
nrow(dsims)
dsims <- dplyr::right_join(dorig2, dsims)
nrow(dsims)
## two datasets dorig and dsims ready now, apply function
dorig <- pre_proc(dorig)
stat_orig <- FUN(dorig, ...)
if (!inherits(stat_orig, "data.frame")) stop("FUN must return a data.frame", call. = FALSE)
if (!statistic %in% names(stat_orig)) stop("statistic must be a column of FUN output", call. = FALSE)
names(stat_orig)[names(stat_orig) %in% statistic] <- paste0(statistic, "_true")
## if ... are present
## useful for modifying data items before stat function is applied
current_call <- as.list(match.call())
supplied_args <- names(current_call[-1])
total_args <- names(formals(ppc_data))
dots_present <- any(!supplied_args %in% total_args)
stat_sim <- dsims %>%
dplyr::group_by(.data$mod_no) %>%
pre_proc() %>%
tidyr::nest() %>%
dplyr::mutate(statistic = lapply(.data$data, FUN, ...)) %>%
# dplyr::mutate(statistic = purrr::map(.data$data, FUN, ...)) %>%
tidyr::unnest(statistic)
stat_sim$data <- NULL
merge(stat_sim, stat_orig)
}
#' @name ppc
#' @param d Output from [ppc_data()].
#'
#' @export
ppc_whisker_plot <- function(d, group, var1, var2, statistic = "statistic") {
requireNamespace("ggplot2")
requireNamespace("rlang")
do_facet_wrap <- xor(!missing(var1), !missing(var2))
do_facet_grid <- !missing(var1) & !missing(var2)
var1 <- rlang::enquo(var1)
var2 <- rlang::enquo(var2)
group <- rlang::enquo(group)
p <- ggplot2::ggplot(d, ggplot2::aes_string(y = statistic)) +
ggplot2::aes(x = !!group) +
ggplot2::theme_bw() +
ggplot2::stat_summary(
fun.ymin = function(x) stats::quantile(x, 0.025, na.rm = TRUE),
fun.ymax = function(x) stats::quantile(x, 0.975, na.rm = TRUE),
geom = "errorbar"
) +
ggplot2::geom_point(ggplot2::aes_string(y = paste0(statistic, "_true")), colour = "red")
if (do_facet_wrap) p <- p + ggplot2::facet_wrap(dplyr::vars(!!var1), scales = "free")
if (do_facet_grid) p <- p + ggplot2::facet_grid(dplyr::vars(!!var1), dplyr::vars(!!var2), scales = "free")
p
}
#' @name ppc
#' @export
ppc_histogram_plot <- function(d, var1, var2, statistic = "statistic") {
requireNamespace("ggplot2")
requireNamespace("rlang")
do_facet_wrap <- xor(!missing(var1), !missing(var2))
do_facet_grid <- !missing(var1) & !missing(var2)
var1 <- rlang::enquo(var1)
var2 <- rlang::enquo(var2)
p <- ggplot2::ggplot(d, ggplot2::aes_string(x = statistic)) +
ggplot2::theme_bw() +
ggplot2::geom_histogram() +
ggplot2::geom_vline(ggplot2::aes_string(xintercept = paste0(statistic, "_true")), colour = "red")
if (do_facet_wrap) p <- p + ggplot2::facet_wrap(dplyr::vars(!!var1), scales = "free")
if (do_facet_grid) p <- p + ggplot2::facet_grid(dplyr::vars(!!var1), dplyr::vars(!!var2), scales = "free")
p
}