Merge pull request #43 from wjakethompson/ecpe-case-study

Add ECPE Case Study
wjakethompson · Dec 22, 2023 · 7b0337f · 7b0337f
2 parents 6d4e28e + 975a6af
commit 7b0337f
Show file tree

Hide file tree

Showing 29 changed files with 1,402 additions and 2,380 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -63,7 +63,7 @@ Suggests:
     testthat (>= 3.0.0)
 Additional_repositories: https://mc-stan.org/r-packages/
 Config/testthat/edition: 3
-Config/Needs/website: wjakethompson/wjake, showtext
+Config/Needs/website: wjakethompson/wjake, showtext, ggdist, english
 Encoding: UTF-8
 Language: en-US
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -13,6 +13,8 @@ S3method(waic,measrfit)
 export("%>%")
 export(":=")
 export(.data)
+export(E)
+export(Pr)
 export(add_criterion)
 export(add_fit)
 export(add_reliability)
@@ -38,6 +40,15 @@ export(prior)
 export(prior_)
 export(prior_string)
 export(reliability)
+export(rvar_mad)
+export(rvar_max)
+export(rvar_mean)
+export(rvar_median)
+export(rvar_min)
+export(rvar_prod)
+export(rvar_sd)
+export(rvar_sum)
+export(rvar_var)
 export(waic)
 import(Rcpp)
 import(methods)
@@ -46,7 +57,18 @@ importFrom(loo,loo)
 importFrom(loo,loo_compare)
 importFrom(loo,waic)
 importFrom(magrittr,"%>%")
+importFrom(posterior,E)
+importFrom(posterior,Pr)
 importFrom(posterior,as_draws)
+importFrom(posterior,rvar_mad)
+importFrom(posterior,rvar_max)
+importFrom(posterior,rvar_mean)
+importFrom(posterior,rvar_median)
+importFrom(posterior,rvar_min)
+importFrom(posterior,rvar_prod)
+importFrom(posterior,rvar_sd)
+importFrom(posterior,rvar_sum)
+importFrom(posterior,rvar_var)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,as_label)

diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,11 @@
   * A new article on model evaluation has been added.
   * The model estimation article has been updated to use the same data set as the model evaluation article.
   * More detailed installation instructions have been added to the getting started vignette (#23).
+  * A case study demonstrating a full DCM-based analysis using data from the ECPE (`?ecpe_data`) has been added.
+
+* measr now reexports functions from [posterior](https://mc-stan.org/posterior/) for conducting mathematical operations on `posterior::rvar()` objects.
+
+* Respondent estimates are now returned as `posterior::rvar()` objects when not summarized.
 
 # measr 0.3.1
 

diff --git a/R/methods.R b/R/methods.R
@@ -18,10 +18,13 @@
 #'   (e.g., `NA`, `"."`, `-99`, etc.). The default is `NA`.
 #' @param summary Should summary statistics be returned instead of the raw
 #'   posterior draws? Only relevant if the model was estimated with
-#'   `method = "mcmc"`. Default is `TRUE`.
+#'   `method = "mcmc"`. Default is `FALSE`.
 #' @param probs The percentiles to be computed by the `[stats::quantile()]`
 #'   function. Only relevant if the model was estimated with `method = "mcmc"`.
 #'   Only used if `summary` is `TRUE`.
+#' @param force If respondent estimates have already been added to the model
+#'   object with [add_respondent_estimates()], should they be recalculated.
+#'   Default is `FALSE`.
 #' @param ... Unused.
 #'
 #' @return A list with two elements: `class_probabilities` and
@@ -39,29 +42,44 @@
 #' @export
 predict.measrdcm <- function(object, newdata = NULL, resp_id = NULL,
                              missing = NA, summary = TRUE,
-                             probs = c(0.025, 0.975), ...) {
+                             probs = c(0.025, 0.975), force = FALSE, ...) {
   model <- check_model(object, required_class = "measrdcm", name = "object")
 
+  if ((!is.null(model$respondent_estimates) &&
+       length(model$respondent_estimates) > 0) &&
+      !force && summary) {
+    return(model$respondent_estimates)
+  }
+
   summary <- check_logical(summary, allow_na = FALSE, name = "summary")
   probs <- check_double(probs, lb = 0, ub = 1, inclusive = TRUE, name = "probs")
   if (!is.null(newdata)) {
     resp_id <- check_character(resp_id, name = "resp_id", allow_null = TRUE)
     score_data <- check_newdata(newdata, identifier = resp_id, model = model,
                                 missing = missing, name = "newdata")
+    resp_lookup <- score_data %>%
+      dplyr::rename(orig_resp = "resp_id") %>%
+      dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
+      dplyr::distinct(.data$orig_resp, .data$resp_id)
   } else {
     score_data <- model$data$data
+    resp_lookup <- model$data$data %>%
+      dplyr::rename(orig_resp = "resp_id") %>%
+      dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
+      dplyr::distinct(.data$orig_resp, .data$resp_id)
   }
+  attr_lookup <- tibble::tibble(real_names = colnames(model$data$qmatrix)) %>%
+    dplyr::filter(.data$real_names != "item_id") %>%
+    dplyr::mutate(att_id = paste0("att", seq_len(dplyr::n())))
 
   clean_qmatrix <- model$data$qmatrix %>%
     dplyr::select(-"item_id") %>%
     dplyr::rename_with(~glue::glue("att{1:(ncol(model$data$qmatrix) - 1)}"))
   stan_data <- create_stan_data(dat = score_data, qmat = clean_qmatrix,
                                 type = model$type)
-  stan_draws <- if (model$method == "mcmc") {
-    get_mcmc_draws(model)
-  } else if (model$method == "optim") {
-    get_optim_draws(model)
-  }
+  stan_draws <- switch(model$method,
+                       "mcmc" = get_mcmc_draws(model),
+                       "optim" = get_optim_draws(model))
 
   stan_pars <- create_stan_gqs_params(backend = model$backend,
                                       draws = stan_draws)
@@ -79,51 +97,22 @@ predict.measrdcm <- function(object, newdata = NULL, resp_id = NULL,
   )
 
   # get mastery information -----
-  class_probs <- extract_class_probs(model = gqs_model,
-                                     attr = ncol(clean_qmatrix))
-  attr_probs <- extract_attr_probs(model = gqs_model, qmat = clean_qmatrix)
+  ret_list <- calculate_probs(model = gqs_model,
+                              qmat = clean_qmatrix,
+                              method = model$method,
+                              resp_lookup = resp_lookup,
+                              attr_lookup = attr_lookup,
+                              resp_id = model$data$resp_id)
 
-  if (!is.null(newdata)) {
-    resp_lookup <- score_data %>%
-      dplyr::rename(orig_resp = "resp_id") %>%
-      dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
-      dplyr::distinct(.data$orig_resp, .data$resp_id)
-  } else {
-    resp_lookup <- model$data$data %>%
-      dplyr::rename(orig_resp = "resp_id") %>%
-      dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
-      dplyr::distinct(.data$orig_resp, .data$resp_id)
+  if (!summary) {
+    no_summary_list <- calculate_probs_no_summary(ret_list = ret_list,
+                                                  method = model$method)
+    return(no_summary_list)
   }
-  attr_lookup <- tibble::tibble(real_names = colnames(model$data$qmatrix)) %>%
-    dplyr::filter(.data$real_names != "item_id") %>%
-    dplyr::mutate(att_id = paste0("att", seq_len(dplyr::n())))
-
-  class_probs <- class_probs %>%
-    dplyr::left_join(resp_lookup, by = c("resp_id")) %>%
-    dplyr::mutate(resp_id = .data$orig_resp) %>%
-    dplyr::select(-"orig_resp") %>%
-    dplyr::rename(!!model$data$resp_id := "resp_id")
-
-  attr_probs <- attr_probs %>%
-    tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw",
-                                  "resp_id")) %>%
-    dplyr::left_join(resp_lookup, by = c("resp_id")) %>%
-    dplyr::left_join(attr_lookup, by = c("name" = "att_id")) %>%
-    dplyr::mutate(resp_id = .data$orig_resp) %>%
-    dplyr::select(-"orig_resp") %>%
-    dplyr::rename(!!model$data$resp_id := "resp_id") %>%
-    dplyr::select(".chain", ".iteration", ".draw", !!model$data$resp_id,
-                  "real_names", "value") %>%
-    tidyr::pivot_wider(names_from = "real_names", values_from = "value")
-
-  ret_list <- list(class_probabilities = class_probs,
-                   attribute_probabilities = attr_probs)
-
-  if (!summary) return(ret_list)
-
-  summary_list <- lapply(ret_list, summarize_probs, probs = probs,
-                         id = model$data$resp_id,
-                         optim = model$method == "optim")
 
+  summary_list <- calculate_probs_summary(ret_list = ret_list,
+                                          probs = probs,
+                                          id = model$data$resp_id,
+                                          method = model$method)
   return(summary_list)
 }
diff --git a/R/model-evaluation.R b/R/model-evaluation.R
@@ -227,7 +227,8 @@ add_respondent_estimates <- function(x, probs = c(0.025, 0.975),
   run_pred <- length(model$respondent_estimates) == 0 || overwrite
 
   if (run_pred) {
-    model$respondent_estimates <- stats::predict(model, probs = probs)
+    model$respondent_estimates <- stats::predict(model, summary = TRUE,
+                                                 probs = probs)
   }
 
   # re-save model object (if applicable)

diff --git a/R/ppmc.R b/R/ppmc.R
@@ -93,14 +93,14 @@
 #'
 #' @export
 #' @examplesIf measr_examples()
-#' cmds_mdm_dina <- measr_dcm(
+#' mdm_dina <- measr_dcm(
 #'   data = mdm_data, missing = NA, qmatrix = mdm_qmatrix,
 #'   resp_id = "respondent", item_id = "item", type = "dina",
 #'   method = "mcmc", seed = 63277, backend = "rstan",
-#'   iter = 700, warmup = 500, chains = 2
+#'   iter = 700, warmup = 500, chains = 2, refresh = 0
 #' )
 #'
-#' fit_ppmc(cmds_mdm_dina, model_fit = "raw_score", item_fit = NULL)
+#' fit_ppmc(mdm_dina, model_fit = "raw_score", item_fit = NULL)
 fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
                      return_draws = 0,
                      model_fit = c("raw_score"),
@@ -202,7 +202,8 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
 
   item_level_fit <- if (!is.null(item_fit)) {
     resp_prob <- extract_class_probs(model = gqs_model,
-                                     attr = ncol(clean_qmatrix))
+                                     attr = ncol(clean_qmatrix),
+                                     method = model$method)
     pi_draws <- posterior::subset_draws(stan_draws, variable = "pi")
 
     ppmc_item_fit(model = model,
@@ -283,7 +284,11 @@ ppmc_rawscore_chisq <- function(model, post_data, probs, return_draws) {
   if (return_draws > 0) {
     raw_score_res <- raw_score_res %>%
       dplyr::mutate(
-        samples = list(chisq_ppmc %>%
+        rawscore_samples = list(raw_score_post %>%
+                                  tidyr::nest(raw_scores = -".draw") %>%
+                                  dplyr::slice_sample(prop = return_draws) %>%
+                                  dplyr::select(-".draw")),
+        chisq_samples = list(chisq_ppmc %>%
                          dplyr::slice_sample(prop = return_draws) %>%
                          dplyr::pull("chisq")),
         .before = "ppp")
@@ -319,8 +324,25 @@ ppmc_conditional_probs <- function(model, attr, resp_prob, pi_draws, probs,
   all_profiles <- profile_labels(attributes = attr)
 
   obs_class <- resp_prob %>%
-    tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw", "resp_id"),
-                        names_to = "class_label", values_to = "prob") %>%
+    dplyr::mutate(dplyr::across(dplyr::where(posterior::is_rvar),
+                                ~lapply(.x,
+                                        function(x) {
+                                          posterior::as_draws_df(x) %>%
+                                            tibble::as_tibble()
+                                        })
+                                )) %>%
+    tidyr::unnest(-"resp_id", names_sep = "_") %>%
+    dplyr::select("resp_id",
+                  dplyr::all_of(paste0(all_profiles$class[1], "_",
+                                       c(".chain", ".iteration", ".draw"))),
+                  dplyr::ends_with("_x")) %>%
+    dplyr::rename_with(function(x) {
+      x <- sub("_x", "", x)
+      x <- sub("\\[[0-9,]*\\]_", "", x)
+    }) %>%
+    tidyr::pivot_longer(cols = -c("resp_id", ".chain", ".iteration", ".draw"),
+                        names_to = "class_label",
+                        values_to = "prob") %>%
     dplyr::mutate(max_class = .data$prob == max(.data$prob),
                   .by = c(".draw", "resp_id")) %>%
     dplyr::filter(.data$max_class) %>%

diff --git a/R/reexports.R b/R/reexports.R
@@ -0,0 +1,43 @@
+#' @importFrom posterior E
+#' @export
+posterior::E
+
+#' @importFrom posterior Pr
+#' @export
+posterior::Pr
+
+#' @importFrom posterior rvar_median
+#' @export
+posterior::rvar_median
+
+#' @importFrom posterior rvar_sum
+#' @export
+posterior::rvar_sum
+
+#' @importFrom posterior rvar_prod
+#' @export
+posterior::rvar_prod
+
+#' @importFrom posterior rvar_min
+#' @export
+posterior::rvar_min
+
+#' @importFrom posterior rvar_max
+#' @export
+posterior::rvar_max
+
+#' @importFrom posterior rvar_mean
+#' @export
+posterior::rvar_mean
+
+#' @importFrom posterior rvar_sd
+#' @export
+posterior::rvar_sd
+
+#' @importFrom posterior rvar_var
+#' @export
+posterior::rvar_var
+
+#' @importFrom posterior rvar_mad
+#' @export
+posterior::rvar_mad