Skip to content

Commit

Permalink
Merge pull request #43 from wjakethompson/ecpe-case-study
Browse files Browse the repository at this point in the history
Add ECPE Case Study
  • Loading branch information
wjakethompson committed Dec 22, 2023
2 parents 6d4e28e + 975a6af commit 7b0337f
Show file tree
Hide file tree
Showing 29 changed files with 1,402 additions and 2,380 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Suggests:
testthat (>= 3.0.0)
Additional_repositories: https://mc-stan.org/r-packages/
Config/testthat/edition: 3
Config/Needs/website: wjakethompson/wjake, showtext
Config/Needs/website: wjakethompson/wjake, showtext, ggdist, english
Encoding: UTF-8
Language: en-US
LazyData: true
Expand Down
22 changes: 22 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ S3method(waic,measrfit)
export("%>%")
export(":=")
export(.data)
export(E)
export(Pr)
export(add_criterion)
export(add_fit)
export(add_reliability)
Expand All @@ -38,6 +40,15 @@ export(prior)
export(prior_)
export(prior_string)
export(reliability)
export(rvar_mad)
export(rvar_max)
export(rvar_mean)
export(rvar_median)
export(rvar_min)
export(rvar_prod)
export(rvar_sd)
export(rvar_sum)
export(rvar_var)
export(waic)
import(Rcpp)
import(methods)
Expand All @@ -46,7 +57,18 @@ importFrom(loo,loo)
importFrom(loo,loo_compare)
importFrom(loo,waic)
importFrom(magrittr,"%>%")
importFrom(posterior,E)
importFrom(posterior,Pr)
importFrom(posterior,as_draws)
importFrom(posterior,rvar_mad)
importFrom(posterior,rvar_max)
importFrom(posterior,rvar_mean)
importFrom(posterior,rvar_median)
importFrom(posterior,rvar_min)
importFrom(posterior,rvar_prod)
importFrom(posterior,rvar_sd)
importFrom(posterior,rvar_sum)
importFrom(posterior,rvar_var)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,as_label)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
* A new article on model evaluation has been added.
* The model estimation article has been updated to use the same data set as the model evaluation article.
* More detailed installation instructions have been added to the getting started vignette (#23).
* A case study demonstrating a full DCM-based analysis using data from the ECPE (`?ecpe_data`) has been added.

* measr now reexports functions from [posterior](https://mc-stan.org/posterior/) for conducting mathematical operations on `posterior::rvar()` objects.

* Respondent estimates are now returned as `posterior::rvar()` objects when not summarized.

# measr 0.3.1

Expand Down
89 changes: 39 additions & 50 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@
#' (e.g., `NA`, `"."`, `-99`, etc.). The default is `NA`.
#' @param summary Should summary statistics be returned instead of the raw
#' posterior draws? Only relevant if the model was estimated with
#' `method = "mcmc"`. Default is `TRUE`.
#' `method = "mcmc"`. Default is `FALSE`.
#' @param probs The percentiles to be computed by the `[stats::quantile()]`
#' function. Only relevant if the model was estimated with `method = "mcmc"`.
#' Only used if `summary` is `TRUE`.
#' @param force If respondent estimates have already been added to the model
#' object with [add_respondent_estimates()], should they be recalculated.
#' Default is `FALSE`.
#' @param ... Unused.
#'
#' @return A list with two elements: `class_probabilities` and
Expand All @@ -39,29 +42,44 @@
#' @export
predict.measrdcm <- function(object, newdata = NULL, resp_id = NULL,
missing = NA, summary = TRUE,
probs = c(0.025, 0.975), ...) {
probs = c(0.025, 0.975), force = FALSE, ...) {
model <- check_model(object, required_class = "measrdcm", name = "object")

if ((!is.null(model$respondent_estimates) &&
length(model$respondent_estimates) > 0) &&
!force && summary) {
return(model$respondent_estimates)
}

summary <- check_logical(summary, allow_na = FALSE, name = "summary")
probs <- check_double(probs, lb = 0, ub = 1, inclusive = TRUE, name = "probs")
if (!is.null(newdata)) {
resp_id <- check_character(resp_id, name = "resp_id", allow_null = TRUE)
score_data <- check_newdata(newdata, identifier = resp_id, model = model,
missing = missing, name = "newdata")
resp_lookup <- score_data %>%
dplyr::rename(orig_resp = "resp_id") %>%
dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
dplyr::distinct(.data$orig_resp, .data$resp_id)
} else {
score_data <- model$data$data
resp_lookup <- model$data$data %>%
dplyr::rename(orig_resp = "resp_id") %>%
dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
dplyr::distinct(.data$orig_resp, .data$resp_id)
}
attr_lookup <- tibble::tibble(real_names = colnames(model$data$qmatrix)) %>%
dplyr::filter(.data$real_names != "item_id") %>%
dplyr::mutate(att_id = paste0("att", seq_len(dplyr::n())))

clean_qmatrix <- model$data$qmatrix %>%
dplyr::select(-"item_id") %>%
dplyr::rename_with(~glue::glue("att{1:(ncol(model$data$qmatrix) - 1)}"))
stan_data <- create_stan_data(dat = score_data, qmat = clean_qmatrix,
type = model$type)
stan_draws <- if (model$method == "mcmc") {
get_mcmc_draws(model)
} else if (model$method == "optim") {
get_optim_draws(model)
}
stan_draws <- switch(model$method,
"mcmc" = get_mcmc_draws(model),
"optim" = get_optim_draws(model))

stan_pars <- create_stan_gqs_params(backend = model$backend,
draws = stan_draws)
Expand All @@ -79,51 +97,22 @@ predict.measrdcm <- function(object, newdata = NULL, resp_id = NULL,
)

# get mastery information -----
class_probs <- extract_class_probs(model = gqs_model,
attr = ncol(clean_qmatrix))
attr_probs <- extract_attr_probs(model = gqs_model, qmat = clean_qmatrix)
ret_list <- calculate_probs(model = gqs_model,
qmat = clean_qmatrix,
method = model$method,
resp_lookup = resp_lookup,
attr_lookup = attr_lookup,
resp_id = model$data$resp_id)

if (!is.null(newdata)) {
resp_lookup <- score_data %>%
dplyr::rename(orig_resp = "resp_id") %>%
dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
dplyr::distinct(.data$orig_resp, .data$resp_id)
} else {
resp_lookup <- model$data$data %>%
dplyr::rename(orig_resp = "resp_id") %>%
dplyr::mutate(resp_id = as.integer(.data$orig_resp)) %>%
dplyr::distinct(.data$orig_resp, .data$resp_id)
if (!summary) {
no_summary_list <- calculate_probs_no_summary(ret_list = ret_list,
method = model$method)
return(no_summary_list)
}
attr_lookup <- tibble::tibble(real_names = colnames(model$data$qmatrix)) %>%
dplyr::filter(.data$real_names != "item_id") %>%
dplyr::mutate(att_id = paste0("att", seq_len(dplyr::n())))

class_probs <- class_probs %>%
dplyr::left_join(resp_lookup, by = c("resp_id")) %>%
dplyr::mutate(resp_id = .data$orig_resp) %>%
dplyr::select(-"orig_resp") %>%
dplyr::rename(!!model$data$resp_id := "resp_id")

attr_probs <- attr_probs %>%
tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw",
"resp_id")) %>%
dplyr::left_join(resp_lookup, by = c("resp_id")) %>%
dplyr::left_join(attr_lookup, by = c("name" = "att_id")) %>%
dplyr::mutate(resp_id = .data$orig_resp) %>%
dplyr::select(-"orig_resp") %>%
dplyr::rename(!!model$data$resp_id := "resp_id") %>%
dplyr::select(".chain", ".iteration", ".draw", !!model$data$resp_id,
"real_names", "value") %>%
tidyr::pivot_wider(names_from = "real_names", values_from = "value")

ret_list <- list(class_probabilities = class_probs,
attribute_probabilities = attr_probs)

if (!summary) return(ret_list)

summary_list <- lapply(ret_list, summarize_probs, probs = probs,
id = model$data$resp_id,
optim = model$method == "optim")

summary_list <- calculate_probs_summary(ret_list = ret_list,
probs = probs,
id = model$data$resp_id,
method = model$method)
return(summary_list)
}
3 changes: 2 additions & 1 deletion R/model-evaluation.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ add_respondent_estimates <- function(x, probs = c(0.025, 0.975),
run_pred <- length(model$respondent_estimates) == 0 || overwrite

if (run_pred) {
model$respondent_estimates <- stats::predict(model, probs = probs)
model$respondent_estimates <- stats::predict(model, summary = TRUE,
probs = probs)
}

# re-save model object (if applicable)
Expand Down
36 changes: 29 additions & 7 deletions R/ppmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@
#'
#' @export
#' @examplesIf measr_examples()
#' cmds_mdm_dina <- measr_dcm(
#' mdm_dina <- measr_dcm(
#' data = mdm_data, missing = NA, qmatrix = mdm_qmatrix,
#' resp_id = "respondent", item_id = "item", type = "dina",
#' method = "mcmc", seed = 63277, backend = "rstan",
#' iter = 700, warmup = 500, chains = 2
#' iter = 700, warmup = 500, chains = 2, refresh = 0
#' )
#'
#' fit_ppmc(cmds_mdm_dina, model_fit = "raw_score", item_fit = NULL)
#' fit_ppmc(mdm_dina, model_fit = "raw_score", item_fit = NULL)
fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),
return_draws = 0,
model_fit = c("raw_score"),
Expand Down Expand Up @@ -202,7 +202,8 @@ fit_ppmc <- function(model, ndraws = NULL, probs = c(0.025, 0.975),

item_level_fit <- if (!is.null(item_fit)) {
resp_prob <- extract_class_probs(model = gqs_model,
attr = ncol(clean_qmatrix))
attr = ncol(clean_qmatrix),
method = model$method)
pi_draws <- posterior::subset_draws(stan_draws, variable = "pi")

ppmc_item_fit(model = model,
Expand Down Expand Up @@ -283,7 +284,11 @@ ppmc_rawscore_chisq <- function(model, post_data, probs, return_draws) {
if (return_draws > 0) {
raw_score_res <- raw_score_res %>%
dplyr::mutate(
samples = list(chisq_ppmc %>%
rawscore_samples = list(raw_score_post %>%
tidyr::nest(raw_scores = -".draw") %>%
dplyr::slice_sample(prop = return_draws) %>%
dplyr::select(-".draw")),
chisq_samples = list(chisq_ppmc %>%
dplyr::slice_sample(prop = return_draws) %>%
dplyr::pull("chisq")),
.before = "ppp")
Expand Down Expand Up @@ -319,8 +324,25 @@ ppmc_conditional_probs <- function(model, attr, resp_prob, pi_draws, probs,
all_profiles <- profile_labels(attributes = attr)

obs_class <- resp_prob %>%
tidyr::pivot_longer(cols = -c(".chain", ".iteration", ".draw", "resp_id"),
names_to = "class_label", values_to = "prob") %>%
dplyr::mutate(dplyr::across(dplyr::where(posterior::is_rvar),
~lapply(.x,
function(x) {
posterior::as_draws_df(x) %>%
tibble::as_tibble()
})
)) %>%
tidyr::unnest(-"resp_id", names_sep = "_") %>%
dplyr::select("resp_id",
dplyr::all_of(paste0(all_profiles$class[1], "_",
c(".chain", ".iteration", ".draw"))),
dplyr::ends_with("_x")) %>%
dplyr::rename_with(function(x) {
x <- sub("_x", "", x)
x <- sub("\\[[0-9,]*\\]_", "", x)
}) %>%
tidyr::pivot_longer(cols = -c("resp_id", ".chain", ".iteration", ".draw"),
names_to = "class_label",
values_to = "prob") %>%
dplyr::mutate(max_class = .data$prob == max(.data$prob),
.by = c(".draw", "resp_id")) %>%
dplyr::filter(.data$max_class) %>%
Expand Down
43 changes: 43 additions & 0 deletions R/reexports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' @importFrom posterior E
#' @export
posterior::E

#' @importFrom posterior Pr
#' @export
posterior::Pr

#' @importFrom posterior rvar_median
#' @export
posterior::rvar_median

#' @importFrom posterior rvar_sum
#' @export
posterior::rvar_sum

#' @importFrom posterior rvar_prod
#' @export
posterior::rvar_prod

#' @importFrom posterior rvar_min
#' @export
posterior::rvar_min

#' @importFrom posterior rvar_max
#' @export
posterior::rvar_max

#' @importFrom posterior rvar_mean
#' @export
posterior::rvar_mean

#' @importFrom posterior rvar_sd
#' @export
posterior::rvar_sd

#' @importFrom posterior rvar_var
#' @export
posterior::rvar_var

#' @importFrom posterior rvar_mad
#' @export
posterior::rvar_mad

0 comments on commit 7b0337f

Please sign in to comment.