Skip to content

Commit

Permalink
Merge pull request #139 from weecology/juniper_active
Browse files Browse the repository at this point in the history
v0.11.0
  • Loading branch information
juniperlsimonis committed Sep 15, 2019
2 parents 314b52f + 94408e0 commit c66cc46
Show file tree
Hide file tree
Showing 29 changed files with 664 additions and 376 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: portalcasting
Title: Functions Used in Predicting Portal Rodent Dynamics
Version: 0.10.0
Version: 0.11.0
Authors@R: c(
person(c("Juniper", "L."), "Simonis",
email = "juniper.simonis@weecology.org", role = c("aut", "cre"),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -38,6 +38,7 @@ export(download_climate_casts)
export(download_destin)
export(download_message)
export(download_url)
export(ensemble_casts)
export(error_if_deep)
export(evalplot_species)
export(extract_data_sets)
Expand Down Expand Up @@ -68,6 +69,7 @@ export(model_controls)
export(model_template)
export(models_path)
export(models_to_cast)
export(most_abundant_species)
export(na_conformer)
export(name_from_url)
export(named_null_list)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Expand Up @@ -2,6 +2,20 @@

Version numbers follow [Semantic Versioning](https://semver.org/).


# [portalcasting 0.11.0](https://github.com/weecology/portalcasting/releases/tag/v0.11.0)
*2019-09-14*

### Ensembling reintroduced
* Associated with the reconfiguration of portalcasting from v0.8.1 to 0.9.0, ensembling was removed temporarily.
* A basic ensemble is reintroduced, now as an unweighted average across all selected models, allowing us to have an ensemble but not have it be tied to AIC weighting (because AIC weighting is no longer possible with the split between interpolated and non-interpolated data for model fitting).
* In a major departure from v0.8.1 and earlier, the ensemble's output is not saved like the actual models'. Rather, it is only calculated when needed on the fly.
* In plotting, it is now the default to use the ensemble for `plot_cast_ts` and `plot_cast_point` and for the ensemble to be included in `plot_casts_err_lead` and `plot_casts_cov_RMSE`.

### Return of `most_abundant_species`
* Function used to select the most common species.
* Now uses the actual data and not the casts to determine the species.

# [portalcasting 0.10.0](https://github.com/weecology/portalcasting/releases/tag/v0.10.0)
*2019-09-13*

Expand Down
14 changes: 12 additions & 2 deletions R/args.R
Expand Up @@ -44,8 +44,10 @@
#' \code{cleanup},
#' \code{covariatesTF},
#' \code{effort},
#' \code{ensemble},
#' \code{fillweight},
#' \code{interpolate},
#' \code{include_interp},
#' \code{na_drop},
#' \code{nadot},
#' \code{NULLname},
Expand Down Expand Up @@ -81,10 +83,11 @@
#' \code{freq},
#' \code{level},
#' \code{main},
#' \code{method} (if not \code{NULL}, must be \code{"unwtavg"}),
#' \code{model} (inputted values are checked via
#' \code{\link{verify_models}}),
#' \code{name},
#' \code{output} (if not \code{NULL}, must be \code{"abundances"}),
#' \code{output} (if not \code{NULL}, must be \code{"abundance"}),
#' \code{path},
#' \code{plots} (if not \code{NULL}, must be \code{"all"} or
#' \code{"longterm"}),
Expand Down Expand Up @@ -200,7 +203,7 @@
#' \code{ndates} (must be positive),
#' \code{nmoons} (must be non-negative),
#' \code{start_moon} (must be positive),
#' \code{topx} (must be non-negative).
#' \code{topx} (must be positive).
#'
#' Must be length-1 \code{integer}-conformable values can be \code{NULL},
#' and can be \code{NA}:
Expand All @@ -209,6 +212,7 @@
#'
#' Must be \code{integer}-conformable values, can be any length, can be
#' \code{NULL}, but cannot be \code{NA}:
#' \code{cast_groups} (must be non-negative),
#' \code{cast_ids} (must be non-negative),
#' \code{end_moons} (must be positive),
#' \code{target_moons} (must be positive).
Expand Down Expand Up @@ -483,6 +487,7 @@ check_arg_list <- function(){
}


avail_methods <- c("unwtavg")
avail_outputs <- c("abundance")
avail_plots <- c("all", "longterm")
avail_species <- c("BA", "DM", "DO", "DS", "NA", "NA.", "OL", "OT", "PB",
Expand All @@ -500,6 +505,7 @@ check_arg_list <- function(){
append_cast_csv = arg_logical(),
arg_checks = arg_logical(),
cast = arg_cast(),
cast_groups = arg_nonnegintnum(NULL),
cast_ids = arg_nonnegintnum(NULL),
cast_id = arg_nonnegintnum(),
cast_cov = arg_df(),
Expand Down Expand Up @@ -534,6 +540,7 @@ check_arg_list <- function(){
downloads = arg_list(),
downloads_versions = arg_character(NULL),
effort = arg_logical(),
ensemble = arg_logical(),
end = arg_date(),
end_moon = arg_posintnum(),
end_moons = arg_posintnum(NULL),
Expand All @@ -552,6 +559,7 @@ check_arg_list <- function(){
hist_cov = arg_df(),
hist_tab = arg_df(),
interpolate = arg_logical(null = TRUE),
include_interp = arg_logical(null = TRUE),
lag = arg_nonnegintnum(null = TRUE, na = TRUE),
lat = arg_numeric(),
lead = arg_posintnum(),
Expand All @@ -561,6 +569,7 @@ check_arg_list <- function(){
lon = arg_numeric(),
main = arg_character(),
metadata = arg_list(),
method = arg_character(vals = avail_methods),
min_lag = arg_nonnegintnum(na = TRUE),
min_observed = arg_posintnum(),
min_plots = arg_posintnum(),
Expand Down Expand Up @@ -611,6 +620,7 @@ check_arg_list <- function(){
target_cols = arg_character(NULL),
time = arg_character(),
title = arg_character(),
topx = arg_posintnum(),
total = arg_logical(),
treatment = arg_character(vals = avail_treatments),
type = arg_character(),
Expand Down
180 changes: 180 additions & 0 deletions R/ensembling.R
@@ -0,0 +1,180 @@
#' @title Combine (ensemble) casts
#'
#' @description Combine multiple casts' output into a single ensemble.
#' Presently, only a general average ensemble is available.
#'
#' @details A pre-loaded table of casts can be input, but if not (default),
#' the table will be efficiently (as defined by the inputs) loaded and
#' trimmed. \cr
#' The casts can be trimmed specifically using the \code{cast_ids} input,
#' otherwise, all relevant casts from the stated \code{cast_groups}
#' will be included.
#'
#' @param main \code{character} value of the name of the main component of
#' the directory tree.
#'
#' @param method \code{character} value of the name of the ensemble method
#' to use. Presently, only \code{"unwtavg"} (unweighted average) is allowed.
#'
#' @param cast_groups \code{integer} (or integer \code{numeric}) value
#' of the cast group to combine with an ensemble. If \code{NULL} (default),
#' the most recent cast group is ensembled.
#'
#' @param cast_ids \code{integer} (or integer \code{numeric}) values
#' representing the casts of interest for restricting ensembling, as indexed
#' within the directory in the \code{casts} sub folder.
#' See the casts metadata file (\code{casts_metadata.csv}) for summary
#' information.
#'
#' @param end_moon \code{integer} (or integer \code{numeric})
#' newmoon number of the forecast origin. Default value is
#' \code{NULL}, which equates to no selection with respect to
#' \code{end_moon}.
#'
#' @param cast_tab Optional \code{data.frame} of cast table outputs. If not
#' input, will be loaded.
#'
#' @param models \code{character} value(s) of the name of the model to
#' include. Default value is \code{NULL}, which equates to no selection with
#' respect to \code{model}. \code{NULL} translates to all \code{models}
#' in the table.
#'
#' @param data_set \code{character} value of the rodent data set to include
#' Default value is \code{NULL}, which equates to the first data set
#' encountered.
#'
#' @param include_interp \code{logical} indicator of if the basic data set
#' names should also be inclusive of the associated interpolated data sets.
#'
#' @param arg_checks \code{logical} value of if the arguments should be
#' checked using standard protocols via \code{\link{check_args}}. The
#' default (\code{arg_checks = TRUE}) ensures that all inputs are
#' formatted correctly and provides directed error messages if not.
#'
#' @param species \code{character} vector of the species code(s)
#' or \code{"total"} for the total across species) to be plotted
#' \code{NULL} translates to the species defined by
#' \code{evalplot_species}.
#'
#' @return \code{NULL}. Plot is generated.
#'
#' @examples
#' \donttest{
#' setup_dir()
#' portalcast(models = c("AutoArima", "ESSS"), end_moons = 515)
#' ensemble_casts()
#' }
#'
#' @export
#'
ensemble_casts <- function(main = ".", method = "unwtavg",
cast_groups = NULL, cast_ids = NULL,
cast_tab = NULL, end_moon = NULL,
models = NULL, data_set = NULL,
include_interp = TRUE,
species = NULL, arg_checks = TRUE){

check_args(arg_checks = arg_checks)
if(is.null(cast_tab)){
cast_choices <- select_casts(main = main, cast_ids = cast_ids,
models = models, end_moons = end_moon,
data_sets = data_set,
include_interp = include_interp,
arg_checks = arg_checks)
if(NROW(cast_choices) == 0){
stop("no casts available for request")
}else{
cast_tab <- read_cast_tabs(main = main, cast_ids = cast_choices$cast_id,
arg_checks = arg_checks)
cast_tab <- add_obs_to_cast_tab(main = main, cast_tab = cast_tab,
arg_checks = arg_checks)
cast_tab <- add_err_to_cast_tab(main = main, cast_tab = cast_tab,
arg_checks = arg_checks)
cast_tab <- add_lead_to_cast_tab(main = main, cast_tab = cast_tab,
arg_checks = arg_checks)
cast_tab <- add_covered_to_cast_tab(main = main, cast_tab = cast_tab,
arg_checks = arg_checks)
}
}
cast_tab$data_set <- gsub("_interp", "", cast_tab$data_set)
cast_ids <- ifnull(cast_ids, unique(cast_tab$cast_id))
models <- ifnull(models, unique(cast_tab$model))
data_set <- ifnull(data_set, "controls")
species <- ifnull(species,
base_species(total = TRUE, arg_checks = arg_checks))
end_moon <- ifnull(end_moon, max(unique(na.omit(cast_tab)$end_moon)))
cast_groups <- ifnull(cast_groups, unique(cast_tab$cast_group))
cast_id_in <- cast_tab$cast_id %in% cast_ids
model_in <- cast_tab$model %in% models
data_set_in <- cast_tab$data_set == data_set
species_in <- cast_tab$species %in% species
end_moon_in <- cast_tab$end_moon %in% end_moon
cast_group_in <- cast_tab$cast_group %in% cast_groups
all_in <- cast_id_in & model_in & data_set_in & species_in & end_moon_in &
cast_group_in
if(sum(all_in) == 0){
stop("no casts available for requested")
}
cast_tab <- cast_tab[all_in, ]
nspecies <- length(species)
moons <- unique(cast_tab$moon)
nmoons <- length(moons)
nmodels <- length(models)
weight <- 1/nmodels
estimate <- rep(NA, nspecies * nmoons)
mvar <- rep(NA, nspecies * nmoons)
l_pi <- rep(NA, nspecies * nmoons)
u_pi <- rep(NA, nspecies * nmoons)
obs <- rep(NA, nspecies * nmoons)
error <- rep(NA, nspecies * nmoons)
end_moon_id <- rep(NA, nspecies * nmoons)
ecast_id <- rep(NA, nspecies * nmoons)
species_id <- rep(NA, nspecies * nmoons)
moon_id <- rep(NA, nspecies * nmoons)
covered <- rep(NA, nspecies * nmoons)
nmodels <- length(models)
counter <- 1
for(i in 1:nspecies){
for(j in 1:nmoons){
species_in <- cast_tab$species %in% species[i]
moon_in <- cast_tab$moon %in% moons[j]
all_in <- species_in & moon_in
pcast_tab <- cast_tab[all_in, ]

estimates <- na.omit(pcast_tab$estimate)
if(length(estimates) > 0){
mvars <- ((na.omit(pcast_tab$upper_pi) - na.omit(pcast_tab$estimate))/
(na.omit(pcast_tab$confidence_level))) ^2
estimate[counter] <- sum(estimates * weight)
wt_ss <- sum(weight * (estimates - estimate[counter]) ^ 2)
if(nmodels > 1){
mvar[counter] <- sum((mvars * weight) + (wt_ss / (nmodels - 1)))
}else{
mvar[counter] <- mvars
}
CL <- unique(na.omit(pcast_tab$confidence_level))
u_pi[counter] <- estimate[counter] + sqrt(mvar[counter] * CL)
l_pi[counter] <- estimate[counter] - sqrt(mvar[counter] * CL)
obs[counter] <- unique(pcast_tab$obs)
error[counter] <- estimate[counter] - obs[counter]
end_moon_id[counter] <- unique(pcast_tab$end_moon)
ecast_id[counter] <- as.numeric(paste0(9999, min(pcast_tab$cast_id)))
moon_id[counter] <- moons[j]
species_id[counter] <- species[i]
covered[counter] <- estimate[counter] >= l_pi[counter] &
estimate[counter] <= u_pi[counter]
}
counter <- counter + 1

}
}

ensemble_name <- paste0("ensemble_", method)

lead <- moon_id - end_moon_id
data.frame(model = ensemble_name, moon = moon_id, species = species_id,
estimate = estimate, var = mvar, lower_pi = l_pi,
upper_pi = u_pi, obs = obs, error = error,
end_moon = end_moon_id, lead = lead, data_set = data_set,
cast_id = ecast_id, covered = covered, stringsAsFactors = FALSE)
}

0 comments on commit c66cc46

Please sign in to comment.