Permalink
Browse files

Merge branch 'anova-cca-use-permutest'

- anova.cca by = "terms" uses directly permutest(..., by="term") and
  runs mainly in compiled code (hence is faster)
- by = "margin" redesigned: no update() of formula, no partial term
  (hence robust against scoping problems), works with NA data
- by = "axis" uses now 'forward' tests instead of marginal (which
  were biased) at the price of being 3-4 times slower
- anova.cca lists of models mangled long model formulae
- adonis2 bug fix: was not adapted to new permutation design
- model.frame is more robust with subsets
- model.matrix finds result directly from the result object and avoids
  scoping problems; works even when model.frame fails with scope
- ordConstrained gained new method="pass" that can refit a model instead
  of update() of formula (which has scoping issues)
  • Loading branch information...
2 parents 66e011b + c9e0557 commit 3ae94eb93f69cbd0420697cc6cbe259c908a799b @jarioksa jarioksa committed Jun 7, 2017
View
@@ -230,6 +230,7 @@ S3method(logLik, radfit.frame)
# model.frame, model.matrix: stats
S3method(model.frame, cca)
S3method(model.matrix, cca)
+S3method(model.matrix, rda)
# multipart: vegan
S3method(multipart, default)
S3method(multipart, formula)
View
@@ -78,6 +78,8 @@
formula[[2]] <- NULL # to remove the lhs
rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE) # to get the data frame of rhs
rhs <- model.matrix(formula, rhs.frame) # and finally the model.matrix
+ assign <- attr(rhs, "assign") ## assign attribute
+ sol$terminfo$assign <- assign[assign > 0]
rhs <- rhs[,-1, drop=FALSE] # remove the (Intercept) to get rank right
rhs <- scale(rhs, scale = FALSE, center = TRUE) # center
qrhs <- qr(rhs)
View
@@ -1,66 +1,60 @@
-### Implementation of by-cases for vegan 2.2 versions of
-### anova.cca. These are all internal functions that are not intended
-### to be called by users in normal sessions, but they should be
-### called from anova.cca (2.2). Therefore the user interface is rigid
-### and input is not checked. The 'permutations' should be a
-### permutation matrix.
+### Implementation of by-cases for anova.cca. These are all internal
+### functions that are not intended to be called by users in normal
+### sessions, but they should be called from anova.cca. Therefore the
+### user interface is rigid and input is not checked. The
+### 'permutations' should be a permutation matrix.
-### by = terms builds models as a sequence of adding terms and submits
-### this to anova.ccalist
+### by = "terms" calls directly permutest.cca which decomposes the
+### inertia between successive terms within compiled C code.
`anova.ccabyterm` <-
function(object, permutations, model, parallel)
{
- ## We need term labels but without Condition() terms
- trms <- terms(object)
- trmlab <- attr(trms, "term.labels")
- trmlab <- trmlab[trmlab %in% attr(terms(object$terminfo),
- "term.labels")]
- ntrm <- length(trmlab)
- m0 <- update(object, paste(".~.-", paste(trmlab, collapse = "-")))
- mods <- list(m0)
- for (i in seq_along(trmlab)) {
- fla <- paste(". ~ . + ", trmlab[i])
- mods[[i+1]] <- update(mods[[i]], fla)
- }
## The result
- sol <- anova.ccalist(mods, permutations = permutations,
- model = model, parallel = parallel)
+ sol <- permutest(object, permutations = permutations,
+ model = model, by = "terms", parallel = parallel)
## Reformat
- out <- data.frame(c(sol[-1, 3], sol[ntrm+1, 1]),
- c(sol[-1, 4], sol[ntrm+1, 2]),
- c(sol[-1, 5], NA),
- c(sol[-1, 6], NA))
+ EPS <- sqrt(.Machine$double.eps)
+ Pval <- (colSums(sweep(sol$F.perm, 2, sol$F.0 - EPS, ">=")) + 1) /
+ (sol$nperm + 1)
+ out <- data.frame(sol$df, sol$chi, c(sol$F.0, NA), c(Pval, NA))
+
if (inherits(object, c("capscale", "dbrda")) && object$adjust == 1)
varname <- "SumOfSqs"
else if (inherits(object, "rda"))
varname <- "Variance"
else
varname <- "ChiSquare"
- dimnames(out) <- list(c(trmlab, "Residual"),
+ dimnames(out) <- list(c(sol$termlabels, "Residual"),
c("Df", varname, "F", "Pr(>F)"))
head <- paste0("Permutation test for ", object$method, " under ",
model, " model\n",
"Terms added sequentially (first to last)\n",
howHead(attr(permutations, "control")))
mod <- paste("Model:", c(object$call))
attr(out, "heading") <- c(head, mod)
- attr(out, "F.perm") <- attr(sol, "F.perm")
+ attr(out, "F.perm") <- sol$F.perm
class(out) <- c("anova.cca", "anova","data.frame")
out
}
-## by = margin: this is not a anova.ccalist case, but we omit each
-## term in turn and compare against the complete model.
+## by = "margin": we omit each term in turn and compare against the
+## complete model. This does not involve partial terms (Conditions) on
+## other variables, but the permutations remain similar in "direct"
+## and "reduced" (default) models (perhaps this model should not be
+## used with "full" models?). This is basically similar decomposition
+## as by="term", but compares models without each term in turn against
+## the complete model in separate calls to permutest.cca. From vegan
+## 2.5-0 this does not update model formula -- this avoids scoping
+## issues and makes the function more robust when embedded in other
+## functions. Instead, we call ordConstrained with method="pass" with
+## modified constraint matrix.
`anova.ccabymargin` <-
function(object, permutations, scope, ...)
{
EPS <- sqrt(.Machine$double.eps)
nperm <- nrow(permutations)
- ## Refuse to handle models with missing data
- if (!is.null(object$na.action))
- stop("by = 'margin' models cannot handle missing data")
## We need term labels but without Condition() terms
if (!is.null(scope) && is.character(scope))
trms <- scope
@@ -79,8 +73,21 @@
## (vegan 2.0) where other but 'nm' were partialled out within
## Condition(). Now we only fit the model without 'nm' and compare
## the difference against the complete model.
- mods <- lapply(trmlab, function(nm, ...)
- permutest(update(object, paste(".~.-", nm)),
+ Y <- ordiYbar(object, "init")
+ X <- model.matrix(object)
+ ## we must have Constraints to get here, but we may also have
+ ## Conditions
+ if (!is.null(object$pCCA)) {
+ Z <- X$Conditions
+ X <- X$Constraints
+ } else {
+ Z <- NULL
+ }
+ ass <- object$terminfo$assign
+ if (is.null(ass))
+ stop("old time result object: update() your model")
+ mods <- lapply(unique(ass), function(i, ...)
+ permutest(ordConstrained(Y, X[, ass != i, drop=FALSE], Z, "pass"),
permutations, ...), ...)
## Chande in df
Df <- sapply(mods, function(x) x$df[2]) - dfbig
@@ -119,7 +126,20 @@
out
}
-### Marginal test for axes
+### by = "axis" uses partial model: we use the original constraints,
+### but add previous axes 1..(k-1) to Conditions when evaluating the
+### significance of axis k which is compared against the first
+### eigenvalue of the permutations. To avoid scoping issues, this
+### calls directly ordConstrained() with modified Conditions (Z) and
+### original Constraints (X) instead of updating formula. This
+### corresponds to "forward" model in Legendre, Oksanen, ter Braak
+### (2011).
+
+### In 2.2-x to 2.4-3 we used "marginal model" where original
+### Constraints were replaced with LC scores axes (object$CCA$u), and
+### all but axis k were used as Conditions when evaluating the
+### significance of axis k. My (J.Oksanen) simulations showed that
+### this gave somewhat biased results.
`anova.ccabyaxis` <-
function(object, permutations, model, parallel, cutoff = 1)
@@ -137,45 +157,52 @@
resdf <- nobs(object) - length(eig) - max(object$pCCA$rank, 0) - 1
Fstat <- eig/object$CA$tot.chi*resdf
Df <- rep(1, length(eig))
- ## Marginal P-values
- LC <- object$CCA$u
- ## missing values?
- if (!is.null(object$na.action))
- LC <- napredict(structure(object$na.action,
- class = "exclude"), LC)
- ## subset?
- if (!is.null(object$subset)) {
- tmp <- matrix(NA, nrow = length(object$subset),
- ncol = ncol(LC))
- tmp[object$subset,] <- LC
- LC <- tmp
- object <- update(object, subset = object$subset)
+
+ ## collect header and varname here: 'object' is modified later
+ if (inherits(object, c("capscale", "dbrda")) && object$adjust == 1)
+ varname <- "SumOfSqs"
+ else if (inherits(object, "rda"))
+ varname <- "Variance"
+ else
+ varname <- "ChiSquare"
+
+ head <- paste0("Permutation test for ", object$method, " under ",
+ model, " model\n",
+ "Forward tests for axes\n",
+ howHead(attr(permutations, "control")))
+ head <- c(head, paste("Model:", c(object$call)))
+
+ ## constraints and model matrices
+ Y <- object$Ybar
+ if (is.null(Y))
+ stop("old style result object does not work: update() your model")
+ if (!is.null(object$pCCA))
+ Z <- qr.X(object$pCCA$QR)
+ else
+ Z <- NULL
+ X <- model.matrix(object)
+ if (!is.null(object$pCCA)) {
+ Z <- X$Conditions
+ X <- X$Constraints
+ } else {
+ Z <- NULL
}
- LC <- as.data.frame(LC)
- fla <- reformulate(names(LC))
+ LC <- object$CCA$u
+
Pvals <- rep(NA, ncol(LC))
F.perm <- matrix(ncol = ncol(LC), nrow = nperm)
- environment(object$terms) <- environment()
- ## in dbrda, some axes can be imaginary, but we only want to have
- ## an analysis of real-valued dimensions, and we must adjust data
- if (ncol(LC) < length(eig)) {
- eig <- eig[seq_len(ncol(LC))]
- Df <- Df[seq_len(ncol(LC))]
- Fstat <- Fstat[seq_len(ncol(LC))]
- }
+ axnams <- colnames(LC)
for (i in seq_along(eig)) {
- part <- paste("~ . +Condition(",
- paste(names(LC)[-i], collapse = "+"), ")")
- upfla <- update(fla, part)
- ## only one axis, and cannot partial out?
- if (length(eig) == 1)
+ if (i > 1) {
+ object <- ordConstrained(Y, X, cbind(Z, LC[, seq_len(i-1)]), "pass")
+ }
+ if (length(eig) == i) {
mod <- permutest(object, permutations, model = model,
parallel = parallel)
- else
- mod <-
- permutest(update(object, upfla, data = LC),
- permutations, model = model,
- parallel = parallel)
+ } else {
+ mod <- permutest(object, permutations, model = model,
+ parallel = parallel, first = TRUE)
+ }
Pvals[i] <- (sum(mod$F.perm >= mod$F.0 - EPS) + 1) / (nperm + 1)
F.perm[ , i] <- mod$F.perm
if (Pvals[i] > cutoff)
@@ -184,19 +211,8 @@
out <- data.frame(c(Df, resdf), c(eig, object$CA$tot.chi),
c(Fstat, NA), c(Pvals,NA))
rownames(out) <- c(names(eig), "Residual")
- if (inherits(object, c("capscale", "dbrda")) && object$adjust == 1)
- varname <- "SumOfSqs"
- else if (inherits(object, "rda"))
- varname <- "Variance"
- else
- varname <- "ChiSquare"
colnames(out) <- c("Df", varname, "F", "Pr(>F)")
- head <- paste0("Permutation test for ", object$method, " under ",
- model, " model\n",
- "Marginal tests for axes\n",
- howHead(attr(permutations, "control")))
- mod <- paste("Model:", c(object$call))
- attr(out, "heading") <- c(head, mod)
+ attr(out, "heading") <- head
attr(out, "F.perm") <- F.perm
class(out) <- c("anova.cca", "anova", "data.frame")
out
View
@@ -86,7 +86,8 @@
c("ResDf", paste0("Res", varname), "Df",
varname, "F", "Pr(>F)"))
## Collect header information
- formulae <- sapply(object, function(z) deparse(formula(z)))
+ formulae <- sapply(object,
+ function(z) deparse(formula(z), width.cutoff = 500))
head <- paste0("Permutation tests for ", method, " under ",
mods[[big]]$model, " model\n",
howHead(attr(permutations, "control")))
View
@@ -143,7 +143,6 @@
sol$terminfo <- ordiTerminfo(d, data)
sol$call$formula <- formula(d$terms, width.cutoff = 500)
sol$call$formula[[2]] <- formula[[2]]
- sol$method <- "capscale"
sol$sqrt.dist <- sqrt.dist
if (!is.na(X$ac) && X$ac > 0) {
sol$ac <- X$ac
View
@@ -32,7 +32,6 @@
if (!is.null(sol$pCCA) && sol$pCCA$tot.chi == 0)
pCCA$rank <- 0
sol <- c(list(call = call,
- method = "cca",
inertia = "mean squared contingency coefficient"),
sol)
class(sol) <- "cca"
View
@@ -134,7 +134,6 @@
sol$terminfo <- ordiTerminfo(d, data)
sol$call$formula <- formula(d$terms, width.cutoff = 500)
sol$call$formula[[2]] <- formula[[2]]
- sol$method <- "dbrda"
sol$sqrt.dist <- sqrt.dist
if (!is.na(ac) && ac > 0) {
sol$ac <- ac
View
@@ -1,17 +1,20 @@
`model.frame.cca` <-
- function (formula, ...)
+ function (formula, ...)
{
if (inherits(formula, "prc"))
stop("model.frame does not work with 'prc' results")
call <- formula$call
- m <- match(c("formula", "data", "na.action", "subset"), names(call),
+ m <- match(c("formula", "data", "na.action", "subset"), names(call),
0)
call <- call[c(1, m)]
+ ## subset must be evaluated before ordiParseFormula
+ if (!is.null(call$subset))
+ call$subset <- formula$subset
call[[1]] <- as.name("ordiParseFormula")
out <- eval(call, environment(), parent.frame())
mf <- out$modelframe
attr(mf, "terms") <- out$terms.expand
- if (!is.null(out$na.action))
+ if (!is.null(out$na.action))
attr(mf, "na.action") <- out$na.action
mf
}
View
@@ -1,19 +1,43 @@
`model.matrix.cca` <-
- function (object, ...)
+ function(object, ...)
{
- if (inherits(object, "prc"))
- stop("model.matrix does not work with 'prc' results")
- call <- object$call
- m <- match(c("formula", "data", "na.action", "subset"), names(call),
- 0)
- call <- call[c(1, m)]
- call[[1]] <- as.name("ordiParseFormula")
- out <- eval(call, environment(), parent.frame())[c("Z", "Y")]
+ X <- Z <- NULL
+ w <- 1/sqrt(object$rowsum)
+ if (!is.null(object$pCCA))
+ Z <- w * qr.X(object$pCCA$QR)
+ if (!is.null(object$CCA)) {
+ X <- qr.X(object$CCA$QR)
+ ## First columns come from Z
+ if (!is.null(Z))
+ X <- X[, -seq_len(ncol(Z)), drop = FALSE]
+ X <- w * X
+ }
m <- list()
- if (!is.null(out$Z))
- m$Conditions <- out$Z
- if (!is.null(out$Y))
- m$Constraints <- out$Y
+ if (!is.null(Z))
+ m$Conditions <- Z
+ if (!is.null(X))
+ m$Constraints <- X
+ if (length(m) == 1)
+ m <- m[[1]]
+ m
+}
+
+`model.matrix.rda` <-
+ function(object, ...)
+{
+ X <- Z <- NULL
+ if (!is.null(object$pCCA))
+ Z <- qr.X(object$pCCA$QR)
+ if (!is.null(object$CCA)) {
+ X <- qr.X(object$CCA$QR)
+ if (!is.null(Z))
+ X <- X[, -seq_len(ncol(Z)), drop=FALSE]
+ }
+ m <- list()
+ if (!is.null(Z))
+ m$Conditions <- Z
+ if (!is.null(X))
+ m$Constraints <- X
if (length(m) == 1)
m <- m[[1]]
m
Oops, something went wrong.

0 comments on commit 3ae94eb

Please sign in to comment.