Merge branch 'biased-anova-cca' (github issue #542)

This implements re-weighted permutation tests with residualized permutations as suggested by ter Braak, C.J.F. & te Beest, D.E. Testing environmental effects on taxonomic composition with canonical correspondence analysis: alternative permutation tests are not equal. This also fixes some smaller details, such as clumsy interface that made parallel execution slower than serial execution. Environ Ecol Stat 29, 849–868 (2022). merge is necessary, # especially if it merges an updated upstream into a topic branch.
vegandevs · Mar 11, 2023 · 0e7e5f0 · 0e7e5f0
2 parents ee3d62a + 1887fad
commit 0e7e5f0
Show file tree

Hide file tree

Showing 7 changed files with 325 additions and 42 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -111,7 +111,8 @@ importFrom(lattice, densityplot, qqmath, bwplot, cloud, lpolygon, panel.abline,
            panel.xyplot,prepanel.default.cloud, splom, xyplot,
            trellis.par.get)
 importFrom(parallel, mclapply, makeCluster, stopCluster, clusterEvalQ,
-           parApply, parLapply, parSapply, parRapply, parCapply)
+           parApply, parLapply, parSapply, parRapply, parCapply,
+           splitIndices)
 importFrom(MASS, isoMDS, sammon, Shepard, mvrnorm, lda)
 importFrom(cluster, daisy, ellipsoidhull)
 ## 's' must be imported in mgcv < 1.8-0 (not needed later)

diff --git a/R/getFcore.R b/R/getFcore.R
@@ -0,0 +1,67 @@
+## getF.c: essential commands in R
+
+#' Inspect changing getF.c
+#'
+#' This version permutes Y & w, does not permute Z, but reweights Z &
+#' X. This is the first version in this branch that fixes problems
+#' with weights in most cases, but fails in the most extreme of Cajo's
+#' tests (Pinho). This is point-to-point identical to C code as
+#' implemement in branch biased-anova-cca v2.6-3-29-gaaf6f700. This is
+#' similar to vegan tests prior to 2.5-1.
+#'
+#' @examples
+#' library(vegan)
+#' data(mite, mite.env)
+#' perm <- shuffleSet(nrow(mite), 999)
+#' mod <- cca(mite ~ SubsDens + WatrCont + Condition(Topo + Shrub),
+#'    data=mite.env)
+#' getFcore(mod, perm)
+#' ## evenness of row weights
+#' diversity(rowSums(mite), "inv") # virtual N
+#' nrow(mite) # N in unweighted model
+#'
+#' @param m fitted constrained ordination model
+#' @param p permutation matrix
+
+`getFcore` <-
+    function(m, p)
+{
+    ## data
+    Y <- ordiYbar(m, "partial") # reduced model
+    QZ <- m$pCCA$QR
+    QR <- m$CCA$QR
+    w <- weights(m)
+
+    ## Set up before the loop
+    ## Z <- .Call(vegan:::test_qrXw, QZ, w, 0)
+    ## X <- .Call(vegan:::test_qrXw, QR, w, ncol(Z))
+
+    ## permutations
+    if (missing(p))
+        p <- matrix(seq_len(nrow(Y)), nrow = 1)
+    niter <- nrow(p)
+    ss <- numeric(niter)
+
+    ## Set up before the loop
+    Z <- qr.X(QZ) # weighted Z
+    X <- .Call(vegan:::test_qrXw, QR, w, ncol(Z)) # unweighted [ZX]
+
+    for (iter in seq_len(niter)) {
+        ## permute Y & w
+        Yperm <- Y[p[iter,],]
+        Zperm <- Z[p[iter,],, drop = FALSE]
+        wperm <- w[p[iter,]]
+        ## Partial
+        QZ <- qr(Zperm)
+        Yperm <- qr.resid(QZ, Yperm)
+        ## Constrained
+        Xrew <- .Call(vegan:::do_wcentre, X, wperm)
+        Xrew <- qr.resid(QZ, Xrew) # "residualized predictor" X
+        QR <- qr(Xrew)
+        Yfit <- qr.fitted(QR, Yperm)
+        Yres <- qr.resid(QR, Yperm)
+        ss[iter] <- sum(Yfit^2)/sum(Yres^2)
+    }
+    list(P = (sum(ss >= m$CCA$tot.chi/m$CA$tot.chi) + 1) / (niter + 1),
+         ss = ss)
+}
diff --git a/R/permutest.cca.R b/R/permutest.cca.R
@@ -31,16 +31,22 @@ permutest.default <- function(x, ...)
     model <- match.arg(model)
     ## special cases
     isCCA <- !inherits(x, "rda")    # weighting
+    if (isCCA)
+        w <- x$rowsum # works with any na.action, weights(x) won't
+    else
+        w <- NULL
     isPartial <- !is.null(x$pCCA)   # handle conditions
     isDB <- inherits(x, c("dbrda")) # only dbrda is distance-based
     ## C function to get the statististics in one loop
-    getF <- function(indx, E, Q, QZ, effects, first, isPartial, isDB, q, r)
+    getF <- function(indx, E, Q, QZ, effects, w, first, isPartial, isCCA,
+                     isDB, q, r)
     {
         # q is the rank(s) of the effect(s) - 1
         # r is the rank of the residual term - 1
         if (!is.matrix(indx))
             indx <- matrix(indx, nrow = 1)
-        out <- .Call(do_getF, indx, E, Q, QZ, effects, first, isPartial, isDB)
+        out <- .Call(do_getF, indx, E, Q, QZ, effects, w, first, isPartial,
+                     isCCA, isDB)
         p <- length(effects)
         if (!isPartial && !first)
             out[, p + 1] <- Chi.tot - rowSums(out[,seq_len(p), drop=FALSE])
@@ -56,17 +62,19 @@ permutest.default <- function(x, ...)
         out
     }
     ## end getF
-    ## wrapper to getF for mcapply
-    getFmcapply <- function(i, permutations,
-                              E, Q, QZ, effects, first, isPartial, isDB, q, r) {
-        getF(permutations[i,], E = E, Q = Q, QZ = QZ, effects = effects,
-             first = first, isPartial = isPartial, isDB = isDB, q = q, r = r)
+    ## wrapper to getF for in parallel lapply
+    getFlapply <- function(i, permutations, E, Q, QZ, effects, w, first,
+                            isPartial, isCCA, isDB, q, r) {
+        getF(permutations[i,], E = E, Q = Q, QZ = QZ, effects = effects, w = w,
+             first = first, isPartial = isPartial, isCCA = isCCA, isDB = isDB,
+             q = q, r = r)
     }
     ## wrapper to getF for parRapply
-    getFparRapply <- function(i, E, Q, QZ, effects, first, isPartial, isDB,
-                              q, r) {
-        getF(i, E = E, Q = Q, QZ = QZ, effects = effects, first = first,
-             isPartial = isPartial, isDB = isDB, q = q, r = r)
+    getFparRapply <- function(i, permutations, E, Q, QZ, effects, w, first,
+                              isPartial, isCCA, isDB, q, r) {
+        getF(permutations[i,], E = E, Q = Q, QZ = QZ, effects = effects, w = w,
+             first = first, isPartial = isPartial, isCCA = isCCA, isDB = isDB,
+             q = q, r = r)
     }
     ## QR decomposition
         Q <- x$CCA$QR
@@ -162,30 +170,34 @@ permutest.default <- function(x, ...)
     if (hasClus || parallel > 1) {
         if(.Platform$OS.type == "unix" && !hasClus) {
             tmp <- do.call(rbind,
-                           mclapply(seq_len(nperm), getFmcapply,
+                           mclapply(splitIndices(nperm, parallel),
+                                    getFlapply,
                                     mc.cores = parallel,
-                                    permutations = permutations, 
-                                    E = E, Q = Q, QZ = QZ, effects = effects,
+                                    permutations = permutations,
+                                    E = E, Q = Q, QZ = QZ,
+                                    effects = effects, w = w,
                                     first = first, isPartial = isPartial,
-                                    isDB = isDB, q = q, r = r))
+                                    isCCA = isCCA, isDB = isDB, q = q, r = r))
         } else {
             ## if hasClus, do not set up and stop a temporary cluster
             if (!hasClus) {
                 parallel <- makeCluster(parallel)
             }
-            tmp <- parRapply(parallel, permutations,
-                             getFparRapply,
-                             E = E, Q = Q, QZ = QZ, effects = effects,
-                             first = first, isPartial = isPartial,
-                             isDB = isDB, q = q, r = r)
-            tmp <- matrix(tmp, nrow = nperm, byrow = TRUE)
+            tmp <- do.call(rbind,
+                           parLapply(parallel,
+                                     splitIndices(nperm, length(parallel)),
+                                     getFparRapply,
+                                     permutations=permutations,
+                                     E = E, Q = Q, QZ = QZ, effects = effects, w = w,
+                                     first = first, isPartial = isPartial,
+                                     isCCA = isCCA, isDB = isDB, q = q, r = r))
             if (!hasClus)
                 stopCluster(parallel)
         }
     } else {
         tmp <- getF(permutations, E = E, Q = Q, QZ = QZ, effects = effects,
-                    first = first, isPartial = isPartial, isDB = isDB,
-                    q = q, r = r)
+                    w = w, first = first, isPartial = isPartial, isCCA = isCCA,
+                    isDB = isDB, q = q, r = r)
     }
     if ((p <- length(effects)) > 1) {
         num <- tmp[,seq_len(p)]

diff --git a/R/wcmdscale.R b/R/wcmdscale.R
@@ -30,7 +30,10 @@
     m <- as.matrix(d^2)
     n <- nrow(m)
     if (missing(w))
-        w <- rep(1, n)
+        w <- rep(1/n, n)
+    ## take care that weights sum up to 1
+    w <- w/sum(w)
+    ## weighted Gower double centring
     m <- .Call(do_wcentre, m, w)
     m <- t(.Call(do_wcentre, t(m), w))
     e <- eigen(-m/2, symmetric = TRUE)