Skip to content

Commit

Permalink
Performance optimization of GuessSigma_x -- avoiding to calculate the…
Browse files Browse the repository at this point in the history
… vcv matrix every call.
  • Loading branch information
Venelin Mitov committed Sep 4, 2019
1 parent 222f69b commit 330fdeb
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 36 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -82,8 +82,10 @@ importFrom(PCMBase,PCMTreeSetPartition)
importFrom(PCMBase,PCMTreeSplitAtNode)
importFrom(PCMBase,PCMTreeTableAncestors)
importFrom(PCMBase,PCMTreeToString)
importFrom(PCMBase,PCMTreeVCV)
importFrom(PCMBase,PCMVar)
importFrom(PCMBase,TruePositiveRate)
importFrom(PCMBase,UpperChol)
importFrom(PCMBase,is.Fixed)
importFrom(PCMBase,is.Global)
importFrom(PCMBase,is.PCM)
Expand Down
63 changes: 38 additions & 25 deletions R/GuessInitVecParams.R
Expand Up @@ -15,7 +15,9 @@
#' vectors should be within the lower and upper bound of the model.
#' @param res an n x p matrix where p is the number of parameters in o.
#' Internal use only.
#' @param ... additional arguments passed to implementations.
#' @param ... additional arguments passed to implementations. Currently an
#' N x N matrix argument named treeVCVMat can be passed which is equal to the
#' phylogenetic covariance matrix for tree.
#' @return an n x p matrix where p is the number of parameters in o.
#'
#' @description This is an S3 generic function that returns a
Expand Down Expand Up @@ -161,7 +163,7 @@ GuessInitVecParams.PCM <- function(
# Copy all arguments into a list
# We establish arguments$<argument-name> as a convention for accessing the
# original argument value.
arguments <- as.list(environment())
arguments <- c(as.list(environment()), list(...))

res2 <- try(do.call(GuessParameters, arguments), silent = TRUE)

Expand Down Expand Up @@ -210,7 +212,7 @@ GuessInitVecParams.BM <- function(
# Copy all arguments into a list
# We establish arguments$<argument-name> as a convention for accessing the
# original argument value.
arguments <- as.list(environment())
arguments <- c(as.list(environment()), list(...))

res2 <- try(do.call(GuessParameters, arguments), silent = TRUE)

Expand Down Expand Up @@ -259,7 +261,7 @@ GuessInitVecParams.OU <- function(
# Copy all arguments into a list
# We establish arguments$<argument-name> as a convention for accessing the
# original argument value.
arguments <- as.list(environment())
arguments <- c(as.list(environment()), list(...))

res2 <- try(do.call(GuessParameters, arguments), silent = TRUE)

Expand Down Expand Up @@ -308,7 +310,7 @@ GuessInitVecParams.DOU <- function(
# Copy all arguments into a list
# We establish arguments$<argument-name> as a convention for accessing the
# original argument value.
arguments <- as.list(environment())
arguments <- c(as.list(environment()), list(...))

res2 <- try(do.call(GuessParameters, arguments), silent = TRUE)

Expand Down Expand Up @@ -360,7 +362,7 @@ GuessInitVecParams.MixedGaussian <- function(
# Copy all arguments into a list
# We establish arguments$<argument-name> as a convention for accessing the
# original argument value.
arguments <- as.list(environment())
arguments <- c(as.list(environment()), list(...))

res2 <- try(do.call(GuessParameters, arguments), silent = TRUE)

Expand Down Expand Up @@ -508,7 +510,7 @@ GuessX0 <- function(
}
}

#' @importFrom PCMBase is.Global PCMTreeGetTipsInRegime PCM PCMDefaultModelTypes PCMVar PCMTreeGetTipsInRegime
#' @importFrom PCMBase is.Global PCMTreeGetTipsInRegime PCM PCMDefaultModelTypes PCMVar PCMTreeGetTipsInRegime UpperChol PCMTreeVCV
GuessSigma_x <- function(
o, regimes,
X = NULL,
Expand All @@ -518,21 +520,17 @@ GuessSigma_x <- function(
...) {

Sigma_x <- SDSigma_x <- o$Sigma_x
argsExtra <- list(...)

if(!(is.null(Sigma_x) || is.Fixed(Sigma_x)) && !is.null(X) && !is.null(tree) ) {

rootDists <- PCMTreeNodeTimes(tree, tipsOnly = TRUE)

# We use this model to build a phylogenetic variance covariance matrix of
# the tree. This is a matrix in which element (i,j) is equal to the shared
# root-distance of the nodes i and j.
modelCov <- PCM(
model = PCMDefaultModelTypes()["A"], regimes = PCMRegimes(tree), k = 1L)

modelCov$Sigma_x[] <- 1.0

# Phylogenetic variance covariance matrix
CovMat <- PCMVar(tree, modelCov, internal = FALSE)
if(!is.null(argsExtra$treeVCVMat)) {
treeVCVMat <- argsExtra$treeVCVMat
} else {
treeVCVMat <- PCMTreeVCV(tree)
}

CalculateRateMatrixBM <- function(X, C, rootDists) {
N <- ncol(X)
Expand All @@ -546,16 +544,27 @@ GuessSigma_x <- function(
BetaHat <- solve(t(D) %*% Cinv %*% D) %*% t(D) %*% Cinv %*% Y
# rate matrix
R <- t(Y - D%*%BetaHat) %*% Cinv %*% (Y - D%*%BetaHat) / N
chol(R)
if(getOption("PCMBase.Transpose.Sigma_x", FALSE)) {
chol(R)
} else {
UpperChol(R)
}
}

NMax <- getOption("PCMBase.MaxNForGuessSigma_x", 1000L)

if(is.Global(Sigma_x)) {
# Sigma_x has a global scope for the entire tree
tipsNA <- apply(X, 2, function(x) any(is.na(x)))
idxTips <- seq_len(PCMTreeNumTips(tree))[!tipsNA]

if(length(idxTips) > NMax) {
idxTips <- sample(idxTips, size = NMax)
}

Sigma_x[] <- CalculateRateMatrixBM(
X[, idxTips, drop=FALSE],
CovMat[idxTips, idxTips, drop = FALSE],
treeVCVMat[idxTips, idxTips, drop = FALSE],
rootDists[idxTips])
SDSigma_x[,] <- sqrt(abs(Sigma_x[,]) * 0.01)
} else {
Expand All @@ -568,9 +577,13 @@ GuessSigma_x <- function(
X[, tipsInRegime, drop=FALSE], 2, function(x) any(is.na(x)))
tipsInRegime <- tipsInRegime[!tipsNA]

if(length(tipsInRegime) > NMax) {
tipsInRegime <- sample(tipsInRegime, size = NMax)
}

Sigma_x[,, r] <- CalculateRateMatrixBM(
X[, tipsInRegime, drop=FALSE],
CovMat[tipsInRegime, tipsInRegime, drop = FALSE],
treeVCVMat[tipsInRegime, tipsInRegime, drop = FALSE],
rootDists[tipsInRegime])

SDSigma_x[,, r] <- sqrt(abs(Sigma_x[,, r]) * 0.01)
Expand Down Expand Up @@ -706,17 +719,17 @@ GuessParameters <- function(

for(name in paramNames) {
if(name == "X0") {
P <- GuessX0(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc)
P <- GuessX0(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc, ...)
} else if(name == "Sigma_x") {
P <- GuessSigma_x(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc)
P <- GuessSigma_x(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc, ...)
} else if(name == "Sigmae_x") {
P <- GuessSigmae_x(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc)
P <- GuessSigmae_x(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc, ...)
} else if(name == "Theta") {
P <- GuessTheta(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc)
P <- GuessTheta(o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc, ...)
} else {
P <- GuessDefault(
name, paramDefaultValues[[name]], paramSDValues[[name]],
o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc)
o = o, regimes = regimes, X = X, tree = tree, SE = SE, tableAnc = tableAnc, ...)
}

if(length(P$mean) > 0L) {
Expand Down
13 changes: 11 additions & 2 deletions R/PCMFit.R
Expand Up @@ -143,8 +143,13 @@ PCMFit <- function(

chunk <- function(x,n) split(x, cut(seq_along(x), n, labels = FALSE))

globalFuns <- unname(unlist(
sapply(ls(.GlobalEnv), function(n) if(is.function(.GlobalEnv[[n]])) n else NULL)))

valParInitOptim <- foreach(
is = chunk(seq_len(nrow(matParInit)), 8L), .combine = c) %op% {
is = chunk(seq_len(nrow(matParInit)), 8L),
.combine = c,
.export = globalFuns) %op% {

lik <- PCMCreateLikelihood(
X = X, tree = tree, model = model, SE = SE,
Expand Down Expand Up @@ -345,7 +350,11 @@ runOptim <- function(
`%do%`
}

listCallsOptim <- foreach(iOptimTry = seq_len(nrow(matParInit))) %op% {
globalFuns <- unname(unlist(
sapply(ls(.GlobalEnv), function(n) if(is.function(.GlobalEnv[[n]])) n else NULL)))

listCallsOptim <- foreach(iOptimTry = seq_len(nrow(matParInit)),
.export = globalFuns) %op% {
memoMaxLoglik <- new.env()

lik <- PCMCreateLikelihood(
Expand Down
5 changes: 4 additions & 1 deletion R/PCMFitMixed.R
Expand Up @@ -16,7 +16,7 @@
#'
#' @importFrom foreach foreach when %do% %dopar% %:%
#' @importFrom data.table data.table rbindlist is.data.table setkey :=
#' @importFrom PCMBase PCMTree PCMTreeSetLabels PCMTreeSetPartition PCMTreeEvalNestedEDxOnTree PCMTreeNumTips PCMTreeListCladePartitions PCMTreeListAllPartitions PCMTreeToString MixedGaussian PCMOptions PCMTreeTableAncestors PCMTreeSplitAtNode PCMGetVecParamsRegimesAndModels MGPMDefaultModelTypes PCMGenerateModelTypes is.Transformable
#' @importFrom PCMBase PCMTree PCMTreeSetLabels PCMTreeSetPartition PCMTreeEvalNestedEDxOnTree PCMTreeNumTips PCMTreeListCladePartitions PCMTreeListAllPartitions PCMTreeToString MixedGaussian PCMOptions PCMTreeTableAncestors PCMTreeSplitAtNode PCMGetVecParamsRegimesAndModels MGPMDefaultModelTypes PCMGenerateModelTypes is.Transformable PCMTreeVCV
#' @importFrom stats logLik coef AIC
#' @return an S3 object of class PCMFitModelMappings.
#'
Expand Down Expand Up @@ -126,6 +126,7 @@ PCMFitMixed <- function(

preorderTree <- PCMTreePreorder(tree)
tableAncestors <- PCMTreeTableAncestors(tree, preorder = preorderTree)
treeVCVMat <- PCMTreeVCV(tree)

# 1. (fitsToClades) Perform a fit of each model-type to each clade
if(is.null(arguments$listPartitions) || arguments$listPartitions == "all") {
Expand Down Expand Up @@ -166,6 +167,7 @@ PCMFitMixed <- function(
argumentsFitsToClades$X <- X
argumentsFitsToClades$tree <- tree
argumentsFitsToClades$modelTypes <- modelTypes
argumentsFitsToClades$treeVCVMat <- treeVCVMat
argumentsFitsToClades$SE <- SE
argumentsFitsToClades$listPartitions <- as.list(cladeRoots)
argumentsFitsToClades$listAllowedModelTypesIndices <-
Expand Down Expand Up @@ -256,6 +258,7 @@ PCMFitMixed <- function(
argumentsStep2$X <- X
argumentsStep2$tree <- tree
argumentsStep2$modelTypes <- modelTypes
argumentsStep2$treeVCVMat <- treeVCVMat
argumentsStep2$SE <- SE
argumentsStep2$skipNodes <- skipNodes
argumentsStep2$fitMappingsPrev <- NULL
Expand Down
9 changes: 5 additions & 4 deletions R/PCMFitModelMappingsToPartitions.R
@@ -1,6 +1,7 @@
#' @importFrom utils tail
PCMFitModelMappingsToCladePartitions <- function(
X, tree, modelTypes,
treeVCVMat = NULL,
SE = matrix(0.0, nrow(X), PCMTreeNumTips(tree)),

scoreFun = AIC,
Expand Down Expand Up @@ -263,14 +264,14 @@ PCMFitModelMappingsToCladePartitions <- function(
n = 1L,
argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit,
argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit,
X = X, tree = tree, SE = SE,
X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE,
varyParams = FALSE)
matParInitGuessVaryParams <- GuessInitVecParams(
o = modelForFit,
n = argsConfigOptim$numGuessInitVecParams,
argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit,
argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit,
X = X, tree = tree, SE = SE,
X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE,
varyParams = TRUE)

matParInit <- rbind(matParInitRunif,
Expand Down Expand Up @@ -305,14 +306,14 @@ PCMFitModelMappingsToCladePartitions <- function(
n = 1L,
argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit,
argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit,
X = X, tree = tree, SE = SE,
X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE,
varyParams = FALSE)
matParInitGuessVaryParams <- GuessInitVecParams(
o = modelForFit,
n = argsConfigOptim$numGuessInitVecParams,
argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit,
argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit,
X = X, tree = tree, SE = SE,
X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE,
varyParams = TRUE)
matParInitJitter <-
jitterModelParams(
Expand Down
2 changes: 2 additions & 0 deletions R/PCMFitRecursiveCladePartition.R
@@ -1,5 +1,6 @@
PCMFitRecursiveCladePartition <- function(
X, tree, modelTypes,
treeVCVMat = NULL,
SE = matrix(0.0, nrow(X), PCMTreeNumTips(tree),
dimnames=list(NULL, as.character(1:PCMTreeNumTips(tree)))),

Expand Down Expand Up @@ -419,6 +420,7 @@ PCMFitRecursiveCladePartition <- function(
argumentsFitsToTree$X <- X
argumentsFitsToTree$tree <- tree
argumentsFitsToTree$modelTypes <- modelTypes
argumentsFitsToTree$treeVCVMat <- treeVCVMat
argumentsFitsToTree$SE <- SE
argumentsFitsToTree$listPartitions = listPartitions
argumentsFitsToTree$listAllowedModelTypesIndices =
Expand Down
14 changes: 11 additions & 3 deletions R/ParametricBootstrap.R
Expand Up @@ -556,9 +556,15 @@ ExtractBSDataForTraitRegression <- function(
sapply(.I, function(i) {
if( !is.null(listStatsForEpochs[[i]]) ) {
matrixSlopes <- matrix(NA_real_, k, k)
matrixSlopes[upper.tri(matrixSlopes)] <-

bsSlopeVals <-
listStatsForEpochs[[i]][[as.character(epoch)]]$
statsForBackboneNodes[[nodeAtEpochLab]]$slopeMean

if(sum(upper.tri(matrixSlopes)) == length(bsSlopeVals)) {
matrixSlopes[upper.tri(matrixSlopes)] <- bsSlopeVals
}

matrixSlopes[traitIndexX, traitIndexY]
} else {
NA_real_
Expand All @@ -573,9 +579,11 @@ ExtractBSDataForTraitRegression <- function(
sapply(.I, function(i) {
if( !is.null(listStatsForEpochs[[i]]) ) {
matrixIntercepts <- matrix(NA_real_, k, k)
matrixIntercepts[upper.tri(matrixIntercepts)] <-
listStatsForEpochs[[i]][[as.character(epoch)]]$
bsIntercVals <- listStatsForEpochs[[i]][[as.character(epoch)]]$
statsForBackboneNodes[[nodeAtEpochLab]]$interceptMean
if(sum(upper.tri(matrixIntercepts)) == length(bsIntercVals)) {
matrixIntercepts[upper.tri(matrixIntercepts)] <- bsIntercVals
}
matrixIntercepts[traitIndexX, traitIndexY]
} else {
NA_real_
Expand Down
4 changes: 3 additions & 1 deletion man/GuessInitVecParams.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 330fdeb

Please sign in to comment.