From d891c4ce172d7809a1661da48aa8b89cd6b475d0 Mon Sep 17 00:00:00 2001 From: Venelin Mitov Date: Fri, 6 Sep 2019 18:51:22 +0200 Subject: [PATCH] Implemented multiple calls instead of a single call to GuessInitVecParams in PCMFitModelMappingsToPartitions. Added variable definitions at the beginning of some functions to fix check warnings. --- NAMESPACE | 8 ++++++ R/GuessInitVecParams.R | 29 +++++++++++++------ R/PCMFitMixed.R | 4 ++- R/PCMFitModelMappingsToPartitions.R | 43 +++++++++++++++++++---------- R/ParametricBootstrap.R | 28 ++++++++++++++++++- 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 25f93f3..df68a53 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,7 @@ export(UpdateTableFits) export(is.PCMFit) export(jitterModelParams) import(data.table) +importFrom(PCMBase,Args_MixedGaussian_MGPMDefaultModelTypes) importFrom(PCMBase,FalsePositiveRate) importFrom(PCMBase,MGPMDefaultModelTypes) importFrom(PCMBase,MixedGaussian) @@ -50,6 +51,7 @@ importFrom(PCMBase,PCMGetVecParamsRegimesAndModels) importFrom(PCMBase,PCMInfo) importFrom(PCMBase,PCMLik) importFrom(PCMBase,PCMMean) +importFrom(PCMBase,PCMNumRegimes) importFrom(PCMBase,PCMNumTraits) importFrom(PCMBase,PCMOptions) importFrom(PCMBase,PCMParamCount) @@ -60,23 +62,29 @@ importFrom(PCMBase,PCMParamLowerLimit) importFrom(PCMBase,PCMParamRandomVecParams) importFrom(PCMBase,PCMParamSetByName) importFrom(PCMBase,PCMParamUpperLimit) +importFrom(PCMBase,PCMRegimes) importFrom(PCMBase,PCMTree) importFrom(PCMBase,PCMTreeBackbonePartition) importFrom(PCMBase,PCMTreeDtNodes) importFrom(PCMBase,PCMTreeEvalNestedEDxOnTree) importFrom(PCMBase,PCMTreeExtractClade) +importFrom(PCMBase,PCMTreeGetDaughters) importFrom(PCMBase,PCMTreeGetLabels) importFrom(PCMBase,PCMTreeGetPartNames) importFrom(PCMBase,PCMTreeGetPartRegimes) importFrom(PCMBase,PCMTreeGetPartition) importFrom(PCMBase,PCMTreeGetPartsForNodes) +importFrom(PCMBase,PCMTreeGetTipsInPart) importFrom(PCMBase,PCMTreeGetTipsInRegime) importFrom(PCMBase,PCMTreeInsertSingletonsAtEpoch) importFrom(PCMBase,PCMTreeListAllPartitions) importFrom(PCMBase,PCMTreeListCladePartitions) +importFrom(PCMBase,PCMTreeMatchLabels) +importFrom(PCMBase,PCMTreeMatrixNodesInSamePart) importFrom(PCMBase,PCMTreeNearestNodesToEpoch) importFrom(PCMBase,PCMTreeNodeTimes) importFrom(PCMBase,PCMTreeNumTips) +importFrom(PCMBase,PCMTreePreorder) importFrom(PCMBase,PCMTreeSetLabels) importFrom(PCMBase,PCMTreeSetPartition) importFrom(PCMBase,PCMTreeSplitAtNode) diff --git a/R/GuessInitVecParams.R b/R/GuessInitVecParams.R index db0e0b2..a67fca4 100644 --- a/R/GuessInitVecParams.R +++ b/R/GuessInitVecParams.R @@ -115,7 +115,7 @@ #' # likelihood at completely random parameter values #' vecRand <- PCMParamRandomVecParams(modelBM.ab.Guess, k = 2, R = 2) #' likFunBM(vecRand) -#' @importFrom PCMBase PCMLik +#' @importFrom PCMBase PCMLik PCMNumRegimes PCMRegimes #' @export GuessInitVecParams <- function( o, regimes = as.character(PCMRegimes(o)), oParent = o, accessExpr = "", @@ -424,6 +424,7 @@ EnforceBounds <- function(vecs, lowerVecParams, upperVecParams) { vecs } +#' @importFrom PCMBase PCMTreeGetDaughters PCMTreeGetTipsInPart GuessX0 <- function( o, regimes, X = NULL, @@ -551,16 +552,30 @@ GuessSigma_x <- function( } } - NMax <- getOption("PCMBase.MaxNForGuessSigma_x", 1000L) + SampleTips <- function(vecTips) { + NMax <- getOption("PCMBase.MaxNForGuessSigma_x", 0.25) + if(NMax <= 1) { + # NMax is specified as a fraction of the number of tips + sampSize <- NMax * length(vecTips) + if(sampSize < 20 && length(vecTips) > 20) { + # forcefully prevent using too few tips for the guess + sampSize <- 20 + } else if(sampSize > 1000) { + # forcefully prevent inverting to big matrices + sampSize <- 1000 + } + vecTips <- sample(vecTips, size = as.integer(sampSize)) + } else if(length(vecTips) > NMax) { + vecTips <- sample(vecTips, size = NMax) + } + } if(is.Global(Sigma_x)) { # Sigma_x has a global scope for the entire tree tipsNA <- apply(X, 2, function(x) any(is.na(x))) idxTips <- seq_len(PCMTreeNumTips(tree))[!tipsNA] - if(length(idxTips) > NMax) { - idxTips <- sample(idxTips, size = NMax) - } + idxTips <- SampleTips(idxTips) Sigma_x[] <- CalculateRateMatrixBM( X[, idxTips, drop=FALSE], @@ -577,9 +592,7 @@ GuessSigma_x <- function( X[, tipsInRegime, drop=FALSE], 2, function(x) any(is.na(x))) tipsInRegime <- tipsInRegime[!tipsNA] - if(length(tipsInRegime) > NMax) { - tipsInRegime <- sample(tipsInRegime, size = NMax) - } + tipsInRegime <- SampleTips(tipsInRegime) Sigma_x[,, r] <- CalculateRateMatrixBM( X[, tipsInRegime, drop=FALSE], diff --git a/R/PCMFitMixed.R b/R/PCMFitMixed.R index 4959352..55ce070 100644 --- a/R/PCMFitMixed.R +++ b/R/PCMFitMixed.R @@ -16,7 +16,7 @@ #' #' @importFrom foreach foreach when %do% %dopar% %:% #' @importFrom data.table data.table rbindlist is.data.table setkey := -#' @importFrom PCMBase PCMTree PCMTreeSetLabels PCMTreeSetPartition PCMTreeEvalNestedEDxOnTree PCMTreeNumTips PCMTreeListCladePartitions PCMTreeListAllPartitions PCMTreeToString MixedGaussian PCMOptions PCMTreeTableAncestors PCMTreeSplitAtNode PCMGetVecParamsRegimesAndModels MGPMDefaultModelTypes PCMGenerateModelTypes is.Transformable PCMTreeVCV +#' @importFrom PCMBase PCMTree PCMTreeSetLabels PCMTreeSetPartition PCMTreeEvalNestedEDxOnTree PCMTreeNumTips PCMTreeListCladePartitions PCMTreeListAllPartitions PCMTreeToString MixedGaussian PCMOptions PCMTreeTableAncestors PCMTreeSplitAtNode PCMGetVecParamsRegimesAndModels MGPMDefaultModelTypes PCMGenerateModelTypes is.Transformable PCMTreeVCV Args_MixedGaussian_MGPMDefaultModelTypes PCMTreeMatchLabels PCMTreePreorder #' @importFrom stats logLik coef AIC #' @return an S3 object of class PCMFitModelMappings. #' @@ -72,6 +72,8 @@ PCMFitMixed <- function( debug = FALSE ) { + # Prevent no visible binding warnings: + if( !is.null(listPartitions) ) { maxCladePartitionLevel = 1L } diff --git a/R/PCMFitModelMappingsToPartitions.R b/R/PCMFitModelMappingsToPartitions.R index 6d5c829..7950eb1 100644 --- a/R/PCMFitModelMappingsToPartitions.R +++ b/R/PCMFitModelMappingsToPartitions.R @@ -265,14 +265,22 @@ PCMFitModelMappingsToCladePartitions <- function( argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, + tableAnc = tableAncestorsTree, varyParams = FALSE) - matParInitGuessVaryParams <- GuessInitVecParams( - o = modelForFit, - n = argsConfigOptim$numGuessInitVecParams, - argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, - argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, - X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, - varyParams = TRUE) + matParInitGuessVaryParams <- do.call( + rbind, lapply( + seq_len(argsConfigOptim$numGuessInitVecParams / 100), + function(i) { + GuessInitVecParams( + o = modelForFit, + n = 100, + argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, + argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, + X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, + tableAnc = tableAncestorsTree, + varyParams = TRUE) + })) + matParInit <- rbind(matParInitRunif, matParInitGuess, @@ -307,14 +315,21 @@ PCMFitModelMappingsToCladePartitions <- function( argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, + tableAnc = tableAncestorsTree, varyParams = FALSE) - matParInitGuessVaryParams <- GuessInitVecParams( - o = modelForFit, - n = argsConfigOptim$numGuessInitVecParams, - argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, - argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, - X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, - varyParams = TRUE) + matParInitGuessVaryParams <- do.call( + rbind, lapply( + seq_len(argsConfigOptim$numGuessInitVecParams / 100), + function(i) { + GuessInitVecParams( + o = modelForFit, + n = 100, + argsPCMParamLowerLimit = argsConfigOptim$argsPCMParamLowerLimit, + argsPCMParamUpperLimit = argsConfigOptim$argsPCMParamUpperLimit, + X = X, tree = tree, treeVCVMat = treeVCVMat, SE = SE, + tableAnc = tableAncestorsTree, + varyParams = TRUE) + })) matParInitJitter <- jitterModelParams( modelForFit, diff --git a/R/ParametricBootstrap.R b/R/ParametricBootstrap.R index a0eb135..9e0e02e 100644 --- a/R/ParametricBootstrap.R +++ b/R/ParametricBootstrap.R @@ -36,7 +36,7 @@ CollectBootstrapResults <- function( })) } -#' @importFrom PCMBase TruePositiveRate FalsePositiveRate PCMTreeDtNodes PCMTreeSetLabels PCMMean PCMVar PCMTreeNearestNodesToEpoch PCMTreeGetPartsForNodes PCMTreeGetLabels PCMNumTraits PCMTreeGetPartNames +#' @importFrom PCMBase TruePositiveRate FalsePositiveRate PCMTreeDtNodes PCMTreeSetLabels PCMMean PCMVar PCMTreeNearestNodesToEpoch PCMTreeGetPartsForNodes PCMTreeGetLabels PCMNumTraits PCMTreeGetPartNames PCMTreeMatrixNodesInSamePart CollectBootstrapResultInternal <- function( id, resultFile, @@ -47,6 +47,9 @@ CollectBootstrapResultInternal <- function( minLength = 0.2, verbose = FALSE) { + # Prevent check problems by creating variables with no visible binding: + median <- mapping <- regime <- endNode <- SEs <- fitMappings <- NULL + if(file.exists(resultFile)) { # this should load an object called fitMappings load(resultFile) @@ -268,6 +271,9 @@ ExtractTimeSeriesForTraitValue <- function( traitIndex = 1L, traitName = paste0("Trait_", traitIndex)) { + # prevent no visible binding warnings during check: + timeInterval <- timeNode <- partFinal <- NULL + if(is.null(backboneTree)) { tree <- attr(model, "tree") if(is.null(tree)) { @@ -343,6 +349,8 @@ ExtractTimeSeriesForTraitRegression <- function( traitNameX = paste0("Trait_", traitIndexX), traitNameY = paste0("Trait_", traitIndexY) ) { + # prevent no visible binding warnings during check: + timeInterval <- timeNode <- partFinal <- NULL if(is.null(backboneTree)) { tree <- attr(model, "tree") if(is.null(tree)) { @@ -426,6 +434,11 @@ ExtractBSDataForTraitValue <- function( traitIndex = 1L, traitName = paste0("Trait_", traitIndex) ) { + + # Prevent no visible binding warnings during check. + listStatsForEpochs <- Id <- quantile <- timeInterval <- timeNode <- + partFinal <- NULL + mI <- PCMInfo(X = NULL, tree = inferredBackboneTree, model = inferredModel) inferredMeans <- PCMMean( inferredBackboneTree, inferredModel, metaI = mI, internal = TRUE) @@ -523,6 +536,10 @@ ExtractBSDataForTraitRegression <- function( traitNameX = paste0("Trait_", traitIndexX), traitNameY = paste0("Trait_", traitIndexY) ) { + # prevent no visible binding warnings during check + quantile <- Id <- timeInterval <- timeNode <- partFinal <- + listStatsForEpochs <- NULL + nodeTimesBackbone <- structure(PCMTreeNodeTimes(inferredBackboneTree), names = PCMTreeGetLabels(inferredBackboneTree)) @@ -671,6 +688,10 @@ ExtractBSDataModelTypeFreqs <- function( epochs, inferredBackboneTree) { + # prevent no visible binding warnings during check: + mapping <- regime <- endNodeLab <- listStatsForEpochs <- Id <- + timeInterval <- timeNode <- partFinal <- startTime <- endTime <- NULL + nodeTimesBackbone <- structure(PCMTreeNodeTimes(inferredBackboneTree), names = PCMTreeGetLabels(inferredBackboneTree)) @@ -749,6 +770,9 @@ ExtractBSDataTprWithinParts <- function( epochs, inferredBackboneTree) { + # prevent no visible binding warnings during check: + listStatsForEpochs <- Id <- timeInterval <- timeNode <- partFinal <- NULL + nodeTimesBackbone <- structure(PCMTreeNodeTimes(inferredBackboneTree), names = PCMTreeGetLabels(inferredBackboneTree)) @@ -808,6 +832,8 @@ ExtractBSDataTprFprGlobal <- function( inferredBackboneTree, epochs) { + # prevent no visible binding warnings during check: + listStatsForEpochs <- Id <- startTime <- endTime <- NULL dtNodesBackboneTree <- PCMTreeDtNodes(inferredBackboneTree) res <- rbindlist(lapply(epochs, function(epoch) {