Skip to content

Commit

Permalink
cleaning up check()
Browse files Browse the repository at this point in the history
 - fixed s3 generic arguments mismatch
 - removed call to party:::newinputs
 - removed test.R
 - added newdata to all methods for proximity
  • Loading branch information
zmjones committed Mar 23, 2015
1 parent d389d3e commit 74ecc90
Show file tree
Hide file tree
Showing 16 changed files with 84 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
README.md
makefile
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@ Imports:
reshape2,
Rcpp (>= 0.11.1),
Matrix,
randomForestCI,
randomForest,
randomForestSRC,
party
Suggests:
randomForest,
randomForestSRC,
scales,
doParallel,
testthat
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export(var_est)
export(variable_importance)
import(assertthat)
import(ggplot2)
import(party)
importFrom(Matrix,Matrix)
importFrom(Matrix,colSums)
importFrom(Matrix,rowMeans)
Expand Down
22 changes: 13 additions & 9 deletions R/ci.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ var_est <- function(fit, df) UseMethod("var_est", fit)
#'
#' @param fit an object of class 'randomForest' returned from \code{randomForest} with \code{keep.inbag = TRUE}
#' @param df dataframe to be used for prediction
#' @param ... additional arguments to be passed to predict.randomForest
#'
#' @return a dataframe with two columns: 'prediction' and 'variance', where the former is the prediction calculated using the inbag data, and the variance is calculated using the bias corrected infinitesimal bootstrap from Wager, Efron, and Tibsharani (2014).
#'
Expand All @@ -28,21 +29,23 @@ var_est <- function(fit, df) UseMethod("var_est", fit)
#' var_est(fit, swiss)
#' }
#' @export
var_est.randomForest <- function(fit, df) {
var_est.randomForest <- function(fit, df, ...) {
info <- installed.packages(fields = c("Package", "Version"))
info <- info[, c("Package", "Version")]
if (!"randomForestCI" %in% info)
stop("install randomForestCI from http://github.com/swager/randomForestCI")
if (!info[info[, 1] == "randomForest", "Version"] == "4.6-11")
stop("install fixed randomForest from http://github.com/swager/randomForest")
out <- randomForestCI::randomForestInfJack(fit, df)
colnames(out) <- c("prediction", "variance")
out
pred <- predict(fit, newdata = df, predict.all = TRUE, ...)
data.frame("prediction" = pred$aggregate,
"variance" = inf_jackknife(pred$individual, fit$ntree, fit$inbag))

}
#' Variance estimation for RandomForest objects from package \code{party}
#'
#' Calculates the variance of predictions from regression using RandomForest using a slightly modified version of the code from randomForestCI (\url{https://github.com/swager/randomForestCI})
#'
#' @import party
#' @param fit an object of class 'RandomForest' returned from \code{cforest}
#' @param df dataframe to be used for prediction
#'
Expand All @@ -58,22 +61,23 @@ var_est.randomForest <- function(fit, df) {
#' }
#' @export
var_est.RandomForest <- function(fit, df) {
new_df <- party:::newinputs(fit, df)
new_df <- initVariableFrame(df)
pred <- sapply(1:length(fit@ensemble), function(i) {
sapply(.Call("R_predictRF_weights",
fit@ensemble[i], fit@where[i], fit@weights[i], new_df, 0, FALSE, PACKAGE = "party"),
function(w) w %*% fit@responses@predict_trafo / sum(w))
})
data.frame("prediction" = predict(fit, newdata = df),
"variance" = inf_jackknife(pred, length(fit@ensemble),
Matrix::Matrix(do.call(cbind, fit@weights), sparse = TRUE)))
Matrix(do.call(cbind, fit@weights), sparse = TRUE)))
}
#' Variance estimation for rfsrc objects from package \code{randomForestSRC}
#'
#' Calculates the variance of predictions from regression using RandomForest using a slightly modified version of the code from randomForestCI (\url{https://github.com/swager/randomForestCI})
#'
#' @param fit an predict object of class 'rfsrc' returned from \code{rfsrc}
#' @param df dataframe to be used for prediction
#' @param ... additional arguments to be passed to predict.rfsrc
#'
#' @return a dataframe with two columns: 'prediction' and 'variance', where the former is the prediction calculated using the inbag data, and the variance is calculated using the bias corrected infinitesimal bootstrap from Wager, Efron, and Tibsharani (2014).
#'
Expand All @@ -86,9 +90,9 @@ var_est.RandomForest <- function(fit, df) {
#' var_est(fit, swiss)
#' }
#' @export
var_est.rfsrc <- function(fit, df) {
var_est.rfsrc <- function(fit, df, ...) {
if (is.null(fit$pd_membership) | is.null(fit$pd_predicted)) {
pred <- predict(fit, newdata = df, outcome = "train")
pred <- predict(fit, newdata = df, outcome = "train", ...)
fit$pd_membership <- pred$membership
fit$pd_predicted <- pred$predicted
}
Expand Down Expand Up @@ -125,7 +129,7 @@ inf_jackknife <- function(pred, B, N) {
## covariance between number of times obs. i appears in b and difference between tree
## and mean across trees (across in bag and out bag)
C <- N %*% t(pred_center) - Matrix(N_avg, nrow(N), 1) %*%
Matrix::Matrix(owSums(pred_center), 1, nrow(pred_center))
Matrix(rowSums(pred_center), 1, nrow(pred_center))
raw_IJ <- colSums(C^2) / B^2
N_var <- mean(rowMeans(N^2) - N_avg^2)
boot_var <- rowMeans(pred_center^2)
Expand Down
7 changes: 5 additions & 2 deletions R/imp.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ variable_importance <- function(fit, ...) UseMethod("variable_importance")
#' if type is "accuracy" importance must be set to TRUE in the call to randomForest
#' @param class_levels logical, when TRUE class level specific importances are returned
#' response variable must be a factor and importance = TRUE in the call to randomForest
#' @param ... further arguments to be passed to nothing
#'
#'
#' @return a data.frame of class "importance"
#'
Expand All @@ -29,7 +31,7 @@ variable_importance <- function(fit, ...) UseMethod("variable_importance")
#' plot_imp(imp)
#' }
#' @export
variable_importance.randomForest <- function(fit, type = "accuracy", class_levels) {
variable_importance.randomForest <- function(fit, type = "accuracy", class_levels, ...) {
if (ncol(fit$importance) == 1 & type != "gini")
stop("set importance = TRUE in call to randomForest")
if (is.null(fit$localImportance) & type == "local")
Expand Down Expand Up @@ -113,6 +115,7 @@ variable_importance.RandomForest <- function(fit, conditional = FALSE, auc = FAL
#' @param type character equal to "permute", "random", "permute.ensemble", or "random.ensemble"
#' this the \code{permute} argument must equal this value in the call to rfsrc
#' @param class_levels logical, when TRUE class level specific importances are returned otherwise the overal importance is returned
#' @param ... further arguments to be passed to nothing
#'
#' @return a data.frame of class "importance"
#'
Expand All @@ -125,7 +128,7 @@ variable_importance.RandomForest <- function(fit, conditional = FALSE, auc = FAL
#' variable_importance(fit, "random", TRUE)
#' }
#' @export
variable_importance.rfsrc <- function(fit, type = "permute", class_levels = FALSE) {
variable_importance.rfsrc <- function(fit, ..., type = "permute", class_levels = FALSE) {
if (!type %in% as.character(fit$call))
stop(paste("call rfsrc with importance =", type))

Expand Down
9 changes: 6 additions & 3 deletions R/pd.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ partial_dependence <- function(fit, ...) UseMethod("partial_dependence", fit)
#' @param empirical logical indicator of whether or not only values in the data should be sampled
#' @param parallel logical indicator of whether a parallel backend should be used if registered
#' @param type with classification, default "" gives most probable class for classification and "prob" gives class probabilities
#' @param ... additional arguments to be passed to nothing
#'
#' @return a dataframe with columns for each predictor in `var` and the fitted value for
#' each set of values taken by the values of 'var' averaged within the values of predictors
Expand Down Expand Up @@ -57,7 +58,7 @@ partial_dependence <- function(fit, ...) UseMethod("partial_dependence", fit)
#' @export
partial_dependence.randomForest <- function(fit, df, var, cutoff = 10, interaction = FALSE,
ci = TRUE, confidence = .95,
empirical = TRUE, parallel = FALSE, type = "") {
empirical = TRUE, parallel = FALSE, type = "", ...) {
pkg <- "randomForest"
y_class <- attr(fit$terms, "dataClasses")[1] ## what type is y
if (!y_class %in% c("integer", "numeric") & ci) ci <- FALSE
Expand Down Expand Up @@ -150,6 +151,7 @@ partial_dependence.randomForest <- function(fit, df, var, cutoff = 10, interacti
#' @param empirical logical indicator of whether or not only values in the data should be sampled
#' @param parallel logical indicator of whether a parallel backend should be used if registered
#' @param type with classification, default "" gives most probable class for classification and "prob" gives class probabilities
#' @param ... additional arguments to be passed to nothing
#'
#' @return a dataframe with columns for each predictor in `var` and the fitted value for
#' each set of values taken by the values of 'var' averaged within the values of predictors
Expand Down Expand Up @@ -190,7 +192,7 @@ partial_dependence.randomForest <- function(fit, df, var, cutoff = 10, interacti
#' @export
partial_dependence.RandomForest <- function(fit, var, cutoff = 10, interaction = FALSE,
ci = TRUE, confidence = .95,
empirical = TRUE, parallel = FALSE, type = "") {
empirical = TRUE, parallel = FALSE, type = "", ...) {
pkg <- "party"
## get y from the fit object
y <- get("response", fit@data@env)
Expand Down Expand Up @@ -303,6 +305,7 @@ partial_dependence.RandomForest <- function(fit, var, cutoff = 10, interaction =
#' @param empirical logical indicator of whether or not only values in the data should be sampled
#' @param parallel logical indicator of whether a parallel backend should be used if registered
#' @param type with classification, default "" gives most probable class for classification and "prob" gives class probabilities
#' @param ... additional arguments to be passed to nothing
#'
#' @return a dataframe with columns for each predictor in `var` and the fitted value for
#' each set of values taken by the values of 'var' averaged within the values of predictors
Expand Down Expand Up @@ -337,7 +340,7 @@ partial_dependence.RandomForest <- function(fit, var, cutoff = 10, interaction =
#' @export
partial_dependence.rfsrc <- function(fit, var, cutoff = 10, interaction = FALSE,
ci = TRUE, confidence = .95,
empirical = TRUE, parallel = FALSE, type = "") {
empirical = TRUE, parallel = FALSE, type = "", ...) {
pkg <- "randomForestSRC"
y <- fit$yvar
if (!(class(y) %in% c("numeric", "integer"))) ci <- FALSE
Expand Down
2 changes: 1 addition & 1 deletion R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ plot_pd <- function(pd, geom = "line", title = "", facet_var) {
p <- ggplot(pd, aes_string(colnames(pd)[1], colnames(pd)[2]))
if (geom == "line") {
p <- p + geom_point() + geom_line()
if (atts$ci) p <- p + geom_ribbon(aes(ymin = low, ymax = high), alpha = .25)
if (atts$ci) p <- p + geom_ribbon(aes_string(ymin = "low", ymax = "high"), alpha = .25)
} else if (geom == "bar")
p <- p + geom_bar(stat = "identity")
else stop("Unsupported geom")
Expand Down
42 changes: 31 additions & 11 deletions R/prox.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,60 @@
#' Extracts proximity matrices from random forest objects from the party, randomForest or randomForestSRC packages
#'
#' @param fit object of class 'RandomForest', 'randomForest', or 'rfsrc'
#' @param newdata new data with the same columns as the data used for \code{fit}
#' @param ... arguments to be passed to \code{extract_proximity}
#'
#' @export
extract_proximity <- function(fit, ...) UseMethod("extract_proximity")
extract_proximity <- function(fit, newdata) UseMethod("extract_proximity")
#' Extract proximity matrix from \code{randomForest} objects
#'
#' @param fit object of class randomForest called with proximity or oob.prox = TRUE
#' @param newdata new data with the same columns as the data used for \code{fit}
#' @param ... additional arguments to pass to predict.randomForest
#'
#' @return an n by n matrix
#' @export
extract_proximity.randomForest <- function(fit) {
if (is.null(fit$proximity))
stop("call randomForest with proximity or oob.prox = TRUE")
fit$proximity
extract_proximity.randomForest <- function(fit, newdata = NULL, ...) {
if (!is.null(newdata)) {
pred <- predict(fit, newdata = newdata, proximity = TRUE, ...)
if (!is.null(pred$oob.prox))
out <- pred$oob.prox
else if (!is.null(pred$prox))
out <- pred$prox
else stop("not sure what is up")
} else {
if (is.null(fit$proximity))
stop("call randomForest with proximity or oob.prox = TRUE")
fit$proximity
}
}
#' Extract proximity matrix from objects returned from \code{cforest}
#'
#' @importFrom party proximity
#' @param fit object of class 'RandomForest' from \code{cforest}
#' @param newdata a data.frame with the same columns as the training data
#' @param ... additional arguments to pass to proximity
#'
#' @return an n by n matrix
#' @export
extract_proximity.RandomForest <- function(fit, newdata = NULL) {
proximity(out, newdata)
extract_proximity.RandomForest <- function(fit, newdata = NULL, ...) {
proximity(fit, newdata, ...)
}
#' Extract proximity matrix from objects returned from \code{rfsrc}
#'
#' @param fit object of class 'rfsrc' from \code{rfsrc}
#' @param newdata new data with the same columns as the data used for \code{fit}
#' @param ... additional arguments to pass to predict.rfsrc
#'
#' @return an n by n matirx
#' @export
extract_proximity.rfsrc <- function(fit) {
if (is.null(fit$proximity))
stop("call rfsrc with proximity equal to TRUE, \"inbag\", \"oob\", or \"all\"")
fit$proximity
extract_proximity.rfsrc <- function(fit, newdata = NULL, ...) {
if (!is.null(newdata)) {
pred <- predict(fit, newdata = newdata, proximity = TRUE, ...)
out <- pred$prox
} else {
if (is.null(fit$proximity))
stop("call rfsrc with proximity equal to TRUE, \"inbag\", \"oob\", or \"all\"")
fit$proximity
}
}
4 changes: 3 additions & 1 deletion man/extract_proximity.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
\alias{extract_proximity}
\title{Methods to extract proximity matrices from random forests}
\usage{
extract_proximity(fit, ...)
extract_proximity(fit, newdata)
}
\arguments{
\item{fit}{object of class 'RandomForest', 'randomForest', or 'rfsrc'}

\item{newdata}{new data with the same columns as the data used for \code{fit}}

\item{...}{arguments to be passed to \code{extract_proximity}}
}
\description{
Expand Down
4 changes: 3 additions & 1 deletion man/extract_proximity.randomForest.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
\alias{extract_proximity.RandomForest}
\title{Extract proximity matrix from objects returned from \code{cforest}}
\usage{
\method{extract_proximity}{RandomForest}(fit, newdata = NULL)
\method{extract_proximity}{RandomForest}(fit, newdata = NULL, ...)
}
\arguments{
\item{fit}{object of class 'RandomForest' from \code{cforest}}

\item{newdata}{a data.frame with the same columns as the training data}

\item{...}{additional arguments to pass to proximity}
}
\value{
an n by n matrix
Expand Down
6 changes: 5 additions & 1 deletion man/extract_proximity.rfsrc.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@
\alias{extract_proximity.rfsrc}
\title{Extract proximity matrix from objects returned from \code{rfsrc}}
\usage{
\method{extract_proximity}{rfsrc}(fit)
\method{extract_proximity}{rfsrc}(fit, newdata = NULL, ...)
}
\arguments{
\item{fit}{object of class 'rfsrc' from \code{rfsrc}}

\item{newdata}{new data with the same columns as the data used for \code{fit}}

\item{...}{additional arguments to pass to predict.rfsrc}
}
\value{
an n by n matirx
Expand Down
4 changes: 3 additions & 1 deletion man/partial_dependence.randomForest.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
\usage{
\method{partial_dependence}{RandomForest}(fit, var, cutoff = 10,
interaction = FALSE, ci = TRUE, confidence = 0.95, empirical = TRUE,
parallel = FALSE, type = "")
parallel = FALSE, type = "", ...)
}
\arguments{
\item{fit}{an object of class 'RandomForest' returned from \code{cforest}}
Expand All @@ -27,6 +27,8 @@ partial dependence calculation}
\item{parallel}{logical indicator of whether a parallel backend should be used if registered}

\item{type}{with classification, default "" gives most probable class for classification and "prob" gives class probabilities}

\item{...}{additional arguments to be passed to nothing}
}
\value{
a dataframe with columns for each predictor in `var` and the fitted value for
Expand Down
4 changes: 3 additions & 1 deletion man/partial_dependence.rfsrc.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
\usage{
\method{partial_dependence}{rfsrc}(fit, var, cutoff = 10,
interaction = FALSE, ci = TRUE, confidence = 0.95, empirical = TRUE,
parallel = FALSE, type = "")
parallel = FALSE, type = "", ...)
}
\arguments{
\item{fit}{an object of class 'rfsrc' returned from \code{rfsrc}}
Expand All @@ -28,6 +28,8 @@ partial dependence calculation}
\item{parallel}{logical indicator of whether a parallel backend should be used if registered}

\item{type}{with classification, default "" gives most probable class for classification and "prob" gives class probabilities}

\item{...}{additional arguments to be passed to nothing}
}
\value{
a dataframe with columns for each predictor in `var` and the fitted value for
Expand Down
4 changes: 3 additions & 1 deletion man/var_est.rfsrc.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
\alias{var_est.rfsrc}
\title{Variance estimation for rfsrc objects from package \code{randomForestSRC}}
\usage{
\method{var_est}{rfsrc}(fit, df)
\method{var_est}{rfsrc}(fit, df, ...)
}
\arguments{
\item{fit}{an predict object of class 'rfsrc' returned from \code{rfsrc}}

\item{df}{dataframe to be used for prediction}

\item{...}{additional arguments to be passed to predict.rfsrc}
}
\value{
a dataframe with two columns: 'prediction' and 'variance', where the former is the prediction calculated using the inbag data, and the variance is calculated using the bias corrected infinitesimal bootstrap from Wager, Efron, and Tibsharani (2014).
Expand Down
4 changes: 3 additions & 1 deletion man/variable_importance.rfsrc.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
\alias{variable_importance.rfsrc}
\title{Variable importance for rfsrc objects}
\usage{
\method{variable_importance}{rfsrc}(fit, type = "permute",
\method{variable_importance}{rfsrc}(fit, ..., type = "permute",
class_levels = FALSE)
}
\arguments{
\item{fit}{an object of class 'rfsrc' returned from \code{rfsrc}}

\item{...}{further arguments to be passed to nothing}

\item{type}{character equal to "permute", "random", "permute.ensemble", or "random.ensemble"
this the \code{permute} argument must equal this value in the call to rfsrc}

Expand Down
Loading

0 comments on commit 74ecc90

Please sign in to comment.