Skip to content

Commit

Permalink
Merge pull request #149 from zachmayer/100%
Browse files Browse the repository at this point in the history
100%
  • Loading branch information
zachmayer committed Jun 24, 2015
2 parents 330b74c + d8a266a commit daeb768
Show file tree
Hide file tree
Showing 14 changed files with 402 additions and 111 deletions.
33 changes: 25 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
language: r
warnings_are_errors: true
sudo: required
# Sample .travis.yml for R projects from https://github.com/craigcitro/r-travis

language: c

env:
global:
Expand All @@ -12,11 +12,28 @@ env:
- NOT_CRAN="true"
- NOT_CRAN="false"

r_github_packages:
- jimhester/covr@4cae287abde5e2cf0c3c73aedeaec4d902dfd3ed
- jimhester/lintr@21607f469c2ce4f999d04a836f9348038a7282e2
- hadley/devtools@52bc15bb7eb87105bbf799d7f784054921146c02
- hadley/testthat@c67018fa4970ee3390ea2056efe56726626b07e3
before_install:
- curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
- chmod 755 ./travis-tool.sh
- ./travis-tool.sh bootstrap

install:
- ./travis-tool.sh install_deps
- ./travis-tool.sh github_package igraph/rigraph
- ./travis-tool.sh github_package jimhester/covr@4cae287abde5e2cf0c3c73aedeaec4d902dfd3ed
- ./travis-tool.sh github_package jimhester/lintr@21607f469c2ce4f999d04a836f9348038a7282e2
- ./travis-tool.sh github_package hadley/devtools@52bc15bb7eb87105bbf799d7f784054921146c02
- ./travis-tool.sh github_package hadley/testthat@c67018fa4970ee3390ea2056efe56726626b07e3

script: ./travis-tool.sh run_tests

on_failure:
- ./travis-tool.sh dump_logs

after_success:
- Rscript -e 'library(covr);coveralls()'

notifications:
email:
on_success: change
on_failure: change
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: caretEnsemble
Type: Package
Title: Ensembles of Caret Models
Version: 1.0.2
Version: 1.0.3
Date: 2015-01-14
Authors@R: c(person(c("Zachary", "A."), "Deane-Mayer", role = c("aut", "cre"),
email = "zach.mayer@gmail.com"),
Expand All @@ -22,6 +22,7 @@ Suggests:
testthat,
lintr,
randomForest,
glmnet,
rpart,
kernlab,
nnet,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2 (4.1.1): do not edit by hand

S3method(autoplot,caretEnsemble)
S3method(getMetric,train)
S3method(plot,caretEnsemble)
S3method(plot,caretStack)
S3method(predict,caretEnsemble)
Expand Down
6 changes: 0 additions & 6 deletions R/OptRMSE.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,5 @@ greedOptRMSE <- function(X, Y, iter = 100L){
}
weights2 <- weights/sum(weights)
maxtest <- sqrt(sum((X %*% weights2 - Y) ^ 2L, na.rm=TRUE))
if(stopper < maxtest){
testresult <- round(maxtest/stopper, 5) * 100
wstr <- paste0("Optimized weights not better than best model. Ensembled result is ",
testresult, "%", " of best model RMSE. Try more iterations.")
message(wstr)
}
return(weights)
}
49 changes: 10 additions & 39 deletions R/caretEnsemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,6 @@ predict.caretEnsemble <- function(object, keepNA = TRUE, se = FALSE, return_weig
}
modtype <- extractModelTypes(object$models)
preds <- predict(object$models, ...)
if(!anyNA(preds)){
keepNA <- TRUE
}
if(keepNA == TRUE){
if(anyNA(preds)){
message("Predictions being made only for cases with complete data")
Expand Down Expand Up @@ -242,19 +239,21 @@ extractModRes <- function(ensemble){
return(modRes)
}

#' Extract a model accuracy metric from an S3 object.
#' @param x an object with model performanc metrics
#' @param metric a character, either "RMSE" or "AUC" indicating which metric to extract
#' @return A numeric representing the metric desired metric.
#' Extract accuracy metrics from a model
#' @rdname metrics
#' @export
getMetric <- function(x, metric){
getMetric <- function(x, ...){
UseMethod("getMetric")
}

#' Extract a model accuracy metric from a \code{\link{train}} object.
#' @param x a caretEnsemble object
#' @param metric Which metric to extract
#' @param ... Passed between metric functions
#' @return A numeric representing the metric desired metric.
#' @rdname metrics
getMetric.train <- function(x, metric= c("AUC", "RMSE")){
#' @export
getMetric.train <- function(x, metric= c("AUC", "RMSE"), ...){
if(missing(metric)){
metric <- ifelse(x$modelType == "Regression", "RMSE", "AUC")
warning("Metric not specified, so default is being chosen.")
Expand Down Expand Up @@ -451,7 +450,7 @@ varImpFrame <- function(x){
#' @return A numeric of the residuals.
residuals.caretEnsemble <- function(object, ...){
if(is.null(object$modelType)){
object$modelType <- extractModelTypes(object$models)
object$modelType <- extractModelTypes(object$models)[1]
}
if(object$modelType == "Regression"){
yhat <- predict(object)
Expand All @@ -469,31 +468,6 @@ residuals.caretEnsemble <- function(object, ...){
}
}

#' @keywords internal
residuals2.train <- function(object){
if(object$modelType == "Regression"){
y <- object$trainingData$.outcome
resid <- residuals(object)
yhat <- predict(object)
data <- data.frame(y = y, yhat = yhat, .resid = resid,
method = object$method)
return(data)
} else if(object$modelType == "Classification"){
yhat <- predict(object, type = "prob")
if (!is.null(ncol(yhat))){
yhat <- yhat[, 1]
}
y <- as.character(object$trainingData$.outcome)
z <- table(y)
prevOutcome <- names(z)[z == max(z)]
y <- ifelse(y == prevOutcome, 1, 0)
resid <- y - yhat
data <- data.frame(y = y, yhat = yhat, .resid = resid,
method = object$method)
return(data)
}
}

#' @title Calculate the residuals from all component models of a caretEnsemble.
#' @description This function calculates raw residuals for both regression and
#' classification \code{train} objects within a \code{\link{caretEnsemble}}.
Expand Down Expand Up @@ -534,7 +508,6 @@ multiResiduals <- function(object, ...){
#' @param data a data set, defaults to the data used to fit the model
#' @param ... additional arguments to pass to fortify
#' @return The original data with extra columns for fitted values and residuals
#' @importFrom digest digest
fortify.caretEnsemble <- function(model, data = NULL, ...){
data <- extractModFrame(model)
data$y <- model$models[[1]]$trainingData$.outcome
Expand Down Expand Up @@ -641,8 +614,6 @@ autoplot.caretEnsemble <- function(object, which = c(1:6), mfrow = c(3, 2),
xvars <- names(plotdf)[!names(plotdf) %in% c("(Intercept)", ".outcome", "y",
".fitted", ".resid")]
xvars <- sample(xvars, 2)
} else {
xvars <- names(plotdf)[xvars]
}
# TODO: Insert checks for length of xvars here
residOut <- multiResiduals(object)
Expand All @@ -669,7 +640,7 @@ autoplot.caretEnsemble <- function(object, which = c(1:6), mfrow = c(3, 2),
geom_smooth(se = FALSE) + scale_x_continuous(xvars[2]) +
scale_y_continuous("Residuals") +
labs(title = paste0("Residuals Against ", xvars[2])) + theme_bw()
grid.arrange(g1, g2, g3, g4, g5, g6, newpage=FALSE)
grid.arrange(g1, g2, g3, g4, g5, g6, ncol=2)
}

utils::globalVariables(c(".fitted", ".resid", "method", "id", "yhat",
Expand Down
5 changes: 1 addition & 4 deletions R/caretStack.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,5 @@ plot.caretStack <- function(x, ...){
#' dotplot.caretStack(meta_model)
#' }
dotplot.caretStack <- function(x, data=NULL, ...){
final <- list(x$ens_model)
names(final) <- paste(paste(x$ens_model$method, collapse="_"), "ENSEMBLE", sep="_")
base <- x$models
dotplot(resamples(c(final, base)), data=data, ...)
dotplot(resamples(x$models), data=data, ...)
}
14 changes: 8 additions & 6 deletions man/metrics.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,22 @@
\alias{getMetric}
\alias{getMetric.train}
\alias{getRMSE}
\title{Extract a model accuracy metric from an S3 object.}
\title{Extract accuracy metrics from a model}
\usage{
getMetric(x, metric)
getMetric(x, ...)

\method{getMetric}{train}(x, metric = c("AUC", "RMSE"))
\method{getMetric}{train}(x, metric = c("AUC", "RMSE"), ...)

getAUC(x)

getRMSE(x)
}
\arguments{
\item{x}{an object with model performanc metrics}
\item{x}{a caretEnsemble object}

\item{metric}{a character, either "RMSE" or "AUC" indicating which metric to extract}
\item{...}{Passed between metric functions}

\item{metric}{Which metric to extract}
}
\value{
A numeric representing the metric desired metric.
Expand All @@ -28,7 +30,7 @@ A numeric for the AUC of the best model
A numeric for the RMSE of the best model
}
\description{
Extract a model accuracy metric from an S3 object.
Extract accuracy metrics from a model

Extract a model accuracy metric from a \code{\link{train}} object.

Expand Down

2 comments on commit daeb768

@jknowles
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is looking really awesome Zach. I'm glad you put in the time to clean all this stuff up, it'll make the enhancements in the next few versions easier to add on without breaking things.

@zachmayer
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I've got a bunch of performance enhancements I want to make, so I can work with bigger datasets.

Please sign in to comment.