Skip to content

Commit

Permalink
Handle POSIXct in linear time
Browse files Browse the repository at this point in the history
  • Loading branch information
crowding authored and wibeasley committed Jan 3, 2014
1 parent f7e32ba commit 1d7c317
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 65 deletions.
120 changes: 65 additions & 55 deletions R/rbind-fill.r
Original file line number Diff line number Diff line change
Expand Up @@ -97,81 +97,82 @@ allocate_column <- function(example, nrows, dfs, var) {
#it. Inspection, even something as innocuous as is.matrix(column),
#will setting NAMED to 2 and forcing a copy on the
#next modification.
a <- attributes(example)
type <- typeof(example)
handler <- type

#this statement may be altered below
assignment <- quote(column[rows] <<- what)

if (inherits(example, "POSIXt")) {
#this should get folded in to general attribute handling as well...
tzone <- attr(example, "tzone")
column <- structure(as.POSIXct(rep(NA, nrows)), tzone=tzone)
} else {
a$names <- NULL
isList <- is.recursive(example)

a <- attributes(example)
a$names <- NULL
isList <- is.recursive(example)
if (is.array(example)) {

if (is.array(example)) {
a$dimnames <- NULL #todo: check if rbind handles these

a$dimnames <- NULL #todo: check if rbind handles these
# Check that all other args have consistent dims
df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
dims <- unique(lapply(dfs[df_has], function(df) dim(df[[var]])[-1]))
if (length(dims) > 1)
stop("Array variable ", var, " has inconsistent dims")

# Check that all other args have consistent dims
df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
dims <- unique(lapply(dfs[df_has], function(df) dim(df[[var]])[-1]))
if (length(dims) > 1)
stop("Array variable ", var, " has inconsistent dims")
# add empty args
assignment[[2]] <- as.call(
c(as.list(assignment[[2]]),
rep(list(quote(expr = )), length(dims[[1]]))))

# add empty args
assignment[[2]] <- as.call(
c(as.list(assignment[[2]]),
rep(list(quote(expr = )), length(dims[[1]]))))
if (length(dims[[1]]) == 0) { #is dropping dims necessary for 1d arrays?
a$dim <- NULL
} else {
a$dim <- c(nrows, dim(example)[-1])
}

if (length(dims[[1]]) == 0) { #is dropping dims necessary for 1d arrays?
a$dim <- NULL
} else {
a$dim <- c(nrows, dim(example)[-1])
}
length <- prod(a$dim)

length <- prod(a$dim)
} else {
length <- nrows
}

if (is.factor(example)) {
df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
isfactor <- vapply(dfs[df_has], function(df) is.factor(df[[var]]), FALSE)
if (all(isfactor)) {
#will be referenced by the mutator
levels <- unique(unlist(lapply(dfs[df_has],
function(df) levels(df[[var]]))))
class <- a$class
a$levels <- levels
a$class <- NULL #postpone setting class
handler <- "factor"
} else {
length <- nrows
#fall back on character
type <- "character"
handler <- "character"
a$class <- NULL
a$levels <- NULL
}
}

if (is.factor(example)) {
df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
isfactor <- vapply(dfs[df_has], function(df) is.factor(df[[var]]), FALSE)
if (all(isfactor)) {
#will be referenced by the mutator
levels <- unique(unlist(lapply(dfs[df_has],
function(df) levels(df[[var]]))))
class <- a$class
a$levels <- levels
a$class <- NULL #postpone setting class
handler <- "factor"
} else {
#fall back on character
type <- "character"
handler <- "character"
a$class <- NULL
a$levels <- NULL
}
}
if (inherits(example, "POSIXt")) {
tzone <- attr(example, "tzone")
a$class <- NULL
class <- c("POSIXct", "POSIXt")
type <- "double"
handler <- "time"
}

column <- vector(type, length)
if (!isList) {
column[] <- NA
}
attributes(column) <- a
column <- vector(type, length)
if (!isList) {
column[] <- NA
}
attributes(column) <- a

#It is especially important never to inspect the column when in the main
#rbind.fill loop. To avoid inspecting the column, we've done
#specialization (figuring out the array assignment form and data
#type) up front, and instead of returning the column, we return a
#mutator function that closes over the column.
#It is especially important never to inspect the column when in the
#main rbind.fill loop. To avoid that, we've done specialization
#(figuring out the array assignment form and data type) ahead of
#time, and instead of returning the column, we return a mutator
#function that closes over the column.
switch(
handler,
character = function(rows, what) {
Expand All @@ -190,6 +191,15 @@ allocate_column <- function(example, nrows, dfs, var) {
eval(assignment)
}
},
time = function(rows, what) {
if (nargs() == 0) {
class(column) <<- class
column
} else {
what <- as.POSIXct(what, tz=tzone)
eval(assignment)
}
},
function(rows, what) {
if(nargs() == 0) return(column)
eval(assignment)
Expand Down
22 changes: 12 additions & 10 deletions inst/tests/test-rbind.r
Original file line number Diff line number Diff line change
Expand Up @@ -230,25 +230,27 @@ get_rbind_times <- function(...) {
expect_linear_enough <- function(timings, size=2^10, threshold=0.03) {
#expect that no more than `threshold` of a `size` input's runtime is
#accounted for by quadratic behavior
model <- lm(user.self ~ size + I(size^2), timings)
p <- predict(model, newdata=data.frame(size=2^10), type="terms")
expect_that(p[2] / p[1] < 0.03, is_true(), NULL, NULL)
#don't understand what predict.lm does w/ built-in intercepts
timings <- mutate(timings, intercept=1)
model <- lm(user.self ~ size + I(size^2) - 1 + intercept, timings)
p <- predict(model, newdata=data.frame(size=2^10, intercept=1), type="terms")
expect_that(p[2] / p[1] < 0.2, is_true(), NULL, NULL)
}

test_that("rbind.fill performance linear", {
rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("numeric", "character", "array"))
expect_linear_enough(rbind.times)
timings <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("numeric", "character", "array"))
expect_linear_enough(timings)
})

test_that("rbind.fill performance linear with factors", {
rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
timings <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("factor"))
expect_linear_enough(rbind.times)
expect_linear_enough(timings)
})

test_that("rbind.fill performance linear with times", {
rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
timings <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("time"))
expect_linear_enough(rbind.times)
expect_linear_enough(timings)
})

0 comments on commit 1d7c317

Please sign in to comment.