-
Notifications
You must be signed in to change notification settings - Fork 315
/
aggregate.R
244 lines (227 loc) · 8.43 KB
/
aggregate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File src/library/stats/R/aggregate.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2018 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
aggregate <-
function(x, ...)
UseMethod("aggregate")
aggregate.default <-
function(x, ...)
{
if(is.ts(x))
aggregate.ts(as.ts(x), ...)
else
aggregate.data.frame(as.data.frame(x), ...)
}
aggregate.data.frame <-
function(x, by, FUN, ..., simplify = TRUE, drop = TRUE)
{
if(!is.data.frame(x)) x <- as.data.frame(x)
## Do this here to avoid masking by non-function (could happen)
FUN <- match.fun(FUN)
if(NROW(x) == 0L) stop("no rows to aggregate")
if(NCOL(x) == 0L) {
## fake it
x <- data.frame(x = rep(1, NROW(x)))
return(aggregate.data.frame(x, by, function(x) 0L)[seq_along(by)])
}
if(!is.list(by))
stop("'by' must be a list")
if(is.null(names(by)) && length(by))
names(by) <- paste0("Group.", seq_along(by))
else {
nam <- names(by)
ind <- which(!nzchar(nam))
names(by)[ind] <- paste0("Group.", ind)
}
if(any(lengths(by) != NROW(x)))
stop("arguments must have same length")
y <- as.data.frame(by, stringsAsFactors = FALSE)
keep <- complete.cases(by)
y <- y[keep, , drop = FALSE]
x <- x[keep, , drop = FALSE]
nrx <- NROW(x)
## Generate a group identifier vector with integers and dots.
ident <- function(x) {
y <- as.factor(x)
l <- length(levels(y))
s <- as.character(seq_len(l))
n <- nchar(s)
levels(y) <- paste0(strrep("0", n[l] - n), s)
y # levels used for drop = FALSE
}
grp <- lapply(y, ident)
multi.y <- !drop && ncol(y)
if(multi.y) {
lev <- lapply(grp, levels)
y <- as.list(y)
for (i in seq_along(y)) {
z <- y[[i]][match(lev[[i]], grp[[i]])]
if(is.factor(z) && any(keep <- is.na(z)))
z[keep] <- levels(z)[keep]
y[[i]] <- z
}
eGrid <- function(L)
expand.grid(L, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
y <- eGrid(y)
}
grp <- if(ncol(y)) {
names(grp) <- NULL
do.call(paste, c(rev(grp), list(sep = ".")))
} else
integer(nrx)
if(multi.y) {
lev <- as.list(eGrid(lev))
names(lev) <- NULL
lev <- do.call(paste, c(rev(lev), list(sep = ".")))
} else
y <- y[match(sort(unique(grp)), grp, 0L), , drop = FALSE]
z <- lapply(x,
function(e) {
## In case of a common length > 1, sapply() gives
## the transpose of what we need ...
ans <- lapply(X = unname(split(e, grp)), FUN = FUN, ...)
if(simplify &&
length(len <- unique(lengths(ans))) == 1L) {
## this used to lose classes
if(len == 1L) {
cl <- lapply(ans, oldClass)
cl1 <- cl[[1L]]
ans <- unlist(ans, recursive = FALSE, use.names = FALSE)
if (!is.null(cl1) &&
all(vapply(cl, identical, NA, y = cl1)))
class(ans) <- cl1
} else if(len > 1L)
ans <- matrix(unlist(ans, recursive = FALSE, use.names = FALSE),
ncol = len,
byrow = TRUE,
dimnames =
if(!is.null(nms <- names(ans[[1L]])))
list(NULL, nms) ## else NULL
)
}
ans
})
len <- length(y)
if(multi.y) {
keep <- match(lev, sort(unique(grp)))
for(i in seq_along(z))
y[[len + i]] <- if(is.matrix(z[[i]]))
z[[i]][keep, , drop = FALSE]
else z[[i]][keep]
} else
for(i in seq_along(z))
y[[len + i]] <- z[[i]]
names(y) <- c(names(by), names(x))
row.names(y) <- NULL
y
}
aggregate.formula <-
function(formula, data, FUN, ..., subset, na.action = na.omit)
{
if(missing(formula) || !inherits(formula, "formula"))
stop("'formula' missing or incorrect")
if(length(formula) != 3L)
stop("'formula' must have both left and right hand sides")
m <- match.call(expand.dots = FALSE)
if(is.matrix(eval(m$data, parent.frame())))
m$data <- as.data.frame(data)
m$... <- m$FUN <- NULL
## need stats:: for non-standard evaluation
m[[1L]] <- quote(stats::model.frame)
if (formula[[2L]] == ".") {
## LHS is a dot, expand it ...
##rhs <- unlist(strsplit(deparse(formula[[3L]]), " *[:+] *"))
## <NOTE>
## Note that this will not do quite the right thing in case the
## RHS contains transformed variables, such that
## setdiff(rhs, names(data))
## is non-empty ...
##lhs <- sprintf("cbind(%s)",
## paste(setdiff(names(data), rhs), collapse = ","))
## formula[[2L]] <- parse(text = lhs)[[1L]]
## </NOTE>
## New logic May 2012 --pd
## Dot expansion:
## lhs ends up as quote(cbind(v1, v2, ....)) using all variables in
## data, except those that are used on the RHS.
## This version uses terms() to get the rhs variables, which means
## that it will NOT remove a variable from the expansion if a
## transformation of it is on the RHS of the formula.
rhs <- as.list(attr(terms(formula[-2L]),"variables")[-1])
lhs <- as.call(c(quote(cbind),
setdiff(lapply(names(data), as.name),
rhs)
)
)
formula[[2L]] <- lhs
m[[2L]] <- formula
}
mf <- eval(m, parent.frame())
if(is.matrix(mf[[1L]])) {
## LHS is a cbind() combo, convert to data frame and fix names.
## Commented out May 2012 (seems to work without it) -- pd
##lhs <- setNames(as.data.frame(mf[[1L]]),
## as.character(m[[2L]][[2L]])[-1L])
lhs <- as.data.frame(mf[[1L]])
aggregate.data.frame(lhs, mf[-1L], FUN = FUN, ...)
}
else
aggregate.data.frame(mf[1L], mf[-1L], FUN = FUN, ...)
}
aggregate.ts <-
function(x, nfrequency = 1, FUN = sum, ndeltat = 1,
ts.eps = getOption("ts.eps"), ...)
{
x <- as.ts(x)
ofrequency <- tsp(x)[3L]
## do this here to avoid masking by non-function (could happen)
FUN <- match.fun(FUN)
## Set up the new frequency, and make sure it is an integer.
if(missing(nfrequency))
nfrequency <- 1 / ndeltat
if((nfrequency > 1) &&
(abs(nfrequency - round(nfrequency)) < ts.eps))
nfrequency <- round(nfrequency)
if(nfrequency == ofrequency)
return(x)
ratio <- ofrequency /nfrequency
if(abs(ratio - round(ratio)) > ts.eps)
stop(gettextf("cannot change frequency from %g to %g",
ofrequency, nfrequency), domain = NA)
## The desired result is obtained by applying FUN to blocks of
## length ofrequency/nfrequency, for each of the variables in x.
## We first get the new start and end right, and then break x into
## such blocks by reshaping it into an array and setting dim.
## avoid e.g. 1.0 %/% 0.2
## https://stat.ethz.ch/pipermail/r-devel/2010-April/057225.html
len <- trunc((ofrequency / nfrequency) + ts.eps)
mat <- is.matrix(x)
if(mat) cn <- colnames(x)
## nstart <- ceiling(tsp(x)[1L] * nfrequency) / nfrequency
## x <- as.matrix(window(x, start = nstart))
nstart <- tsp(x)[1L]
## Can't use nstart <- start(x) as this causes problems if
## you get a vector of length 2.
x <- as.matrix(x)
nend <- floor(nrow(x) / len) * len
x <- apply(array(c(x[1 : nend, ]),
dim = c(len, nend / len, ncol(x))),
MARGIN = c(2L, 3L), FUN = FUN, ...)
if(!mat) x <- as.vector(x)
else colnames(x) <- cn
ts(x, start = nstart, frequency = nfrequency)
}