lengths(x) now also works (trivially) for atomic vectors --> can use …

…it safely in many places git-svn-id: https://svn.r-project.org/R/trunk@68292 00db46b3-68df-0310-9c12-caf00c1e9a41
wch · May 1, 2015 · 8305f52 · 8305f52
1 parent 4250fad
commit 8305f52
Show file tree

Hide file tree

Showing 13 changed files with 95 additions and 63 deletions.
diff --git a/doc/NEWS.Rd b/doc/NEWS.Rd
@@ -34,6 +34,10 @@
 
       \item \code{str(x)} now displays \code{"Time-Series"} also for
       matrix (multivariate) time-series, i.e. when \code{is.ts(x)} is true.
+
+      \item \code{lengths(x)} now also works (trivially) for atomic
+      \code{x} and hence can be used more generally as an efficient
+      replacement of \code{sapply(x, length)} and similar.
     }
   }
 

diff --git a/src/library/base/R/apply.R b/src/library/base/R/apply.R
@@ -84,28 +84,28 @@ apply <- function(X, MARGIN, FUN, ...)
 
     ans.names <- names(ans[[1L]])
     if(!ans.list)
-	ans.list <- any(unlist(lapply(ans, length)) != l.ans)
+	ans.list <- any(lengths(ans) != l.ans)
     if(!ans.list && length(ans.names)) {
         all.same <- vapply(ans, function(x) identical(names(x), ans.names), NA)
         if (!all(all.same)) ans.names <- NULL
     }
     len.a <- if(ans.list) d2 else length(ans <- unlist(ans, recursive = FALSE))
     if(length(MARGIN) == 1L && len.a == d2) {
 	names(ans) <- if(length(dn.ans[[1L]])) dn.ans[[1L]] # else NULL
-	return(ans)
+	ans
     }
-    if(len.a == d2)
-	return(array(ans, d.ans, dn.ans))
-    if(len.a && len.a %% d2 == 0L) {
+    else if(len.a == d2)
+	array(ans, d.ans, dn.ans)
+    else if(len.a && len.a %% d2 == 0L) {
         if(is.null(dn.ans)) dn.ans <- vector(mode="list", length(d.ans))
 	dn1 <- list(ans.names)
 	if(length(dn.call) && !is.null(n1 <- names(dn <- dn.call[1])) &&
 	   nzchar(n1) && length(ans.names) == length(dn[[1]]))
 	    names(dn1) <- n1
 	dn.ans <- c(dn1, dn.ans)
-	return(array(ans, c(len.a %/% d2, d.ans),
-		     if(!is.null(names(dn.ans)) || !all(vapply(dn.ans, is.null, NA)))
-			 dn.ans))
-    }
-    return(ans)
+	array(ans, c(len.a %/% d2, d.ans),
+	      if(!is.null(names(dn.ans)) || !all(vapply(dn.ans, is.null, NA)))
+		  dn.ans)
+    } else
+	ans
 }
diff --git a/src/library/base/R/sapply.R b/src/library/base/R/sapply.R
@@ -1,7 +1,7 @@
 #  File src/library/base/R/sapply.R
 #  Part of the R package, http://www.R-project.org
 #
-#  Copyright (C) 1995-2012 The R Core Team
+#  Copyright (C) 1995-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@
 ##' @return x itself, or an array if the simplification "is sensible"
 simplify2array <- function(x, higher = TRUE)
 {
-    if(length(common.len <- unique(unlist(lapply(x, length)))) > 1L)
+    if(length(common.len <- unique(lengths(x))) > 1L)
         return(x)
     if(common.len == 1L)
         unlist(x, recursive = FALSE)

diff --git a/src/library/base/R/tapply.R b/src/library/base/R/tapply.R
@@ -1,7 +1,7 @@
 #  File src/library/base/R/tapply.R
 #  Part of the R package, http://www.R-project.org
 #
-#  Copyright (C) 1995-2012 The R Core Team
+#  Copyright (C) 1995-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -41,7 +41,7 @@ tapply <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE)
     if (is.null(FUN)) return(group)
     ans <- lapply(X = split(X, group), FUN = FUN, ...)
     index <- as.integer(names(ans))
-    if (simplify && all(unlist(lapply(ans, length)) == 1L)) {
+    if (simplify && all(lengths(ans) == 1L)) {
 	ansmat <- array(dim = extent, dimnames = namelist)
 	ans <- unlist(ans, recursive = FALSE)
     } else {

diff --git a/src/library/base/man/lengths.Rd b/src/library/base/man/lengths.Rd
@@ -5,15 +5,19 @@
 
 \name{lengths}
 \alias{lengths}
-\title{Lengths of List Elements}
+\title{Lengths of List or Vector Elements}
+\encoding{UTF-8}
 \description{
-  Get the length of each element in a list as an integer or numeric vector.
+  Get the length of each element of a \code{\link{list}} or atomic
+  vector (\code{\link{is.atomic}}) as an integer or numeric vector.
 }
 \usage{
 lengths(x, use.names = TRUE)
 }
 \arguments{
-  \item{x}{a \code{\link{list}} or list-like such as an \code{\link{expression}}.}
+  \item{x}{a \code{\link{list}}, list-like such as an
+    \code{\link{expression}} or an atomic vector (for which the result
+    is trivial).}
   \item{use.names}{logical indicating if the result should inherit the
     \code{\link{names}} from \code{x}.}
 }
@@ -23,6 +27,13 @@ lengths(x, use.names = TRUE)
   \code{length(x[[i]])} is called for all \code{i}, so any methods on
   \code{length} are considered.
 }
+\note{
+  One raison d'\enc{être}{etre} of \code{lengths(x)} is its use as a
+  more efficient version of \code{sapply(x, length)} and similar
+  \code{*apply} calls to \code{\link{length}}.  This is the reason why
+  \code{x} may be an atomic vector, even though \code{lengths(x)} is
+  trivial in that case.
+}
 \value{
   A non-negative \code{\link{integer}} of length \code{length(x)},
   except when any element has a length of more than

diff --git a/src/library/parallel/R/clusterApply.R b/src/library/parallel/R/clusterApply.R
@@ -1,7 +1,7 @@
 #  File src/library/parallel/R/clusterApply.R
 #  Part of the R package, http://www.R-project.org
 #
-#  Copyright (C) 1995-2013 The R Core Team
+#  Copyright (C) 1995-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -99,7 +99,7 @@ clusterMap <- function (cl = NULL, fun, ..., MoreArgs = NULL, RECYCLE = TRUE,
     args <- list(...)
     if (length(args) == 0) stop("need at least one argument")
     .scheduling <- match.arg(.scheduling)
-    n <- sapply(args, length)
+    n <- lengths(args)
     if (RECYCLE) {
         vlen <- max(n)
         if(vlen && min(n) == 0L)
@@ -290,7 +290,7 @@ parApply <- function(cl = NULL, X, MARGIN, FUN, ...)
 
     ans.names <- names(ans[[1L]])
     if(!ans.list)
-	ans.list <- any(unlist(lapply(ans, length)) != l.ans)
+	ans.list <- any(lengths(ans) != l.ans)
     if(!ans.list && length(ans.names)) {
         all.same <- vapply(ans, function(x) identical(names(x), ans.names), NA)
         if (!all(all.same)) ans.names <- NULL

diff --git a/src/library/stats/R/aggregate.R b/src/library/stats/R/aggregate.R
@@ -53,7 +53,7 @@ function(x, by, FUN, ..., simplify = TRUE)
 
     nrx <- NROW(x)
 
-    if(any(unlist(lapply(by, length)) != nrx))
+    if(any(lengths(by) != nrx))
         stop("arguments must have same length")
 
     y <- as.data.frame(by, stringsAsFactors = FALSE)
@@ -84,7 +84,7 @@ function(x, by, FUN, ..., simplify = TRUE)
                     ## the transpose of what we need ...
                     ans <- lapply(X = split(e, grp), FUN = FUN, ...)
                     if(simplify &&
-                       length(len <- unique(sapply(ans, length))) == 1L) {
+                       length(len <- unique(lengths(ans))) == 1L) {
                         ## this used to lose classes
                         if(len == 1L) {
                             cl <- lapply(ans, oldClass)

diff --git a/src/library/stats/R/aov.R b/src/library/stats/R/aov.R
@@ -1,7 +1,7 @@
 #  File src/library/stats/R/aov.R
 #  Part of the R package, http://www.R-project.org
 #
-#  Copyright (C) 1995-2014 The R Core Team
+#  Copyright (C) 1995-2015 The R Core Team
 #  Copyright (C) 1998 B. D. Ripley
 #
 #  This program is free software; you can redistribute it and/or modify
@@ -350,7 +350,7 @@ summary.aov <- function(object, intercept = FALSE, split,
                 nmi <- nmeffect[1 + uasgn[i]]
                 nmrows <- c(nmrows, nmi)
                 if(!missing(split) && !is.na(int <- match(nmi, ns))) {
-                    df <- c(df, unlist(lapply(split[[int]], length)))
+		    df <- c(df, lengths(split[[int]]))
                     if(is.null(nms <- names(split[[int]])))
                         nms <- paste0("C", seq_along(split[[int]]))
                     ss <- c(ss, unlist(lapply(split[[int]],

diff --git a/src/library/stats/R/dummy.coef.R b/src/library/stats/R/dummy.coef.R
@@ -2,7 +2,7 @@
 #  Part of the R package, http://www.R-project.org
 #
 #  Copyright (C) 1998 B. D. Ripley
-#  Copyright (C) 1998-2013 The R Core Team
+#  Copyright (C) 1998-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -31,7 +31,7 @@ dummy.coef.lm <- function(object, use.na=FALSE, ...)
     Terms <- delete.response(Terms)
     vars <- all.vars(Terms) # e.g. drops I(.), ...
     nxl <- setNames(rep.int(1, length(vars)), vars)
-    tmp <- vapply(xl, length, 1L)
+    tmp <- lengths(xl)
     nxl[names(tmp)] <- tmp
     lterms <- apply(facs, 2L, function(x) prod(nxl[x > 0]))
     nl <- sum(lterms)
@@ -97,7 +97,7 @@ dummy.coef.aovlist <- function(object, use.na = FALSE, ...)
 	return(as.list(coef(object)))
     }
     nxl <- setNames(rep.int(1, length(vars)), vars)
-    tmp <- unlist(lapply(xl, length))
+    tmp <- lengths(xl)
     nxl[names(tmp)] <- tmp
     lterms <- apply(facs, 2L, function(x) prod(nxl[x > 0]))
     nl <- sum(lterms)
@@ -160,7 +160,7 @@ print.dummy_coef <- function(x, ..., title)
 {
     terms <- names(x)
     n <- length(x)
-    nm <- max(vapply(x, length, 1L))
+    nm <- max(lengths(x))
     ans <- matrix("", 2L*n, nm)
     rn <- rep.int("", 2L*n)
     line <- 0

diff --git a/src/library/stats/R/lm.R b/src/library/stats/R/lm.R
@@ -1,7 +1,7 @@
 #  File src/library/stats/R/lm.R
 #  Part of the R package, http://www.R-project.org
 #
-#  Copyright (C) 1995-2014 The R Core Team
+#  Copyright (C) 1995-2015 The R Core Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -582,7 +582,7 @@ anova.lm <- function(object, ...)
         nmeffects <- c("(Intercept)", attr(object$terms, "term.labels"))
         tlabels <- nmeffects[1 + unique(asgn)]
         ss <- c(unlist(lapply(split(comp^2,asgn), sum)), ssr)
-        df <- c(unlist(lapply(split(asgn,  asgn), length)), dfr)
+        df <- c(lengths(split(asgn,  asgn)), dfr)
     } else {
         ss <- ssr
         df <- dfr

diff --git a/src/library/tools/R/QC.R b/src/library/tools/R/QC.R
@@ -305,7 +305,7 @@ function(x, ...)
           })
     }
 
-    as.character(unlist(lapply(which(sapply(x, length) > 0L), .fmt)))
+    as.character(unlist(lapply(which(lengths(x) > 0L), .fmt)))
 }
 
 ### * codoc
@@ -964,7 +964,7 @@ function(package, lib.loc = NULL)
     aliases <- lapply(db, .Rd_get_metadata, "alias")
     named_class <- lapply(aliases, grepl, pattern="-class$")
     nClass <- sApply(named_class, sum)
-    oneAlias <- sApply(aliases, length) == 1L
+    oneAlias <- lengths(aliases, use.names=FALSE) == 1L
     idx <- oneAlias | nClass == 1L
     if(!any(idx)) return(bad_Rd_objects)
     db <- db[idx]
@@ -1118,7 +1118,7 @@ function(package, lib.loc = NULL)
     ## we do the vectorized metadata computations first, and try to
     ## subscript whenever possible.
     aliases <- lapply(db, .Rd_get_metadata, "alias")
-    idx <- sapply(aliases, length) == 1L
+    idx <- lengths(aliases) == 1L
     if(!any(idx)) return(bad_Rd_objects)
     db <- db[idx]
     aliases <- aliases[idx]
@@ -1146,7 +1146,7 @@ function(package, lib.loc = NULL)
         re <- "\\\\code\\{([^}]*)\\}( *, *)?"
         m <- gregexpr(re, s)
         add <- regmatches(s, m)
-        lens <- sapply(add, length)
+        lens <- lengths(add)
         add <- sub(re, "\\1", unlist(add))
         ## The old code base simply dropped the \code markup via
         ##   gsub("\\\\code\\{(.*)\\}:?", "\\1", s)
@@ -1195,7 +1195,7 @@ function(package, lib.loc = NULL)
 
     Rd_var_names <- lapply(db, .get_data_frame_var_names)
 
-    idx <- (sapply(Rd_var_names, length) > 0L)
+    idx <- (lengths(Rd_var_names) > 0L)
     if(!length(idx)) return(bad_Rd_objects)
     aliases <- unlist(aliases[idx])
     Rd_var_names <- Rd_var_names[idx]
@@ -1714,7 +1714,7 @@ function(package, dir, lib.loc = NULL)
 
         ## Determine function names in the \usage.
         exprs <- db_usages[[docObj]]
-        exprs <- exprs[sapply(exprs, length) > 1L]
+        exprs <- exprs[lengths(exprs) > 1L]
         ## Ordinary functions.
         functions <-
             as.character(sapply(exprs,
@@ -1827,7 +1827,7 @@ function(package, dir, file, lib.loc = NULL,
                     pkgDLL <- unclass(DLLs[[1L]])$name # different for data.table
                     if(registration) {
                         reg <- getDLLRegisteredRoutines(DLLs[[1L]])
-                        have_registration <- sum(sapply(reg, length)) > 0L
+                        have_registration <- sum(lengths(reg)) > 0L
                     }
                 }
             }
@@ -3313,7 +3313,7 @@ function(x, ...)
     if(length(x$bad_maintainer))
         writeLines(c(gettext("Malformed maintainer field."), ""))
 
-    if(any(as.integer(sapply(x$bad_depends_or_suggests_or_imports, length)) > 0L )) {
+    if(any(as.integer(lengths(x$bad_depends_or_suggests_or_imports)) > 0L )) {
         bad <- x$bad_depends_or_suggests_or_imports
         writeLines(gettext("Malformed Depends or Suggests or Imports or Enhances field."))
         if(length(bad$bad_dep_entry)) {
@@ -3356,7 +3356,7 @@ function(x, ...)
 
     xx<- x; xx$bad_Title <- xx$bad_Description <- NULL
 
-    if(any(as.integer(sapply(xx, length)) > 0L))
+    if(any(as.integer(lengths(xx)) > 0L))
         writeLines(c(strwrap(gettextf("See section 'The DESCRIPTION file' in the 'Writing R Extensions' manual.")),
                      ""))
 
@@ -3401,7 +3401,7 @@ function(dfile)
             }
         }
         if(!status) {
-            llinks <- llinks[sapply(llinks, length) > 1L]
+            llinks <- llinks[lengths(llinks) > 1L]
             if(length(llinks)) links <- sapply(llinks, `[[`, 1L)
         }
         ## and check if we can actually link to these.
@@ -3605,7 +3605,7 @@ function(x, ...)
           c(gettext("Fields with non-ASCII values:"),
             .pretty_format(x$fields_with_non_ASCII_values))
       },
-      if(any(as.integer(sapply(x, length)) > 0L)) {
+      if(any(as.integer(lengths(x)) > 0L)) {
           c(strwrap(gettextf("See section 'The DESCRIPTION file' in the 'Writing R Extensions' manual.")),
             "")
       })
@@ -4570,7 +4570,7 @@ function(x, ...)
           .pretty_format(x[[i]]))
     }
 
-    as.character(unlist(lapply(which(sapply(x, length) > 0L), .fmt)))
+    as.character(unlist(lapply(which(lengths(x) > 0L), .fmt)))
 }
 
 ### * .check_package_ASCII_code
@@ -4697,7 +4697,7 @@ function(dir)
                                     OS_subdirs = c("unix", "windows")),
                collect_parse_woes)
     Sys.setlocale("LC_CTYPE", "C")
-    structure(out[sapply(out, length) > 0L],
+    structure(out[lengths(out) > 0L],
               class = "check_package_code_syntax")
 }
 
@@ -5947,7 +5947,7 @@ function(package, dir, lib.loc = NULL)
                         },
                                  error = function(e) character())
                     })
-        names(txts)[sapply(x, length) > 0L]
+        names(txts)[lengths(x) > 0L]
     }
 
     if(!missing(package)) {
@@ -6222,7 +6222,7 @@ function(cfile, dir = NULL)
         writeLines(sprintf("entry %d: invalid type %s",
                            pos, sQuote(entries)))
     }
-    pos <- which(!ind & (sapply(bad, length) > 0L))
+    pos <- which(!ind & (lengths(bad) > 0L))
     if(length(pos)) {
         writeLines(strwrap(sprintf("entry %d (%s): missing required field(s) %s",
                                    pos,
@@ -7373,7 +7373,7 @@ function(package, dir, lib.loc = NULL)
             files_with_duplicated_names
 
     files_grouped_by_aliases <-
-        split(rep.int(files, sapply(aliases, length)),
+        split(rep.int(files, lengths(aliases)),
               unlist(aliases, use.names = FALSE))
     files_with_duplicated_aliases <-
         files_grouped_by_aliases[sapply(files_grouped_by_aliases,