Skip to content

Commit d53b098

Browse files
author
murdoch
committed
Fix handling of encodings in vignettes.
git-svn-id: https://svn.r-project.org/R/trunk@67165 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 3541402 commit d53b098

File tree

7 files changed

+108
-79
lines changed

7 files changed

+108
-79
lines changed

doc/NEWS.Rd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@
411411
\item Loading packages incorrectly defining an S4 generic followed
412412
by a function of the same name caused an erroneous cyclic
413413
namespace dependency error.
414+
415+
\item Declared vignette encodings are now always passed to the
416+
vignette engine.
414417
}
415418
}
416419
}

doc/manual/R-exts.texi

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3140,15 +3140,26 @@ need to be declared to @LaTeX{} via a line like
31403140
@end example
31413141
@noindent
31423142
(It is also possible to use the more recent @samp{inputenx} @LaTeX{}
3143-
package.) If the encoding is UTF-8, this can also be declared using
3143+
package.) For files where this line is not needed (e.g. chapters
3144+
included within the body of a larger document, or non-Sweave
3145+
vignettes), the encoding may be declared using a comment like
3146+
@example
3147+
%!\VignetteEncoding@{UTF-8@}
3148+
@end example
3149+
@noindent
3150+
If the encoding is UTF-8, this can also be declared using
31443151
the declaration
31453152
@example
31463153
%!\SweaveUTF8
31473154
@end example
31483155
@noindent
3149-
but be aware that @LaTeX{} may require the @samp{usepackage} declaration.
3150-
@command{R CMD check} will warn about any non-@acronym{ASCII}
3151-
vignettes it finds which do not have one of these declarations.
3156+
If no declaration is given in the vignette, it will be assumed to be
3157+
in the encoding declared for the package. If there is no encoding
3158+
declared in either place, then it is an error to use non-@acronym{ASCII}
3159+
characters in the vignette.
3160+
3161+
In any case, be aware that @LaTeX{} may require the @samp{usepackage}
3162+
declaration.
31523163

31533164
@code{Sweave()} will also parse and evaluate the @R{} code in each
31543165
chunk. The @R{} output will also be in the current locale (or @acronym{UTF-8}

src/library/tools/R/Vignettes.R

Lines changed: 77 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,15 @@ function(package, dir, lib.loc = NULL,
166166
file <- basename(file)
167167
name <- vigns$names[i]
168168
engine <- vignetteEngine(vigns$engines[i])
169-
169+
enc <- vigns$encodings[i]
170+
if (enc == "non-ASCII")
171+
stop(gettextf("Vignette '%s' is non-ASCII but has no declared encoding", name),
172+
domain = NA)
170173
if(tangle) {
171174
message(" Running ", sQuote(file))
172175
.eval_with_capture({
173176
result$tangle[[file]] <- tryCatch({
174-
engine$tangle(file, quiet = TRUE)
177+
engine$tangle(file, quiet = TRUE, encoding = enc)
175178
setwd(startdir) # in case a vignette changes the working dir
176179
find_vignette_product(name, by = "tangle", main = FALSE, engine = engine)
177180
}, error = function(e) e)
@@ -181,7 +184,7 @@ function(package, dir, lib.loc = NULL,
181184
setwd(startdir) # in case a vignette changes the working dir then errored out
182185
.eval_with_capture({
183186
result$weave[[file]] <- tryCatch({
184-
engine$weave(file, quiet = TRUE)
187+
engine$weave(file, quiet = TRUE, encoding = enc)
185188
setwd(startdir)
186189
find_vignette_product(name, by = "weave", engine = engine)
187190
}, error = function(e) e)
@@ -413,8 +416,11 @@ function(package, dir, subdirs = NULL, lib.loc = NULL, output = FALSE,
413416
stopifnot(length(names) == length(docs),
414417
length(engines) == length(docs),
415418
length(patterns) == length(docs), !anyDuplicated(docs))
416-
417-
z <- list(docs=docs, names=names, engines=engines, patterns=patterns,
419+
420+
defaultEncoding <- .get_package_metadata(dir)["Encoding"]
421+
encodings <- vapply(docs, getVignetteEncoding, "", default = defaultEncoding)
422+
423+
z <- list(docs=docs, names=names, engines=engines, patterns=patterns, encodings = encodings,
418424
dir = docdir, pkgdir = dir, msg = msg)
419425

420426
if (output) {
@@ -501,10 +507,14 @@ buildVignettes <-
501507
file <- basename(vigns$docs[i])
502508
name <- vigns$names[i]
503509
engine <- vignetteEngine(vigns$engine[i])
504-
510+
enc <- vigns$encodings[i]
511+
if (enc == "non-ASCII")
512+
stop(gettextf("Vignette '%s' is non-ASCII but has no declared encoding",
513+
file), domain = NA, call. = FALSE)
514+
505515
output <- tryCatch({
506516
## FIXME: run this in a separate process
507-
engine$weave(file, quiet = quiet)
517+
engine$weave(file, quiet = quiet, encoding = enc)
508518
setwd(startdir)
509519
find_vignette_product(name, by = "weave", engine = engine)
510520
}, error = function(e) {
@@ -522,7 +532,7 @@ buildVignettes <-
522532
if (tangle) { # This is set for all engines as of 3.0.2
523533
output <- tryCatch({
524534
## FIXME: run this in a separate process
525-
engine$tangle(file, quiet = quiet)
535+
engine$tangle(file, quiet = quiet, encoding = enc)
526536
setwd(startdir)
527537
find_vignette_product(name, by = "tangle", main = FALSE, engine = engine)
528538
}, error = function(e) {
@@ -586,7 +596,8 @@ buildVignettes <-
586596
buildVignette <-
587597
function(file, dir = ".", weave = TRUE, latex = TRUE, tangle = TRUE,
588598
quiet = TRUE, clean = TRUE, keep = character(),
589-
engine = NULL, buildPkg = NULL, ...)
599+
engine = NULL, buildPkg = NULL,
600+
encoding = getVignetteEncoding(file), ...)
590601
{
591602
if (!file_test("-f", file))
592603
stop(gettextf("file '%s' not found", file), domain = NA)
@@ -615,6 +626,9 @@ buildVignette <-
615626
file, paste(engine$package, engine$name, sep="::")),
616627
domain = NA)
617628

629+
if (encoding == "non-ASCII")
630+
stop(gettextf("Vignette '%s' is non-ASCII but has no declared encoding", name))
631+
618632
# Set output directory temporarily
619633
file <- file_path_as_absolute(file)
620634
olddir <- setwd(dir)
@@ -631,7 +645,7 @@ buildVignette <-
631645

632646
# Weave
633647
final <- if (weave) {
634-
engine$weave(file, quiet = quiet, ...)
648+
engine$weave(file, quiet = quiet, encoding = encoding, ...)
635649
setwd(tdir) # In case weave/vignette changed it
636650
output <- find_vignette_product(name, by = "weave", engine = engine)
637651

@@ -645,7 +659,7 @@ buildVignette <-
645659

646660
# Tangle
647661
sources <- if (tangle) {
648-
engine$tangle(file, quiet = quiet, ...)
662+
engine$tangle(file, quiet = quiet, encoding = encoding, ...)
649663
setwd(tdir) # In case tangle changed it
650664
find_vignette_product(name, by = "tangle", main = FALSE, engine = engine)
651665
} # else NULL
@@ -675,72 +689,70 @@ buildVignette <-
675689
unique(keep)
676690
}
677691

678-
### * .getVignetteEncoding
692+
### * getVignetteEncoding
679693

680694
getVignetteEncoding <- function(file, ...)
681695
{
682-
## Look for inputen[cx] first, then %\SweaveUTF8. Complain about
683-
## inconsistencies.
684-
685696
lines <- readLines(file, warn = FALSE)
686-
result1 <- .getVignetteEncoding(lines, ...)
687-
688-
poss <- grep("^[[:space:]]*%+[[:space:]]*\\\\SweaveUTF8[[:space:]]*$", lines, useBytes = TRUE)
689-
if (length(poss)) {
690-
result <- "UTF-8"
691-
if (!(result1 %in% c("", "non-ASCII", "UTF-8")))
692-
stop(gettextf("Inconsistent encoding specifications: %s with %%\\SweaveUTF8", result1), domain = NA)
693-
} else
694-
result <- result1
695-
result
697+
.getVignetteEncoding(lines, ...)
696698
}
697699

698-
.getVignetteEncoding <- function(lines, convert = FALSE)
700+
.getVignetteEncoding <- function(lines, default = NA)
699701
{
700702
res <- .get_vignette_metadata(lines, "Encoding")[1L]
701703

702704
if(is.na(res)) {
703-
## Look for input enc lines using inputenc or inputenx
704-
## Note, multiple encodings are excluded.
705-
poss <-
706-
grep("^[[:space:]]*\\\\usepackage\\[([[:alnum:]]+)\\]\\{inputen[cx]\\}",
707-
lines, useBytes = TRUE)
708-
## Check it is in the preamble
709-
start <- grep("^[[:space:]]*\\\\begin\\{document\\}",
710-
lines, useBytes = TRUE)
711-
if(length(start)) poss <- poss[poss < start[1L]]
712-
if(!length(poss)) {
713-
asc <- iconv(lines, "latin1", "ASCII")
714-
ind <- is.na(asc) | asc != lines
715-
if(any(ind)) return("non-ASCII")
716-
return("") # or "ASCII"
705+
poss <- grep("^[[:space:]]*%+[[:space:]]*\\\\SweaveUTF8[[:space:]]*$", lines, useBytes = TRUE)
706+
if (length(poss))
707+
res <- "UTF-8"
708+
else {
709+
## Look for input enc lines using inputenc or inputenx
710+
## Note, multiple encodings are excluded.
711+
poss <-
712+
grep("^[[:space:]]*\\\\usepackage\\[([[:alnum:]]+)\\]\\{inputen[cx]\\}",
713+
lines, useBytes = TRUE)
714+
## Check it is in the preamble
715+
start <- grep("^[[:space:]]*\\\\begin\\{document\\}",
716+
lines, useBytes = TRUE)
717+
if(length(start))
718+
poss <- poss[poss < start[1L]]
719+
if(length(poss)) {
720+
poss <- lines[poss[1L]]
721+
res <- gsub("^[[:space:]]*\\\\usepackage\\[([[:alnum:]]+)\\].*", "\\1",
722+
poss) # This line should be ASCII.
723+
## see Rd2latex.R.
724+
## Currently utf8, utf8x, latin1, latin9 and ansinew are in use.
725+
res <- switch(res,
726+
"utf8" =, "utf8x" = "UTF-8",
727+
"latin1" =, "iso-8859-1" = "latin1",
728+
"latin2" =, "iso-8859-2" = "latin2",
729+
"latin9" =, "iso-8859-15" = "latin-9", # only form known to GNU libiconv
730+
"latin10" =, "iso-8859-16" = "latin10",
731+
"cyrillic" =, "iso-8859-5" = "ISO-8859-5", # inputenx
732+
"koi8-r" = "KOI8-R", # inputenx
733+
"arabic" = "ISO-8859-6", # Not clear next 3 are known to latex
734+
"greek" =, "iso-8859-7" = "ISO-8859-7",
735+
"hebrew" =, "iso-8859-8" = "ISO-8859-8",
736+
"ansinew" = "CP1252",
737+
"applemac" = "macroman",
738+
## assume these only get used on Windows
739+
"cp1250" = "CP1250",
740+
"cp1252" = "CP1252",
741+
"cp1257" = "CP1257",
742+
"unknown")
743+
} else if (!is.na(default)) {
744+
res <- default
745+
} else { # Nothing else has indicated an encoding, maybe it's just ASCII
746+
asc <- iconv(lines, "latin1", "ASCII")
747+
ind <- is.na(asc) | asc != lines
748+
if(any(ind))
749+
res <- "non-ASCII"
750+
else
751+
res <- "" # or "ASCII"
752+
}
717753
}
718-
poss <- lines[poss[1L]]
719-
res <- gsub("^[[:space:]]*\\\\usepackage\\[([[:alnum:]]+)\\].*", "\\1",
720-
poss) # This line should be ASCII.
721754
}
722-
if (convert) {
723-
## see Rd2latex.R.
724-
## Currently utf8, utf8x, latin1, latin9 and ansinew are in use.
725-
switch(res,
726-
"utf8" =, "utf8x" = "UTF-8",
727-
"latin1" =, "iso-8859-1" = "latin1",
728-
"latin2" =, "iso-8859-2" = "latin2",
729-
"latin9" =, "iso-8859-15" = "latin-9", # only form known to GNU libiconv
730-
"latin10" =, "iso-8859-16" = "latin10",
731-
"cyrillic" =, "iso-8859-5" = "ISO-8859-5", # inputenx
732-
"koi8-r" = "KOI8-R", # inputenx
733-
"arabic" = "ISO-8859-6", # Not clear next 3 are known to latex
734-
"greek" =, "iso-8859-7" = "ISO-8859-7",
735-
"hebrew" =, "iso-8859-8" = "ISO-8859-8",
736-
"ansinew" = "CP1252",
737-
"applemac" = "macroman",
738-
## assume these only get used on Windows
739-
"cp1250" = "CP1250",
740-
"cp1252" = "CP1252",
741-
"cp1257" = "CP1257",
742-
"unknown")
743-
} else res
755+
res
744756
}
745757

746758
### * .build_vignette_index

src/library/tools/R/admin.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,7 @@ function(dir, outDir, encoding = "")
530530
if (!is.null(vigns$sources) && !is.null(vigns$sources[file][[1]]))
531531
next
532532
file <- basename(file)
533-
enc <- getVignetteEncoding(file, TRUE)
534-
if(enc %in% c("non-ASCII", "unknown")) enc <- encoding
533+
enc <- vigns$encodings[i]
535534

536535
cat(" ", sQuote(basename(file)),
537536
if(nzchar(enc)) paste("using", sQuote(enc)), "\n")

src/library/tools/R/check.R

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2670,7 +2670,8 @@ setRlibs <-
26702670
sQuote(basename(bad_vignettes))),
26712671
"", ""), collapse = "\n"))
26722672
}
2673-
encs <- vapply(vigns$docs, getVignetteEncoding, "")
2673+
defaultEncoding <- .get_package_metadata(pkgdir)["Encoding"]
2674+
encs <- vapply(vigns$docs, getVignetteEncoding, "", default = defaultEncoding)
26742675
bad_vignettes <- vigns$docs[encs == "non-ASCII"]
26752676
if(nb <- length(bad_vignettes)) {
26762677
if(!any) warningLog(Log)
@@ -2746,8 +2747,9 @@ setRlibs <-
27462747
## If the vignettes declare an encoding, are they actually in it?
27472748
## (We don't check the .tex, though)
27482749
bad_vignettes <- character()
2749-
for (v in vigns$docs) {
2750-
enc <- getVignetteEncoding(v, TRUE)
2750+
for (i in seq_along(vigns$docs)) {
2751+
v <- vigns$docs[i]
2752+
enc <- vigns$encodings[i]
27512753
if (enc %in% c("", "non-ASCII", "unknown")) next
27522754
lines <- readLines(v, warn = FALSE) # some miss final NA
27532755
lines2 <- iconv(lines, enc, "UTF-16LE", toRaw = TRUE)
@@ -2785,8 +2787,7 @@ setRlibs <-
27852787
for (i in seq_along(vigns$docs)) {
27862788
file <- vigns$docs[i]
27872789
name <- vigns$names[i]
2788-
enc <- getVignetteEncoding(file, TRUE)
2789-
if(enc %in% c("non-ASCII", "unknown")) enc <- def_enc
2790+
enc <- vigns$encodings[i]
27902791
cat(" ", sQuote(basename(file)),
27912792
if(nzchar(enc)) paste("using", sQuote(enc)),
27922793
"...")

src/library/tools/man/buildVignette.Rd

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
\usage{
1212
buildVignette(file, dir = ".", weave = TRUE, latex = TRUE, tangle = TRUE,
1313
quiet = TRUE, clean = TRUE, keep = character(),
14-
engine = NULL, buildPkg = NULL, ...)
14+
engine = NULL, buildPkg = NULL, encoding, ...)
1515
}
1616
\arguments{
1717
\item{file}{character; the vignette source file}
@@ -28,7 +28,9 @@ buildVignette(file, dir = ".", weave = TRUE, latex = TRUE, tangle = TRUE,
2828
\item{engine}{\code{NULL} or character; name of vignette engine to
2929
use. Overrides any \code{\\VignetteEngine\{\}} markup in the vignette.}
3030
\item{buildPkg}{\code{NULL} or character; an optional package in which to find
31-
the vignette engine}
31+
the vignette engine.}
32+
\item{encoding}{the encoding to assume for the file. If not specified, it will
33+
be inferred from the file contents.}
3234
\item{...}{Additional arguments passed to weave and tangle.}
3335

3436
}

src/library/utils/R/Sweave.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ SweaveReadFile <- function(file, syntax, encoding = "")
216216

217217
if (encoding != "bytes") {
218218
## now sort out an encoding, if needed.
219-
enc <- tools:::.getVignetteEncoding(text, convert = TRUE)
219+
enc <- tools:::.getVignetteEncoding(text,
220+
default = if (identical(encoding, "")) NA else encoding)
220221
if (enc == "non-ASCII") {
221222
enc <- if (nzchar(encoding)) {
222223
encoding

0 commit comments

Comments
 (0)