/
iconv.Rd
84 lines (74 loc) · 2.81 KB
/
iconv.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
\name{iconv}
\alias{iconv}
\alias{iconvlist}
\title{Convert Character Vector between Encodings}
\description{
This uses system facilities to convert a character vector between
encodings: the \sQuote{i} stands for \sQuote{internationalization}.
}
\usage{
iconv(x, from, to, sub=NA)
iconvlist()
}
\arguments{
\item{x}{A character vector.}
\item{from}{A character string describing the current encoding.}
\item{to}{A character string describing the target encoding.}
\item{sub}{character string. If not \code{NA} it is used to replace
any non-convertible bytes in the input. (This would normally be a
single character, but can be more. If \code{"byte"}, the indication is
\code{"<xx>"} with the hex code of the byte.}
}
\details{
The names of encodings and which ones are available (and indeed, if
any are) is platform-dependent. On systems that support \R's
\code{iconv} you can use \code{""} for the encoding of the current
locale, as well as \code{"latin1"} and \code{"UTF-8"}.
#ifdef unix
On many platforms \code{iconvlist} provides an alphabetical list of
the supported encodings. On others, the information is on the man
page for \code{iconv(5)} or elsewhere in the man pages (and beware
that the system command \code{iconv} may not support the same set of
encodings as the C functions \R calls).
Unfortunately, the names are rarely common across platforms.
#endif
#ifdef windows
\code{iconvlist} provides an alphabetical list of the supported encodings.
#endif
Elements of \code{x} which cannot be converted (perhaps because they
are invalid or because they cannot be represented in the target
encoding) will be returned as \code{NA} unless \code{sub} is specified.
Some versions of \code{iconv} will allow transliteration by appending
\code{//TRANSLIT} to the \code{to} encoding: see the examples.
}
\value{
A character vector of the same length and the same attributes as \code{x}.
}
\note{
Not all platforms support these functions. See also
\code{\link{capabilities}("iconv")}.
}
\seealso{
\code{\link{localeToCharset}}, \code{\link{file}}.
}
\examples{\dontrun{
iconvlist()
## convert from Latin-2 to UTF-8: two of the glibc iconv variants.
iconv(x, "ISO_8859-2", "UTF-8")
iconv(x, "LATIN2", "UTF-8")
## Both x below are in latin1 and will only display correctly in a
## latin1 locale.
(x <- "fa\xE7ile")
charToRaw(xx <- iconv(x, "latin1", "UTF-8"))
## in a UTF-8 locale, print(xx)
iconv(x, "latin1", "ASCII") # NA
iconv(x, "latin1", "ASCII", "?") # "fa?ile"
iconv(x, "latin1", "ASCII", "") # "faile"
iconv(x, "latin1", "ASCII", "byte") # "fa<e7>ile"
# Extracts from R help files
(x <- c("Ekstr\xf8m", "J\xf6reskog", "bi\xdfchen Z\xfcrcher"))
iconv(x, "latin1", "ASCII//TRANSLIT")
iconv(x, "latin1", "ASCII", sub="byte")
}}
\keyword{ character }
\keyword{ utilities }