Skip to content

Commit

Permalink
port r46505:6 from trunk
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/branches/R-2-7-branch@46507 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
ripley committed Sep 8, 2008
1 parent 700c5c4 commit b624f33
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 22 deletions.
17 changes: 17 additions & 0 deletions src/gnuwin32/CHANGES
Expand Up @@ -13,6 +13,23 @@
**************************************************


CHANGES IN R VERSION 2.7.2 patched


BUG FIXES

o iconv() was incorrectly identifying R running in an Eastern
European locale as using ISO-8859-2 (aka Latin-2) rather than
CP1250: they differ in the positions of some characters,
notably s and z with caron. This was seen in tcltk output
from Rcmdr in 2.7.x, but might have occurred elsewhere (and
earlier).

This also affected localeToCharset() which incorrectly
returned "ISO8859-2".



CHANGES IN R VERSION 2.7.2


Expand Down
10 changes: 5 additions & 5 deletions src/library/utils/R/iconv.R
Expand Up @@ -62,13 +62,13 @@ localeToCharset <- function(locale = Sys.getlocale("LC_CTYPE"))
## 1256 -> ISO 8859-6
## 1257 -> ISO 8859-13
switch(x[2],
"1250" = return("ISO8859-2"),
# this is quite wrong "1250" = return("ISO8859-2"),
# this is quite wrong "1251" = return("KOI8-U"),
"1252" = return("ISO8859-1"),
"1253" = return("ISO8859-7"),
"1254" = return("ISO8859-9"),
"1255" = return("ISO8859-8"),
"1256" = return("ISO8859-6"),
# "1253" = return("ISO8859-7"),
# "1254" = return("ISO8859-9"),
# "1255" = return("ISO8859-8"),
# "1256" = return("ISO8859-6"),
"1257" = return("ISO8859-13")
)
return(paste("CP", x[2], sep=""))
Expand Down
27 changes: 11 additions & 16 deletions src/main/localecharset.c
Expand Up @@ -613,7 +613,7 @@ char *locale2charset(const char *locale)
#ifdef Win32
/*
## PUTTY suggests mapping Windows code pages as
## 1250 -> ISO 8859-2
## 1250 -> ISO 8859-2: this is WRONG
## 1251 -> KOI8-U
## 1252 -> ISO 8859-1
## 1253 -> ISO 8859-7
Expand All @@ -623,21 +623,16 @@ char *locale2charset(const char *locale)
## 1257 -> ISO 8859-13
*/
switch(cp = atoi(enc)) {
case 1250:
return "ISO8859-2";
/* case 1251: return "KOI8-U"; This is not anywhere near the same */
case 1252:
return "ISO8859-1";
case 1253:
return "ISO8859-7";
case 1254:
return "ISO8859-9";
case 1255:
return "ISO8859-8";
case 1256:
return "ISO8859-6";
case 1257:
return "ISO8859-13";
/* case 1250: return "ISO8859-2"; */
/* case 1251: return "KOI8-U"; This is not anywhere near the same */
case 1252: return "ISO8859-1";
/*
case 1253: return "ISO8859-7";
case 1254: return "ISO8859-9";
case 1255: return "ISO8859-8";
case 1256: return "ISO8859-6";
*/
case 1257: return "ISO8859-13";
default:
sprintf(charset, "CP%u", cp);
return charset;
Expand Down
17 changes: 16 additions & 1 deletion src/main/sysutils.c
Expand Up @@ -141,13 +141,15 @@ wchar_t *filenameToWchar(const SEXP fn, const Rboolean expand)
static wchar_t filename[PATH_MAX+1];
void *obj;
const char *from = "", *inbuf;
char cpx[7];
char *outbuf;
size_t inb, outb, res;

if(!strlen(CHAR(fn))) {
wcscpy(filename, L"");
return filename;
}
if(localeCP > 0) {snprintf(cpx, 7, "CP%d", localeCP); from = cpx;}
if(IS_LATIN1(fn)) from = "latin1";
if(IS_UTF8(fn)) from = "UTF-8";
obj = Riconv_open("UCS-2LE", from);
Expand Down Expand Up @@ -882,7 +884,10 @@ const wchar_t *wtransChar(SEXP x)
obj = utf8_wobj;
knownEnc = TRUE;
} else {
obj = Riconv_open("UCS-2LE", "");
/* This gets used in reading environment variables early on */
char to[7] = "";
if (localeCP > 0) snprintf(to, 7, "CP%d", localeCP);
obj = Riconv_open("UCS-2LE", to);
if(obj == (void *)(-1)) error(_("unsupported conversion"));
}

Expand Down Expand Up @@ -962,7 +967,17 @@ const char *reEnc(const char *x, cetype_t ce_in, cetype_t ce_out, int subst)
}

switch(ce_out) {
#ifdef Win32
case CE_NATIVE:
{
/* avoid possible misidentification of CP1250 as LATIN-2 */
sprintf(buf, "CP%d", localeCP);
tocode = buf;
break;
}
#else
case CE_NATIVE: tocode = ""; break;
#endif
case CE_LATIN1: tocode = "latin1"; break;
case CE_UTF8: tocode = "UTF-8"; break;
default: return x;
Expand Down

0 comments on commit b624f33

Please sign in to comment.