Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

experimental branch R-NaString with string NAs

git-svn-id: https://svn.r-project.org/R/branches/R-NaString@16321 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information...
commit 8d8d8336c3e34df36b3c535fac16c8a14da7cc83 1 parent 054bf30
tlumley authored
View
33 src/library/base/R/all.equal.R
@@ -56,25 +56,32 @@ function(target, current, tolerance = .Machine$double.eps ^ .5,
if(is.na(xy) || xy > tolerance)
paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)) else TRUE
}
-
-all.equal.character <- function(target, current, ...)
+"all.equal.character" <-
+ function (target, current, ...)
{
lt <- length(target)
lc <- length(current)
- if(lt != lc) {
- msg <- paste("Lengths (", lt, ", ", lc,
- ") differ (string compare on first ", ll <- min(lt, lc),
- ")", sep = "")
- ll <- seq(length = ll)
- target <- target[ll]
- current <- current[ll]
- } else msg <- NULL
- ne <- target != current
- if(!any(ne) && is.null(msg)) TRUE
- else if(any(ne)) c(msg, paste(sum(ne), "string mismatches"))
+ if (lt != lc) {
+ msg <- paste("Lengths (", lt, ", ", lc, ") differ (string compare on first ",
+ ll <- min(lt, lc), ")", sep = "")
+ ll <- seq(length = ll)
+ target <- target[ll]
+ current <- current[ll]
+ }
+ else msg <- NULL
+ nas<-is.na(target)
+ if (any(nas!=is.na(current)))
+ return(paste("`is.NA' value mismatches:", sum(is.na(current)),
+ "in current,", sum(out), " in target"))
+ ne <- !nas & target != current
+ if (!any(ne) && is.null(msg))
+ TRUE
+ else if (any(ne))
+ c(msg, paste(sum(ne), "string mismatches"))
else msg
}
+
all.equal.factor <- function(target, current, ...)
{
if(!inherits(current, "factor"))
View
2  src/library/base/R/dataframe.R
@@ -331,7 +331,7 @@ function(..., row.names = NULL, check.rows = FALSE, check.names = TRUE) {
if(is.matrix(i))
return(as.matrix(x)[i])
y <- NextMethod("[")
- if(any(names(y) == "NA"))
+ if(any(is.na(names(y))))
stop("undefined columns selected")
return(structure(y, class = class(x), row.names = row.names(x)))
}
View
2  src/library/base/R/factor.R
@@ -88,7 +88,7 @@ as.vector.factor <- function(x, type="any")
as.character.factor <- function(x,...)
{
cx <- levels(x)[x]
- if("NA" %in% levels(x)) cx[is.na(x)] <- "<NA>"
+ #if("NA" %in% levels(x)) cx[is.na(x)] <- "<NA>"
cx
}
View
2  src/library/base/R/files.R
@@ -86,7 +86,7 @@ format.octmode <- function(x, ...)
y <- floor(y/8)
ans0 <- paste(z, ans0, sep="")
}
- ans <- rep("NA", length(x))
+ ans <- rep(as.character(NA), length(x))
ans[!isna] <- ans0
ans
}
View
2  src/library/base/R/read.fwf.R
@@ -3,7 +3,7 @@ read.fwf <- function(file, widths, sep = "\t", as.is = FALSE,
{
doone <- function(x) {
x <- substring(x, first, last)
- x[nchar(x)==0] <- "NA"
+ x[nchar(x)==0] <- as.character(NA)
x
}
FILE <- tempfile("Rfwf.")
View
2  src/library/base/R/readtable.R
@@ -12,7 +12,7 @@ count.fields <- function(file, sep = "", quote = "\"'", skip = 0,
}
-type.convert <- function(x, na.strings = "NA", as.is = FALSE, dec = ".")
+type.convert <- function(x, na.strings = as.character(NA), as.is = FALSE, dec = ".")
.Internal(type.convert(x, na.strings, as.is, dec))
read.table <-
View
1  src/library/base/R/unix/update.packages.R
@@ -70,6 +70,7 @@ download.packages <- function(pkgs, destdir, available=NULL,
for(p in unique(pkgs))
{
ok <- (available[,"Package"] == p) | (available[,"Bundle"] == p)
+ ok <- ok & !is.na(ok)
if(!any(ok))
warning(paste("No package \"", p, "\" on CRAN.", sep=""))
else{
View
1  src/library/base/R/windows/update.packages.R
@@ -70,6 +70,7 @@ download.packages <- function(pkgs, destdir, available=NULL,
for(p in unique(pkgs))
{
ok <- (available[,"Package"] == p) | (available[,"Bundle"] == p)
+ ok <- ok & !is.na(ok)
if(!any(ok))
warning(paste("No package \"", p, "\" on CRAN.", sep=""))
else{
View
24 src/main/coerce.c
@@ -71,8 +71,8 @@ double R_strtod(const char *c, char **end)
{
double x;
- if (strncmp(c, "NA", 2) == 0){
- x = NA_REAL; *end = (char *)c + 2; /* coercion for -Wall */
+ if (strncmp(c, "<NA>", 4) == 0){
+ x = NA_REAL; *end = (char *)c + 4; /* coercion for -Wall */
}
else if (strncmp(c, "NaN", 3) == 0) {
x = R_NaN; *end = (char *)c + 3;
@@ -281,28 +281,40 @@ SEXP StringFromLogical(int x, int *warn)
{
int w;
formatLogical(&x, 1, &w);
- return mkChar(EncodeLogical(x, w));
+ if (x==NA_LOGICAL)
+ return NA_STRING;
+ else
+ return mkChar(EncodeLogical(x, w));
}
SEXP StringFromInteger(int x, int *warn)
{
int w;
formatInteger(&x, 1, &w);
- return mkChar(EncodeInteger(x, w));
+ if (x==NA_INTEGER)
+ return NA_STRING;
+ else
+ return mkChar(EncodeInteger(x, w));
}
SEXP StringFromReal(double x, int *warn)
{
int w, d, e;
formatReal(&x, 1, &w, &d, &e, 0);
- return mkChar(EncodeReal(x, w, d, e));
+ if (ISNAN(x))
+ return NA_STRING; /* NA and NaN. Is this right?*/
+ else
+ return mkChar(EncodeReal(x, w, d, e));
}
SEXP StringFromComplex(Rcomplex x, int *warn)
{
int wr, dr, er, wi, di, ei;
formatComplex(&x, 1, &wr, &dr, &er, &wi, &di, &ei, 0);
- return mkChar(EncodeComplex(x, wr, dr, er, wi, di, ei));
+ if (ISNAN(x.r) || ISNAN(x.i))
+ return NA_STRING; /* again, NA or NaN */
+ else
+ return mkChar(EncodeComplex(x, wr, dr, er, wi, di, ei));
}
/* Conversion between the two list types (LISTSXP and VECSXP). */
View
4 src/main/dstruct.c
@@ -95,8 +95,8 @@ SEXP mkChar(const char *name)
{
SEXP c;
- if (streql(name, "NA"))
- return (NA_STRING);
+ /* if (streql(name, "NA"))
+ return (NA_STRING);*/
c = allocString(strlen(name));
strcpy(CHAR(c), name);
return c;
View
2  src/main/printutils.c
@@ -293,7 +293,7 @@ char *EncodeString(char *s, int w, int quote, int right)
for(i=0 ; i<b ; i++) *q++ = ' ';
}
if(quote) *q++ = quote;
- if (s == CHAR(NA_STRING) )
+ if (s == CHAR(NA_STRING) )
p = CHAR(R_print.na_string);
else p = s;
while(*p) {
View
8 src/main/printvector.c
@@ -141,8 +141,12 @@ static void printStringVector(SEXP * x, int n, int quote, int indx)
if (i > 0 && width + w + R_print.gap > R_print.width) {
DO_newline;
}
- Rprintf("%*s%s", R_print.gap, "",
- EncodeString(CHAR(x[i]), w, quote, Rprt_adj_left));
+ if (x[i]==NA_STRING)
+ Rprintf("%*s%s", R_print.gap, "",
+ EncodeString(CHAR(x[i]), w, 0, Rprt_adj_left));
+ else
+ Rprintf("%*s%s", R_print.gap, "",
+ EncodeString(CHAR(x[i]), w, quote, Rprt_adj_left));
width += w + R_print.gap;
}
Rprintf("\n");
View
40 src/main/relop.c
@@ -420,25 +420,34 @@ static SEXP string_relop(RELOP_TYPE code, SEXP s1, SEXP s2)
switch (code) {
case EQOP:
for (i = 0; i < n; i++) {
- if (strcmp(CHAR(STRING_ELT(s1, i % n1)),
- CHAR(STRING_ELT(s2, i % n2))) == 0)
- LOGICAL(ans)[i] = 1;
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (strcmp(CHAR(STRING_ELT(s1, i % n1)),
+ CHAR(STRING_ELT(s2, i % n2))) == 0)
+ LOGICAL(ans)[i] = 1;
else
- LOGICAL(ans)[i] = 0;
+ LOGICAL(ans)[i] = 0;
}
break;
case NEOP:
for (i = 0; i < n; i++) {
- if (streql(CHAR(STRING_ELT(s1, i % n1)),
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (streql(CHAR(STRING_ELT(s1, i % n1)),
CHAR(STRING_ELT(s2, i % n2))) != 0)
- LOGICAL(ans)[i] = 0;
+ LOGICAL(ans)[i] = 0;
else
- LOGICAL(ans)[i] = 1;
+ LOGICAL(ans)[i] = 1;
}
break;
case LTOP:
for (i = 0; i < n; i++) {
- if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
CHAR(STRING_ELT(s2, i % n2))) < 0)
LOGICAL(ans)[i] = 1;
else
@@ -447,7 +456,10 @@ static SEXP string_relop(RELOP_TYPE code, SEXP s1, SEXP s2)
break;
case GTOP:
for (i = 0; i < n; i++) {
- if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
CHAR(STRING_ELT(s2, i % n2))) > 0)
LOGICAL(ans)[i] = 1;
else
@@ -456,7 +468,10 @@ static SEXP string_relop(RELOP_TYPE code, SEXP s1, SEXP s2)
break;
case LEOP:
for (i = 0; i < n; i++) {
- if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
CHAR(STRING_ELT(s2, i % n2))) <= 0)
LOGICAL(ans)[i] = 1;
else
@@ -465,7 +480,10 @@ static SEXP string_relop(RELOP_TYPE code, SEXP s1, SEXP s2)
break;
case GEOP:
for (i = 0; i < n; i++) {
- if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
+ if ((STRING_ELT(s1, i % n1)==NA_STRING) ||
+ (STRING_ELT(s2, i % n2)==NA_STRING))
+ LOGICAL(ans)[i]=NA_LOGICAL;
+ else if (STRCMP(CHAR(STRING_ELT(s1, i % n1)),
CHAR(STRING_ELT(s2, i % n2))) >= 0)
LOGICAL(ans)[i] = 1;
else
View
26 src/main/saveload.c
@@ -910,7 +910,10 @@ static void NewMakeLists (SEXP obj, SEXP sym_list, SEXP env_list)
static void OutCHARSXP (FILE *fp, SEXP s)
{
R_assert(TYPEOF(s) == CHARSXP);
- OutString(fp, CHAR(s));
+ if (CHAR(s)==CHAR(NA_STRING)) /* blech */
+ OutString(fp,(char *) 0);
+ else
+ OutString(fp, CHAR(s));
}
static void NewWriteVec (SEXP s, SEXP sym_list, SEXP env_list, FILE *fp)
@@ -1081,6 +1084,7 @@ static SEXP InCHARSXP (FILE *fp)
/* FIXME: rather than use strlen, use actual length of string when
* sized strings get implemented in R's save/load code. */
tmp = InString(fp);
+ if (tmp==0) return NA_STRING;
len = strlen(tmp);
AllocBuffer(len);
s = allocVector(CHARSXP, len);
@@ -1264,6 +1268,10 @@ static int InIntegerAscii(FILE *fp)
static void OutStringAscii(FILE *fp, char *x)
{
int i, nbytes;
+ if (x==0) { /* String NA */
+ fprintf(fp,"%d",-1);
+ return;
+ }
nbytes = strlen(x);
fprintf(fp, "%d ", nbytes);
for (i = 0; i < nbytes; i++) {
@@ -1303,6 +1311,7 @@ static char *InStringAscii(FILE *fp)
/* FIXME : Ultimately we need to replace */
/* this with a real string allocation. */
/* All buffers must die! */
+ if (nbytes== -1) return( (char *) 0); /* String NA */
if (nbytes >= buflen) {
char *newbuf = realloc(buf, nbytes + 1);
if (newbuf == NULL)
@@ -1432,6 +1441,7 @@ static char *InStringBinary(FILE *fp)
static char *buf = NULL;
static int buflen = 0;
int nbytes = InIntegerBinary(fp);
+ if (nbytes==-1) return((char *) 0); /* String NA */
if (nbytes >= buflen) {
char *newbuf = realloc(buf, nbytes + 1);
if (newbuf == NULL)
@@ -1482,6 +1492,7 @@ static void OutIntegerBinary(FILE *fp, int i)
static void OutStringBinary(FILE *fp, char *s)
{
+ if (s==0) OutIntegerBinary(fp, -1); /* String NA */
int n = strlen(s);
OutIntegerBinary(fp, n);
if (fwrite(s, sizeof(char), n, fp) != n)
@@ -1558,7 +1569,12 @@ static int InIntegerXdr(FILE *fp)
static void OutStringXdr(FILE *fp, char *s)
{
- unsigned int n = strlen(s);
+ unsigned int n;
+ if (s==0) { /* String NA */
+ OutIntegerXdr(fp, -1);
+ return;
+ }
+ n = strlen(s);
OutIntegerXdr(fp, n);
if (!xdr_bytes(&xdrs, &s, &n, n)) {
xdr_destroy(&xdrs);
@@ -1570,7 +1586,11 @@ static char *InStringXdr(FILE *fp)
{
static char *buf = NULL;
static int buflen = 0;
- unsigned int nbytes = InIntegerXdr(fp);
+ int nb=InIntegerXdr(fp);
+ unsigned int nbytes;
+ if (nb==-1)
+ return( (char *) 0); /* String NA */
+ nbytes = (unsigned int) nb;
if (nbytes >= buflen) {
char *newbuf = realloc(buf, nbytes + 1);
if (newbuf == NULL)
View
5 src/main/unique.c
@@ -121,7 +121,10 @@ static int cequal(SEXP x, int i, SEXP y, int j)
static int sequal(SEXP x, int i, SEXP y, int j)
{
- return !strcmp(CHAR(STRING_ELT(x, i)), CHAR(STRING_ELT(y, j)));
+ if (STRING_ELT(x,i)!=NA_STRING && STRING_ELT(y,j)!=NA_STRING)
+ return !strcmp(CHAR(STRING_ELT(x, i)), CHAR(STRING_ELT(y, j)));
+ else
+ return STRING_ELT(x,i)==STRING_ELT(y,j);
}
/* Choose M to be the smallest power of 2 */
Please sign in to comment.
Something went wrong with that request. Please try again.