-
Notifications
You must be signed in to change notification settings - Fork 309
/
identical.c
423 lines (397 loc) · 13.5 KB
/
identical.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 2001-2023 The R Core Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* https://www.R-project.org/Licenses/
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <Defn.h>
/* -> Rinternals.h which exports R_compute_identical() */
/* Implementation of identical(x, y) */
/* How are R "double"s compared : */
typedef enum {
bit_NA__num_bit = 0,// S's default - look at bit pattern, also for NA/NaN's
bit_NA__num_eq = 1,// bitwise comparison for NA / NaN; '==' for other numbers
single_NA__num_bit = 2,// one kind of NA or NaN; for num, use 'bit'comparison
single_NA__num_eq = 3,// one kind of NA or NaN; for num, use '==' : R's DEFAULT
} ne_strictness_type;
/* NOTE: ne_strict = NUM_EQ + (SINGLE_NA * 2) = NUM_EQ | (SINGLE_NA << 1) */
static Rboolean neWithNaN(double x, double y, ne_strictness_type str);
static R_INLINE int asFlag(SEXP x, const char *name)
{
int val = asLogical(x);
if (val == NA_LOGICAL)
error(_("invalid '%s' value"), name);
return val;
}
/* .Internal(identical(..)) */
attribute_hidden SEXP do_identical(SEXP call, SEXP op, SEXP args, SEXP env)
{
int nargs = length(args);
/* avoid problems with earlier (and future) versions captured in S4
methods: but this should be fixed where it is caused, in
'methods'!
checkArity(op, args); */
if (nargs < 5)
error("%d arguments passed to .Internal(%s) which requires %d",
length(args), PRIMNAME(op), PRIMARITY(op));
SEXP x = CAR(args); args = CDR(args);
SEXP y = CAR(args); args = CDR(args);
int num_as_bits = ! asFlag(CAR(args), "num.eq"); args = CDR(args);
int NA_as_bits = ! asFlag(CAR(args), "single.NA"); args = CDR(args);
int attr_by_order = ! asFlag(CAR(args), "attrib.as.set"); args = CDR(args);
int use_bytecode = FALSE;
if (nargs >= 6)
use_bytecode = ! asFlag(CAR(args), "ignore.bytecode");
int use_cloenv = TRUE;
if (nargs >= 7)
use_cloenv = ! asFlag(CADR(args), "ignore.environment");
int use_srcref = FALSE;
if (nargs >= 8)
use_srcref = ! asFlag(CADDR(args), "ignore.srcref");
int extptr_as_ref = FALSE;
if (nargs >= 9)
extptr_as_ref = asFlag(CADDDR(args), "extptr.as.ref");
int flags = 0;
if (num_as_bits) flags |= IDENT_NUM_AS_BITS;
if (NA_as_bits) flags |= IDENT_NA_AS_BITS;
if (attr_by_order) flags |= IDENT_ATTR_BY_ORDER;
if (use_bytecode) flags |= IDENT_USE_BYTECODE;
if (use_cloenv) flags |= IDENT_USE_CLOENV;
if (use_srcref) flags |= IDENT_USE_SRCREF;
if (extptr_as_ref) flags |= IDENT_EXTPTR_AS_REF;
return ScalarLogical(R_compute_identical(x, y, flags));
}
#define NUM_EQ (!(flags & IDENT_NUM_AS_BITS))
#define SINGLE_NA (!(flags & IDENT_NA_AS_BITS))
#define ATTR_AS_SET (!(flags & IDENT_ATTR_BY_ORDER))
#define IGNORE_BYTECODE (!(flags & IDENT_USE_BYTECODE))
#define IGNORE_ENV (!(flags & IDENT_USE_CLOENV))
#define IGNORE_SRCREF (!(flags & IDENT_USE_SRCREF))
#define EXTPTR_AS_REF (flags & IDENT_EXTPTR_AS_REF)
/* do the two objects compute as identical?
Also used in unique.c */
Rboolean
R_compute_identical(SEXP x, SEXP y, int flags)
{
if(x == y) /* same pointer */
return TRUE;
if(TYPEOF(x) != TYPEOF(y) ||
OBJECT(x) != OBJECT(y) ||
IS_S4_OBJECT(x) != IS_S4_OBJECT(y))
return FALSE;
/* Skip attribute checks for CHARSXP
-- such attributes are used for the cache. */
if(TYPEOF(x) == CHARSXP) {
/* This matches NAs */
return Seql(x, y);
}
SEXP ax, ay;
if (IGNORE_SRCREF && TYPEOF(x) == CLOSXP) {
/* Remove "srcref" attribute - and below, treat body(x), body(y) */
SEXP x_ = PROTECT(duplicate(x)), y_ = PROTECT(duplicate(y));
setAttrib(x_, R_SrcrefSymbol, R_NilValue);
setAttrib(y_, R_SrcrefSymbol, R_NilValue);
ax = ATTRIB(x_); ay = ATTRIB(y_);
UNPROTECT(2);
}
else {
ax = ATTRIB(x); ay = ATTRIB(y);
}
if(ax != R_NilValue || ay != R_NilValue) {
if(ax == R_NilValue || ay == R_NilValue)
return FALSE;
/* Attributes are tagged pairlists with unique non-empty non-NA tags.
This code still includes a check and if they are not pairlists,
they are not compared, with a warning (could be turned into an error
or removed). */
if(TYPEOF(ax) != LISTSXP || TYPEOF(ay) != LISTSXP) {
warning(_("ignoring non-pairlist attributes"));
} else if (!ATTR_AS_SET) {
/* ax, ay might be fresh allocations from duplicating into
x_, y_) above, so need to be protected from possible
allocations in getAttrib and recursive calls to
R_compute_identical in the loop. */
PROTECT(ax);
PROTECT(ay);
while (ax != R_NilValue) {
if (ay == R_NilValue) {
UNPROTECT(2); /* ax, ay */
return FALSE;
}
/* Need to check for R_RowNamesSymbol and treat specially */
if (TAG(ax) == R_RowNamesSymbol) {
SEXP atrx = PROTECT(getAttrib(x, R_RowNamesSymbol));
SEXP atry = PROTECT(getAttrib(y, R_RowNamesSymbol));
if (!R_compute_identical(atrx, atry, flags)) {
UNPROTECT(4); /* atrx, atry, ax, ay */
return FALSE;
}
UNPROTECT(2); /* atrx, atry */
} else if (!R_compute_identical(CAR(ax), CAR(ay), flags)) {
UNPROTECT(2); /* ax, ay */
return FALSE;
}
if (!R_compute_identical(PRINTNAME(TAG(ax)),
PRINTNAME(TAG(ay)), flags)) {
UNPROTECT(2); /* ax, ay */
return FALSE;
}
ax = CDR(ax);
ay = CDR(ay);
}
UNPROTECT(2); /* ax, ay */
if (ay != R_NilValue)
return FALSE;
} else /* ATTR_AS_SET */ {
/* This code is not very efficient, but then neither is using
pairlists for attributes. If long attribute lists become more
common (and they are used for S4 slots) we should store them in
a hash table. */
SEXP elx, ely;
if(length(ax) != length(ay)) return FALSE;
/* They are the same length and should have
unique non-empty non-NA tags */
/* ax, ay might be fresh allocations from duplicating into
x_, y_) above, so need to be protected from possible
allocations in getAttrib and recursive calls to
R_compute_identical in the loop. */
PROTECT(ax);
PROTECT(ay);
for(elx = ax; elx != R_NilValue; elx = CDR(elx)) {
const char *tx = CHAR(PRINTNAME(TAG(elx)));
for(ely = ay; ely != R_NilValue; ely = CDR(ely))
if(streql(tx, CHAR(PRINTNAME(TAG(ely))))) {
/* We need to treat row.names specially here */
if(streql(tx, "row.names")) {
SEXP
atrx = PROTECT(getAttrib(x, R_RowNamesSymbol)),
atry = PROTECT(getAttrib(y, R_RowNamesSymbol));
if(!R_compute_identical(atrx, atry, flags)) {
UNPROTECT(4); /* atrx, atry, ax, ay */
return FALSE;
} else
UNPROTECT(2); /* atrx, atry */
} else
if(!R_compute_identical(CAR(elx), CAR(ely),
flags)) {
UNPROTECT(2); /* ax, ay */
return FALSE;
}
break;
}
if(ely == R_NilValue) {
UNPROTECT(2); /* ax, ay */
return FALSE;
}
}
UNPROTECT(2); /* ax, ay */
}
}
switch (TYPEOF(x)) {
case NILSXP:
return TRUE;
case LGLSXP:
if (XLENGTH(x) != XLENGTH(y)) return FALSE;
/* Use memcmp (which is ISO C90) to speed up the comparison */
return memcmp((void *)LOGICAL(x), (void *)LOGICAL(y),
xlength(x) * sizeof(int)) == 0 ? TRUE : FALSE;
case INTSXP:
if (XLENGTH(x) != XLENGTH(y)) return FALSE;
/* Use memcmp (which is ISO C90) to speed up the comparison */
return memcmp((void *)INTEGER(x), (void *)INTEGER(y),
xlength(x) * sizeof(int)) == 0 ? TRUE : FALSE;
case REALSXP:
{
R_xlen_t n = XLENGTH(x);
if(n != XLENGTH(y)) return FALSE;
else {
double *xp = REAL(x), *yp = REAL(y);
int ne_strict = NUM_EQ | (SINGLE_NA << 1);
for(R_xlen_t i = 0; i < n; i++)
if(neWithNaN(xp[i], yp[i], ne_strict)) return FALSE;
}
return TRUE;
}
case CPLXSXP:
{
R_xlen_t n = XLENGTH(x);
if(n != XLENGTH(y)) return FALSE;
else {
Rcomplex *xp = COMPLEX(x), *yp = COMPLEX(y);
int ne_strict = NUM_EQ | (SINGLE_NA << 1);
for(R_xlen_t i = 0; i < n; i++)
if(neWithNaN(xp[i].r, yp[i].r, ne_strict) ||
neWithNaN(xp[i].i, yp[i].i, ne_strict))
return FALSE;
}
return TRUE;
}
case STRSXP:
{
R_xlen_t i, n = XLENGTH(x);
if(n != XLENGTH(y)) return FALSE;
for(i = 0; i < n; i++) {
/* This special-casing for NAs is not needed */
Rboolean na1 = (STRING_ELT(x, i) == NA_STRING),
na2 = (STRING_ELT(y, i) == NA_STRING);
if(na1 ^ na2) return FALSE;
if(na1 && na2) continue;
if (! Seql(STRING_ELT(x, i), STRING_ELT(y, i))) return FALSE;
}
return TRUE;
}
case CHARSXP: /* Probably unreachable, but better safe than sorry... */
{
/* This matches NAs */
return Seql(x, y);
}
case VECSXP:
case EXPRSXP:
{
R_xlen_t i, n = XLENGTH(x);
if(n != XLENGTH(y)) return FALSE;
for(i = 0; i < n; i++)
if(!R_compute_identical(VECTOR_ELT(x, i),VECTOR_ELT(y, i), flags))
return FALSE;
return TRUE;
}
case LANGSXP:
case LISTSXP:
case DOTSXP:
{
while (x != R_NilValue) {
if(y == R_NilValue)
return FALSE;
if(!R_compute_identical(CAR(x), CAR(y), flags))
return FALSE;
if(!R_compute_identical(PRINTNAME(TAG(x)), PRINTNAME(TAG(y)), flags))
return FALSE;
x = CDR(x);
y = CDR(y);
}
return(y == R_NilValue);
}
case CLOSXP:
{
if(!R_compute_identical(FORMALS(x), FORMALS(y), flags)) return FALSE;
if(IGNORE_BYTECODE) {
if(IGNORE_SRCREF) {
SEXP x_ = PROTECT(R_body_no_src(x)),
y_ = PROTECT(R_body_no_src(y));
Rboolean id_body = R_compute_identical(x_, y_, flags);
UNPROTECT(2);
if(!id_body) return FALSE;
} else if(!R_compute_identical(BODY_EXPR(x), BODY_EXPR(y), flags))
return FALSE;
} else { // !IGNORE_BYTECODE: use byte code for comparison of function bodies :
if(!R_compute_identical(BODY(x), BODY(y), flags)) return FALSE;
}
// now, formals and body are equal, check the environment(.)s:
return (IGNORE_ENV || CLOENV(x) == CLOENV(y) ? TRUE : FALSE);
}
case SPECIALSXP:
case BUILTINSXP:
return(PRIMOFFSET(x) == PRIMOFFSET(y) ? TRUE : FALSE);
case ENVSXP:
case SYMSXP:
case WEAKREFSXP: /**** is this the best approach? */
return(x == y ? TRUE : FALSE);
case BCODESXP:
return R_compute_identical(BCODE_CODE(x), BCODE_CODE(y), flags) &&
R_compute_identical(BCODE_EXPR(x), BCODE_EXPR(y), flags) &&
R_compute_identical(BCODE_CONSTS(x), BCODE_CONSTS(y), flags);
case EXTPTRSXP:
if (EXTPTR_AS_REF)
return x == y ? TRUE : FALSE;
else
return (EXTPTR_PTR(x) == EXTPTR_PTR(y) ? TRUE : FALSE);
case RAWSXP:
if (XLENGTH(x) != XLENGTH(y)) return FALSE;
/* Use memcmp (which is ISO C90) to speed up the comparison */
return memcmp((void *)RAW(x), (void *)RAW(y),
XLENGTH(x) * sizeof(Rbyte)) == 0 ? TRUE : FALSE;
case PROMSXP:
{
// args are evaluated -- but can be seen from DOTSXP dissection
/* test for equality of the substituted expression -- or should
we require both expression and environment to be identical? */
SEXP sy = PROTECT(substitute(PREXPR(y), PRENV(y)));
SEXP sx = PROTECT(substitute(PREXPR(x), PRENV(x)));
Rboolean ans = R_compute_identical(sx, sy, flags);
UNPROTECT(2); /* sx, sy */
return ans;
}
case OBJSXP:
/* attributes already tested, so all slots identical */
return TRUE;
default:
/* these are all supposed to be types that represent constant
entities, so no further testing required ?? */
printf("Unknown Type in identical(): %s (%x)\n", R_typeToChar(x), TYPEOF(x));
return TRUE;
}
}
/**
* [N]ot [E]qual (x, y) <==> x "!=" y
* where the NA/NaN and "-0." / "+0." cases treatment depend on 'str'.
*
* @param x
* @param y the two "number"s to be compared
* @param str a "strictness" indicator, one of 2*2 (one|bit)_NA__num_(eq|bit)
* "SINGLE_NA" means: x and y differ in the case
* that one, but not both are NaN. Two NaN values are judged
* identical for this purpose, but NA != NaN
*
* "NUM_EQ" means: (x != y) is used when both are not NA or NaN
* whereas "bit_NA" and "num_bit" use the bitwise memory comparison memcmp();
* notably "*_num_bit" will differentiate '+0.' and '-0.'.
*
* @return FALSE or TRUE indicating if x or y differ
*/
static Rboolean neWithNaN(double x, double y, ne_strictness_type str)
{
switch (str) {
case single_NA__num_eq:
case single_NA__num_bit:
if(R_IsNA(x))
return(R_IsNA(y) ? FALSE : TRUE);
if(R_IsNA(y))
return(R_IsNA(x) ? FALSE : TRUE);
if(ISNAN(x))
return(ISNAN(y) ? FALSE : TRUE);
case bit_NA__num_eq:
case bit_NA__num_bit:
; /* do nothing */
}
switch (str) {
case single_NA__num_eq:
return(x != y);
case bit_NA__num_eq:
if(!ISNAN(x) && !ISNAN(y))
return(x != y);
else /* bitwise check for NA/NaN's */
return memcmp((const void *) &x,
(const void *) &y, sizeof(double)) ? TRUE : FALSE;
case bit_NA__num_bit:
case single_NA__num_bit:
return memcmp((const void *) &x,
(const void *) &y, sizeof(double)) ? TRUE : FALSE;
default: /* Wall */
return FALSE;
}
}