Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make RegExp#[Symbol.*] methods call exec #411

Merged
merged 5 commits into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/core-js/internals/advance-string-index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
'use strict';
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, add a comment that it's AdvanceStringIndex abstract operation and the link to the spec.

var at = require('../internals/string-at')(true);
module.exports = function (S, index, unicode) {
return index + (unicode ? at(S, index).length : 1);
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var fails = require('../internals/fails');
var requireObjectCoercible = require('../internals/require-object-coercible');
var wellKnownSymbol = require('../internals/well-known-symbol');

module.exports = function (KEY, length, exec) {
module.exports = function (KEY, length, exec, sham) {
var SYMBOL = wellKnownSymbol(KEY);
var methods = exec(requireObjectCoercible, SYMBOL, ''[KEY]);
var stringMethod = methods[0];
Expand All @@ -16,13 +16,17 @@ module.exports = function (KEY, length, exec) {
return ''[KEY](O) != 7;
})) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure that this test also covers all engines without internal execution of .exec method? Maybe makes sense extend it?

redefine(String.prototype, KEY, stringMethod);
hide(RegExp.prototype, SYMBOL, length == 2
redefine(RegExp.prototype, SYMBOL, length == 2
// 21.2.5.8 RegExp.prototype[@@replace](string, replaceValue)
// 21.2.5.11 RegExp.prototype[@@split](string, limit)
? function (string, arg) { return regexMethod.call(string, this, arg); }
// 21.2.5.6 RegExp.prototype[@@match](string)
// 21.2.5.9 RegExp.prototype[@@search](string)
: function (string) { return regexMethod.call(string, this); }
);
// TODO: This line makes the tests fail:
// ReferenceError: Can't find variable: Reflect
// hide(RegExp.prototype[SYMBOL], 'name', '[Symbol.' + KEY + ']');
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In many engines, functions .name property non-writable non-configurable, so, I think, it would be better just remove it.

if (sham) hide(RegExp.prototype[SYMBOL], 'sham', true);
}
};
45 changes: 38 additions & 7 deletions packages/core-js/modules/es.string.match.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
'use strict';

var anObject = require('../internals/an-object');
var toLength = require('../internals/to-length');
var advanceStringIndex = require('../internals/advance-string-index');
var nativeExec = RegExp.prototype.exec;

// @@match logic
require('../internals/fix-regexp-well-known-symbol-logic')('match', 1, function (defined, MATCH, nativeMatch) {
// `String.prototype.match` method
// https://tc39.github.io/ecma262/#sec-string.prototype.match
return [function match(regexp) {
var O = defined(this);
var matcher = regexp == undefined ? undefined : regexp[MATCH];
return matcher !== undefined ? matcher.call(regexp, O) : new RegExp(regexp)[MATCH](String(O));
}, nativeMatch];
return [
// `String.prototype.match` method
// https://tc39.github.io/ecma262/#sec-string.prototype.match
function match(regexp) {
var O = defined(this);
var matcher = regexp == undefined ? undefined : regexp[MATCH];
return matcher !== undefined ? matcher.call(regexp, O) : new RegExp(regexp)[MATCH](String(O));
},
// `RegExp.prototype[@@match]` method
// https://tc39.github.io/ecma262/#sec-regexp.prototype-@@match
function Symbol$match(regexp) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For better compression, use named functions only when they should have the same explicit .name property (like match above).

if (regexp.exec === nativeExec) return nativeMatch.call(this, regexp);

var rx = anObject(regexp);
var S = String(this);

if (!rx.global) return rx.exec(S);

var fullUnicode = rx.unicode;
rx.lastIndex = 0;
var A = [];
var n = 0;
var result;
while ((result = rx.exec(S)) !== null) {
Copy link
Owner

@zloirock zloirock Jun 30, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In RegExpExec, own .exec should be called only if it's callable. Maybe makes sense to implement RegExpExec abstract operation?

var matchStr = String(result[0]);
A[n] = matchStr;
if (matchStr === '') rx.lastIndex = advanceStringIndex(S, toLength(rx.lastIndex), fullUnicode);
n++;
}
return n === 0 ? null : A;
}
];
});
118 changes: 111 additions & 7 deletions packages/core-js/modules/es.string.replace.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,117 @@
'use strict';

var anObject = require('../internals/an-object');
var toObject = require('../internals/to-object');
var toLength = require('../internals/to-length');
var toInteger = require('../internals/to-integer');
var advanceStringIndex = require('../internals/advance-string-index');
var nativeExec = RegExp.prototype.exec;
var max = Math.max;
var min = Math.min;
var floor = Math.floor;
var SUBSTITUTION_SYMBOLS = /\$([$&`']|\d\d?|<[^>]*>)/g;
var SUBSTITUTION_SYMBOLS_NO_NAMED = /\$([$&`']|\d\d?)/g;

var maybeToString = function (it) {
return it === undefined ? it : String(it);
};

// @@replace logic
require('../internals/fix-regexp-well-known-symbol-logic')('replace', 2, function (defined, REPLACE, nativeReplace) {
// `String.prototype.replace` method
// https://tc39.github.io/ecma262/#sec-string.prototype.replace
return [function replace(searchValue, replaceValue) {
var O = defined(this);
var replacer = searchValue == undefined ? undefined : searchValue[REPLACE];
return replacer !== undefined
return [
// `String.prototype.replace` method
// https://tc39.github.io/ecma262/#sec-string.prototype.replace
function replace(searchValue, replaceValue) {
var O = defined(this);
var replacer = searchValue == undefined ? undefined : searchValue[REPLACE];
return replacer !== undefined
? replacer.call(searchValue, O, replaceValue)
: nativeReplace.call(String(O), searchValue, replaceValue);
}, nativeReplace];
},
// `RegExp.prototype[@@replace]` method
// https://tc39.github.io/ecma262/#sec-regexp.prototype-@@replace
function Symbol$replace(regexp, replaceValue) {
if (regexp.exec === nativeExec) return nativeReplace.call(this, regexp, replaceValue);

var rx = anObject(regexp);
var S = String(this);

var functionalReplace = typeof replaceValue === 'function';
if (!functionalReplace) replaceValue = String(replaceValue);

var global = rx.global;
if (global) {
var fullUnicode = rx.unicode;
rx.lastIndex = 0;
}
var results = [];
while (true) {
var result = rx.exec(S);
if (result === null) break;

results.push(result);
if (!global) break;

var matchStr = String(result[0]);
if (matchStr === '') rx.lastIndex = advanceStringIndex(S, toLength(rx.lastIndex), fullUnicode);
}

var accumulatedResult = '';
var nextSourcePosition = 0;
for (var i = 0; i < results.length; i++) {
result = results[i];

var matched = String(result[0]);
var position = max(min(toInteger(result.index), S.length), 0);
var captures = result.slice(1).map(maybeToString);
var namedCaptures = result.groups;
if (functionalReplace) {
var replacerArgs = [matched].concat(captures, position, S);
if (namedCaptures !== undefined) replacerArgs.push(namedCaptures);
var replacement = String(replaceValue.apply(undefined, replacerArgs));
} else {
replacement = getSubstitution(matched, S, position, captures, namedCaptures, replaceValue);
}
if (position >= nextSourcePosition) {
accumulatedResult += S.slice(nextSourcePosition, position) + replacement;
nextSourcePosition = position + matched.length;
}
}
return accumulatedResult + S.slice(nextSourcePosition);
}
];

// https://tc39.github.io/ecma262/#sec-getsubstitution
function getSubstitution(matched, str, position, captures, namedCaptures, replacement) {
var tailPos = position + matched.length;
var m = captures.length;
var symbols = SUBSTITUTION_SYMBOLS_NO_NAMED;
if (namedCaptures !== undefined) {
namedCaptures = toObject(namedCaptures);
symbols = SUBSTITUTION_SYMBOLS;
}
return nativeReplace.call(replacement, symbols, function (match, ch) {
var capture;
switch (ch[0]) {
case '$': return '$';
case '&': return matched;
case '`': return str.slice(0, position);
case "'": return str.slice(tailPos);
case '<':
capture = namedCaptures[ch.slice(1, -1)];
break;
default: // \d\d?
var n = +ch;
if (n === 0) return ch;
if (n > m) {
var f = floor(n / 10);
if (f === 0) return ch;
if (f <= m) return captures[f - 1] === undefined ? ch[1] : captures[f - 1] + ch[1];
return ch;
}
capture = captures[n - 1];
}
return capture === undefined ? '' : capture;
});
}
});
35 changes: 28 additions & 7 deletions packages/core-js/modules/es.string.search.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
'use strict';

var anObject = require('../internals/an-object');
var sameValue = require('../internals/same-value');
var nativeExec = RegExp.prototype.exec;

// @@search logic
require('../internals/fix-regexp-well-known-symbol-logic')('search', 1, function (defined, SEARCH, nativeSearch) {
// `String.prototype.search` method
// https://tc39.github.io/ecma262/#sec-string.prototype.search
return [function search(regexp) {
var O = defined(this);
var searcher = regexp == undefined ? undefined : regexp[SEARCH];
return searcher !== undefined ? searcher.call(regexp, O) : new RegExp(regexp)[SEARCH](String(O));
}, nativeSearch];
return [
// `String.prototype.search` method
// https://tc39.github.io/ecma262/#sec-string.prototype.search
function search(regexp) {
var O = defined(this);
var searcher = regexp == undefined ? undefined : regexp[SEARCH];
return searcher !== undefined ? searcher.call(regexp, O) : new RegExp(regexp)[SEARCH](String(O));
},
// `RegExp.prototype[@@search]` method
// https://tc39.github.io/ecma262/#sec-regexp.prototype-@@search
function Symbol$search(regexp) {
if (regexp.exec === nativeExec) return nativeSearch.call(this, regexp);

var rx = anObject(regexp);
var S = String(this);

var previousLastIndex = rx.lastIndex;
if (!sameValue(previousLastIndex, 0)) rx.lastIndex = 0;
var result = rx.exec(S);
if (!sameValue(rx.lastIndex, previousLastIndex)) rx.lastIndex = previousLastIndex;
return result === null ? -1 : result.index;
}
];
});
88 changes: 77 additions & 11 deletions packages/core-js/modules/es.string.split.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
'use strict';

var isRegExp = require('../internals/is-regexp');
var anObject = require('../internals/an-object');
var speciesConstructor = require('../internals/species-constructor');
var advanceStringIndex = require('../internals/advance-string-index');
var toLength = require('../internals/to-length');
var nativeExec = RegExp.prototype.exec;
var arrayPush = [].push;
var min = Math.min;
var LENGTH = 'length';

// eslint-disable-next-line no-empty
var SUPPORTS_Y = !!(function () { try { return new RegExp('x', 'y'); } catch (e) {} })();

// @@split logic
require('../internals/fix-regexp-well-known-symbol-logic')('split', 2, function (defined, SPLIT, nativeSplit) {
var isRegExp = require('../internals/is-regexp');
var internalSplit = nativeSplit;
var arrayPush = [].push;
var LENGTH = 'length';
if (
'abbc'.split(/(b)*/)[1] == 'c' ||
'test'.split(/(?:)/, -1)[LENGTH] != 4 ||
Expand Down Expand Up @@ -62,13 +73,68 @@ require('../internals/fix-regexp-well-known-symbol-logic')('split', 2, function
return separator === undefined && limit === 0 ? [] : nativeSplit.call(this, separator, limit);
};
}
// `String.prototype.split` method
// https://tc39.github.io/ecma262/#sec-string.prototype.split
return [function split(separator, limit) {
var O = defined(this);
var splitter = separator == undefined ? undefined : separator[SPLIT];
return splitter !== undefined

return [
// `String.prototype.split` method
// https://tc39.github.io/ecma262/#sec-string.prototype.split
function split(separator, limit) {
var O = defined(this);
var splitter = separator == undefined ? undefined : separator[SPLIT];
return splitter !== undefined
? splitter.call(separator, O, limit)
: internalSplit.call(String(O), separator, limit);
}, internalSplit];
});
},
// `RegExp.prototype[@@split]` method
// https://tc39.github.io/ecma262/#sec-regexp.prototype-@@split
//
// NOTE: This cannot be properly polyfilled in engines that don't support
// the 'y' flag.
function Symbol$split(regexp, limit) {
// We can never use `internalSplit` if exec has been changed, because
// internalSplit contains workarounds for things which might have been
// purposely changed by the developer.
if (regexp.exec === nativeExec) return internalSplit.call(this, regexp, limit);

var rx = anObject(regexp);
var S = String(this);
var C = speciesConstructor(rx, RegExp);

var unicodeMatching = rx.unicode;
var flags = (rx.ignoreCase ? 'i' : '') +
(rx.multiline ? 'm' : '') +
(rx.unicode ? 'u' : '') +
(SUPPORTS_Y ? 'y' : 'g');

// ^(? + rx + ) is needed, in combination with some S slicing, to
// simulate the 'y' flag.
var splitter = new C(SUPPORTS_Y ? rx : '^(?:' + rx.source + ')', flags);
var lim = limit === undefined ? 0xffffffff : limit >>> 0;
if (lim === 0) return [];
if (S.length === 0) return splitter.exec(S) === null ? [S] : [];
var p = 0;
var q = 0;
var A = [];
while (q < S.length) {
splitter.lastIndex = SUPPORTS_Y ? q : 0;
var z = splitter.exec(SUPPORTS_Y ? S : S.slice(q));
var e;
if (
z === null ||
(e = min(toLength(splitter.lastIndex + (SUPPORTS_Y ? 0 : q)), S.length)) === p
) {
q = advanceStringIndex(S, q, unicodeMatching);
} else {
A.push(S.slice(p, q));
if (A.length === lim) return A;
for (var i = 1; i <= z.length - 1; i++) {
A.push(z[i]);
if (A.length === lim) return A;
}
q = p = e;
}
}
A.push(S.slice(p));
return A;
}
];
}, !SUPPORTS_Y);
6 changes: 1 addition & 5 deletions packages/core-js/modules/esnext.string.match-all.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ var isRegExp = require('../internals/is-regexp');
var getFlags = require('../internals/regexp-flags');
var hide = require('../internals/hide');
var speciesConstructor = require('../internals/species-constructor');
var at = require('../internals/string-at')(true);
var advanceStringIndex = require('../internals/advance-string-index');
var MATCH_ALL = require('../internals/well-known-symbol')('matchAll');
var IS_PURE = require('../internals/is-pure');
var REGEXP_STRING = 'RegExp String';
Expand Down Expand Up @@ -37,10 +37,6 @@ var matchAllIterator = function (R, O) {
return new $RegExpStringIterator(matcher, S, global, fullUnicode);
};

var advanceStringIndex = function (S, index, unicode) {
return index + (unicode ? at(S, index).length : 1);
};

var regExpExec = function (R, S) {
var exec = R.exec;
var result;
Expand Down
18 changes: 18 additions & 0 deletions tests/helpers/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,21 @@ export function timeLimitedPromise(time, fn) {
}),
]);
}

// This function is used to force RegExp.prototype[Symbol.*] methods
// to not use the native implementation.
export function patchRegExp$exec(run) {
return assert => {
const originalExec = RegExp.prototype.exec;
// eslint-disable-next-line no-extend-native
RegExp.prototype.exec = function (...args) {
return originalExec.apply(this, args);
};
try {
return run(assert);
} finally {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC in very old IE try / finally does not work without catch.

// eslint-disable-next-line no-extend-native
RegExp.prototype.exec = originalExec;
}
};
}
Loading