Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Make RegExp character class match JSC.

  • Loading branch information...
commit 94bb378ee558ed52411894754a0e1851fc243ed8 1 parent 72f5bcb
lrn@chromium.org authored

Showing 2 changed files with 37 additions and 11 deletions. Show diff stats Hide diff stats

  1. +25 11 src/parser.cc
  2. +12 0 test/mjsunit/regexp.js
36 src/parser.cc
@@ -4449,6 +4449,22 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
4449 4449 }
4450 4450
4451 4451
  4452 +static const uc16 kNoCharClass = 0;
  4453 +
  4454 +// Adds range or pre-defined character class to character ranges.
  4455 +// If char_class is not kInvalidClass, it's interpreted as a class
  4456 +// escape (i.e., 's' means whitespace, from '\s').
  4457 +static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
  4458 + uc16 char_class,
  4459 + CharacterRange range) {
  4460 + if (char_class != kNoCharClass) {
  4461 + CharacterRange::AddClassEscape(char_class, ranges);
  4462 + } else {
  4463 + ranges->Add(range);
  4464 + }
  4465 +}
  4466 +
  4467 +
4452 4468 RegExpTree* RegExpParser::ParseCharacterClass() {
4453 4469 static const char* kUnterminated = "Unterminated character class";
4454 4470 static const char* kRangeOutOfOrder = "Range out of order in character class";
@@ -4462,12 +4478,8 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
4462 4478 }
4463 4479 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4464 4480 while (has_more() && current() != ']') {
4465   - uc16 char_class = 0;
  4481 + uc16 char_class = kNoCharClass;
4466 4482 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
4467   - if (char_class) {
4468   - CharacterRange::AddClassEscape(char_class, ranges);
4469   - continue;
4470   - }
4471 4483 if (current() == '-') {
4472 4484 Advance();
4473 4485 if (current() == kEndMarker) {
@@ -4475,15 +4487,17 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
4475 4487 // following code report an error.
4476 4488 break;
4477 4489 } else if (current() == ']') {
4478   - ranges->Add(first);
  4490 + AddRangeOrEscape(ranges, char_class, first);
4479 4491 ranges->Add(CharacterRange::Singleton('-'));
4480 4492 break;
4481 4493 }
4482   - CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
4483   - if (char_class) {
4484   - ranges->Add(first);
  4494 + uc16 char_class_2 = kNoCharClass;
  4495 + CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
  4496 + if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
  4497 + // Either end is an escaped character class. Treat the '-' verbatim.
  4498 + AddRangeOrEscape(ranges, char_class, first);
4485 4499 ranges->Add(CharacterRange::Singleton('-'));
4486   - CharacterRange::AddClassEscape(char_class, ranges);
  4500 + AddRangeOrEscape(ranges, char_class_2, next);
4487 4501 continue;
4488 4502 }
4489 4503 if (first.from() > next.to()) {
@@ -4491,7 +4505,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
4491 4505 }
4492 4506 ranges->Add(CharacterRange::Range(first.from(), next.to()));
4493 4507 } else {
4494   - ranges->Add(first);
  4508 + AddRangeOrEscape(ranges, char_class, first);
4495 4509 }
4496 4510 }
4497 4511 if (!has_more()) {
12 test/mjsunit/regexp.js
@@ -202,6 +202,17 @@ assertFalse(re.test('\n'));
202 202 assertFalse(re.test('a'));
203 203 assertFalse(re.test('Z'));
204 204
  205 +// First - is treated as range operator, second as literal minus.
  206 +// This follows the specification in parsing, but doesn't throw on
  207 +// the \s at the beginning of the range.
  208 +re = /[\s-0-9]/;
  209 +assertTrue(re.test(' '));
  210 +assertTrue(re.test('\xA0'));
  211 +assertTrue(re.test('-'));
  212 +assertTrue(re.test('0'));
  213 +assertTrue(re.test('9'));
  214 +assertFalse(re.test('1'));
  215 +
205 216 // Test beginning and end of line assertions with or without the
206 217 // multiline flag.
207 218 re = /^\d+/;
@@ -647,3 +658,4 @@ assertEquals(4, re.exec("zimzamzumba").index);
647 658 assertEquals(["bc"], re.exec("zimzomzumbc"));
648 659 assertFalse(re.test("c"));
649 660 assertFalse(re.test(""));
  661 +

0 comments on commit 94bb378

Please sign in to comment.
Something went wrong with that request. Please try again.