Skip to content

Commit 605456f

Browse files
committed
Added intersection support.
1 parent 2f0f6e6 commit 605456f

File tree

5 files changed

+250
-226
lines changed

5 files changed

+250
-226
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package com.semmle.js.ast.regexp;
2+
3+
import com.semmle.js.ast.SourceLocation;
4+
import java.util.List;
5+
6+
public class Intersection extends RegExpTerm {
7+
private final List<RegExpTerm> intersections;
8+
9+
public Intersection(SourceLocation loc, List<RegExpTerm> intersections) {
10+
super(loc, "Intersection");
11+
this.intersections = intersections;
12+
}
13+
14+
@Override
15+
public void accept(Visitor v) {
16+
v.visit(this);
17+
}
18+
19+
public List<RegExpTerm> getIntersections() {
20+
return intersections;
21+
}
22+
}

javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,6 @@ public interface Visitor {
6363
public void visit(UnicodePropertyEscape nd);
6464

6565
public void visit(CharacterClassQuotedString nd);
66+
67+
public void visit(Intersection nd);
6668
}

javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.semmle.js.ast.regexp.Group;
2424
import com.semmle.js.ast.regexp.HexEscapeSequence;
2525
import com.semmle.js.ast.regexp.IdentityEscape;
26+
import com.semmle.js.ast.regexp.Intersection;
2627
import com.semmle.js.ast.regexp.Literal;
2728
import com.semmle.js.ast.regexp.NamedBackReference;
2829
import com.semmle.js.ast.regexp.NonWordBoundary;
@@ -94,6 +95,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
9495
termkinds.put("ZeroWidthNegativeLookbehind", 26);
9596
termkinds.put("UnicodePropertyEscape", 27);
9697
termkinds.put("CharacterClassQuotedString", 28);
98+
termkinds.put("Intersection", 29);
9799
}
98100

99101
private static final String[] errmsgs =
@@ -352,6 +354,14 @@ public void visit(CharacterClassQuotedString nd) {
352354
Label lbl = extractTerm(nd, parent, idx);
353355
visit(nd.getTerm(), lbl, 0);
354356
}
357+
358+
@Override
359+
public void visit(Intersection nd) {
360+
Label lbl = extractTerm(nd, parent, idx);
361+
int i = 0;
362+
for (RegExpTerm element : nd.getIntersections())
363+
visit(element, lbl, i++);
364+
}
355365
}
356366

357367
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.semmle.js.ast.regexp.Group;
2020
import com.semmle.js.ast.regexp.HexEscapeSequence;
2121
import com.semmle.js.ast.regexp.IdentityEscape;
22+
import com.semmle.js.ast.regexp.Intersection;
2223
import com.semmle.js.ast.regexp.NamedBackReference;
2324
import com.semmle.js.ast.regexp.NonWordBoundary;
2425
import com.semmle.js.ast.regexp.OctalEscape;
@@ -37,6 +38,7 @@
3738
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
3839
import java.util.ArrayList;
3940
import java.util.Arrays;
41+
import java.util.Collections;
4042
import java.util.List;
4143

4244
/** A parser for ECMAScript 2018 regular expressions. */
@@ -563,10 +565,16 @@ private RegExpTerm parseCharacterClass() {
563565
return this.finishTerm(new CharacterClass(loc, elements, inverted));
564566
}
565567

568+
private enum CharacterClassType {
569+
STANDARD,
570+
INTERSECTION,
571+
}
572+
566573
// ECMA 2024 `v` flag allows nested character classes.
567574
private RegExpTerm parseNestedCharacterClass() {
568575
SourceLocation loc = new SourceLocation(pos());
569576
List<RegExpTerm> elements = new ArrayList<>();
577+
CharacterClassType classType = CharacterClassType.STANDARD;
570578

571579
this.match("[");
572580
boolean inverted = this.match("^");
@@ -578,11 +586,23 @@ private RegExpTerm parseNestedCharacterClass() {
578586
if (lookahead("[")) {
579587
elements.add(parseNestedCharacterClass());
580588
}
589+
else if (lookahead("&&")) {
590+
this.match("&&");
591+
classType = CharacterClassType.INTERSECTION;
592+
}
581593
else {
582594
elements.add(this.parseCharacterClassElement());
583595
}
584596
}
585-
return this.finishTerm(new CharacterClass(loc, elements, inverted));
597+
598+
// Create appropriate RegExpTerm based on the detected class type
599+
switch (classType) {
600+
case INTERSECTION:
601+
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new Intersection(loc, elements)), inverted));
602+
case STANDARD:
603+
default:
604+
return this.finishTerm(new CharacterClass(loc, elements, inverted));
605+
}
586606
}
587607

588608
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");

0 commit comments

Comments
 (0)