Skip to content

Commit

Permalink
Added intersection support.
Browse files Browse the repository at this point in the history
  • Loading branch information
Napalys committed Mar 2, 2025
1 parent 2f0f6e6 commit 605456f
Showing 5 changed files with 250 additions and 226 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.semmle.js.ast.regexp;

import com.semmle.js.ast.SourceLocation;
import java.util.List;

public class Intersection extends RegExpTerm {
private final List<RegExpTerm> intersections;

public Intersection(SourceLocation loc, List<RegExpTerm> intersections) {
super(loc, "Intersection");
this.intersections = intersections;
}

@Override
public void accept(Visitor v) {
v.visit(this);
}

public List<RegExpTerm> getIntersections() {
return intersections;
}
}
Original file line number Diff line number Diff line change
@@ -63,4 +63,6 @@ public interface Visitor {
public void visit(UnicodePropertyEscape nd);

public void visit(CharacterClassQuotedString nd);

public void visit(Intersection nd);
}
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@
import com.semmle.js.ast.regexp.Group;
import com.semmle.js.ast.regexp.HexEscapeSequence;
import com.semmle.js.ast.regexp.IdentityEscape;
import com.semmle.js.ast.regexp.Intersection;
import com.semmle.js.ast.regexp.Literal;
import com.semmle.js.ast.regexp.NamedBackReference;
import com.semmle.js.ast.regexp.NonWordBoundary;
@@ -94,6 +95,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
termkinds.put("ZeroWidthNegativeLookbehind", 26);
termkinds.put("UnicodePropertyEscape", 27);
termkinds.put("CharacterClassQuotedString", 28);
termkinds.put("Intersection", 29);
}

private static final String[] errmsgs =
@@ -352,6 +354,14 @@ public void visit(CharacterClassQuotedString nd) {
Label lbl = extractTerm(nd, parent, idx);
visit(nd.getTerm(), lbl, 0);
}

@Override
public void visit(Intersection nd) {
Label lbl = extractTerm(nd, parent, idx);
int i = 0;
for (RegExpTerm element : nd.getIntersections())
visit(element, lbl, i++);
}
}

public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {
22 changes: 21 additions & 1 deletion javascript/extractor/src/com/semmle/js/parser/RegExpParser.java
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
import com.semmle.js.ast.regexp.Group;
import com.semmle.js.ast.regexp.HexEscapeSequence;
import com.semmle.js.ast.regexp.IdentityEscape;
import com.semmle.js.ast.regexp.Intersection;
import com.semmle.js.ast.regexp.NamedBackReference;
import com.semmle.js.ast.regexp.NonWordBoundary;
import com.semmle.js.ast.regexp.OctalEscape;
@@ -37,6 +38,7 @@
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/** A parser for ECMAScript 2018 regular expressions. */
@@ -563,10 +565,16 @@ private RegExpTerm parseCharacterClass() {
return this.finishTerm(new CharacterClass(loc, elements, inverted));
}

private enum CharacterClassType {
STANDARD,
INTERSECTION,
}

// ECMA 2024 `v` flag allows nested character classes.
private RegExpTerm parseNestedCharacterClass() {
SourceLocation loc = new SourceLocation(pos());
List<RegExpTerm> elements = new ArrayList<>();
CharacterClassType classType = CharacterClassType.STANDARD;

this.match("[");
boolean inverted = this.match("^");
@@ -578,11 +586,23 @@ private RegExpTerm parseNestedCharacterClass() {
if (lookahead("[")) {
elements.add(parseNestedCharacterClass());
}
else if (lookahead("&&")) {
this.match("&&");
classType = CharacterClassType.INTERSECTION;
}
else {
elements.add(this.parseCharacterClassElement());
}
}
return this.finishTerm(new CharacterClass(loc, elements, inverted));

// Create appropriate RegExpTerm based on the detected class type
switch (classType) {
case INTERSECTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new Intersection(loc, elements)), inverted));
case STANDARD:
default:
return this.finishTerm(new CharacterClass(loc, elements, inverted));
}
}

private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
Loading
Oops, something went wrong.

0 comments on commit 605456f

Please sign in to comment.