Skip to content

Commit

Permalink
Added fallback for parsing RegExp with unknown flags.
Browse files Browse the repository at this point in the history
  • Loading branch information
Napalys committed Mar 3, 2025
1 parent 430514b commit 78aa5dc
Showing 3 changed files with 137 additions and 137 deletions.
Original file line number Diff line number Diff line change
@@ -580,6 +580,27 @@ public Label visit(Identifier nd, Context c) {
return key;
}

public boolean isRegExpCall(Node parent) {
if (parent != null && parent instanceof VariableDeclaration) {
for (VariableDeclarator declarator : ((VariableDeclaration) parent).getDeclarations()) {
if (declarator.getInit() instanceof InvokeExpression) {
InvokeExpression invoke = (InvokeExpression) declarator.getInit();
Expression callee = invoke.getCallee();
if (callee instanceof Identifier && "RegExp".equals(((Identifier)callee).getName())) {
return true;
} else if (callee instanceof MemberExpression) {
MemberExpression memberExpr = (MemberExpression)callee;
if (memberExpr.getProperty() instanceof Identifier &&
"RegExp".equals(((Identifier)memberExpr.getProperty()).getName())) {
return true;
}
}
}
}
}
return false;
}

@Override
public Label visit(Literal nd, Context c) {
Label key = super.visit(nd, c);
@@ -600,7 +621,12 @@ public Label visit(Literal nd, Context c) {
SourceMap sourceMap =
SourceMap.legacyWithStartPos(
SourceMap.fromString(nd.getRaw()).offsetBy(0, offsets), startPos);
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, source.substring(source.lastIndexOf('/'), source.length()));

boolean isRegExprCall = isRegExpCall(contextManager.getCurrentStatement());
// If the regular expression was created using RegExp(), the flags might be unknown.
// In this case, we will also attempt to parse it using the "v" (Unicode sets) flag.
String flagsStr = isRegExprCall ? null : source.substring(source.lastIndexOf('/') + 1);
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, flagsStr);
} else if (nd.isStringLiteral()
&& !c.isInsideType()
&& nd.getRaw().length() < 1000
14 changes: 13 additions & 1 deletion javascript/extractor/src/com/semmle/js/parser/RegExpParser.java
Original file line number Diff line number Diff line change
@@ -75,7 +75,7 @@ public List<Error> getErrors() {
private String flags;

/** Parse the given string as a regular expression. */
public Result parse(String src) {
public Result tryParse(String src) {
this.src = src;
this.pos = 0;
this.errors = new ArrayList<>();
@@ -88,6 +88,18 @@ public Result parse(String src) {
return new Result(root, errors);
}

public Result parse(String src) {
Result res = tryParse(src);
if(flags == null && !res.getErrors().isEmpty()) {
// Try parsing with the `v` flag enabled
flags = "v";
Result resultWithV = tryParse(src);
// If we got a better result with the `v` flag enabled, return that result
if(resultWithV.getErrors().isEmpty())return resultWithV;
}
return res;
}

public Result parse(String src, String flags) {
this.flags = flags;
return parse(src);
Original file line number Diff line number Diff line change
@@ -125,159 +125,121 @@ enclosing_stmt(#20039,#20030)
expr_containers(#20039,#20001)
literals("/[[abc]&&[[bcd]--[[c][d]]]]/","/[[abc]&&[[bcd]--[[c][d]]]]/",#20039)
#20040=*
regexpterm(#20040,1,#20039,0,"[[abc]&&[[bcd]--[[c][d]]]]")
regexpterm(#20040,23,#20039,0,"[[abc]&&[[bcd]--[[c][d]]]]")
#20041=@"loc,{#10000},1,27,1,52"
locations_default(#20041,#10000,1,27,1,52)
hasLocation(#20040,#20041)
#20042=*
regexpterm(#20042,23,#20040,0,"[[abc]")
#20043=@"loc,{#10000},1,27,1,32"
locations_default(#20043,#10000,1,27,1,32)
hasLocation(#20042,#20043)
#20044=*
regexpterm(#20044,14,#20042,0,"[")
#20045=@"loc,{#10000},1,28,1,28"
locations_default(#20045,#10000,1,28,1,28)
hasLocation(#20044,#20045)
regexp_const_value(#20044,"[")
#20046=*
regexpterm(#20046,14,#20042,1,"a")
#20047=@"loc,{#10000},1,29,1,29"
locations_default(#20047,#10000,1,29,1,29)
hasLocation(#20046,#20047)
regexp_const_value(#20046,"a")
#20048=*
regexpterm(#20048,14,#20042,2,"b")
#20049=@"loc,{#10000},1,30,1,30"
locations_default(#20049,#10000,1,30,1,30)
hasLocation(#20048,#20049)
regexp_const_value(#20048,"b")
#20050=*
regexpterm(#20050,14,#20042,3,"c")
#20051=@"loc,{#10000},1,31,1,31"
locations_default(#20051,#10000,1,31,1,31)
hasLocation(#20050,#20051)
regexp_const_value(#20050,"c")
#20052=*
regexpterm(#20052,14,#20040,1,"&&")
#20053=@"loc,{#10000},1,33,1,34"
locations_default(#20053,#10000,1,33,1,34)
hasLocation(#20052,#20053)
regexp_const_value(#20052,"&&")
regexpterm(#20042,29,#20040,0,"[[abc]&&[[bcd]--[[c][d]]]]")
hasLocation(#20042,#20041)
#20043=*
regexpterm(#20043,23,#20042,0,"[abc]")
#20044=@"loc,{#10000},1,28,1,32"
locations_default(#20044,#10000,1,28,1,32)
hasLocation(#20043,#20044)
#20045=*
regexpterm(#20045,14,#20043,0,"a")
#20046=@"loc,{#10000},1,29,1,29"
locations_default(#20046,#10000,1,29,1,29)
hasLocation(#20045,#20046)
regexp_const_value(#20045,"a")
#20047=*
regexpterm(#20047,14,#20043,1,"b")
#20048=@"loc,{#10000},1,30,1,30"
locations_default(#20048,#10000,1,30,1,30)
hasLocation(#20047,#20048)
regexp_const_value(#20047,"b")
#20049=*
regexpterm(#20049,14,#20043,2,"c")
#20050=@"loc,{#10000},1,31,1,31"
locations_default(#20050,#10000,1,31,1,31)
hasLocation(#20049,#20050)
regexp_const_value(#20049,"c")
#20051=*
regexpterm(#20051,23,#20042,1,"[[bcd]--[[c][d]]]")
#20052=@"loc,{#10000},1,35,1,51"
locations_default(#20052,#10000,1,35,1,51)
hasLocation(#20051,#20052)
#20053=*
regexpterm(#20053,30,#20051,0,"[[bcd]--[[c][d]]]")
hasLocation(#20053,#20052)
#20054=*
regexpterm(#20054,23,#20040,2,"[[bcd]")
#20055=@"loc,{#10000},1,35,1,40"
locations_default(#20055,#10000,1,35,1,40)
regexpterm(#20054,23,#20053,0,"[bcd]")
#20055=@"loc,{#10000},1,36,1,40"
locations_default(#20055,#10000,1,36,1,40)
hasLocation(#20054,#20055)
#20056=*
regexpterm(#20056,14,#20054,0,"[")
#20057=@"loc,{#10000},1,36,1,36"
locations_default(#20057,#10000,1,36,1,36)
regexpterm(#20056,14,#20054,0,"b")
#20057=@"loc,{#10000},1,37,1,37"
locations_default(#20057,#10000,1,37,1,37)
hasLocation(#20056,#20057)
regexp_const_value(#20056,"[")
regexp_const_value(#20056,"b")
#20058=*
regexpterm(#20058,14,#20054,1,"b")
#20059=@"loc,{#10000},1,37,1,37"
locations_default(#20059,#10000,1,37,1,37)
regexpterm(#20058,14,#20054,1,"c")
#20059=@"loc,{#10000},1,38,1,38"
locations_default(#20059,#10000,1,38,1,38)
hasLocation(#20058,#20059)
regexp_const_value(#20058,"b")
regexp_const_value(#20058,"c")
#20060=*
regexpterm(#20060,14,#20054,2,"c")
#20061=@"loc,{#10000},1,38,1,38"
locations_default(#20061,#10000,1,38,1,38)
regexpterm(#20060,14,#20054,2,"d")
#20061=@"loc,{#10000},1,39,1,39"
locations_default(#20061,#10000,1,39,1,39)
hasLocation(#20060,#20061)
regexp_const_value(#20060,"c")
regexp_const_value(#20060,"d")
#20062=*
regexpterm(#20062,14,#20054,3,"d")
#20063=@"loc,{#10000},1,39,1,39"
locations_default(#20063,#10000,1,39,1,39)
regexpterm(#20062,23,#20053,1,"[[c][d]]")
#20063=@"loc,{#10000},1,43,1,50"
locations_default(#20063,#10000,1,43,1,50)
hasLocation(#20062,#20063)
regexp_const_value(#20062,"d")
#20064=*
regexpterm(#20064,14,#20040,3,"--")
#20065=@"loc,{#10000},1,41,1,42"
locations_default(#20065,#10000,1,41,1,42)
hasLocation(#20064,#20065)
regexp_const_value(#20064,"--")
#20066=*
regexpterm(#20066,23,#20040,4,"[[c]")
#20067=@"loc,{#10000},1,43,1,46"
locations_default(#20067,#10000,1,43,1,46)
hasLocation(#20066,#20067)
#20068=*
regexpterm(#20068,14,#20066,0,"[")
#20069=@"loc,{#10000},1,44,1,44"
locations_default(#20069,#10000,1,44,1,44)
hasLocation(#20068,#20069)
regexp_const_value(#20068,"[")
#20070=*
regexpterm(#20070,14,#20066,1,"c")
#20071=@"loc,{#10000},1,45,1,45"
locations_default(#20071,#10000,1,45,1,45)
hasLocation(#20070,#20071)
regexp_const_value(#20070,"c")
#20072=*
regexpterm(#20072,23,#20040,5,"[d]")
#20073=@"loc,{#10000},1,47,1,49"
locations_default(#20073,#10000,1,47,1,49)
hasLocation(#20072,#20073)
#20074=*
regexpterm(#20074,14,#20072,0,"d")
#20075=@"loc,{#10000},1,48,1,48"
locations_default(#20075,#10000,1,48,1,48)
hasLocation(#20074,#20075)
regexp_const_value(#20074,"d")
#20076=*
regexpterm(#20076,14,#20040,6,"]")
#20077=@"loc,{#10000},1,50,1,50"
locations_default(#20077,#10000,1,50,1,50)
hasLocation(#20076,#20077)
regexp_const_value(#20076,"]")
#20078=*
regexpterm(#20078,14,#20040,7,"]")
#20079=@"loc,{#10000},1,51,1,51"
locations_default(#20079,#10000,1,51,1,51)
hasLocation(#20078,#20079)
regexp_const_value(#20078,"]")
#20080=*
regexpterm(#20080,14,#20040,8,"]")
#20081=@"loc,{#10000},1,52,1,52"
locations_default(#20081,#10000,1,52,1,52)
hasLocation(#20080,#20081)
regexp_const_value(#20080,"]")
#20082=*
regexp_parse_errors(#20082,#20040,"unexpected character")
hasLocation(#20082,#20077)
#20083=*
regexp_parse_errors(#20083,#20040,"unexpected character")
hasLocation(#20083,#20079)
#20084=*
regexp_parse_errors(#20084,#20040,"unexpected character")
hasLocation(#20084,#20081)
#20085=*
exprs(#20085,79,#20035,1,"notKnownFlags")
hasLocation(#20085,#20021)
enclosing_stmt(#20085,#20030)
expr_containers(#20085,#20001)
literals("notKnownFlags","notKnownFlags",#20085)
#20086=@"var;{notKnownFlags};{#20000}"
variables(#20086,"notKnownFlags",#20000)
bind(#20085,#20086)
#20087=*
entry_cfg_node(#20087,#20001)
#20088=@"loc,{#10000},1,1,1,0"
locations_default(#20088,#10000,1,1,1,0)
hasLocation(#20087,#20088)
#20089=*
exit_cfg_node(#20089,#20001)
hasLocation(#20089,#20027)
regexpterm(#20064,31,#20062,0,"[[c][d]]")
hasLocation(#20064,#20063)
#20065=*
regexpterm(#20065,23,#20064,0,"[c]")
#20066=@"loc,{#10000},1,44,1,46"
locations_default(#20066,#10000,1,44,1,46)
hasLocation(#20065,#20066)
#20067=*
regexpterm(#20067,14,#20065,0,"c")
#20068=@"loc,{#10000},1,45,1,45"
locations_default(#20068,#10000,1,45,1,45)
hasLocation(#20067,#20068)
regexp_const_value(#20067,"c")
#20069=*
regexpterm(#20069,23,#20064,1,"[d]")
#20070=@"loc,{#10000},1,47,1,49"
locations_default(#20070,#10000,1,47,1,49)
hasLocation(#20069,#20070)
#20071=*
regexpterm(#20071,14,#20069,0,"d")
#20072=@"loc,{#10000},1,48,1,48"
locations_default(#20072,#10000,1,48,1,48)
hasLocation(#20071,#20072)
regexp_const_value(#20071,"d")
#20073=*
exprs(#20073,79,#20035,1,"notKnownFlags")
hasLocation(#20073,#20021)
enclosing_stmt(#20073,#20030)
expr_containers(#20073,#20001)
literals("notKnownFlags","notKnownFlags",#20073)
#20074=@"var;{notKnownFlags};{#20000}"
variables(#20074,"notKnownFlags",#20000)
bind(#20073,#20074)
#20075=*
entry_cfg_node(#20075,#20001)
#20076=@"loc,{#10000},1,1,1,0"
locations_default(#20076,#10000,1,1,1,0)
hasLocation(#20075,#20076)
#20077=*
exit_cfg_node(#20077,#20001)
hasLocation(#20077,#20027)
successor(#20030,#20034)
successor(#20085,#20035)
successor(#20039,#20085)
successor(#20073,#20035)
successor(#20039,#20073)
successor(#20037,#20039)
successor(#20035,#20032)
successor(#20034,#20037)
successor(#20032,#20089)
successor(#20087,#20030)
successor(#20032,#20077)
successor(#20075,#20030)
numlines(#10000,1,1,0)
filetype(#10000,"javascript")

0 comments on commit 78aa5dc

Please sign in to comment.