Skip to content

Commit 59e400d

Browse files
Merge pull request #7723 from joefarebrother/redos
Java: Add ReDoS queries
2 parents 4bef451 + 64227c9 commit 59e400d

39 files changed

+5716
-60
lines changed

config/identical-files.json

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,20 +475,23 @@
475475
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll",
476476
"ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll"
477477
],
478-
"ReDoS Util Python/JS/Ruby": [
478+
"ReDoS Util Python/JS/Ruby/Java": [
479479
"javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
480480
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
481-
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
481+
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll",
482+
"java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll"
482483
],
483-
"ReDoS Exponential Python/JS/Ruby": [
484+
"ReDoS Exponential Python/JS/Ruby/Java": [
484485
"javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
485486
"python/ql/lib/semmle/python/security/performance/ExponentialBackTracking.qll",
486-
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll"
487+
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll",
488+
"java/ql/lib/semmle/code/java/security/performance/ExponentialBackTracking.qll"
487489
],
488-
"ReDoS Polynomial Python/JS/Ruby": [
490+
"ReDoS Polynomial Python/JS/Ruby/Java": [
489491
"javascript/ql/lib/semmle/javascript/security/performance/SuperlinearBackTracking.qll",
490492
"python/ql/lib/semmle/python/security/performance/SuperlinearBackTracking.qll",
491-
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll"
493+
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll",
494+
"java/ql/lib/semmle/code/java/security/performance/SuperlinearBackTracking.qll"
492495
],
493496
"BadTagFilterQuery Python/JS/Ruby": [
494497
"javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll",

java/ql/lib/semmle/code/java/PrintAst.qll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
import java
10+
import semmle.code.java.regex.RegexTreeView
1011

1112
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
1213

@@ -132,6 +133,9 @@ private newtype TPrintAstNode =
132133
} or
133134
TImportsNode(CompilationUnit cu) {
134135
shouldPrint(cu, _) and exists(Import i | i.getCompilationUnit() = cu)
136+
} or
137+
TRegExpTermNode(RegExpTerm term) {
138+
exists(StringLiteral str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
135139
}
136140

137141
/**
@@ -164,6 +168,19 @@ class PrintAstNode extends TPrintAstNode {
164168
*/
165169
Location getLocation() { none() }
166170

171+
/**
172+
* Holds if this node is at the specified location.
173+
* The location spans column `startcolumn` of line `startline` to
174+
* column `endcolumn` of line `endline` in file `filepath`.
175+
* For more information, see
176+
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
177+
*/
178+
predicate hasLocationInfo(
179+
string filepath, int startline, int startcolumn, int endline, int endcolumn
180+
) {
181+
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
182+
}
183+
167184
/**
168185
* Gets the value of the property of this node, where the name of the property
169186
* is `key`.
@@ -290,6 +307,47 @@ final class AnnotationPartNode extends ExprStmtNode {
290307
}
291308
}
292309

310+
/**
311+
* A node representing a `StringLiteral`.
312+
* If it is used as a regular expression, then it has a single child, the root of the parsed regular expression.
313+
*/
314+
final class StringLiteralNode extends ExprStmtNode {
315+
StringLiteralNode() { element instanceof StringLiteral }
316+
317+
override PrintAstNode getChild(int childIndex) {
318+
childIndex = 0 and
319+
result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
320+
}
321+
}
322+
323+
/**
324+
* A node representing a regular expression term.
325+
*/
326+
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
327+
RegExpTerm term;
328+
329+
RegExpTermNode() { this = TRegExpTermNode(term) }
330+
331+
/** Gets the `RegExpTerm` for this node. */
332+
RegExpTerm getTerm() { result = term }
333+
334+
override PrintAstNode getChild(int childIndex) {
335+
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
336+
}
337+
338+
override string toString() {
339+
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
340+
}
341+
342+
override Location getLocation() { result = term.getLocation() }
343+
344+
override predicate hasLocationInfo(
345+
string filepath, int startline, int startcolumn, int endline, int endcolumn
346+
) {
347+
term.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
348+
}
349+
}
350+
293351
/**
294352
* A node representing a `LocalVariableDeclExpr`.
295353
*/

java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ private module Frameworks {
142142
private import semmle.code.java.frameworks.jOOQ
143143
private import semmle.code.java.frameworks.JMS
144144
private import semmle.code.java.frameworks.RabbitMQ
145+
private import semmle.code.java.regex.RegexFlowModels
145146
}
146147

147148
private predicate sourceModelCsv(string row) {
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/**
2+
* Defines configurations and steps for handling regexes
3+
*/
4+
5+
import java
6+
import semmle.code.java.dataflow.ExternalFlow
7+
private import semmle.code.java.dataflow.DataFlow
8+
private import semmle.code.java.dataflow.DataFlow2
9+
private import RegexFlowModels
10+
private import semmle.code.java.security.SecurityTests
11+
12+
private class ExploitableStringLiteral extends StringLiteral {
13+
ExploitableStringLiteral() { this.getValue().matches(["%+%", "%*%", "%{%}%"]) }
14+
}
15+
16+
/**
17+
* Holds if `kind` is an external sink kind that is relevant for regex flow.
18+
* `full` is true if sinks with this kind match against the full string of its input.
19+
* `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against,
20+
* where -1 is the qualifier; or -2 if no such argument exists.
21+
*/
22+
private predicate regexSinkKindInfo(string kind, boolean full, int strArg) {
23+
sinkModel(_, _, _, _, _, _, _, kind, _) and
24+
exists(string fullStr, string strArgStr |
25+
(
26+
full = true and fullStr = "f"
27+
or
28+
full = false and fullStr = ""
29+
) and
30+
(
31+
strArgStr.toInt() = strArg
32+
or
33+
strArg = -2 and
34+
strArgStr = ""
35+
)
36+
|
37+
kind = "regex-use[" + fullStr + strArgStr + "]"
38+
)
39+
}
40+
41+
/** A sink that is relevant for regex flow. */
42+
private class RegexFlowSink extends DataFlow::Node {
43+
boolean full;
44+
int strArg;
45+
46+
RegexFlowSink() {
47+
exists(string kind |
48+
regexSinkKindInfo(kind, full, strArg) and
49+
sinkNode(this, kind)
50+
)
51+
}
52+
53+
/** Holds if a regex that flows here is matched against a full string (rather than a substring). */
54+
predicate matchesFullString() { full = true }
55+
56+
/** Gets the string expression that a regex that flows here is matched against, if any. */
57+
Expr getStringArgument() {
58+
exists(MethodAccess ma |
59+
this.asExpr() = argOf(ma, _) and
60+
result = argOf(ma, strArg)
61+
)
62+
}
63+
}
64+
65+
private Expr argOf(MethodAccess ma, int arg) {
66+
arg = -1 and result = ma.getQualifier()
67+
or
68+
result = ma.getArgument(arg)
69+
}
70+
71+
/**
72+
* A unit class for adding additional regex flow steps.
73+
*
74+
* Extend this class to add additional flow steps that should apply to regex flow configurations.
75+
*/
76+
class RegexAdditionalFlowStep extends Unit {
77+
/**
78+
* Holds if the step from `node1` to `node2` should be considered a flow
79+
* step for regex flow configurations.
80+
*/
81+
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
82+
}
83+
84+
// TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported.
85+
private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
86+
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
87+
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
88+
ma.getMethod().getSourceDeclaration().overrides*(m) and
89+
m.hasQualifiedName(package, type, name) and
90+
node1.asExpr() = argOf(ma, arg) and
91+
node2.asExpr() = ma
92+
|
93+
package = "java.util.regex" and
94+
type = "Pattern" and
95+
(
96+
name = ["asMatchPredicate", "asPredicate", "matcher"] and
97+
arg = -1
98+
or
99+
name = "compile" and
100+
arg = 0
101+
)
102+
or
103+
package = "java.util.function" and
104+
type = "Predicate" and
105+
name = ["and", "or", "not", "negate"] and
106+
arg = [-1, 0]
107+
)
108+
}
109+
}
110+
111+
private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
112+
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
113+
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
114+
ma.getMethod().getSourceDeclaration().overrides*(m) and
115+
m.hasQualifiedName(package, type, name) and
116+
node1.asExpr() = argOf(ma, arg) and
117+
node2.asExpr() = ma
118+
|
119+
package = "com.google.common.base" and
120+
type = "Splitter" and
121+
(
122+
name = "on" and
123+
m.getParameterType(0).(RefType).hasQualifiedName("java.util.regex", "Pattern") and
124+
arg = 0
125+
or
126+
name = "withKeyValueSeparator" and
127+
m.getParameterType(0).(RefType).hasQualifiedName("com.google.common.base", "Splitter") and
128+
arg = 0
129+
or
130+
name = "onPattern" and
131+
arg = 0
132+
or
133+
name = ["limit", "omitEmptyStrings", "trimResults", "withKeyValueSeparator"] and
134+
arg = -1
135+
)
136+
)
137+
}
138+
}
139+
140+
private class RegexFlowConf extends DataFlow2::Configuration {
141+
RegexFlowConf() { this = "RegexFlowConfig" }
142+
143+
override predicate isSource(DataFlow::Node node) {
144+
node.asExpr() instanceof ExploitableStringLiteral
145+
}
146+
147+
override predicate isSink(DataFlow::Node node) { node instanceof RegexFlowSink }
148+
149+
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
150+
any(RegexAdditionalFlowStep s).step(node1, node2)
151+
}
152+
153+
override predicate isBarrier(DataFlow::Node node) {
154+
node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass
155+
}
156+
}
157+
158+
/**
159+
* Holds if `regex` is used as a regex, with the mode `mode` (if known).
160+
* If regex mode is not known, `mode` will be `"None"`.
161+
*
162+
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
163+
* and therefore may be relevant for ReDoS queries are considered.
164+
*/
165+
predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) {
166+
any(RegexFlowConf c).hasFlow(DataFlow2::exprNode(regex), _) and
167+
mode = "None" and // TODO: proper mode detection
168+
(if matchesFullString(regex) then match_full_string = true else match_full_string = false)
169+
}
170+
171+
/**
172+
* Holds if `regex` is used as a regular expression that is matched against a full string,
173+
* as though it was implicitly surrounded by ^ and $.
174+
*/
175+
private predicate matchesFullString(StringLiteral regex) {
176+
exists(RegexFlowConf c, RegexFlowSink sink |
177+
sink.matchesFullString() and
178+
c.hasFlow(DataFlow2::exprNode(regex), sink)
179+
)
180+
}
181+
182+
/**
183+
* Holds if the string literal `regex` is a regular expression that is matched against the expression `str`.
184+
*
185+
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
186+
* and therefore may be relevant for ReDoS queries are considered.
187+
*/
188+
predicate regexMatchedAgainst(StringLiteral regex, Expr str) {
189+
exists(RegexFlowConf c, RegexFlowSink sink |
190+
str = sink.getStringArgument() and
191+
c.hasFlow(DataFlow2::exprNode(regex), sink)
192+
)
193+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/** Definitions of data flow steps for determining flow of regular expressions. */
2+
3+
import java
4+
import semmle.code.java.dataflow.ExternalFlow
5+
6+
private class RegexSinkCsv extends SinkModelCsv {
7+
override predicate row(string row) {
8+
row =
9+
[
10+
//"namespace;type;subtypes;name;signature;ext;input;kind"
11+
"java.util.regex;Matcher;false;matches;();;Argument[-1];regex-use[f]",
12+
"java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-use[f]",
13+
"java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-use[]",
14+
"java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-use[]",
15+
"java.util.regex;Pattern;false;matcher;(CharSequence);;Argument[-1];regex-use[0]",
16+
"java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-use[f1]",
17+
"java.util.regex;Pattern;false;split;(CharSequence);;Argument[-1];regex-use[0]",
18+
"java.util.regex;Pattern;false;split;(CharSequence,int);;Argument[-1];regex-use[0]",
19+
"java.util.regex;Pattern;false;splitAsStream;(CharSequence);;Argument[-1];regex-use[0]",
20+
"java.util.function;Predicate;false;test;(Object);;Argument[-1];regex-use[0]",
21+
"java.lang;String;false;matches;(String);;Argument[0];regex-use[f-1]",
22+
"java.lang;String;false;split;(String);;Argument[0];regex-use[-1]",
23+
"java.lang;String;false;split;(String,int);;Argument[0];regex-use[-1]",
24+
"java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-use[-1]",
25+
"java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-use[-1]",
26+
"com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-use[]",
27+
"com.google.common.base;Splitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
28+
"com.google.common.base;Splitter;false;splitToList;(CharSequence);;Argument[-1];regex-use[0]",
29+
"com.google.common.base;Splitter$MapSplitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
30+
]
31+
}
32+
}

0 commit comments

Comments
 (0)