Skip to content

Commit a760ed8

Browse files
authored
Merge pull request #5388 from tausbn/python-api-graph-builtins
Python: Support built-ins in API graphs
2 parents dfc0e9b + 978200e commit a760ed8

File tree

9 files changed

+120
-8
lines changed

9 files changed

+120
-8
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* API graphs now contain nodes for built-in functions and classes. For instance, `API::builtin("open")` is the API graph node corresponding to the built-in `open` function.

python/ql/src/semmle/python/ApiGraphs.qll

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ module API {
216216
*/
217217
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }
218218

219+
/** Gets a node corresponding to the built-in with the given name, if any. */
220+
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
221+
219222
/**
220223
* Provides the actual implementation of API graphs, cached for performance.
221224
*
@@ -300,11 +303,18 @@ module API {
300303
MkRoot() or
301304
/** An abstract representative for imports of the module called `name`. */
302305
MkModuleImport(string name) {
303-
imports(_, name)
306+
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
307+
(name != "__builtin__" or major_version() = 3) and
308+
(
309+
imports(_, name)
310+
or
311+
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
312+
// `foo` and `foo.bar`:
313+
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
314+
)
304315
or
305-
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
306-
// `foo` and `foo.bar`:
307-
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
316+
// The `builtins` module should always be implicitly available
317+
name = "builtins"
308318
} or
309319
/** A use of an API member at the node `nd`. */
310320
MkUse(DataFlow::Node nd) { use(_, _, nd) }
@@ -339,6 +349,24 @@ module API {
339349
)
340350
}
341351

352+
private import semmle.python.types.Builtins as Builtins
353+
354+
/**
355+
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
356+
*
357+
* Currently this is an over-approximation, and does not account for things like overwriting a
358+
* built-in with a different value.
359+
*/
360+
private DataFlow::Node likely_builtin(string name) {
361+
result.asCfgNode() =
362+
any(NameNode n |
363+
n.isGlobal() and
364+
n.isLoad() and
365+
name = n.getId() and
366+
name = any(Builtins::Builtin b).getName()
367+
)
368+
}
369+
342370
/**
343371
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
344372
* `lbl` in the API graph.
@@ -369,6 +397,10 @@ module API {
369397
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
370398
)
371399
)
400+
or
401+
// Built-ins, treated as members of the module `builtins`
402+
base = MkModuleImport("builtins") and
403+
lbl = Label::member(any(string name | ref = likely_builtin(name)))
372404
}
373405

374406
/**
@@ -381,6 +413,11 @@ module API {
381413
imports(ref, name)
382414
)
383415
or
416+
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
417+
major_version() = 2 and
418+
nd = MkModuleImport("builtins") and
419+
imports(ref, "__builtin__")
420+
or
384421
nd = MkUse(ref)
385422
}
386423

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --lang=2 --max-import-depth=1
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def python2_style():
2+
from __builtin__ import open #$ use=moduleImport("builtins").getMember("open")
3+
open("hello.txt") #$ use=moduleImport("builtins").getMember("open").getReturn()

python/ql/test/experimental/dataflow/ApiGraphs-py2/use.expected

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import python
2+
import semmle.python.dataflow.new.DataFlow
3+
import TestUtilities.InlineExpectationsTest
4+
import semmle.python.ApiGraphs
5+
6+
class ApiUseTest extends InlineExpectationsTest {
7+
ApiUseTest() { this = "ApiUseTest" }
8+
9+
override string getARelevantTag() { result = "use" }
10+
11+
private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
12+
n = a.getAUse() and l = n.getLocation()
13+
}
14+
15+
override predicate hasActualResult(Location location, string element, string tag, string value) {
16+
exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) |
17+
tag = "use" and
18+
// Only report the longest path on this line:
19+
value =
20+
max(API::Node a2, Location l2 |
21+
relevant_node(a2, _, l2) and
22+
l2.getFile() = location.getFile() and
23+
l2.getStartLine() = location.getStartLine()
24+
|
25+
a2.getPath()
26+
) and
27+
element = n.toString()
28+
)
29+
}
30+
}

python/ql/test/experimental/dataflow/ApiGraphs/test.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,42 @@ def my_internal_method(self): #$ use=moduleImport("pflask").getMember("views").g
100100
pass
101101

102102
int_instance = IntMyView() #$ use=moduleImport("pflask").getMember("views").getMember("View").getASubclass().getReturn()
103+
104+
105+
# Built-ins
106+
107+
def use_of_builtins():
108+
for x in range(5): #$ use=moduleImport("builtins").getMember("range").getReturn()
109+
if x < len([]): #$ use=moduleImport("builtins").getMember("len").getReturn()
110+
print("Hello") #$ use=moduleImport("builtins").getMember("print").getReturn()
111+
raise Exception("Farewell") #$ use=moduleImport("builtins").getMember("Exception").getReturn()
112+
113+
def imported_builtins():
114+
import builtins #$ use=moduleImport("builtins")
115+
def open(f):
116+
return builtins.open(f) #$ MISSING: use=moduleImport("builtins").getMember("open").getReturn()
117+
118+
def redefine_print():
119+
def my_print(x):
120+
import builtins #$ use=moduleImport("builtins")
121+
builtins.print("I'm printing", x) #$ use=moduleImport("builtins").getMember("print").getReturn()
122+
print = my_print
123+
print("these words")
124+
125+
def local_redefine_range():
126+
range = 5
127+
return range
128+
129+
def global_redefine_range():
130+
global range
131+
range = 6
132+
return range #$ SPURIOUS: use=moduleImport("builtins").getMember("range")
133+
134+
def obscured_print():
135+
p = print #$ use=moduleImport("builtins").getMember("print")
136+
p("Can you see me?") #$ use=moduleImport("builtins").getMember("print").getReturn()
137+
138+
def python2_style():
139+
# In Python 3, `__builtin__` has no special meaning.
140+
from __builtin__ import open #$ use=moduleImport("__builtin__").getMember("open")
141+
open("hello.txt") #$ use=moduleImport("__builtin__").getMember("open").getReturn()

python/ql/test/experimental/dataflow/ApiGraphs/test1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
33
try:
44
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
5-
except AttributeError as e:
6-
print(e) # module 'mypkg' has no attribute 'bar'
5+
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
6+
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'

python/ql/test/experimental/dataflow/ApiGraphs/test5.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
44
try:
55
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
6-
except AttributeError as e:
7-
print(e) # module 'mypkg' has no attribute 'bar'
6+
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
7+
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'
88

99
from mypkg import bar as _bar #$ use=moduleImport("mypkg").getMember("bar")
1010
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...

0 commit comments

Comments
 (0)