Skip to content

Python: Add type-tracking flow for class (instance) attributes #16670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,29 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
not nodeFrom instanceof DataFlowPublic::IterableElementNode
or
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content)
or
// class-level attribute store
classmethodSoreStep(nodeFrom, nodeTo, content)
}

/** Holds if `write` writes to the `attrName` attribute of the class parameter of a classmethod on `cls`. */
private predicate classmethodStoreOnCls(
DataFlowPublic::AttrWrite write, Class cls, string attrName
) {
exists(DataFlowDispatch::DataFlowClassmethod writeMethod |
writeMethod.getClass() = cls and
write.getObject().getALocalSource() =
writeMethod.getParameter(any(DataFlowDispatch::ParameterPosition p | p.isSelf())) and
write.getAttributeName() = attrName
)
}

private predicate classmethodSoreStep(Node nodeFrom, Node nodeTo, Content content) {
exists(Class cls, DataFlowPublic::AttrWrite write |
classmethodStoreOnCls(write, cls, content.(DataFlowPublic::AttributeContent).getAttribute()) and
nodeFrom = write.getValue() and
nodeTo = DataFlowPublic::exprNode(cls.getParent())
)
}

/**
Expand Down Expand Up @@ -273,6 +296,74 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
*/
predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content loadContent, Content storeContent) {
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent)
or
// flow from class/self -> cls/self/instance, for the relevant attributes.
// Using loadStoreStep is more powerful than a potential solution utilizing levelStepNoCall targeting attribute read; with this setup, a flow summary that reads a class attribute will actually work!
exists(Class cls, string attrName, boolean storeOnClass |
loadContent.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
storeContent.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
(
// class attribute
storeOnClass = true and
nodeFrom = DataFlowPublic::exprNode(cls.getParent()) and
(
exists(DataFlowPublic::AttrWrite write | write.accesses(nodeFrom, attrName))
or
classmethodStoreOnCls(_, cls, attrName)
)
or
// `self.foo = <value>` in normal method
storeOnClass = false and
exists(DataFlowPublic::AttrWrite write |
instanceMethodStoreOnSelf(write, cls, attrName) and nodeFrom = write.getObject()
)
) and
(
// cls in classmethod on same class
storeOnClass = true and
exists(DataFlowDispatch::DataFlowClassmethod classMethod |
classMethod.getClass() = cls and
nodeTo = classMethod.getParameter(any(DataFlowDispatch::ParameterPosition p | p.isSelf()))
)
or
// self in (plain) method on same class
//
// TODO: handle subclasses
storeOnClass in [true, false] and
exists(DataFlowDispatch::DataFlowMethod instanceMethod |
not instanceMethod instanceof DataFlowDispatch::DataFlowClassmethod and
not instanceMethod instanceof DataFlowDispatch::DataFlowStaticmethod
|
instanceMethod.getClass() = cls and
nodeTo =
instanceMethod.getParameter(any(DataFlowDispatch::ParameterPosition p | p.isSelf()))
)
or
// instantiation of class
//
// TODO: proper tracking of class (we can't just use type-tracking right now,
// since we're using a late-inlined relation in a recursive setting, which is
// not supported)
storeOnClass in [true, false] and
nodeTo.(DataFlowPublic::CallCfgNode).getFunction().getALocalSource() =
DataFlowPublic::exprNode(cls.getParent())
)
)
}

/** Holds if `write` writes to the `attrName` attribute of the "self" parameter of a normal method on `cls`. */
private predicate instanceMethodStoreOnSelf(
DataFlowPublic::AttrWrite write, Class cls, string attrName
) {
exists(DataFlowDispatch::DataFlowMethod writeMethod |
not writeMethod instanceof DataFlowDispatch::DataFlowClassmethod and
not writeMethod instanceof DataFlowDispatch::DataFlowStaticmethod
|
writeMethod.getClass() = cls and
write.getObject().getALocalSource() =
writeMethod.getParameter(any(DataFlowDispatch::ParameterPosition p | p.isSelf())) and
write.getAttributeName() = attrName
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@ testFailures
failures
debug_callableNotUnique
pointsTo_found_typeTracker_notFound
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method |
| code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def |
| code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar |
| code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo |
| code/func_defined_outside_class.py:22:1:22:15 | ControlFlowNode for Attribute() | outside |
| code/func_defined_outside_class.py:24:1:24:14 | ControlFlowNode for Attribute() | outside_sm |
| code/func_defined_outside_class.py:25:1:25:14 | ControlFlowNode for Attribute() | outside_cm |
| code/func_defined_outside_class.py:38:11:38:21 | ControlFlowNode for _gen() | B._gen |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ def __init__(self, func):
self.direct_ref = my_func

def later(self):
self.indirect_ref() # $ pt=my_func MISSING: tt=my_func
self.direct_ref() # $ pt=my_func MISSING: tt=my_func
self.indirect_ref() # $ pt,tt=my_func
self.direct_ref() # $ pt,tt=my_func

foo = Foo(my_func) # $ tt=Foo.__init__
foo.later() # $ pt,tt=Foo.later
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def bar_on_super(cls):
b = B()
b.foo() # $ pt,tt=B.foo
b.foo_on_super() # $ pt,tt=B.foo_on_super
b.od() # $ pt=outside_def
b.od() # $ pt,tt=outside_def
b.sm() # $ pt,tt=B.sm

print("="*10, "static method")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def foo(self):

a = A()
a.foo_ref() # $ pt=A.foo
a.outside_ref() # $ pt=outside
a.outside_ref() # $ pt,tt=outside

a.outside_sm() # $ pt=outside_sm
a.outside_cm() # $ pt=outside_cm
Expand Down
112 changes: 102 additions & 10 deletions python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_global_attribute_read():

def test_local_attribute_assignment():
# Same as `test_global_attribute_assignment`, but the assigned variable is not global
# In this case, we don't want flow going to the `ModuleVariableNode` for `local_var`
# In this case, we don't want flow going to the `ModuleVariableNode` for `local_var`
# (which is referenced in `test_local_attribute_read`).
local_var = object() # $ tracked=foo
local_var.foo = tracked # $ tracked tracked=foo
Expand All @@ -76,7 +76,7 @@ class MyClass: # $tracked=field

lookup = MyClass.field # $tracked tracked=field
instance = MyClass() # $tracked=field
lookup2 = instance.field # MISSING: tracked
lookup2 = instance.field # $ tracked tracked=field

# ------------------------------------------------------------------------------
# Dynamic attribute access
Expand Down Expand Up @@ -143,22 +143,22 @@ def dunder_dict_indirect_read():
# Tracking of attribute on class instance
# ------------------------------------------------------------------------------

# attribute set in method
# attribute set in constructor (so is always called)
# inspired by https://github.com/github/codeql/pull/6023

class MyClass2(object):
def __init__(self): # $ tracked=foo
self.foo = tracked # $ tracked=foo tracked

def print_foo(self): # $ MISSING: tracked=foo
print(self.foo) # $ MISSING: tracked=foo tracked
def print_foo(self): # $ tracked=foo
print(self.foo) # $ tracked=foo tracked

def possibly_uncalled_method(self): # $ MISSING: tracked=foo
print(self.foo) # $ MISSING: tracked=foo tracked
def possibly_uncalled_method(self): # $ tracked=foo
print(self.foo) # $ tracked=foo tracked

instance = MyClass2()
print(instance.foo) # $ MISSING: tracked=foo tracked
instance.print_foo() # $ MISSING: tracked=foo
instance = MyClass2() # $ tracked=foo
print(instance.foo) # $ tracked=foo tracked
instance.print_foo() # $ tracked=foo


# attribute set from outside of class
Expand All @@ -177,3 +177,95 @@ def possibly_uncalled_method(self): # $ MISSING: tracked=foo
instance.print_self() # $ tracked=foo
instance.foo = tracked # $ tracked=foo tracked
instance.print_foo() # $ tracked=foo


# attribute set from method on class (which may or may not be called for a specific instance)

class MyClass4(object):
def set_foo(self): # $ tracked=foo
self.foo = tracked # $ tracked=foo tracked

def print_foo(self): # $ tracked=foo
print(self.foo) # $ tracked=foo tracked

def possibly_uncalled_method(self): # $ tracked=foo
print(self.foo) # $ tracked=foo tracked

instance = MyClass4() # $ tracked=foo
instance.set_foo() # $ tracked=foo
instance.print_foo() # $ tracked=foo
print(instance.foo) # $ tracked=foo tracked


# class-level attributes

class MyClass5(object): # $ tracked=foo tracked=bar
foo = tracked # $ tracked
# bar is set from a classmethod
bar = None

def on_self(self): # $ tracked=bar tracked=foo
print(self.foo) # $ tracked=foo tracked tracked=bar
print(self.bar) # $ tracked=bar tracked tracked=foo

@staticmethod
def on_classref():
print(MyClass5.foo) # $ tracked=foo tracked tracked=bar
print(MyClass5.bar) # $ tracked=foo tracked=bar tracked

@classmethod
def on_cls(cls): # $ tracked=bar tracked=foo
print(cls.foo) # $ tracked=foo tracked tracked=bar
print(cls.bar) # $ tracked=bar tracked tracked=foo

@classmethod
def set_bar(cls): # $ tracked=bar tracked=foo
cls.bar = tracked # $ tracked=bar tracked tracked=foo

instance = MyClass5() # $ tracked=foo tracked=bar
print(instance.foo) # $ tracked=foo tracked tracked=bar
print(instance.bar) # $ tracked=bar tracked tracked=foo


# shadowing of class-level attribute by instance attribute

class MyClass6(object): # $ int=foo
foo = int() # $ int

def set_instance_foo(self): # $ str=foo int=foo
self.foo = str() # $ str str=foo int=foo

def use_im(self): # $ int=foo str=foo
print(self.foo) # $ int int=foo str str=foo

@classmethod
def use_cls(cls): # $ int=foo
print(cls.foo) # $ int int=foo


print(MyClass6.foo) # $ int int=foo

instance = MyClass6() # $ int=foo str=foo
print(instance.foo) # $ int int=foo str str=foo
instance.set_instance_foo() # $ int=foo str=foo
print(instance.foo) # $ int int=foo str str=foo


# attributes flowing between subclass and base class

class BaseClass(object):
def set_foo(self): # $ tracked=foo
self.foo = tracked # $ tracked=foo tracked

def use_foo(self): # $ tracked=foo
print(self.foo) # $ tracked=foo tracked

def use_bar(self): # $ tracked=foo MISSING: tracked=bar
print(self.bar) # $ tracked=foo MISSING: tracked tracked=bar

class SubClass(BaseClass): # $ MISSING: tracked=foo
def also_use_foo(self): # $ tracked=bar
print(self.foo) # $ tracked=bar MISSING: tracked=foo tracked

def set_bar(self): # $ tracked=bar
self.bar = tracked # $ tracked=bar tracked
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async def baz3(request): # $ requestHandler

class MyCustomHandlerClass:

async def foo_handler(self, request): # $ MISSING: requestHandler
async def foo_handler(self, request): # $ requestHandler
return web.Response(text="MyCustomHandlerClass.foo") # $ HttpResponse

my_custom_handler = MyCustomHandlerClass()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def method2(self):

x = MyClass()
x.base_method()
x.method1()
x.cls_method()
x.static()
x.method2()
x.method1() # $ resolved=method1
x.cls_method() # $ resolved=cls_method
x.static() # $ resolved=static
x.method2() # $ resolved=method2
Loading