# Tracking Origins

_Brief abstract/introduction/motivation.  State what the chapter is about in 1-2 paragraphs._
_Then, have an introduction video:_

In [None]:
from bookutils import YouTubeVideo
YouTubeVideo("w4u5gCgPlmg")

**Prerequisites**

* _Refer to earlier chapters as notebooks here, as here:_ [Earlier Chapter](Fuzzer.ipynb).

In [None]:
import bookutils

## Synopsis

<!-- Automatically generated. Do not edit. -->



_For those only interested in using the code in this chapter (without wanting to know how it works), give an example.  This will be copied to the beginning of the chapter (before the first section) as text with rendered input and output._

You can use `int_fuzzer()` as:

```python
print(int_fuzzer())
```
```python
=> 76.5

```


## Approach 1: Wrap Data

In [None]:
def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    else:
        if x > y:
            return y
        elif x > z:
            return x
    return z

In [None]:
m = middle(2, 1, 3)
m

In [None]:
import inspect

## Instrumenting Assignments

In [None]:
import ast
import astor

In [None]:
from bookutils import rich_output

In [None]:
if rich_output():
    from showast import show_ast
else:
    def show_ast(tree):
        ast.dump(tree)

In [None]:
import math

In [None]:
def square_root(x):
    assert x >= 0  # precondition

    approx = None
    guess = x / 2
    while approx != guess:
        approx = guess
        guess = (approx + x / approx) / 2

    assert math.isclose(approx * approx, x)
    return approx

In [None]:
square_root_tree = ast.parse(inspect.getsource(square_root))
show_ast(square_root_tree)

In [None]:
from ast import NodeTransformer, Subscript, Constant, Name, Load, Store, \
    Assign, Attribute, If, With, withitem, Return, Index, Str

In [None]:
DATA_STORE = '_data'

In [None]:
print(ast.dump(ast.parse(f"{DATA_STORE}['x']")))

In [None]:
def make_data_access(id, ctx):
    return Subscript(
        value=Name(id=DATA_STORE, ctx=Load()),
        slice=Index(value=Str(s=id)),
        ctx=ctx
    )

In [None]:
class AccessTransformer(NodeTransformer):
    def visit_Name(self, node):
        return make_data_access(node.id, node.ctx)

In [None]:
new_square_root_tree = AccessTransformer().visit(square_root_tree)
print(astor.to_source(new_square_root_tree))

In [None]:
class SaveArgsTransformer(NodeTransformer):
    def visit_FunctionDef(self, node):
        named_args = []
        for child in ast.iter_child_nodes(node.args):
            if isinstance(child, ast.arg):
                named_args.append(child.arg)

        assign_stmts = []
        for arg in named_args:
            assign_stmt = Assign(
                targets=[make_data_access(arg, Store())],
                value=Name(id=arg, ctx=Load())
            )
            assign_stmts.append(assign_stmt)

        node.body = assign_stmts + node.body
        return node

In [None]:
new_square_root_tree = SaveArgsTransformer().visit(new_square_root_tree)
show_ast(new_square_root_tree)

In [None]:
print(astor.to_source(new_square_root_tree))

In [None]:
class SaveReturnTransformer(NodeTransformer):
    RETURN_VALUE = '<return value>'

    def visit_Return(self, node):
        assign_node = Assign(
                targets=[make_data_access(self.RETURN_VALUE, Store())],
                value=node.value
            )
        return_node = Return(
                value=make_data_access(self.RETURN_VALUE, Load())
            )
        ast.copy_location(assign_node, node)
        ast.copy_location(return_node, node)

        return [
            assign_node,
            return_node
        ]

In [None]:
new_square_root_tree = SaveReturnTransformer().visit(new_square_root_tree)
show_ast(new_square_root_tree)

In [None]:
print(astor.to_source(new_square_root_tree))

In [None]:
class ControlTransformer(NodeTransformer):
    def make_with(self, block):
        if len(block) == 0:
            return []

        return [With(
            items=[
                withitem(
                    context_expr=Name(id=DATA_STORE, ctx=Load()),
                    optional_vars=None)
            ],
            body=block
        )]

    def visit_If(self, node):
        node.body = self.make_with(node.body)
        node.orelse = self.make_with(node.orelse)
        return self.generic_visit(node)

    def visit_While(self, node):
        node.body = self.make_with(node.body)
        node.orelse = self.make_with(node.orelse)
        return self.generic_visit(node)

In [None]:
show_ast(new_square_root_tree)

In [None]:
print(astor.to_source(new_square_root_tree))

In [None]:
def print_ast_ids(tree):
    for node in ast.walk(tree):
        print(node)
        try:
            print(astor.to_source(node))
        except AttributeError:
            print("(No source)\n")

In [None]:
# print_ast_ids(new_square_root_tree)

In [None]:
class DataStore(dict):
    def __init__(self, *args):
        super().__init__(*args)

    def __getitem__(self, name):
        if name in self:
            return super().__getitem__(name)
        else:
            return globals()[name]

    def __setitem__(self, name, value):
        return super().__setitem__(name, value)

    def __repr__(self):
        return super().__repr__()

    def __enter__(self):
        pass

    def __exit__(self, exc_type, exc_value, traceback):
        pass

In [None]:
class DataLogger(DataStore):
    def caller_location(self):
        frame = inspect.currentframe().f_back.f_back
        return f"{frame.f_code.co_name}:{frame.f_lineno}"

    def __getitem__(self, name):
        print(self.caller_location(), "reading", name)
        return super().__getitem__(name)

    def __setitem__(self, name, value):
        print(self.caller_location(), "storing", name)
        return super().__setitem__(name, value)

In [None]:
import itertools

In [None]:
class DataTracker(DataStore):
    def __init__(self, *args):
        super().__init__(*args)
        self.origins = {}
        self.data_dependencies = {}
        self.control_dependencies = {}
        self.last_read = []
        self.last_read_location = None
        self.control = [[]]

    def caller_location(self):
        frame = inspect.currentframe().f_back.f_back
        return (frame.f_code.co_name, frame.f_lineno)

    def __getitem__(self, name):
        location = self.caller_location()
        if location != self.last_read_location:
            self.last_read_location = location
            self.last_read = []
        self.last_read.append(name)
        return super().__getitem__(name)

    def __setitem__(self, name, value):
        location = self.caller_location()

        new_data_dependencies = self.data_dependencies[(name, location)] \
            if (name, location) in self.data_dependencies else set()

        for var_read in self.last_read:
            if var_read in self.origins:
                new_data_dependencies.add((var_read, self.origins[var_read]))

        new_control_dependencies = self.control_dependencies[(name, location)] \
            if (name, location) in self.control_dependencies else set()

        for var_read in itertools.chain.from_iterable(self.control):
            if var_read in self.origins:
                new_control_dependencies.add((var_read, self.origins[var_read]))

        self.data_dependencies[(name, location)] = new_data_dependencies
        self.control_dependencies[(name, location)] = new_control_dependencies

        self.origins[name] = location

        return super().__setitem__(name, value)

    def __enter__(self):
        self.control.append(self.last_read)

    def __exit__(self, exc_type, exc_value, traceback):
        self.control.pop()

In [None]:
class Instrumenter(object):
    def __init__(self, *items_to_instrument, log=False):
        self.log = log
        self.items_to_instrument = items_to_instrument

    def __enter__(self):
        """Instrument sources"""
        for item in self.items_to_instrument:
            self.instrument(item)
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """Restore sources"""
        for item in self.items_to_instrument:
            globals()[item.__name__] = item

    def instrument(self, item):
        if self.log:
            print("Instrumenting", item)

In [None]:
with Instrumenter(square_root) as ins:
    pass

In [None]:
class Slicer(Instrumenter):
    def instrument(self, item):
        tree = ast.parse(inspect.getsource(item))

        AccessTransformer().visit(tree)
        SaveArgsTransformer().visit(tree)
        SaveReturnTransformer().visit(tree)
        ControlTransformer().visit(tree)
        
        ast.fix_missing_locations(tree)
        # print_ast_ids(tree)

        if self.log:
            print(f"Instrumenting {item}:")
            print(astor.to_source(tree))

        code = compile(tree, '<string>', 'exec')
        exec(code, globals())
        globals()[DATA_STORE] = DataTracker()

    def data_dependencies(self):
        return globals()[DATA_STORE].data_dependencies

    def control_dependencies(self):
        return globals()[DATA_STORE].control_dependencies

In [None]:
with Slicer(square_root, log=True) as slicer:
    y = square_root(9)
y

In [None]:
_data

In [None]:
_data.origins

In [None]:
slicer.data_dependencies()

In [None]:
slicer.control_dependencies()

In [None]:
square_root(9)

In [None]:
from graphviz import Digraph, nohtml

In [None]:
import html

In [None]:
# ignore
STEP_COLOR = 'peachpuff'
FONT_NAME = 'Courier'

In [None]:
# ignore
def graph(comment="default"):
    return Digraph(name='', comment=comment, graph_attr={},
        node_attr={'style': 'filled',
                   'shape': 'box',
                   'fillcolor': STEP_COLOR,
                   'fontname': FONT_NAME},
        edge_attr={'fontname': FONT_NAME})

In [None]:
# ignore
def display_dependencies(data_dependencies, control_dependencies={}):
    def id(node):
        return html.escape(repr(node))

    def label(node):
        (name, location) = node
        code_name, lineno = location
        fun = globals()[code_name]
        source_lines, first_lineno = inspect.getsourcelines(fun)
        source = source_lines[lineno - first_lineno].strip()
        return f'<<B>{html.escape(name)}</B><BR/><FONT POINT-SIZE="8.0">{source}</FONT>>'

    g = graph()
    for d in data_dependencies:
        g.node(id(d), label=label(d))
        for t in data_dependencies[d]:
            # print(label(d), '=>', label(t))
            g.edge(id(t), id(d))
        for t in control_dependencies[d]:
            # print(label(d), '->', label(t))
            g.edge(id(t), id(d), style='dashed')

    display(g)

In [None]:
display_dependencies(slicer.data_dependencies(),
                     slicer.control_dependencies()
                    )

* TODO: Differentiate control and data deps by color
* TODO: Arrange nodes by location

In [None]:
with Slicer(middle, log=True) as slicer:
    y = middle(2, 1, 3)

In [None]:
slicer.control_dependencies()

In [None]:
display_dependencies(slicer.data_dependencies(),
                     slicer.control_dependencies()
                    )

## Synopsis

_For those only interested in using the code in this chapter (without wanting to know how it works), give an example.  This will be copied to the beginning of the chapter (before the first section) as text with rendered input and output._

You can use `int_fuzzer()` as:

## Lessons Learned

* _Lesson one_
* _Lesson two_
* _Lesson three_

## Next Steps

_Link to subsequent chapters (notebooks) here, as in:_

* [use _mutations_ on existing inputs to get more valid inputs](MutationFuzzer.ipynb)
* [use _grammars_ (i.e., a specification of the input format) to get even more valid inputs](Grammars.ipynb)
* [reduce _failing inputs_ for efficient debugging](Reducer.ipynb)


## Background

_Cite relevant works in the literature and put them into context, as in:_

The idea of ensuring that each expansion in the grammar is used at least once goes back to Burkhardt \cite{Burkhardt1967}, to be later rediscovered by Paul Purdom \cite{Purdom1972}.

@INPROCEEDINGS{6899220,
  author={Z. {Chen} and L. {Chen} and Y. {Zhou} and Z. {Xu} and W. C. {Chu} and B. {Xu}},
  booktitle={2014 IEEE 38th Annual Computer Software and Applications Conference}, 
  title={Dynamic Slicing of Python Programs}, 
  year={2014},
  volume={},
  number={},
  pages={219-228},
  doi={10.1109/COMPSAC.2014.30}
}



## Exercises

_Close the chapter with a few exercises such that people have things to do.  To make the solutions hidden (to be revealed by the user), have them start with_

```
**Solution.**
```

_Your solution can then extend up to the next title (i.e., any markdown cell starting with `#`)._

_Running `make metadata` will automatically add metadata to the cells such that the cells will be hidden by default, and can be uncovered by the user.  The button will be introduced above the solution._

### Exercise 1: _Title_

_Text of the exercise_

In [None]:
# Some code that is part of the exercise
pass

_Some more text for the exercise_

**Solution.** _Some text for the solution_

In [None]:
# Some code for the solution
2 + 2

_Some more text for the solution_

### Exercise 2: _Title_

_Text of the exercise_

**Solution.** _Solution for the exercise_

In [None]:
import traceback

In [None]:
class tint(int):
    def __new__(cls, value, *args, **kw):
        return int.__new__(cls, value)

    def __init__(self, value, slice=None, **kwargs):
        self.slice = [self.current_location()]
        if slice is not None:
            self.slice += slice

    def current_location(self):
        frame = inspect.currentframe()
        while ('self' in frame.f_locals and 
               isinstance(frame.f_locals['self'], tint)):
            frame = frame.f_back

        return (frame.f_code.co_name, frame.f_lineno)

In [None]:
class tint(tint):
    def __repr__(self):
        return int.__repr__(self)

In [None]:
class tint(tint):
    def __str__(self):
        return int.__str__(self)

In [None]:
x = tint(2)
x

In [None]:
x.slice

In [None]:
x == 2

In [None]:
type(x)

In [None]:
class tint(tint):
    def create(self, x):
        return tint(x, slice=self.slice)

Operators are defined in the [Python data model](https://docs.python.org/3/reference/datamodel.html)

In [None]:
class tint(tint):
    def __add__(self, x):
        return self.create(int(self) + x)
    def __radd__(self, x):
        return self.create(x + int(self))

In [None]:
x = tint(2)
x = x + 2
type(x)

In [None]:
x

In [None]:
class tint(tint):
    def __sub__(self, x):
        return self.create(int(self) - x)
    def __rsub__(self, x):
        return self.create(x - int(self))

In [None]:
class tint(tint):
    def __mul__(self, x):
        return self.create(int(self) * x)
    def __rmul__(self, x):
        return self.create(x * int(self))

In [None]:
class tint(tint):
    def __matmul__(self, x):
        return self.create(int(self) @ x)
    def __rmatmul__(self, x):
        return self.create(x @ int(self))

In [None]:
class tint(tint):
    def __truediv__(self, x):
        return self.create(int(self) / x)
    def __rtruediv__(self, x):
        return self.create(x / int(self))

In [None]:
class tint(tint):
    def __floordiv__(self, x):
        return self.create(int(self) // x)
    def __rfloordiv__(self, x):
        return self.create(x // int(self))

In [None]:
class tint(tint):
    def __mod__(self, x):
        return self.create(int(self) % x)
    def __rmod__(self, x):
        return self.create(x % int(self))

In [None]:
class tint(tint):
    def __divmod__(self, x):
        return self.create(divmod(int(self), x))
    def __rdivmod__(self, x):
        return self.create(divmod(x, int(self)))

In [None]:
class tint(tint):
    def __pow__(self, x):
        return self.create(int(self) ** x)
    def __rpow__(self, x):
        return self.create(x ** int(self))

In [None]:
class tint(tint):
    def __lshift__(self, x):
        return self.create(int(self) << x)
    def __rlshift__(self, x):
        return self.create(x << int(self))

In [None]:
class tint(tint):
    def __rshift__(self, x):
        return self.create(int(self) >> x)
    def __rrshift__(self, x):
        return self.create(x >> int(self))

In [None]:
class tint(tint):
    def __and__(self, x):
        return self.create(int(self) & x)
    def __rand__(self, x):
        return self.create(x & int(self))

In [None]:
class tint(tint):
    def __xor__(self, x):
        return self.create(int(self) ^ x)
    def __rxor__(self, x):
        return self.create(x ^ int(self))

In [None]:
class tint(tint):
    def __or__(self, x):
        return self.create(int(self) | x)
    def __ror__(self, x):
        return self.create(x | int(self))

In [None]:
class tint(tint):
    def __neg__(self):
        return self.create(-int(self))
    def __pos__(self):
        return self.create(+int(self))
    def __abs__(self):
        return self.create(abs(int(self)))
    def __invert__(self):
        return self.create(-int(self))

In [None]:
class tint(tint):
    def __index__(self):
        return int(self)

In [None]:
x = tint(2)
y = x + 3 - (3 + x)

In [None]:
y, type(y), y.slice

In [None]:
x = tint(2)
y = tint(1)
z = tint(3)
m = middle(x, y, z)
m, m.slice

In [None]:
x = tint(4)
y = square_root(x)

In [None]:
y.slice

In [None]:
from ExpectError import ExpectError

In [None]:
with ExpectError():
    y = square_root(tint(2))

Next steps:

* implement `tfloat`
* instrument assignments

```python
with Slicer() as slicer:
    x = foobar(blue)
slicer
```
x was initialized from ...
blue was just there :-)