# Repairing Code

_Brief abstract/introduction/motivation.  State what the chapter is about in 1-2 paragraphs._
_Then, have an introduction video:_

In [None]:
from bookutils import YouTubeVideo
YouTubeVideo("w4u5gCgPlmg")

**Prerequisites**

* _Refer to earlier chapters as notebooks here, as here:_ [Earlier Chapter](Fuzzer.ipynb).

In [None]:
import bookutils

## Synopsis

<!-- Automatically generated. Do not edit. -->



_For those only interested in using the code in this chapter (without wanting to know how it works), give an example.  This will be copied to the beginning of the chapter (before the first section) as text with rendered input and output._

You can use `int_fuzzer()` as:

```python
print(int_fuzzer())
```
```python
=> 76.5

```


## Fixing Things Manually

\todo{Add}

In [None]:
from StatisticalDebugger import middle

In [None]:
# ignore
from bookutils import print_content

In [None]:
# ignore
import inspect

In [None]:
# ignore
_, first_lineno = inspect.getsourcelines(middle)
middle_source = inspect.getsource(middle)
print_content(middle_source, '.py', start_line_number=first_lineno)

## Automatic Code Repair

1. Have a set of tests.
2. Localize the defect.
3. Apply random insert/delete/swap operations.
4. Evolve.

Let's do this on `middle` first.

## A Test Suite

A set of passing tests.

In [None]:
import random

In [None]:
def middle_testcase():
    x = random.randrange(10)
    y = random.randrange(10)
    z = random.randrange(10)
    return x, y, z

In [None]:
[middle_testcase() for i in range(5)]

In [None]:
def middle_test(x, y, z):
    m = middle(x, y, z)
    assert m == sorted([x, y, z])[1]

In [None]:
def middle_passing_testcase():
    while True:
        try:
            x, y, z = middle_testcase()
            _ = middle_test(x, y, z)
            return x, y, z
        except AssertionError:
            pass

In [None]:
(x, y, z) = middle_passing_testcase()
m = middle(x, y, z)
print(f"middle({x}, {y}, {z}) = {m}")

In [None]:
def middle_failing_testcase():
    while True:
        try:
            x, y, z = middle_testcase()
            _ = middle_test(x, y, z)
        except AssertionError:
            return x, y, z

In [None]:
(x, y, z) = middle_failing_testcase()
m = middle(x, y, z)
print(f"middle({x}, {y}, {z}) = {m}")

In [None]:
MIDDLE_TESTS = 100

In [None]:
MIDDLE_PASSING_TESTCASES = [middle_passing_testcase() for i in range(MIDDLE_TESTS)]

In [None]:
MIDDLE_FAILING_TESTCASES = [middle_failing_testcase() for i in range(MIDDLE_TESTS)]

## Locating the Defect

In [None]:
from StatisticalDebugger import OchiaiDebugger, TarantulaDebugger, CoverageCollector

In [None]:
debugger = OchiaiDebugger(CoverageCollector)

for x, y, z in MIDDLE_PASSING_TESTCASES:
    with debugger.collect_pass():
        m = middle(x, y, z)

for x, y, z in MIDDLE_FAILING_TESTCASES:
    with debugger.collect_fail():
        m = middle(x, y, z)

In [None]:
debugger

In [None]:
debugger.rank()

We thus focus on the following line:

In [None]:
# ignore
lineno = debugger.rank()[0]
lines, first_lineno = inspect.getsourcelines(middle)
print(lineno, end="")
print_content(lines[lineno - first_lineno], '.py')

In [None]:
debugger.suspiciousness(lineno)

In [None]:
debugger.suspiciousness(first_lineno)

## Random Code Mutations

* delete a statement
* insert a statement (from the same source)
* replace by another statements (from the same source)

In [None]:
import ast
import astor

In [None]:
from bookutils import rich_output

In [None]:
if rich_output():
    from showast import show_ast
else:
    def show_ast(tree):
        ast.dump(tree)

In [None]:
from ast import NodeTransformer, NodeVisitor

In [None]:
def middle_tree():
    return ast.parse(inspect.getsource(middle))
show_ast(middle_tree())

### Picking Statements

Let us start with a source of potential statements.

In [None]:
class StatementVisitor(NodeVisitor):
    def __init__(self):
        self.statements = []
        super().__init__()
        
    def add_statements(self, node, attr):
        elem = getattr(node, attr, [])
        if isinstance(elem, list):
            self.statements += elem
        else:
            self.statements.append(elem)
        
    def visit_Node(self, node):
        # Any node other than the ones listed below
        self.add_statements(node, 'body')
        self.add_statements(node, 'orelse')
            
    def visit_Module(self, node):
        # Module children are defs, classes and globals - don't add
        super().generic_visit(node)        

    def visit_ClassDef(self, node):
        # Class children are defs and globals - don't add
        super().generic_visit(node)        

    def generic_visit(self, node):
        self.visit_Node(node)
        super().generic_visit(node)

In [None]:
def all_statements(tree, tp=None):
    visitor = StatementVisitor()
    visitor.visit(tree)
    statements = visitor.statements
    if tp is not None:
        statements = [s for s in statements if isinstance(s, tp)]

    return statements

In [None]:
all_statements(middle_tree(), ast.Return)

We can randomly pick an element:

In [None]:
import random

In [None]:
random_node = random.choice(all_statements(middle_tree()))
astor.to_source(random_node)

### Mutating Statements

In [None]:
import copy

In [None]:
class RandomNodeMutator(NodeTransformer):
    def __init__(self, suspiciousness_func=None, 
                 statements=None, mutation_rate=0.1, log=False):
        
        if suspiciousness_func is None:
            suspiciousness_func = lambda lineno: 1.0
        self.suspiciousness_func = suspiciousness_func

        if statements is None:
            statements = []
        self.statements = statements

        self.mutation_rate = mutation_rate
        self.log = log
        
        self.mutations = 0

        super().__init__()

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def node_suspiciousness(self, node):
        if not hasattr(node, 'lineno'):
            return 0
        return self.suspiciousness_func(node.lineno)

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    SKIP_LIST = {ast.Module, ast.ClassDef,
                 ast.FunctionDef, ast.AsyncFunctionDef}

    def should_mutate(self, node):
        if not isinstance(node, ast.stmt):
            return False
        if any(isinstance(node, cls) for cls in self.SKIP_LIST):
            return False

        suspicious_enough = (random.random() <= self.node_suspiciousness(node))
        chosen_for_mutation = (random.random() <= self.mutation_rate)
        return suspicious_enough and chosen_for_mutation

In [None]:
import re

In [None]:
RE_SPACE = re.compile(r'[ \t\n]+')

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def format_node(self, node):
        if node is None:
            return None
        if isinstance(node, list):
            return "; ".join(self.format_node(elem) for elem in node)

        s = RE_SPACE.sub(' ', astor.to_source(node)).strip()
        if len(s) > 20:
            s = s[:20] + "..."
        return repr(s)

    def generic_visit(self, node):
        if not self.should_mutate(node):
            return super().generic_visit(node)
        
        op = random.choice([self.insert, self.swap, self.delete])
        new_node = op(node)
        self.mutations += 1

        if self.log:
            print(f"{op.__name__}: {self.format_node(node)} becomes {self.format_node(new_node)}")

        return new_node

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def choose_statement(self):
        return copy.deepcopy(random.choice(self.statements))

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def swap(self, node):
        # Replace with a random node from statements
        new_node = self.choose_statement()
        ast.copy_location(new_node, node)

        # Imported `if P: X` becomes `if P: pass`
        if isinstance(new_node, ast.stmt):
            if hasattr(new_node, 'body'):
                new_node.body = [ast.Pass()]
            if hasattr(new_node, 'orelse'):
                new_node.orelse = []
            if hasattr(new_node, 'finalbody'):
                new_node.finalbody = []
            ast.copy_location(new_node, node)

        return new_node

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def insert(self, node):
        # Insert a random node from statements
        new_node = self.choose_statement()
        
        # Inserted `if P: X` becomes: `if P: node`
        if isinstance(new_node, ast.stmt) and hasattr(new_node, 'body'):
            new_node.body = [node]
            if hasattr(new_node, 'orelse'):
                new_node.orelse = []
            if hasattr(new_node, 'finalbody'):
                new_node.finalbody = []
            ast.copy_location(new_node, node)
            return new_node

        # Do not insert after `return`
        if isinstance(node, ast.Return):
            if isinstance(new_node, ast.Return):
                return new_node
            else:
                return [ new_node, node ]

        return [ node, new_node ]

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def delete(self, node):
        # Delete this node
        if isinstance(node, ast.stmt):
            # Avoid empty bodies; make this a `pass` statement
            new_node = ast.Pass()
            ast.copy_location(new_node, node)
            return new_node
        
        return None

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def visit(self, node):
        if not self.statements:
            self.statements = all_statements(node)

        return super().visit(node)

In [None]:
class RandomNodeMutator(RandomNodeMutator):
    def mutate(self, tree):
        tree = copy.deepcopy(tree)

        self.mutations = 0
        while self.mutations == 0:
            tree = self.visit(tree)
            
        ast.fix_missing_locations(tree)
        return tree

In [None]:
mutator = RandomNodeMutator(log=True)
new_tree = mutator.mutate(middle_tree())

In [None]:
print_content(astor.to_source(new_tree), '.py')

## Fitness

In [None]:
WEIGHT_PASSING = 0.5
WEIGHT_FAILING = 0.5

In [None]:
def middle_fitness(tree):
    original_middle = middle

    # Define the new function
    func_def = astor.to_source(tree)
    # print(func_def)

    try:
        code = compile(tree, '<fitness>', 'exec')
    except ValueError:
        return 0  # Compilation error

    exec(code, globals())

    passing_passed = 0
    failing_passed = 0
    
    # Test how many of the passing runs pass
    for x, y, z in MIDDLE_PASSING_TESTCASES:
        try:
            middle_test(x, y, z)
            passing_passed += 1
        except AssertionError:
            pass
        
    passing_ratio = passing_passed / len(MIDDLE_PASSING_TESTCASES)

    # Test how many of the failing runs pass
    for x, y, z in MIDDLE_FAILING_TESTCASES:
        try:
            middle_test(x, y, z)
            failing_passed += 1
        except AssertionError:
            pass

    failing_ratio = failing_passed / len(MIDDLE_FAILING_TESTCASES)

    fitness = (WEIGHT_PASSING * passing_ratio +
                WEIGHT_FAILING * failing_ratio)
    
    globals()['middle'] = original_middle
    return fitness

In [None]:
middle_fitness(middle_tree())

In [None]:
middle_fitness(ast.parse("def middle(x, y, z): return z"))

In [None]:
from StatisticalDebugger import middle_fixed

In [None]:
middle_fixed_source = \
    inspect.getsource(middle_fixed).replace('middle_fixed', 'middle').strip()

In [None]:
middle_fitness(ast.parse(middle_fixed_source))

## Population

In [None]:
MIDDLE_POPSIZE = 20

In [None]:
MIDDLE_POPULATION = [ middle_tree() ] + [ mutator.mutate(middle_tree()) for i in range(MIDDLE_POPSIZE - 1) ]

In [None]:
MIDDLE_POPULATION.sort(key=middle_fitness, reverse=True)

Highest fitness:

In [None]:
print(astor.to_source(MIDDLE_POPULATION[0]), middle_fitness(MIDDLE_POPULATION[0]))

Lowest fitness:

In [None]:
print(astor.to_source(MIDDLE_POPULATION[-1]), middle_fitness(MIDDLE_POPULATION[-1]))

## Evolution

In [None]:
def evolve():
    global MIDDLE_POPULATION

    statements = all_statements(middle_tree())
    mutator = RandomNodeMutator(statements=statements)

    n = len(MIDDLE_POPULATION)

    MIDDLE_POPULATION.sort(key=middle_fitness, reverse=True)
    MIDDLE_POPULATION = MIDDLE_POPULATION[:n // 2]

    # avg = sum(middle_fitness(tree) for tree in MIDDLE_POPULATION) / (n // 2)
    # print(f"Average fitness: {avg}")

    offspring = [mutator.mutate(tree) for tree in MIDDLE_POPULATION]
    
    MIDDLE_POPULATION += offspring

In [None]:
evolve()

In [None]:
tree = MIDDLE_POPULATION[0]
print(astor.to_source(tree), middle_fitness(tree))

In [None]:
evolve()

In [None]:
evolve()

In [None]:
evolve()

In [None]:
evolve()

In [None]:
tree = MIDDLE_POPULATION[0]
print(astor.to_source(tree), middle_fitness(tree))

In [None]:
for i in range(10):
    evolve()
    best_tree = MIDDLE_POPULATION[0]
    print(middle_fitness(best_tree), end=" ")

In [None]:
print_content(astor.to_source(best_tree), '.py')

## Simplifying

Or should we just use `DeltaDebugger` on code to minimize the whole thing?

In [None]:
from DeltaDebugger import DeltaDebugger

In [None]:
middle_lines = astor.to_source(best_tree).split('\n')

In [None]:
def test_middle_lines(lines):
    source = "\n".join(lines)
    tree = ast.parse(source)
    assert middle_fitness(tree) < 1.0  # "Fail" only while fitness is 1.0

In [None]:
with DeltaDebugger() as dd:
    test_middle_lines(middle_lines)

In [None]:
reduced_source = "\n".join(dd.min_args()['lines'])
reduced_source = astor.to_source(ast.parse(reduced_source))  # normalize
print_content(reduced_source, '.py')

We can present the difference to the original as a patch:

In [None]:
print(middle_source)

In [None]:
simple_source = astor.to_source(ast.parse(middle_source))  # normalize

In [None]:
print(simple_source)

In [None]:
from ChangeDebugger import diff, patch

In [None]:
import urllib

In [None]:
for p in diff(simple_source, reduced_source):
    print(urllib.parse.unquote(str(p)))

## Crossover

Pick two ASTs. Pick two functions. Do a crossover on any body sequence.

In [None]:
def p1():
    def inner():
        print(a)
        print(b)
        print(c)
    
    a = 1
    b = 2
    c = 3

In [None]:
def p2():
    def inner():
        print(x)
        print(y)
        print(z)

    x = 1
    y = 2
    z = 3

In [None]:
class RandomBodyCrossover:
    def __init__(self, log=True):
        self.log = log

    def cross_bodies(self, body_1, body_2):
        assert isinstance(body_1, list)
        assert isinstance(body_2, list)

        split_1 = len(body_1) // 2
        split_2 = len(body_2) // 2
        return body_1[:split_1] + body_2[split_2:], body_2[:split_2] + body_1[split_1:]

In [None]:
tree_p1 = ast.parse(inspect.getsource(p1))
tree_p2 = ast.parse(inspect.getsource(p2))

In [None]:
body_p1 = tree_p1.body[0].body
body_p2 = tree_p2.body[0].body
body_p1

In [None]:
crosser = RandomBodyCrossover()
tree_p1.body[0].body, tree_p2.body[0].body = crosser.cross_bodies(body_p1, body_p2)

In [None]:
print_content(astor.to_source(tree_p1), '.py')

In [None]:
print_content(astor.to_source(tree_p2), '.py')

In [None]:
class RandomBodyCrossover(RandomBodyCrossover):
    # In modules and class defs, the ordering of elements does not matter (much)
    SKIP_LIST = {ast.Module, ast.ClassDef}

    def can_cross(self, tree, body_attr='body'):
        if any(isinstance(tree, cls) for cls in self.SKIP_LIST):
            return False

        body = getattr(tree, body_attr, [])
        return body and len(body) >= 2

In [None]:
class RandomBodyCrossover(RandomBodyCrossover):
    def crossover_attr(self, t1, t2, body_attr):
        assert isinstance(t1, ast.AST)
        assert isinstance(t2, ast.AST)
        assert isinstance(body_attr, str)
        
        if not getattr(t1, body_attr, None) or not getattr(t2, body_attr, None):
            return False

        body_1 = getattr(t1, body_attr)
        body_2 = getattr(t2, body_attr)

        # print(f"t1.{body_attr} = {body_1}")
        # print(f"t2.{body_attr} = {body_2}")
        
        if self.can_cross(t1, body_attr) and self.can_cross(t2, body_attr):
            if self.log:
                print(f"Crossing {t1}.{body_attr} and {t2}.{body_attr}")
            
            new_body_1, new_body_2 = self.cross_bodies(body_1, body_2)
            setattr(t1, body_attr, new_body_1)
            setattr(t2, body_attr, new_body_2)
            return True

        # Strategy 1: Find matches in class/function of same name
        for child_1 in body_1:
            if hasattr(child_1, 'name'):
                for child_2 in body_2:
                    if hasattr(child_2, 'name') and child_1.name == child_2.name:
                        if self.crossover(child_1, child_2):
                            return True

        # Strategy 2: Find matches anywhere
        for child_1 in random.sample(body_1, len(body_1)):
            for child_2 in random.sample(body_2, len(body_2)):
                if self.crossover(child_1, child_2):
                    return True
            
        return False

In [None]:
class CannotCrossError(ValueError):
    pass

In [None]:
class RandomBodyCrossover(RandomBodyCrossover):
    def crossover(self, t1, t2):
        """Do a crossover of ASTs `t1` and `t2`.
        Raises `CannotCrossError` if no crossover is found."""
        assert isinstance(t1, ast.AST)
        assert isinstance(t2, ast.AST)

        for body_attr in ['body', 'orelse', 'finalbody']:
            if self.crossover_attr(t1, t2, body_attr):
                return t1, t2

        raise CannotCrossError("No crossover found")

In [None]:
def p1():
    if True:
        print(1)
        print(2)
        print(3)

In [None]:
def p2():
    if True:
        print(a)
        print(b)
    if False:
        print(c)
        print(d)

In [None]:
crosser = RandomBodyCrossover(log=True)
tree_p1 = ast.parse(inspect.getsource(p1))
tree_p2 = ast.parse(inspect.getsource(p2))
crosser.crossover(tree_p1, tree_p2)

In [None]:
print_content(astor.to_source(tree_p1), '.py')

In [None]:
print_content(astor.to_source(tree_p2), '.py')

## A Repairer Class

In [None]:
class Repairer(OchiaiDebugger):
    pass

Use as:

```python
repairer = Repairer()
with repairer.collect_pass():
    func(...)
with repairer.collect_fail():
    func(...)
repairer.repair()
```

## Synopsis

We can repair things!

## Lessons Learned

* _Lesson one_
* _Lesson two_
* _Lesson three_

## Next Steps

_Link to subsequent chapters (notebooks) here, as in:_

* [use _mutations_ on existing inputs to get more valid inputs](MutationFuzzer.ipynb)
* [use _grammars_ (i.e., a specification of the input format) to get even more valid inputs](Grammars.ipynb)
* [reduce _failing inputs_ for efficient debugging](Reducer.ipynb)


## Background

_Cite relevant works in the literature and put them into context, as in:_

The idea of ensuring that each expansion in the grammar is used at least once goes back to Burkhardt \cite{Burkhardt1967}, to be later rediscovered by Paul Purdom \cite{Purdom1972}.

## Exercises

_Close the chapter with a few exercises such that people have things to do.  To make the solutions hidden (to be revealed by the user), have them start with_

```
**Solution.**
```

_Your solution can then extend up to the next title (i.e., any markdown cell starting with `#`)._

_Running `make metadata` will automatically add metadata to the cells such that the cells will be hidden by default, and can be uncovered by the user.  The button will be introduced above the solution._

### Exercise 1: _Title_

_Text of the exercise_

In [None]:
# Some code that is part of the exercise
pass

_Some more text for the exercise_

**Solution.** _Some text for the solution_

In [None]:
# Some code for the solution
2 + 2

_Some more text for the solution_

### Exercise 2: _Title_

_Text of the exercise_

**Solution.** _Solution for the exercise_

In [None]:
class PrunePassTransformer(NodeTransformer):
    def prune_pass(self, body):
        if not body:
            return body

        # Get rid of `pass` statements
        new_body = list(filter(lambda stmt: not isinstance(stmt, ast.Pass), body))
        if len(new_body) == 0:
            # Oops – we need at least one `pass`
            new_node = ast.Pass()
            ast.copy_location(new_node, body[0])
            new_body = [new_node]

        return new_body
    
    def visit_Node(self, node):
        if hasattr(node, 'body'):
            node.body = self.prune_pass(node.body)
        if hasattr(node, 'orelse'):
            node.orelse = self.prune_pass(node.orelse)
        return node

    def generic_visit(self, node):
        super().generic_visit(node)
        return self.visit_Node(node)

In [None]:
prune_passes = PrunePassTransformer()
prune_passes.visit(best_tree)
print_content(astor.to_source(best_tree), '.py')

In [None]:
class PruneReturnTransformer(NodeTransformer):
    def ends_in_return(self, body):
        return len(body) > 0 and isinstance(body[-1], ast.Return)

    def prune_returns(self, body):
        if body is None:
            return None

        # Get rid of statements after `return`
        for i, stmt in enumerate(body):
            if isinstance(stmt, ast.Return):
                body = body[:i + 1]
                break
            if isinstance(stmt, ast.If):
                body_returns = self.ends_in_return(stmt.body)
                orelse_returns = self.ends_in_return(stmt.orelse)
                if body_returns and orelse_returns:
                    body = body[:i + 1]
                    break

        return body

    def visit_Node(self, node):
        if hasattr(node, 'body'):
            node.body = self.prune_returns(node.body)
        if hasattr(node, 'orelse'):
            node.orelse = self.prune_returns(node.orelse)
        return node
    
    def generic_visit(self, node):
        super().generic_visit(node)
        return self.visit_Node(node)

In [None]:
prune_returns = PruneReturnTransformer()
prune_returns.visit(best_tree)
print_content(astor.to_source(best_tree), '.py')