In [None]:
import bookutils.setup

# Project 1 - Mutation Fuzzing

Mutation fuzzers are effective at testing and perform well for unstructured or for simple inputs formats. However, when dealing with complex structured inputs their random mutations are innefficient. Consider, for example, the following SVG file:

```
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
  <path d="M17,30l19-12l43,54l-17,15zM14,84c12-20 46-61 61-72l13,19 c-17,10-50,50-60,64z" fill="#C30" stroke-linejoin="round" stroke-width="6" stroke="#C30"></path>
</svg>
```

A random mutation replacing `</svg>` for `=/sgv>`  is perfectly possible, however it would result in an invalid SVG, the same would happen if we add a `"` to any attribute. When sequentially applying multiple random mutations, the probability of generating an input that is a valid SVG file significantly decreases.

While fuzzers can run for days in a row to cover considerable behavior, the goal of this project is to utilize mutation fuzzing to cover as much code as possible during a specified number of generations. 

Our target is the [svglib](https://pypi.org/project/svglib/) SVG rendering library written in python. For an easier integration with the library we provide a wrapped function __parse_svg(string)__, which receives a string with the SVG content and invokes the parsing library. To ensure that all converted elements are correct, the wrapper function internally converts the parsed SVG into PDF and PNG formats. Finally, the wrapper function returns an _RLG Drawing_ object if the conversion was successfull or None if it wasn't.

In [None]:
import sys
import logging
import os
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPDF, renderPM

In [None]:
# Required to run svglib on Python3
xrange = range

logging.disable(logging.ERROR)

RUN_EVALUATION = True
DEBUG = True
COUNT = 0

In [None]:
def parse_svg(data):
    if DEBUG:
        global COUNT
        if COUNT % 1000 == 0:
            print(COUNT)
        COUNT += 1
            
    pdf_file = 'tmp.pdf'
    png_file = 'tmp.png'
    svg_file = 'tmp.svg'
    try:
        with open(svg_file, "w") as f:
            f.write(data)

        drawing = svg2rlg(svg_file)

        assert(drawing is not None)

        renderPDF.drawToFile(drawing, pdf_file)
        #renderPM.drawToFile(drawing, png_file)
        
        return drawing
    finally:
        if os.path.exists(svg_file):
            os.remove(svg_file)
            
        if os.path.exists(png_file):
            os.remove(png_file)
            
        if os.path.exists(pdf_file):
            os.remove(pdf_file)

In [None]:
parse_svg("""
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
  <path d="M17,30l19-12l43,54l-17,15zM14,84c12-20 46-61 61-72l13,19 c-17,10-50,50-60,64z" fill="#C30" stroke-linejoin="round" stroke-width="6" stroke="#C30"></path>
</svg>
""")

## Auxiliary functions

The SVG format has a tree structure. In order to aid the fuzzer's implementation we provide an auxiliary function to convert an SVG string representations into Python's [ElementTree](https://docs.python.org/2/library/xml.etree.elementtree.html) for easier manipulation.

In [None]:
import sys
from lxml import etree

def svg_as_tree(data):
    """Converts a String representation of an SVG into an ElementTree and returns its root

    :param data: String representation of an SVG
    :return: ElementTree https://docs.python.org/3/library/xml.etree.elementtree.html
    """
    parser = etree.XMLParser(encoding='utf-8')
    root = etree.fromstring(data.encode('utf-8'), parser=parser)
    return root

The tree representation can be used to, for example, apply mutations on internal components of the nodes, as well as move, add or remove elements. The following code illustrates how to convert from a String into an [ElementTree](https://docs.python.org/2/library/xml.etree.elementtree.html).

In [None]:
svg_string = """<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
  <path d="M19,19h58v58h-58z" stroke="#000" fill="none" stroke-width="4"/>
  <path d="M17,30l19-12l43,54l-17,15zM14,84c12-20 46-61 61-72l13,19 c-17,10-50,50-60,64z" fill="#C30" stroke-linejoin="round" stroke-width="6" stroke="#C30"/>
  <cicle/>
</svg>
"""
root_node = svg_as_tree(svg_string)
print("%s - %s" % (root_node.tag, root_node.attrib))

After converting the String representation into a tree it is possible to iterate over the nodes

In [None]:
# Printing immediate child nodes:
for child in root_node:
    print("%s - %s" % (child.tag, child.attrib))

As well are access and manipulate the node type (_tag_) and its attributes (_attrib_)

In [None]:
# Accessing and changing properties
first_child = root_node[0]
print("old value of stroke-width: %s" % first_child.attrib['stroke-width'])

first_child.attrib['stroke-width'] = "99"
print("new value of stroke-width: %s" % first_child.attrib['stroke-width'])

After the changes the tree can be converted back into a string to be used in _parse_svg()_ function.

In [None]:
new_string = etree.tostring(root_node)
print(new_string)

# Fuzzer template

The basic template from our fuzzer is based on the [MutationCoverageFuzzer](MutationFuzzer.ipynb) from the lecture.

This template automatically loads a set of 10 SVG files as an initial seed.

In [None]:
from Coverage import Coverage
from MutationFuzzer import MutationCoverageFuzzer, FunctionCoverageRunner

In [None]:
class Project1MutationCoverageFuzzer(MutationCoverageFuzzer):
    def __init__(self, min_mutations=2, max_mutations=10):
        seed = self._get_initial_seed()
        super().__init__(seed, min_mutations, max_mutations)

    def _get_initial_seed(self):
        """Gets the initial seed for the fuzzer

        :return: List of SVG in string format
        """

        seed_dir = os.path.join(".", "data", "svg-full")
        seed_files = list(filter(lambda f: ".svg" in f, os.listdir(seed_dir)))

        seed = []
        for f in seed_files:
            with open(os.path.join(seed_dir, f)) as x:
                s = ''.join(x.readlines()).strip()
                seed.append(s)

        print("Seed size: " + str(len(seed)) + " files")
        return seed

## Fuzzing the _svglib_

To fuzz _svglib_ your fuzzer must execute it and inspect how much coverage it obtained with a specific input. With this goal we'll extend the [FunctionCoverageRunner](MutationFuzzer.ipynb) class from the lecture. The original class calculated coverage and was capable of handling exceptions, however, if the fuzzer triggered, for example, an infinite loop, it would not work. In this extension we add a configurable timeout for the command to ensure our library will always terminate.

In [None]:
from ExpectError import ExpectTimeout, ExpectError

class FunctionCoverageRunnerWithTimeout(FunctionCoverageRunner):
    def __init__(self, function, timeout=1):
        self._timeout = timeout
        super().__init__(function)

    def run(self, inp):
        outcome = self.FAIL
        result = None
        self._coverage = []
        
        with ExpectError(mute=True):
            with ExpectTimeout(self._timeout, mute=True):
                result = self.run_function(inp)
                outcome = self.PASS

        return result, outcome

In [None]:
parse_svg_runner = FunctionCoverageRunnerWithTimeout(parse_svg)

We also define our experiment as a set of 5 runs, with random seeds 2000-2004 with 10000 actions.

In [None]:
import datetime
import random

def run_experiment(fuzzer, start_seed=2000, end_seed=2005, trials=10000):
    print("Started fuzzing at %s" % str(datetime.datetime.now()))

    experiment_population = []
    for seed in range(start_seed, end_seed):
        print("Starting seed %d at %s" % (seed, str(datetime.datetime.now())))
        random.seed(seed)

        fuzzer.reset()
        fuzzer.runs(parse_svg_runner, trials)

        experiment_population.append(fuzzer.population)

    print("Finished fuzzing at %s" % str(datetime.datetime.now()))
    
    return experiment_population

We then initialize our fuzzer

In [None]:
mutation_fuzzer = Project1MutationCoverageFuzzer()

And execute it multiple times to test it. __Note:__ we're running this example with only 10 trials to demonstrate the functionality. The fuzzer should be executed for 10000 trials.

In [None]:
experiment_population = run_experiment(mutation_fuzzer, trials=10)

## Obtaining the population coverage

In order to obtain the overal coverage achieved by the fuzzer's population we will adapt the [population_coverage](Coverage.ipynb) function from the lecture.

The following code calculates the overall coverage from a fuzzer's population:

In [None]:
import matplotlib.pyplot as plt

def population_coverage(population, function):
    cumulative_coverage = []
    all_coverage = set()

    for s in population:
        with Coverage() as cov:
            with ExpectError(mute=True):
                with ExpectTimeout(1, mute=True):
                    function(s)
        all_coverage |= cov.coverage()
        cumulative_coverage.append(len(all_coverage))

    return all_coverage, cumulative_coverage

# Your code

Now extend the Project1MutationCoverageFuzzer class, implement your own custom mutations and fuzz _svglib_ to achieve a better coverage.

## Tips

* You can develop any type of mutation as well as use random mutations.
* The commands `with ExpectError(mute=True)` and `with ExpectTimeout(1, mute=True)` remove the error output. It may be useful to set `mute=False` for debugging.
* Your fuzzer will be restarted (`reset()`) after each execution.   
* We recommend you to extend the class `Project1MutationCoverageFuzzer` as `class Project1MutationCoverageFuzzer(Project1MutationCoverageFuzzer): ...` to reuse the implementations for the lecture.

In [None]:
class Project1MutationCoverageFuzzer(Project1MutationCoverageFuzzer):
    # <Write your code here>
    pass

# Evaluation

Since our experiment consists of a set of executions, we'll calculate the coverage of all populations, and return it's average as final result.

In [None]:
def evaluate(populations):
    global COUNT
    coverages = []
    seen_statements = set()

    for idx, population in enumerate(populations):
        COUNT = 0
        all_coverage, cumulative_coverage = population_coverage(
            populations[idx], parse_svg)

        seen_statements |= all_coverage
        coverages.append(len(all_coverage))

        plt.plot(cumulative_coverage)
        plt.title('Coverage of parse_svg() with random inputs')
        plt.xlabel('# of inputs')
        plt.ylabel('lines covered')
        print("Covered lines (run %d) %d" % (idx, len(all_coverage)))
        print("Unique elements (run %d) %d" % (idx, len(cumulative_coverage)))        

    return tuple([sum(coverages) / len(coverages), len(seen_statements)])

In [None]:
print("Average coverage: %d - Total achieved coverage: %d" % evaluate(experiment_population))

## Evaluation scheme

* For the evaluation your fuzzer will be executed __five__ times with random seeds __2000-2004__ and __10000__ trials in each seed.
* In order to be approved your fuzzer should achieve an average coverage of __4400__ LOC (lines of code). 
* Bonus points will be awarded for fuzzers which reach a total of more than __5500__ unique library statements throughout the experiment, as well as to fuzzers which reach an __exceptions__ from the library in any execution (One single bonus points will be awarded for exceptions, irrespective of the number of exceptions triggered). Examples include:
    ```    
    Exception ignored in: <bound method Image.__del__ of <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1280x853 at 0x1604B3F60>>
    Traceback (most recent call last):
      File "/etc/anaconda3/lib/python3.6/site-packages/PIL/Image.py", line 588, in __del__
        def __del__(self):
      File "<string>", line 5, in traceit
      File "<string>", line 16, in check_time
    TimeoutError: 
    ```
* The grades will be based on the average coverage achieved by your fuzzer.
* Students can be randomly selected to explain their code in order to demonstrate authorship.

__The following code will be used to evaluate your fuzzer (Note: Your fuzzer must be executable by the following code)__


In [None]:
if RUN_EVALUATION:
    print("Initializing evaluation")
    parse_svg_runner = FunctionCoverageRunnerWithTimeout(parse_svg)
    mutation_fuzzer = Project1MutationCoverageFuzzer()
    print("Running experiment")
    experiment_population = run_experiment(mutation_fuzzer, trials=10000)

In [None]:
if RUN_EVALUATION:
    print("Computing results")
    avg_statements, total_statements = evaluate(experiment_population)

    print("Final result: Average coverage: %d - Total achieved coverage: %d" % (avg_statements, total_statements))