# Grammar Fuzzer

In [1]:
import string
import src.utils as utils
import random

In [2]:
ASCII_MAP = {
        '[__WHITESPACE__]': string.whitespace,
        '[__DIGIT__]': string.digits,
        '[__ASCII_LOWER__]': string.ascii_lowercase,
        '[__ASCII_UPPER__]': string.ascii_uppercase,
        '[__ASCII_PUNCT__]': string.punctuation,
        '[__ASCII_LETTER__]': string.ascii_letters,
        '[__ASCII_ALPHANUM__]': string.ascii_letters + string.digits,
        '[__ASCII_PRINTABLE__]': string.printable
        }
FUZZRANGE = 10

In [3]:
class Fuzzer:
    def __init__(self, grammar):
        self.grammar = grammar

    def fuzz(self, key='<start>', max_num=None, max_depth=None):
        raise NotImplemented()

In [4]:
class LimitFuzzer(Fuzzer):
    def __init__(self, grammar):
        super().__init__(grammar)
        self.key_cost = {}
        self.cost = self.compute_cost(grammar)

    def compute_cost(self, grammar):
        cost = {}
        for k in grammar:
            cost[k] = {}
            for rule in grammar[k]:
                cost[k][str(rule)] = self.expansion_cost(grammar, rule, set())
            if len(grammar[k]):
                assert len([v for v in cost[k] if v != float('inf')]) > 0
        return cost

In [5]:
class LimitFuzzer(LimitFuzzer):
    def symbol_cost(self, grammar, symbol, seen):
        if symbol in self.key_cost: return self.key_cost[symbol]
        if symbol in seen:
            self.key_cost[symbol] = float('inf')
            return float('inf')
        v = min((self.expansion_cost(grammar, rule, seen | {symbol})
                    for rule in grammar.get(symbol, [])), default=0)
        self.key_cost[symbol] = v
        return v

    def expansion_cost(self, grammar, tokens, seen):
        return max((self.symbol_cost(grammar, token, seen)
                    for token in tokens if token in grammar), default=0) + 1

In [6]:
class LimitFuzzer(LimitFuzzer):
    def nonterminals(self, rule):
        return [t for t in rule if utils.is_nt(t)]

    def iter_gen_key(self, key, max_depth):
        def get_def(t):
            if t in ASCII_MAP:
                return [random.choice(ASCII_MAP[t]), []]
            elif t and t[-1] == '+' and t[0:-1] in ASCII_MAP:
                num = random.randrange(FUZZRANGE) + 1
                val = [random.choice(ASCII_MAP[t[0:-1]]) for i in range(num)]
                return [''.join(val), []]
            elif utils.is_nt(t):
                return [t, None]
            else:
                return [t, []]

        cheap_grammar = {}
        for k in self.cost:
            rules = self.grammar[k]
            if rules:
                min_cost = min([self.cost[k][str(r)] for r in rules])
                cheap_grammar[k] = [r for r in self.grammar[k] if self.cost[k][str(r)] == min_cost]
            else:
                cheap_grammar[k] = [] # (No rules found)

        root = [key, None]
        queue = [(0, root)]
        while queue:
            (depth, item), *queue = queue
            key = item[0]
            if item[1] is not None: continue
            grammar = self.grammar if depth < max_depth else cheap_grammar
            chosen_rule = random.choice(grammar[key])
            expansion = [get_def(t) for t in chosen_rule]
            item[1] = expansion
            for t in expansion: queue.append((depth+1, t))

        return root
    
    def gen_key(self, key, depth, max_depth):
        if key in ASCII_MAP:
            return (random.choice(ASCII_MAP[key]), [])
        if key and key[-1] == '+' and key[0:-1] in ASCII_MAP:
            m = random.randrange(FUZZRANGE) + 1
            return (''.join([random.choice(ASCII_MAP[key[0:-1]]) for i in range(m)]), [])
        if key not in self.grammar: return (key, [])
        if depth > max_depth:
            clst = sorted([(self.cost[key][str(rule)], rule) for rule in self.grammar[key]])
            rules = [r for c,r in clst if c == clst[0][0]]
        else:
            rules = self.grammar[key]
        v = self.gen_rule(random.choice(rules), depth+1, max_depth)
        return (key, v)
    
    def gen_rule(self, rule, depth, max_depth):
        return [self.gen_key(token, depth, max_depth) for token in rule]

    def fuzz(self, key='<start>', max_depth=10):
        return utils.tree_to_str(self.iter_gen_key(key=key, max_depth=max_depth))

In [7]:
import string

EXPR_GRAMMAR = {
 '<start>': [['<expr>']],
 '<expr>': [['<term>', '+', '<expr>'],
            ['<term>', '-', '<expr>'],
            ['<term>']],
 '<term>': [['<factor>', '*', '<term>'],
            ['<factor>', '/', '<term>'],
            ['<factor>']],
 '<factor>': [['+', '<factor>'],
              ['-', '<factor>'],
              ['(', '<expr>', ')'],
              ['<integer>', '.', '<integer>'],
              ['<integer>']],
 '<integer>': [['<digit>', '<integer>'], ['<digit>']],
 '<digit>': [['0'], ['1'], ['2'], ['3'], ['4'], ['5'], ['6'], ['7'], ['8'], ['9']]}

EXPR_START = '<start>'

In [8]:
if __name__ == '__main__':
    lf = LimitFuzzer(EXPR_GRAMMAR)
    for i in range(10):
        v = lf.fuzz(EXPR_START)
        print(repr(v))

'(5.5/-(4.6-2.5)/24.45*(5)+8.8*35.1-+7-(4.8)*4.9+9.8*5)/71-((7.4*1+8.9)/+-9.1)*0956*-5.616/5.4+++4/+559.5*2*--3.3*+9.8+(7.4*7.4/4.5)-+75'
'--816.65*-963.8/2.9/-(8/5-1.9+4)/722.2/+9.5*+7*6/1.2'
'-+-003/+4*2-64.264020+4*2/6.9/(1-9.6)/8.2*1-+4--7*(4.2)/(7.9)-(2.7-6)*1.4++8.2*1/9.7-8*1-0.1+7'
'+-+7/+8481.979*(9.41++2-0/1.5-1)/(-1.8*5.5)/(3.0+5-2)+-67.2543'
'234-+-(2/9.8*6.2-9.9*9.4-2)/(-+2.3)*436.291*9.942/+(5)/-7*1.8+((4)+7.3/6/8.8)/+9.56+991*+(0.0-5)*-1.7*+4+282/0*+0/6.4'
'++2.83*----(5+1)*-3+85*(--0.7--3+4*1.6-3-0.6)*+3/47.9/+6+(69.9/+6.9)+-(1/4.3+7.1-9)*3*5.2+7/3/5.4'
'-+++0--(99.38*4.6)*8.6+67896.6-+(0.6+5-3.7)*9.692*+-7/0.7*2/2+574*5/6.2--+1+(0.5)*2-2*0.8'
'5-(65.8/--7/(8.6))*6/-2*8'
'(((9.1)-9.2/7.2)*78*-+9-(1*8.6+3.9+3)--7.0*-3.4)*+5.408/+--54/(-4.3*1*7.5)/(6.1)*6.7+((8-9)*35*(8.4)*9-(3)/7+8.8-3/3.5+0)'
'-226+894202.87++(+3)'


# Done

In [9]:
#%tb