In [13]:
import query_tree

In [196]:
# coding: utf-8
import re
import unittest
import mmh3
import varbyte

SPLIT_RGX = re.compile(r'\w+|[\(\)&\|!]', re.U)

class QtreeTypeInfo:
    def __init__(self, value, op=False, bracket=False, term=False):
        self.value = value
        self.is_operator = op
        self.is_bracket = bracket
        self.is_term = term

    def __repr__(self):
        return repr(self.value)

    def __eq__(self, other):
        if isinstance(other, QtreeTypeInfo):
            return self.value == other.value
        return self.value == other


class QTreeTerm(QtreeTypeInfo):
    def __init__(self, term):
        QtreeTypeInfo.__init__(self, term, term=True)


class QTreeOperator(QtreeTypeInfo):
    def __init__(self, op):
        QtreeTypeInfo.__init__(self, op, op=True)
        self.priority = get_operator_prio(op)
        self.left = None
        self.right = None


class QTreeBracket(QtreeTypeInfo):
    def __init__(self, bracket):
        QtreeTypeInfo.__init__(self, bracket, bracket=True)


def get_operator_prio(s):
    if s == '|':
        return 0
    if s == '&':
        return 1
    if s == '!':
        return 2

    return None


def is_operator(s):
    return get_operator_prio(s) is not None


def tokenize_query(q):
    tokens = []
    for t in map(lambda w: w.lower().encode('utf-8'), re.findall(SPLIT_RGX, q)):
        if t == '(' or t == ')':
            tokens.append(QTreeBracket(t))
        elif is_operator(t):
            tokens.append(QTreeOperator(t))
        else:
            tokens.append(QTreeTerm(t))

    return tokens



def parse_query(q):
    tokens = tokenize_query(q)
    return build_query_tree(tokens)


""" Collect query tree to sting back. It needs for tests. """
def qtree2str(root, depth=0):
    if root.is_operator:
        need_brackets = depth > 0 and root.value != '!'
        res = ''
        if need_brackets:
            res += '('

        if root.left:
            res += qtree2str(root.left, depth+1)

        if root.value == '!':
            res += root.value
        else:
            res += ' ' + root.value + ' '

        if root.right:
            res += qtree2str(root.right, depth+1)

        if need_brackets:
            res += ')'

        return res
    else:
        return root.value

    


In [72]:
tokens = [ '!','(', '!', 'a', '|', 'b', ')', '&', 'c']

In [279]:
def strip_brackets(tokens):
    stack = []
    for token in tokens:
        stack.append(token)
        if(token.value == ')'):
            inner_tokens = []
            while(token.value != '('):
                token = stack.pop()
                inner_tokens.insert(0, token) 
            inner = parse_expression(inner_tokens[1:-1])
            stack.append(inner)
    return parse_expression(stack)

def is_not_list(obj):
    return not isinstance(obj,(list,))

max_priority = 2

operator_list = ['!', '&', '|']

def parse_expression(tokens, priority = 0):
    
    i = 0
    for token in tokens:        
        if(token == operator_list[priority]):
            if(token == '!'):
                # '!!' -> ''
                if(tokens[i] == '!' and tokens[i + 1].value == '!'):
                    del tokens[i]
                    del tokens[i]
                    return parse_expression(tokens, priority)
                tokens[i] = [tokens[i], tokens[i + 1], None]
                del tokens[i + 1]
                return parse_expression(tokens, priority)
            else:
                tokens[i - 1] = [tokens[i], tokens[i - 1], tokens[i + 1]] 
                del tokens[i]
                del tokens[i]
                return parse_expression(tokens, priority)
        i += 1
    if(priority + 1 <= max_priority):        
        return parse_expression(tokens, priority + 1)
    return tokens[0]   

def is_list(obj):
    return isinstance(obj,(list,))

def evaluate(tree_as_list):
    if is_list(tree_as_list[1]):
        left = evaluate(tree_as_list[1])
    else:
        left = tree_as_list[1]
        
    if is_list(tree_as_list[2]):
        right = evaluate(tree_as_list[2])
    else:
        right = tree_as_list[2]

    if tree_as_list[0] == '!':
        return negation(left)
    if tree_as_list[0] == '&':
        return intersection(left, right)
    if tree_as_list[0] == '|':
        return union(left, right)

In [None]:
def union(right, left, index):
    
        return set(varbyte.decompress(index[mmh3.hash(root.value)]))
  
        left_set = evaluate_query_tree(left.value, index)
        right_set = evaluate_query_tree(right.value, index)
        return left_set.intersection(right_set)


In [280]:
strip_brackets(tokenize_query('( !a | b ) & c'))

['&', ['|', ['!', 'a', None], 'b'], 'c']

In [281]:
evaluate(strip_brackets(tokenize_query('a & b')))

NameError: global name 'intersection' is not defined