# SQL to circuit ansätze

This notebook implements the part of the algorithm which translates join order benchmark queries into pregroup diagrams and pregroup diagrams to circuit ansätze. The simple but not very realistic example can be found in `sql_to_circuit_simple_example` notebook.

The following code generates diagrams for all the SELECT-FROM-WHERE queries in the join order benchmark. Running the code will take some time and it also works as a test package for the code. The diagrams are already generated in the folder `join-order-benchmark-diagrams`. The parameter `figsize` should be adjusted based on the length of the query but currently is has constant dimensions.

Unfortunalyte, JOB queries produce too large circuits for quantum computing resources that we have available. 

Besides the visual figures the code performs rewriting process using the snake removal procedure. The outcomes are serialized into `data\pregroup_data` folder. As in the data generation case, this notebook is for reproducibility reasons and the user does not need to rerun this if they do not want to change the underlying queries.

## Transformation 1: SQL to context-free grammar diagrams

In [14]:
from antlr4 import *
from SQLiteLexer import SQLiteLexer
from SQLiteParser import SQLiteParser
from SQLiteParserListener import SQLiteParserListener
import json
import os
import glob
from pathlib import Path
from discopy import Ty, Box, Functor
from functools import reduce
from discopy.utils import dumps, loads
this_folder = os.path.abspath(os.getcwd())

In [None]:
query_path = "\\join-order-benchmark-queries\\[0-9]*.sql"

In [15]:
def create_CFG_diagrams():
    join_order_queries = glob.glob(this_folder + query_path)

    for query in join_order_queries:
        base_name = Path(query).stem
        try:
            input_stream = FileStream(query)
            lexer = SQLiteLexer(input_stream)
            stream = CommonTokenStream(lexer)
            parser = SQLiteParser(stream)
            tree = parser.parse()
            walker = ParseTreeWalker()
            listener = SQLiteParserListener(parser)
            walker.walk(listener, tree)
            diagram = listener.get_final_diagram().dagger()
            diagram.draw(figsize=(100, 100), path = this_folder + "\\join-order-benchmark-cfg-diagrams\\" + base_name + ".png")
            with open(this_folder + "\\join-order-benchmark-cfg-diagrams\\" + base_name + ".json", 'w') as outfile:
                json.dump(json.loads(dumps(diagram)), outfile)
        except:
            print("Query: ", base_name, " failed.")

In [16]:
#%%capture
#create_CFG_diagrams()

## Transformation 2: Context-free grammar diagrams to pregroup grammar diagrams

In [17]:
from discopy.rigid import Diagram, Cup, Cap, Id
from discopy.grammar.pregroup import Word

functor_data = None
with open('rewriter_functor_data.json') as json_file:
    functor_data = json.load(json_file)
    
def count_boxes(diagram, box_name):
    i = []
    def fun(boxes, box_name, i):
        for box in boxes:
            if box.name == box_name:
                i.append(box_name)
    
    for elem in diagram:
        elem.fmap(lambda x : fun(x.boxes, box_name, i))
    return len(i)

def object_mapping(obj, num_of_result_columns, num_of_tables):
    dom_ty_name = obj.name
    dom_ty = Ty()
    if dom_ty_name in functor_data["object_function"].keys():
        for ty in functor_data["object_function"][dom_ty_name]:
            if "." in ty:
                ty = ty.split(".")
                if ty[1] == "l":
                    dom_ty = dom_ty @ Ty(ty[0]).l
                elif ty[1] == "r":
                    dom_ty = dom_ty @ Ty(ty[0]).r
            else:
                dom_ty = dom_ty @ Ty(ty)
    elif dom_ty_name == "select-keyword":
        dom_ty = dom_ty @ Ty('s')
        for i in range(num_of_result_columns):
            dom_ty = dom_ty @ Ty('n').l
    elif dom_ty_name == "from-keyword":
        dom_ty = dom_ty @ Ty('s').r @ Ty('s')
        for i in range(num_of_tables):
            dom_ty = dom_ty @ Ty('n').l
    else:
        cod_name = dom_name
    return dom_ty

def arrow_mapping(box, num_of_result_columns, num_of_tables):
    morphism_dom_name = box.name
    #print(morphism_dom_name)
    #print(box.dom)
    #print(box.cod)
    result = Id(Ty())
    if morphism_dom_name in functor_data["arrow_function"].keys():
        cup = False
        for i in range(len(functor_data["arrow_function"][morphism_dom_name])):
            box = functor_data["arrow_function"][morphism_dom_name][i]
            if box["box"] == "Id":
                if "." in box["type"]:
                    ty = box["type"].split(".")
                    if ty[1] == "l":
                        result = result @ Id(Ty(ty[0]).l)
                    elif ty[1] == "r":
                        result = result @ Id(Ty(ty[0]).r)
                else:
                    result = result @ Id(Ty(box["type"]))
            elif box["box"] == "Cup":
                if cup:
                    cup = False
                    continue
                else:
                    cup = True
                left, right = None, None
                
                if "." in box["type"]:
                    ty = box["type"].split(".")
                    if ty[1] == "l":
                        left = Ty(ty[0]).l
                    elif ty[1] == "r":
                        left = Ty(ty[0]).r
                else:
                    left = Ty(box["type"])
                    
                box = functor_data["arrow_function"][morphism_dom_name][i + 1]
                
                if "." in box["type"]:
                    ty = box["type"].split(".")
                    if ty[1] == "l":
                        right = Ty(ty[0]).l
                    elif ty[1] == "r":
                        right = Ty(ty[0]).r
                else:
                    right = Ty(box["type"])
                
                result = result @ Cap(left, right)
                
    elif morphism_dom_name == "select-clause":
        result = Id(Ty('s'))
        left = Ty()
        for i in range(num_of_result_columns):
            left = left @ Ty('n')
        result = result @ Diagram.caps(left.l, left)
    elif morphism_dom_name == "from-clause":
        result = Id(Ty('s').r) @ Id(Ty('s'))
        left = Ty()
        for i in range(num_of_tables):
            left = left @ Ty('n')
        result = result @ Diagram.caps(left.l, left)
    elif type(box.cod) == Ty:
        if box.dom == Ty('literal_value'):
            result = Box(morphism_dom_name, Ty('e'), Ty())
        elif box.dom == Ty('where_keyword'):
            result = Box(morphism_dom_name, Ty('s').r @ Ty('s') @ Ty('e').l, Ty())
        elif box.dom == Ty('select_keyword'):
            dom_ty = Ty('s')
            for i in range(num_of_result_columns):
                dom_ty = dom_ty @ Ty('n').l
            result = Box(morphism_dom_name, dom_ty, Ty())
        elif box.dom == Ty('from_keyword'):
            dom_ty = Ty('s').r @ Ty('s')
            for i in range(num_of_tables):
                dom_ty = dom_ty @ Ty('n').l
            result = Box(morphism_dom_name, dom_ty, Ty())
        else:
            result = Box(morphism_dom_name, Ty('n'), Ty())
    #result.draw()
    return result

In [18]:
def create_pregroup_grammar_diagrams():
    cfg_diagrams = glob.glob(this_folder + "\\join-order-benchmark-cfg-diagrams\\[0-9]*.json")
    
    for serialized_diagram in cfg_diagrams:
        base_name = Path(serialized_diagram).stem
        f = open(serialized_diagram, "r")
        data = f.read()
        diagram = loads(data)
        
        num_of_result_columns = count_boxes(diagram, "result-column")
        num_of_tables = count_boxes(diagram, "table")
        Rewriter = Functor(ob = lambda x: object_mapping(x, num_of_result_columns, num_of_tables), ar = lambda f: arrow_mapping(f, num_of_result_columns, num_of_tables))
        
        pregroup_diagram = Rewriter(diagram)
        pregroup_diagram.draw(figsize=(100, 100), path = this_folder + "\\join-order-benchmark-pregroup-diagrams\\" + base_name + ".png")

In [19]:
create_pregroup_grammar_diagrams()

AxiomError: company_type_id[::-1] >> mc[::-1] @ Id(n) does not compose with Id(n).