In [None]:
import os
from collections import defaultdict, namedtuple

In [None]:
ns2ms = 10**6

def get_vpg_result(file):
    Res = namedtuple('Res', 'parse extract tree')

    res_vpg = {}
    with open(file) as f:
        text = f.readlines()

    text = [row.split(",") for row in text[1:]]
    text = [[t.strip() for t in row] for row in text]

    for row in text:
        k, *values = row
        print(row)
        res_vpg[k] = Res (*[float(v) for v in values])

    return res_vpg

def get_result(file):
    with open(file) as f:
        text = f.readlines()
    text = [t.strip() for t in text]
    names = text[0::3]
    lex_times = [float(t) for t in text[1::3]]
    parse_times = [float(t) for t in text[2::3]]
    result = {}
    for name, ltime, ptime in zip(names, lex_times, parse_times):
        result[name] = (ltime, ptime)
    return result

def get_token(file):

    with open(file) as f:
        text = f.readlines()

    text = [row.split(",") for row in text[1:]]
    text = [[t.strip() for t in row] for row in text]

    tokens = [(k, int(file_size), int(token_num)) for k, file_size, token_num in text]
    # NOTE - sort by file size
    tokens = sorted(tokens, key=lambda x:x[2])

    return tokens

In [None]:
res_vpg = get_vpg_result("../results/vpg.eval_xml.csv")
tokens = get_token("../token_info/xml.token_num")
res_antlr = get_result("../eval_antlr/xml/results/eval_antlr.xml.result")
res_bison = get_result("../eval_bison/xml/results/eval_bison.xml.result")

In [None]:
use_file_size = False

if use_file_size:
    tokens = sorted(tokens, key=lambda x:x[1])

    names, file_size, token_num = zip(*tokens[-5:])
    names, file_size, token_num = zip(*tokens[2:])

    sizes = file_size
    size_name = "File Size"
else:
    tokens = sorted(tokens, key=lambda x:x[2])

    names, file_size, token_num = zip(*tokens[-5:])
    names, file_size, token_num = zip(*tokens[2:])
    sizes = token_num
    size_name = "Number of Tokens"

x = sizes
xi = list(range(len(x)))

parse_antlr = [res_antlr[name][1] / ns2ms for name in names]
parse_bison = [res_bison[name][1] / ns2ms for name in names]
parse_vpg   = [res_vpg[name].parse / ns2ms for name in names]
extract_vpg = [res_vpg[name].extract / ns2ms for name in names]
pe_vpg = [(res_vpg[name].parse+res_vpg[name].extract) / ns2ms for name in names]
tree_vpg    = [res_vpg[name].tree / ns2ms for name in names]
sum_vpg      = [(res_vpg[name].parse+res_vpg[name].extract+res_vpg[name].tree) / ns2ms for name in names]

In [None]:
x = sizes
xi = list(range(len(x)))

In [None]:
import matplotlib.pyplot as plt
import scienceplots
import matplotlib as mpl
from matplotlib.ticker import ScalarFormatter
import matplotlib.ticker as ticker


with plt.style.context(['science','ieee', 'high-vis']):

    plt.figure()
    plt.plot(x, parse_antlr, label="ANTLR")
    plt.plot(x, parse_bison, label="Bison")
    plt.plot(x, pe_vpg, label="VPG Parse + Extract")
    plt.plot(x, tree_vpg, label="VPG Conv")
    plt.plot(x, sum_vpg, label="VPG Sum")
    ax = plt.gca()
    ax.set_xscale("log")
    ax.set_yscale("log")

    ax.set_xlabel(size_name)
    ax.set_ylabel("Parsing Time (ms)")
    ax.set_title("Parsing XML")
    plt.legend()
    if use_file_size:
        plt.savefig('../figures/ParseXML_file_size.png')
    else:
        plt.savefig('../figures/ParseXML_num_tokens.png')

    plt.show()

In [None]:
def print_tables(names, sizes):
    name_size = {name:size for name, size in zip(names,sizes)}
    names = [ "ORTCA", "SUAS", "address", "cd", "po" ]
    sizes = [name_size[name] for name in names ]

    parse_antlr = [res_antlr[name][1] / ns2ms for name in names]
    parse_bison = [res_bison[name][1] / ns2ms for name in names]
    lex_antlr = [res_antlr[name][0] / ns2ms for name in names]
    lex_bison = [res_bison[name][0] / ns2ms for name in names]
    parse_vpg   = [res_vpg[name].parse / ns2ms for name in names]
    extract_vpg = [res_vpg[name].extract / ns2ms for name in names]
    pe_vpg = [(res_vpg[name].parse+res_vpg[name].extract) / ns2ms for name in names]
    conv_vpg    = [res_vpg[name].tree / ns2ms for name in names]
    sum_vpg      = [(res_vpg[name].parse+res_vpg[name].extract+res_vpg[name].tree) / ns2ms for name in names]

    for i, (name,size) in enumerate(zip(names,sizes)):
        print(( "{} & {}" + " & {:.0f} ms"*7 + "\\\\").format(name, size, parse_antlr[i],parse_bison[i],pe_vpg[i],conv_vpg[i], sum_vpg[i], lex_antlr[i], lex_bison[i]))

    for i, (name,size) in enumerate(zip(names,sizes)):

        print(( "{}"+" & {:.0f} ms"*4).format(size, lex_antlr[i], pe_vpg[i], sum_vpg[i], sum_vpg[i] + lex_antlr[i]))

    for i, (name,size) in enumerate(zip(names,sizes)):
        print(( "{}").format(size))