In [1]:
import numpy as np
import networkx as nx
import copy
import pickle
import sys
import time
sys.path.append("/home/ubuntu/CE_scheme/")
from BayesCard.Evaluation.utils import parse_query
from Schemas.stats.schema import gen_stats_light_schema
from Join_scheme.data_prepare import identify_key_values, process_stats_data

In [51]:
data_path = "/home/ubuntu/End-to-End-CardEst-Benchmark/datasets/stats_simplified/{}.csv"
schema = gen_stats_light_schema(data_path)
all_keys, equivalent_keys = identify_key_values(schema)
print(equivalent_keys)
model_path = "/home/ubuntu/data_CE/CE_scheme_models/model_stats_sub_optimal_200.pkl"
with open(model_path, "rb") as f:
    new_BE = pickle.load(f)

{'posts.Id': {'comments.PostId', 'postLinks.PostId', 'posts.Id', 'votes.PostId', 'postHistory.PostId', 'tags.ExcerptPostId', 'postLinks.RelatedPostId'}, 'users.Id': {'badges.UserId', 'users.Id', 'comments.UserId', 'postHistory.UserId', 'posts.OwnerUserId', 'votes.UserId'}}


In [42]:
import numpy as np
import copy

from Join_scheme.join_graph import process_condition, get_join_hyper_graph
from Join_scheme.data_prepare import identify_key_values
from BayesCard.Evaluation.cardinality_estimation import timestamp_transorform, construct_table_query


class Factor:
    """
    This the class defines a multidimensional conditional probability.
    """
    def __init__(self, variables, pdfs, equivalent_variables=[]):
        self.variables = variables
        self.equivalent_variables = equivalent_variables
        self.pdfs = pdfs
        self.cardinalities = dict()
        for i, var in enumerate(self.variables):
            self.cardinalities[var] = pdfs.shape[i]
            if len(equivalent_variables) != 0:
                self.cardinalities[equivalent_variables[i]] = pdfs.shape[i]


class Bound_ensemble:
    """
    This the class where we store all the trained models and perform inference on the bound.
    """
    def __init__(self, bns, table_buckets, schema):
        self.bns = bns
        self.table_buckets = table_buckets
        self.schema = schema
        self.all_keys, self.equivalent_keys = identify_key_values(schema)

    def parse_query_simple(self, query):
        """
        If your selection query contains no aggregation and nested sub-queries, you can use this function to parse a
        join query. Otherwise, use parse_query function.
        """
        query = query.replace(" where ", " WHERE ")
        query = query.replace(" from ", " FROM ")
        query = query.replace(" and ", " AND ")
        query = query.split(";")[0]
        query = query.strip()
        tables_all = {}
        join_cond = []
        table_query = {}
        join_keys = {}
        tables_str = query.split(" WHERE ")[0].split(" FROM ")[-1]
        for table_str in tables_str.split(","):
            table_str = table_str.strip()
            if " as " in table_str:
                tables_all[table_str.split(" as ")[-1]] = table_str.split(" as ")[0]
            else:
                tables_all[table_str.split(" ")[-1]] = table_str.split(" ")[0]

        # processing conditions
        conditions = query.split(" WHERE ")[-1].split(" AND ")
        for cond in conditions:
            table, cond, join, join_key = process_condition(cond, tables_all)
            if not join:
                attr = cond[0]
                op = cond[1]
                value = cond[2]
                if "Date" in attr:
                    assert "::timestamp" in value
                    value = timestamp_transorform(value.strip().split("::timestamp")[0])
                if table not in table_query:
                    table_query[table] = dict()
                construct_table_query(self.bns[table], table_query[table], attr, op, value)
            else:
                join_cond.append(cond)
                for tab in join_key:
                    if tab in join_keys:
                        join_keys[tab].add(join_key[tab])
                    else:
                        join_keys[tab] = set([join_key[tab]])

        return tables_all, table_query, join_cond, join_keys

    def get_all_id_conidtional_distribution(self, table_queries, join_keys, equivalent_group):
        res = dict()
        for table in join_keys:
            key_attrs = list(join_keys[table])
            if table in table_queries:
                table_query = table_queries[table]
            else:
                table_query = {}
            id_attrs, probs = self.bns[table].query_id_prob(table_query, key_attrs)
            new_id_attrs = []
            for K in id_attrs:
                for PK in equivalent_group:
                    if K in equivalent_group[PK]:
                        new_id_attrs.append(PK)
            assert len(new_id_attrs) == len(id_attrs)
            res[table] = Factor(id_attrs, probs, new_id_attrs)
        return res

    def eliminate_one_key_group_general(self, tables, key_group, factors):
        rest_groups = dict()
        rest_group_tables = dict()
        for table in tables:
            assert key_group in factors[table].equivalent_variables
            temp = copy.deepcopy(factors[table].equivalent_variables)
            temp.remove(key_group)
            for keys in temp:
                if keys in rest_groups:
                    assert factors[table].cardinalities[keys] == rest_groups[keys]
                    rest_group_tables[keys].append(table)
                else:
                    rest_groups[keys] = factors[table].cardinalities[keys]
                    rest_group_tables[keys] = [table]

        new_factor_variables = []
        new_factor_cardinalities = []
        for key in rest_groups:
            new_factor_variables.append(key)
            new_factor_cardinalities.append(rest_groups[key])
        new_factor_pdf = np.zeros(tuple(new_factor_cardinalities))

    def eliminate_one_key_group(self, tables, key_group, factors, relevant_keys):
        """This version only supports 2D distributions"""
        #print(key_group)
        rest_group = None
        rest_group_cardinalty = 0
        eliminated_tables = []
        rest_group_tables = []
        for table in tables:
            assert key_group in factors[table].equivalent_variables
            temp = copy.deepcopy(factors[table].equivalent_variables)
            temp.remove(key_group)
            if len(temp) == 0:
                eliminated_tables.append(table)
            for key in temp:
                if rest_group:
                    assert factors[table].cardinalities[key] == rest_group_cardinalty
                    rest_group_tables.append(table)
                else:
                    rest_group = key
                    rest_group_cardinalty = factors[table].cardinalities[key]
                    rest_group_tables = [table]

        all_probs_eliminated = []
        all_modes_eliminated = []
        #print(eliminated_tables)
        #print(rest_group_tables)
        for table in eliminated_tables:
            bin_modes = self.table_buckets[table].oned_bin_modes[relevant_keys[key_group][table]]
            all_probs_eliminated.append(factors[table].pdfs)
            all_modes_eliminated.append(np.minimum(bin_modes, factors[table].pdfs))
        if rest_group:
            new_factor_pdf = np.zeros(rest_group_cardinalty)
        else:
            return self.compute_bound_oned(all_probs_eliminated, all_modes_eliminated)
        
        #print(np.sum(factors[rest_group_tables[0]].pdfs))
        #print(np.sum(factors[rest_group_tables[0]].pdfs, axis=1))
        
        #print(self.table_buckets[rest_group_tables[0]].twod_bin_modes[relevant_keys[key_group][rest_group_tables[0]]])
        for i in range(rest_group_cardinalty):
            #print(i)
            rest_group_probs_eliminated = []
            rest_group_modes_eliminated = []
            for table in rest_group_tables:
                
                idx_f = factors[table].equivalent_variables.index(key_group)
                idx_b = self.table_buckets[table].id_attributes.index(relevant_keys[key_group][table])
                #print(idx_f, idx_b)
                bin_modes = self.table_buckets[table].twod_bin_modes[relevant_keys[key_group][table]]
                if idx_f == 0 and idx_b == 0:
                    rest_group_probs_eliminated.append(factors[table].pdfs[:, i])
                    rest_group_modes_eliminated.append(np.minimum(bin_modes[:, i], factors[table].pdfs[:, i]))
                elif idx_f == 0 and idx_b == 1:
                    rest_group_probs_eliminated.append(factors[table].pdfs[:, i])
                    rest_group_modes_eliminated.append(np.minimum(bin_modes[i, :], factors[table].pdfs[:, i]))
                elif idx_f == 1 and idx_b == 0:
                    #print(np.sum(factors[table].pdfs[i, :]))
                    rest_group_probs_eliminated.append(factors[table].pdfs[i, :])
                    rest_group_modes_eliminated.append(np.minimum(bin_modes[:, i], factors[table].pdfs[i, :]))
                else:
                    rest_group_probs_eliminated.append(factors[table].pdfs[i, :])
                    rest_group_modes_eliminated.append(np.minimum(bin_modes[i, :], factors[table].pdfs[i, :]))
            #print("non-zero prob:", len(all_probs_eliminated), np.sum(all_probs_eliminated[-1] != 0), np.sum(all_probs_eliminated[-1]))
            #print("non-zero mode:", len(all_modes_eliminated), np.sum(all_modes_eliminated[-1] != 0), np.sum(all_modes_eliminated[-1]))
            new_factor_pdf[i] = self.compute_bound_oned(all_probs_eliminated + rest_group_probs_eliminated, 
                                                        all_modes_eliminated + rest_group_modes_eliminated)

        for table in rest_group_tables:
            factors[table] = Factor([rest_group], new_factor_pdf, [rest_group])

        return None

    def compute_bound_oned(self, all_probs, all_modes):
        all_probs = np.stack(all_probs, axis=0)
        #print(np.sum(all_probs != 0))
        all_modes = np.stack(all_modes, axis=0)
        #print(np.sum(all_modes != 0))
        multiplier = np.prod(all_modes, axis=0)
        non_zero_idx = np.where(multiplier != 0)[0]
        #print(len(non_zero_idx))
        min_number = np.amin(all_probs[:, non_zero_idx]/all_modes[:, non_zero_idx], axis=0)
        multiplier[non_zero_idx] = multiplier[non_zero_idx] * min_number
        #print("multipler:", np.sum(multiplier))
        return np.sum(multiplier)

    def get_optimal_elimination_order(self, equivalent_group, join_keys, factors):
        cardinalities = dict()
        lengths = dict()
        tables_involved = dict()
        relevant_keys = dict()
        for group in equivalent_group:
            relevant_keys[group] = dict()
            lengths[group] = len(equivalent_group[group])
            cardinalities[group] = []
            tables_involved[group] = set([])
            for keys in equivalent_group[group]:
                for table in join_keys:
                    if keys in join_keys[table]:
                        cardinalities[group].append(len(join_keys[table]))
                        tables_involved[group].add(table)
                        variables = factors[table].variables
                        variables[variables.index(keys)] = group
                        factors[table].variables = variables
                        relevant_keys[group][table] = keys
                        break
            cardinalities[group] = np.asarray(cardinalities[group])

        optimal_order = list(equivalent_group.keys())
        for i in range(len(optimal_order)):
            min_idx = i
            for j in range(i+1, len(optimal_order)):
                min_group = optimal_order[min_idx]
                curr_group = optimal_order[j]
                if np.max(cardinalities[curr_group]) < np.max(cardinalities[min_group]):
                    min_idx = j
                else:
                    min_max_tables = np.max(cardinalities[min_group])
                    min_num_max_tables = len(np.where(cardinalities[min_group] == min_max_tables)[0])
                    curr_max_tables = np.max(cardinalities[curr_group])
                    curr_num_max_tables = len(np.where(cardinalities[curr_group] == curr_max_tables)[0])
                    if curr_num_max_tables < min_num_max_tables:
                        min_idx = j
                    elif lengths[curr_group] < lengths[min_group]:
                        min_idx = j
            optimal_order[i], optimal_order[min_idx] = optimal_order[min_idx], optimal_order[i]
        return optimal_order, tables_involved, relevant_keys

    def get_cardinality_bound(self, query_str):
        tables_all, table_queries, join_cond, join_keys = self.parse_query_simple(query_str)
        equivalent_group = get_join_hyper_graph(join_keys, self.equivalent_keys)
        conditional_factors = self.get_all_id_conidtional_distribution(table_queries, join_keys, equivalent_group)
        optimal_order, tables_involved, relevant_keys = self.get_optimal_elimination_order(equivalent_group, join_keys,
                                                                            conditional_factors)

        for key_group in optimal_order:
            tables = tables_involved[key_group]
            res = self.eliminate_one_key_group(tables, key_group, conditional_factors, relevant_keys)
        return res


In [52]:
BE = Bound_ensemble(new_BE.bns, new_BE.table_buckets, new_BE.schema)

In [53]:
query57 = "SELECT COUNT(*) FROM posts as p, postLinks as pl, postHistory as ph, votes as v, badges as b, users as u WHERE p.Id = pl.RelatedPostId AND u.Id = p.OwnerUserId AND u.Id = b.UserId AND u.Id = ph.UserId AND u.Id = v.UserId AND p.CommentCount>=0 AND p.CommentCount<=13 AND ph.PostHistoryTypeId=5 AND ph.CreationDate<='2014-08-13 09:20:10'::timestamp AND v.CreationDate>='2010-07-19 00:00:00'::timestamp AND b.Date<='2014-09-09 10:24:35'::timestamp AND u.Views>=0 AND u.DownVotes>=0 AND u.CreationDate>='2010-08-04 16:59:53'::timestamp AND u.CreationDate<='2014-07-22 15:15:22'::timestamp;"
t = time.time()
res = BE.get_cardinality_bound(query57)
print(time.time() - t)
print(res)

0.0678858757019043
1167709503258.581


In [45]:
query_file = "/home/ubuntu/End-to-End-CardEst-Benchmark/workloads/stats_CEB/sub_plan_queries/stats_CEB_sub_queries.sql"
with open(query_file, "r") as f:
    queries = f.readlines()


In [54]:
qerror = []
latency = []
pred = []
for i, query_str in enumerate(queries):
    query = query_str.split("||")[0][:-1]
    print("========================")
    true_card = int(query_str.split("||")[-1])
    t = time.time()
    res = BE.get_cardinality_bound(query)
    pred.append(res)
    latency.append(time.time() - t)
    qerror.append(res/true_card)
    print(f"estimating query {i}: predicted {res}, true_card {true_card}, qerror {res/true_card}, latency {time.time() - t}")

estimating query 0: predicted 79851.0, true_card 79851, qerror 1.0, latency 0.006472110748291016
estimating query 1: predicted 12029558.0, true_card 10220614, qerror 1.1769897581495594, latency 0.007853269577026367
estimating query 2: predicted 3936570.0, true_card 1458075, qerror 2.699840543181935, latency 0.00991678237915039
estimating query 3: predicted 4018726.3489689557, true_card 1709781, qerror 2.3504333882344906, latency 0.011612176895141602
estimating query 4: predicted 10535179.0, true_card 7491903, qerror 1.4062086762201806, latency 0.010989665985107422
estimating query 5: predicted 1028906.4177326594, true_card 428612, qerror 2.4005543888940566, latency 0.011984825134277344
estimating query 6: predicted 59711027.36463124, true_card 55900138, qerror 1.068173165594533, latency 0.011457204818725586
estimating query 7: predicted 11102.0, true_card 10972, qerror 1.0118483412322274, latency 0.008883237838745117
estimating query 8: predicted 60665445.0, true_card 699302, qerror 86

estimating query 59: predicted 33875.14264761277, true_card 26836, qerror 1.2623022301241902, latency 0.028447628021240234
estimating query 60: predicted 3682949.7701961263, true_card 2704241, qerror 1.3619162530987905, latency 0.006556510925292969
estimating query 61: predicted 34466.642735042726, true_card 32918, qerror 1.047045468589912, latency 0.005743265151977539
estimating query 62: predicted 88199.77975695922, true_card 86112, qerror 1.0242449340040787, latency 0.006495475769042969
estimating query 63: predicted 3419123.514235027, true_card 2488080, qerror 1.3742015989176501, latency 0.007988929748535156
estimating query 64: predicted 1596558.0, true_card 1056687, qerror 1.510909096071022, latency 0.003872394561767578
estimating query 65: predicted 31467.393518104083, true_card 20334, qerror 1.547525991841452, latency 0.005399942398071289
estimating query 66: predicted 74059.51791287246, true_card 62294, qerror 1.1888708047785093, latency 0.004387378692626953
estimating query 6

estimating query 138: predicted 3279394.53128791, true_card 298796, qerror 10.975362894041119, latency 0.008033514022827148
estimating query 139: predicted 339541802.5951356, true_card 6672465, qerror 50.88701141109554, latency 0.01325678825378418
estimating query 140: predicted 236895413.90759227, true_card 235010920, qerror 1.008018750395055, latency 0.0047876834869384766
estimating query 141: predicted 13685912.524602357, true_card 10967392, qerror 1.2478730152621842, latency 0.03125143051147461
estimating query 142: predicted 95834.92233764203, true_card 64129, qerror 1.4944084944041234, latency 0.007779121398925781
estimating query 143: predicted 12950459.231511563, true_card 12470361, qerror 1.0384991446126992, latency 0.031500816345214844
estimating query 144: predicted 159107.35384901142, true_card 140071, qerror 1.135905032797734, latency 0.00789332389831543
estimating query 145: predicted 30521.51415098377, true_card 20007, qerror 1.5255417679304129, latency 0.033969163894653

estimating query 208: predicted 129792.81583936054, true_card 130707, qerror 0.993005851556233, latency 0.004070281982421875
estimating query 209: predicted 971806.1618962524, true_card 443440, qerror 2.1915166919904663, latency 0.0048980712890625
estimating query 210: predicted 41392.49881996187, true_card 42002, qerror 0.9854887581534657, latency 0.003676176071166992
estimating query 211: predicted 78348.05250682085, true_card 79006, qerror 0.9916721832116656, latency 0.0027663707733154297
estimating query 212: predicted 268218798.31406698, true_card 105742741, qerror 2.5365220891528333, latency 0.007543087005615234
estimating query 213: predicted 3862363.7504616203, true_card 1423047, qerror 2.7141505167866, latency 0.005523681640625
estimating query 214: predicted 11784323.28323553, true_card 9976404, qerror 1.181219533935828, latency 0.005581378936767578
estimating query 215: predicted 971806.1618962524, true_card 443440, qerror 2.1915166919904663, latency 0.004864692687988281
est

estimating query 292: predicted 201981.33270332383, true_card 55293, qerror 3.6529277250885976, latency 0.006292819976806641
estimating query 293: predicted 69860.27914396262, true_card 66311, qerror 1.053524741656175, latency 0.00783991813659668
estimating query 294: predicted 1344065.4604461123, true_card 1228382, qerror 1.0941754767215022, latency 0.009340763092041016
estimating query 295: predicted 289474.6340988182, true_card 10594, qerror 27.32439438350181, latency 0.007925033569335938
estimating query 296: predicted 3307472.788772854, true_card 160411, qerror 20.61874054006804, latency 0.008213520050048828
estimating query 297: predicted 689139.8684369789, true_card 147586, qerror 4.669412196529338, latency 0.010567426681518555
estimating query 298: predicted 6158731.144236522, true_card 194687, qerror 31.634013284074037, latency 0.007197141647338867
estimating query 299: predicted 1698251.7994525228, true_card 262343, qerror 6.47340237571623, latency 0.009523630142211914
estima

estimating query 349: predicted 24330.466836517902, true_card 222, qerror 109.59669746179235, latency 0.005986690521240234
estimating query 350: predicted 3025153307.0, true_card 2809685334, qerror 1.0766875814855927, latency 0.005673408508300781
estimating query 351: predicted 1190634.5376374768, true_card 52366, qerror 22.73678603745707, latency 0.00519108772277832
estimating query 352: predicted 1338431.1228324878, true_card 3672, qerror 364.49649314610235, latency 0.0061833858489990234
estimating query 353: predicted 5573318.0, true_card 3829734, qerror 1.4552754838847815, latency 0.003925800323486328
estimating query 354: predicted 6871.026663198024, true_card 120, qerror 57.2585555266502, latency 0.0034155845642089844
estimating query 355: predicted 21942.51187099901, true_card 16, qerror 1371.406991937438, latency 0.00435948371887207
estimating query 356: predicted 514929.50668000197, true_card 4930, qerror 104.44817579716064, latency 0.004206418991088867
estimating query 357: p

estimating query 409: predicted 2388472140.910315, true_card 917236886, qerror 2.6039861429104314, latency 0.009217500686645508
estimating query 410: predicted 9882785.093590233, true_card 1129, qerror 8753.574042152553, latency 0.010612010955810547
estimating query 411: predicted 3800176.923071117, true_card 1501, qerror 2531.763439754242, latency 0.009380102157592773
estimating query 412: predicted 15494407.236987345, true_card 14692, qerror 1054.6152489101105, latency 0.007843732833862305
estimating query 413: predicted 4405654.695424814, true_card 12469, qerror 353.32863063796725, latency 0.00914907455444336
estimating query 414: predicted 349364493.8515893, true_card 9812, qerror 35605.83916139312, latency 0.010749578475952148
estimating query 415: predicted 3971957.147573416, true_card 1774105, qerror 2.238851222206925, latency 0.0055675506591796875
estimating query 416: predicted 60019048.0, true_card 54459286, qerror 1.1020902477494838, latency 0.003750324249267578
estimating q

estimating query 469: predicted 10767.011093671561, true_card 11097, qerror 0.9702632327360152, latency 0.0029916763305664062
estimating query 470: predicted 40550.0, true_card 42188, qerror 0.9611737934957808, latency 0.0060389041900634766
estimating query 471: predicted 957058.0, true_card 452644, qerror 2.114372442802732, latency 0.004122495651245117
estimating query 472: predicted 79851.0, true_card 79851, qerror 1.0, latency 0.004248619079589844
estimating query 473: predicted 283303.620727562, true_card 33295, qerror 8.508893849754077, latency 0.004118919372558594
estimating query 474: predicted 170698.20330954925, true_card 88490, qerror 1.9290112251050882, latency 0.011162281036376953
estimating query 475: predicted 3228989.2846910404, true_card 1032818, qerror 3.1263874997250634, latency 0.011228561401367188
estimating query 476: predicted 154525.46709698692, true_card 10828, qerror 14.27091495169809, latency 0.01320195198059082
estimating query 477: predicted 2544862.20550403

estimating query 548: predicted 2173259.546050367, true_card 78895, qerror 27.54622658026956, latency 0.012502193450927734
estimating query 549: predicted 81610.95425052656, true_card 32380, qerror 2.5204124228081084, latency 0.010969877243041992
estimating query 550: predicted 2010328.0390593763, true_card 738281, qerror 2.7229849326467517, latency 0.01066279411315918
estimating query 551: predicted 27742264.207650267, true_card 24804704, qerror 1.1184275453418138, latency 0.00679779052734375
estimating query 552: predicted 39231.26031992324, true_card 16394, qerror 2.3930255166477514, latency 0.011332035064697266
estimating query 553: predicted 78456.67300450394, true_card 312, qerror 251.46369552725622, latency 0.017757177352905273
estimating query 554: predicted 1178191.2083793245, true_card 7584, qerror 155.35221629474216, latency 0.017418861389160156
estimating query 555: predicted 13829900.163132004, true_card 644030, qerror 21.47399991169977, latency 0.013679027557373047
estima

estimating query 618: predicted 33392248.66475709, true_card 15012180, qerror 2.2243437438637885, latency 0.010593414306640625
estimating query 619: predicted 608245.596708066, true_card 46721, qerror 13.018676755807151, latency 0.015952587127685547
estimating query 620: predicted 340065.36655483954, true_card 79008, qerror 4.30418902585611, latency 0.010483980178833008
estimating query 621: predicted 644930104.8957077, true_card 510775350, qerror 1.2626492349243317, latency 0.00825643539428711
estimating query 622: predicted 2274347.3560206816, true_card 165958, qerror 13.704355053812902, latency 0.014013051986694336
estimating query 623: predicted 2159411.7502916884, true_card 328282, qerror 6.577917005171433, latency 0.008900880813598633
estimating query 624: predicted 982345.0956863471, true_card 137842, qerror 7.126602165423797, latency 0.013744354248046875
estimating query 625: predicted 417857836.7354766, true_card 42872686, qerror 9.746481401596267, latency 0.01715254783630371


estimating query 676: predicted 1591236.4973502457, true_card 3279466, qerror 0.48521207335287075, latency 0.015606880187988281
estimating query 677: predicted 6500663.340286832, true_card 11947976, qerror 0.5440807162892554, latency 0.010835886001586914
estimating query 678: predicted 158301.89233729104, true_card 28256, qerror 5.602416914541727, latency 0.0035331249237060547
estimating query 679: predicted 252464.5727167664, true_card 28803, qerror 8.765217953573114, latency 0.004040241241455078
estimating query 680: predicted 921814.5556375199, true_card 666440, qerror 1.383192118776664, latency 0.008043766021728516
estimating query 681: predicted 7242.675261376999, true_card 4556, qerror 1.589700452453248, latency 0.0039234161376953125
estimating query 682: predicted 67535.89868074309, true_card 47828, qerror 1.4120577628322968, latency 0.008811235427856445
estimating query 683: predicted 341459.541438348, true_card 183601, qerror 1.8597912943739305, latency 0.004084587097167969
es

estimating query 732: predicted 58674.894573930265, true_card 60960, qerror 0.9625146747691973, latency 0.01425790786743164
estimating query 733: predicted 314003.6909756358, true_card 163757, qerror 1.9174978228450434, latency 0.008083343505859375
estimating query 734: predicted 432667.78648483916, true_card 5794, qerror 74.67514437087317, latency 0.0075359344482421875
estimating query 735: predicted 4130067.715062114, true_card 9266, qerror 445.722827008646, latency 0.014707565307617188
estimating query 736: predicted 70531.83070391486, true_card 626, qerror 112.67065607654132, latency 0.013068437576293945
estimating query 737: predicted 3803382.122436616, true_card 191742, qerror 19.835936427264848, latency 0.014252901077270508
estimating query 738: predicted 38574.527059642154, true_card 9855, qerror 3.9142087325867227, latency 0.012457847595214844
estimating query 739: predicted 2658958.431672162, true_card 21714, qerror 122.45364426969522, latency 0.01572585105895996
estimating q

estimating query 800: predicted 13907777.568524092, true_card 3059536, qerror 4.545714634024274, latency 0.021754741668701172
estimating query 801: predicted 327785.9333070513, true_card 34054, qerror 9.625475224850277, latency 0.01455998420715332
estimating query 802: predicted 8869617.367398772, true_card 47513578, qerror 0.18667542502058618, latency 0.015785694122314453
estimating query 803: predicted 3453480.6026095096, true_card 660673, qerror 5.227216191080171, latency 0.013927459716796875
estimating query 804: predicted 13682382.550451802, true_card 10988114, qerror 1.2451984526600108, latency 0.013638973236083984
estimating query 805: predicted 125053.46915635437, true_card 2869, qerror 43.58782473208588, latency 0.006590843200683594
estimating query 806: predicted 3884080.0812328537, true_card 9270, qerror 418.9946150197253, latency 0.008179426193237305
estimating query 807: predicted 13907777.568524092, true_card 125401, qerror 110.90643271205248, latency 0.018964290618896484

estimating query 866: predicted 34411.488407382094, true_card 809, qerror 42.53583239478627, latency 0.03482985496520996
estimating query 867: predicted 333019.3684210526, true_card 30862, qerror 10.790595827265005, latency 0.00423884391784668
estimating query 868: predicted 2621662.114446653, true_card 155051, qerror 16.90838572112823, latency 0.0060117244720458984
estimating query 869: predicted 13498936.64586043, true_card 328480, qerror 41.09515540020832, latency 0.03976082801818848
estimating query 870: predicted 164678.94267264364, true_card 157979, qerror 1.0424103372767497, latency 0.01137995719909668
estimating query 871: predicted 6938405.095330548, true_card 33712, qerror 205.81410463130482, latency 0.007607460021972656
estimating query 872: predicted 13794036.939881247, true_card 98084, qerror 140.63493474859555, latency 0.0422818660736084
estimating query 873: predicted 335793.3187020594, true_card 34509, qerror 9.730601254804816, latency 0.010995864868164062
estimating qu

estimating query 925: predicted 2208696.191119099, true_card 365, qerror 6051.222441422189, latency 0.017429113388061523
estimating query 926: predicted 1633597.541939959, true_card 469, qerror 3483.150409253644, latency 0.019339323043823242
estimating query 927: predicted 391847.8003095712, true_card 2013, qerror 194.65861913043776, latency 0.016111373901367188
estimating query 928: predicted 6133135.160731153, true_card 16371, qerror 374.63411891339274, latency 0.015778779983520508
estimating query 929: predicted 2208696.191119099, true_card 469, qerror 4709.373541831767, latency 0.017822265625
estimating query 930: predicted 174305.0, true_card 174305, qerror 1.0, latency 0.0012233257293701172
estimating query 931: predicted 287391.0, true_card 33326, qerror 8.623627197983556, latency 0.0016450881958007812
estimating query 932: predicted 3627810.0, true_card 704085, qerror 5.1525170966573635, latency 0.003350973129272461
estimating query 933: predicted 953632.6018824622, true_card 8

estimating query 994: predicted 127929.72565425781, true_card 75613, qerror 1.691901202891802, latency 0.009149551391601562
estimating query 995: predicted 1295875.7020977847, true_card 762635, qerror 1.6992082740731604, latency 0.009097099304199219
estimating query 996: predicted 5371327.450536061, true_card 2243020, qerror 2.394685491228817, latency 0.00987100601196289
estimating query 997: predicted 5020635.418065559, true_card 99251, qerror 50.58523761035717, latency 0.007965087890625
estimating query 998: predicted 2780889.90040508, true_card 162684, qerror 17.093813161743505, latency 0.010720968246459961
estimating query 999: predicted 19824152.2032642, true_card 1307922, qerror 15.156983522919715, latency 0.013451337814331055
estimating query 1000: predicted 13790035.338640459, true_card 1541181, qerror 8.947706556621487, latency 0.011436223983764648
estimating query 1001: predicted 19900845.221676193, true_card 32280163, qerror 0.6165038640503827, latency 0.013779640197753906
e

estimating query 1071: predicted 5647381.5456999745, true_card 33326, qerror 169.45872729100324, latency 0.005158662796020508
estimating query 1072: predicted 4590575.218026762, true_card 520705, qerror 8.81607669991024, latency 0.005152463912963867
estimating query 1073: predicted 19845997.236660544, true_card 3814706, qerror 5.202497187636621, latency 0.011182308197021484
estimating query 1074: predicted 11621807.299604114, true_card 97054, qerror 119.74578378638813, latency 0.010432004928588867
estimating query 1075: predicted 19923079.0, true_card 15900001, qerror 1.2530237576714618, latency 0.022588491439819336
estimating query 1076: predicted 19817121.522676293, true_card 9387994, qerror 2.1109005313250404, latency 0.02069401741027832
estimating query 1077: predicted 205685.43937893916, true_card 11102, qerror 18.52688158700587, latency 0.007009983062744141
estimating query 1078: predicted 213576.50876744464, true_card 130148, qerror 1.6410279740560334, latency 0.0064001083374023

estimating query 1150: predicted 45036.71992450436, true_card 10773, qerror 4.1805179545627364, latency 0.005652666091918945
estimating query 1151: predicted 281521.0238189788, true_card 26711, qerror 10.539516447118372, latency 0.006089687347412109
estimating query 1152: predicted 3564085.048069958, true_card 410781, qerror 8.676362947823678, latency 0.006869792938232422
estimating query 1153: predicted 61664.631558748995, true_card 3495, qerror 17.64367140450615, latency 0.0065288543701171875
estimating query 1154: predicted 17692013.55782091, true_card 8159945, qerror 2.1681535301795427, latency 0.012465476989746094
estimating query 1155: predicted 5838764.630887736, true_card 206901, qerror 28.22008898404423, latency 0.005631208419799805
estimating query 1156: predicted 722485.2370331931, true_card 2861, qerror 252.52891892107414, latency 0.005360841751098633
estimating query 1157: predicted 19842697.872586213, true_card 3788320, qerror 5.237862132181604, latency 0.0121002197265625

estimating query 1216: predicted 4250163.71234788, true_card 12139474, qerror 0.3501110272444984, latency 0.01706075668334961
estimating query 1217: predicted 19114335.999578536, true_card 38686327, qerror 0.49408505489752325, latency 0.012934446334838867
estimating query 1218: predicted 132510.8072972169, true_card 58070, qerror 2.28191505591901, latency 0.008419036865234375
estimating query 1219: predicted 2762086.851194438, true_card 527280, qerror 5.2383683264952925, latency 0.0059583187103271484
estimating query 1220: predicted 711238.5694041156, true_card 589000, qerror 1.207535771484067, latency 0.004242897033691406
estimating query 1221: predicted 302615.67427833076, true_card 201156, qerror 1.5043830374352778, latency 0.0074770450592041016
estimating query 1222: predicted 159912.4653102188, true_card 112363, qerror 1.4231772497193809, latency 0.005934000015258789
estimating query 1223: predicted 866312.4166277862, true_card 377700, qerror 2.2936521488689072, latency 0.00635623

estimating query 1276: predicted 2693374.310746147, true_card 51472, qerror 52.32697992590431, latency 0.013244152069091797
estimating query 1277: predicted 4323137.061505344, true_card 4728606, qerror 0.9142519088089267, latency 0.013372421264648438
estimating query 1278: predicted 850819.8444403792, true_card 103316, qerror 8.235121805338759, latency 0.011874675750732422
estimating query 1279: predicted 852012.2486952301, true_card 175748, qerror 4.847920025805301, latency 0.013548135757446289
estimating query 1280: predicted 4086047.809685937, true_card 1760476, qerror 2.3209903512947276, latency 0.014911174774169922
estimating query 1281: predicted 408476.87628015643, true_card 46047, qerror 8.8708683797024, latency 0.013690471649169922
estimating query 1282: predicted 851479.5582101343, true_card 86270, qerror 9.869938080562585, latency 0.015267372131347656
estimating query 1283: predicted 850619.7641340627, true_card 186206, qerror 4.568165172626353, latency 0.015172481536865234


estimating query 1353: predicted 1171572.2882627135, true_card 4213, qerror 278.0850434993386, latency 0.015127897262573242
estimating query 1354: predicted 2546414.1975719677, true_card 1250330, qerror 2.0365936973214813, latency 0.015537023544311523
estimating query 1355: predicted 1884008.8517583632, true_card 12873, qerror 146.35351912983478, latency 0.015702486038208008
estimating query 1356: predicted 19420798.029242337, true_card 426751, qerror 45.5085003415161, latency 0.015403509140014648
estimating query 1357: predicted 18136933.12697208, true_card 14071, qerror 1288.9583630852164, latency 0.014499425888061523
estimating query 1358: predicted 3627802.6427640156, true_card 3318431, qerror 1.093228288538775, latency 0.01256871223449707
estimating query 1359: predicted 4023039.2808129224, true_card 48222, qerror 83.42746631854594, latency 0.016266345977783203
estimating query 1360: predicted 3777558.762987723, true_card 109211, qerror 34.58954467029624, latency 0.016570091247558

estimating query 1416: predicted 34356.087031634364, true_card 34664, qerror 0.9911172118519029, latency 0.03227663040161133
estimating query 1417: predicted 12381224.166966077, true_card 10993951, qerror 1.1261851328031276, latency 0.03370332717895508
estimating query 1418: predicted 13854540.769647695, true_card 11029918, qerror 1.2560873770455678, latency 0.00855255126953125
estimating query 1419: predicted 171008.0623367611, true_card 166415, qerror 1.027600050096212, latency 0.0040738582611083984
estimating query 1420: predicted 34690.23178944147, true_card 33533, qerror 1.034510237361449, latency 0.008883237838745117
estimating query 1421: predicted 13761050.200015416, true_card 10862842, qerror 1.2668001799175037, latency 0.01009225845336914
estimating query 1422: predicted 736213.4760917854, true_card 591108, qerror 1.2454804808796114, latency 0.0032036304473876953
estimating query 1423: predicted 77705.28188199812, true_card 56533, qerror 1.374511911308406, latency 0.004943370

estimating query 1483: predicted 1374144.2949182107, true_card 783881, qerror 1.7530011505805227, latency 0.008122920989990234
estimating query 1484: predicted 13752013.0, true_card 11015599, qerror 1.2484126373881257, latency 0.004898548126220703
estimating query 1485: predicted 161864.0011754522, true_card 164047, qerror 0.986692845193464, latency 0.006949901580810547
estimating query 1486: predicted 583926.5667129717, true_card 171377, qerror 3.407263324209034, latency 0.006353616714477539
estimating query 1487: predicted 11464.368571452476, true_card 11436, qerror 1.0024806375876596, latency 0.008678436279296875
estimating query 1488: predicted 34465.18006025474, true_card 32349, qerror 1.0654171708632334, latency 0.006120204925537109
estimating query 1489: predicted 163794899.9653568, true_card 70306921, qerror 2.329712319009914, latency 0.008584737777709961
estimating query 1490: predicted 1349829.447160726, true_card 697548, qerror 1.9351061821705833, latency 0.01076340675354003

estimating query 1547: predicted 1165893.024925073, true_card 151229, qerror 7.7094540394043, latency 0.010321855545043945
estimating query 1548: predicted 1081409.2966417673, true_card 204329, qerror 5.292490525778364, latency 0.007858514785766602
estimating query 1549: predicted 957012.7930149828, true_card 253539, qerror 3.7746176841234793, latency 0.008535146713256836
estimating query 1550: predicted 64437223.42016177, true_card 3778084, qerror 17.05552958064505, latency 0.009992361068725586
estimating query 1551: predicted 261446031.6451784, true_card 260664622, qerror 1.0029977587260706, latency 0.004439115524291992
estimating query 1552: predicted 13867626.0, true_card 11031325, qerror 1.2571133567363848, latency 0.0038115978240966797
estimating query 1553: predicted 93325.03212799379, true_card 57512, qerror 1.6227053854498852, latency 0.0050051212310791016
estimating query 1554: predicted 14049929.522379845, true_card 13983299, qerror 1.0047650073405314, latency 0.004496097564

estimating query 1608: predicted 736209.1581609147, true_card 589751, qerror 1.2483389738396622, latency 0.03127002716064453
estimating query 1609: predicted 134887.0, true_card 134887, qerror 1.0, latency 0.0025854110717773438
estimating query 1610: predicted 201411.36107514804, true_card 55626, qerror 3.620813308078022, latency 0.003942728042602539
estimating query 1611: predicted 58420.77529952938, true_card 53419, qerror 1.0936328890381584, latency 0.03078746795654297
estimating query 1612: predicted 9792.783905404502, true_card 9794, qerror 0.9998758326939454, latency 0.0017268657684326172
estimating query 1613: predicted 1454264.0877259036, true_card 1326706, qerror 1.0961464617827188, latency 0.03100109100341797
estimating query 1614: predicted 303187.0, true_card 303187, qerror 1.0, latency 0.002803802490234375
estimating query 1615: predicted 326887.0, true_card 326887, qerror 1.0, latency 0.02858424186706543
estimating query 1616: predicted 6187242.7710683895, true_card 13601

estimating query 1671: predicted 19403630.539907776, true_card 4040813, qerror 4.801912521046575, latency 0.00931859016418457
estimating query 1672: predicted 3627284.99999997, true_card 642530, qerror 5.645316172007487, latency 0.004821062088012695
estimating query 1673: predicted 953630.7266566264, true_card 864063, qerror 1.1036587918434495, latency 0.0058705806732177734
estimating query 1674: predicted 1315399.1478290977, true_card 428079, qerror 3.072795320090679, latency 0.011000394821166992
estimating query 1675: predicted 204620.2354873286, true_card 52257, qerror 3.915652170758532, latency 0.0056459903717041016
estimating query 1676: predicted 67537.50014722622, true_card 64699, qerror 1.0438723959756135, latency 0.0075838565826416016
estimating query 1677: predicted 1363166.4080950306, true_card 1260416, qerror 1.0815210280534606, latency 0.008948564529418945
estimating query 1678: predicted 34107154.895796776, true_card 1957551, qerror 17.423379976203314, latency 0.011070966

estimating query 1742: predicted 289448555.39535797, true_card 43927632, qerror 6.589213718494044, latency 0.01791238784790039
estimating query 1743: predicted 22370790.015909836, true_card 22310877, qerror 1.002685372516277, latency 0.005510807037353516
estimating query 1744: predicted 1408229.0, true_card 1331833, qerror 1.0573615460797263, latency 0.003635406494140625
estimating query 1745: predicted 2498.244444444444, true_card 1192, qerror 2.095842654735272, latency 0.013938426971435547
estimating query 1746: predicted 16254480.237268219, true_card 15975345, qerror 1.0174728769405743, latency 0.003345966339111328
estimating query 1747: predicted 165580.16266941943, true_card 167503, qerror 0.9885205797473444, latency 0.01378631591796875
estimating query 1748: predicted 72399.88888888889, true_card 68382, qerror 1.0587565278712072, latency 0.01237034797668457
estimating query 1749: predicted 7264755859.780199, true_card 7234622392, qerror 1.0041651749251654, latency 0.0053339004516

estimating query 1806: predicted 46912.90006204841, true_card 47100, qerror 0.9960276021666329, latency 0.0034465789794921875
estimating query 1807: predicted 3774494.6260283026, true_card 3115494, qerror 1.2115236383149197, latency 0.003609895706176758
estimating query 1808: predicted 79851.0, true_card 79851, qerror 1.0, latency 0.0012254714965820312
estimating query 1809: predicted 48436463.4007526, true_card 47376444, qerror 1.0223743977228978, latency 0.006288290023803711
estimating query 1810: predicted 11634149143.683376, true_card 11206879551, qerror 1.0381256522602003, latency 0.005456209182739258
estimating query 1811: predicted 16325379.0, true_card 16203910, qerror 1.0074962771331117, latency 0.003105640411376953
estimating query 1812: predicted 3774494.6260283026, true_card 3115494, qerror 1.2115236383149197, latency 0.003562450408935547
estimating query 1813: predicted 11634149143.683376, true_card 11206879551, qerror 1.0381256522602003, latency 0.0057010650634765625
esti

estimating query 1875: predicted 56159.92285516544, true_card 25523, qerror 2.200365272701698, latency 0.008214950561523438
estimating query 1876: predicted 1302432.4014505066, true_card 415486, qerror 3.1347203069429694, latency 0.007719993591308594
estimating query 1877: predicted 1112959.2052089958, true_card 243903, qerror 4.563122246175717, latency 0.00677943229675293
estimating query 1878: predicted 5747916.072169939, true_card 61110, qerror 94.0585186085737, latency 0.009736299514770508
estimating query 1879: predicted 1425901.4829976344, true_card 63711, qerror 22.380773853771476, latency 0.009750843048095703
estimating query 1880: predicted 15033122.598227939, true_card 878864, qerror 17.10517508764489, latency 0.009562492370605469
estimating query 1881: predicted 29409552.028957106, true_card 640157, qerror 45.94115510563363, latency 0.008998870849609375
estimating query 1882: predicted 1112959.2052089958, true_card 184562, qerror 6.030272782094883, latency 0.0100317001342773

estimating query 1951: predicted 235539.14294681486, true_card 5220, qerror 45.12244117755074, latency 0.01674795150756836
estimating query 1952: predicted 1618017.0633316664, true_card 188226, qerror 8.596140083366095, latency 0.017118453979492188
estimating query 1953: predicted 4898038.509363632, true_card 40734, qerror 120.24447658868836, latency 0.017494678497314453
estimating query 1954: predicted 4898038.509363632, true_card 35205, qerror 139.12905863836477, latency 0.018050193786621094
estimating query 1955: predicted 410972.9031485139, true_card 65529, qerror 6.2716187206963925, latency 0.006470918655395508
estimating query 1956: predicted 22391097.23979686, true_card 22337322, qerror 1.0024074166006498, latency 0.005441904067993164
estimating query 1957: predicted 69529.94021289643, true_card 35785, qerror 1.9429912033784107, latency 0.009879112243652344
estimating query 1958: predicted 4086.1193220783593, true_card 1933, qerror 2.113874455291443, latency 0.006121397018432617

estimating query 2012: predicted 529228.4164581095, true_card 599646, qerror 0.8825680759283135, latency 0.03221750259399414
estimating query 2013: predicted 143058.38735151308, true_card 128316, qerror 1.1148912633772334, latency 0.005807399749755859
estimating query 2014: predicted 59274.265655555675, true_card 40644, qerror 1.4583767753064578, latency 0.03220200538635254
estimating query 2015: predicted 68590.71141358672, true_card 55320, qerror 1.2398899387850093, latency 0.0067899227142333984
estimating query 2016: predicted 910.8364131646182, true_card 427, qerror 2.1331063540154993, latency 0.03372383117675781
estimating query 2017: predicted 77766387538.48917, true_card 78147491078, qerror 0.9951232786330856, latency 0.0052950382232666016
estimating query 2018: predicted 2766244535.922164, true_card 2738898533, qerror 1.009984306681201, latency 0.03334975242614746
estimating query 2019: predicted 4869543.267323472, true_card 1147749, qerror 4.242690054466152, latency 0.00713825

estimating query 2084: predicted 7436748293.278431, true_card 2263957167, qerror 3.284844961591286, latency 0.011410713195800781
estimating query 2085: predicted 1444912.267278917, true_card 868002, qerror 1.664641633635541, latency 0.007982254028320312
estimating query 2086: predicted 256734956.4799574, true_card 256479273, qerror 1.0009968972422867, latency 0.004656076431274414
estimating query 2087: predicted 17564070.610700723, true_card 15518956, qerror 1.13178171332535, latency 0.004412174224853516
estimating query 2088: predicted 94547.98133537905, true_card 57627, qerror 1.6406889363558583, latency 0.008425712585449219
estimating query 2089: predicted 1067097.9228848314, true_card 1427072, qerror 0.747753387975401, latency 0.009473323822021484
estimating query 2090: predicted 348367.8441235759, true_card 211843, qerror 1.6444623807422285, latency 0.008568763732910156
estimating query 2091: predicted 11641.502171970038, true_card 9610, qerror 1.211394606864728, latency 0.0119910

estimating query 2159: predicted 214013.99999999997, true_card 163984, qerror 1.3050907405600545, latency 0.01301431655883789
estimating query 2160: predicted 6048715.693742078, true_card 1141338, qerror 5.299670819461086, latency 0.0078067779541015625
estimating query 2161: predicted 75005.81147495113, true_card 56968, qerror 1.3166305904183249, latency 0.00804448127746582
estimating query 2162: predicted 4754555.151208107, true_card 134801, qerror 35.27091899324269, latency 0.013720512390136719
estimating query 2163: predicted 7513.445639628882, true_card 6710, qerror 1.1197385453992372, latency 0.010309219360351562
estimating query 2164: predicted 4754555.151208107, true_card 13909829, qerror 0.3418126240953866, latency 0.016249656677246094
estimating query 2165: predicted 262882.52183491475, true_card 260355, qerror 1.009707982696375, latency 0.013749122619628906
estimating query 2166: predicted 4500628.99566818, true_card 3483198, qerror 1.2920968017517753, latency 0.0074548721313

estimating query 2216: predicted 4272605.0859927405, true_card 173718, qerror 24.595062607172203, latency 0.035370826721191406
estimating query 2217: predicted 19528980.68044537, true_card 906966, qerror 21.53220813177712, latency 0.01565718650817871
estimating query 2218: predicted 242601.14377900944, true_card 9921, qerror 24.453295411653002, latency 0.010775566101074219
estimating query 2219: predicted 14416369.018856822, true_card 967844, qerror 14.895343690570817, latency 0.03451871871948242
estimating query 2220: predicted 19603752.410245225, true_card 10107224, qerror 1.9395783065899426, latency 0.01580190658569336
estimating query 2221: predicted 2717293.3519374803, true_card 122537, qerror 22.17528870412594, latency 0.010599851608276367
estimating query 2222: predicted 2331818.456588551, true_card 11678813, qerror 0.1996622821675928, latency 0.03752279281616211
estimating query 2223: predicted 643213.6858069575, true_card 139845, qerror 4.599475746769334, latency 0.03629899024

estimating query 2283: predicted 19817738.17118757, true_card 81958485, qerror 0.24180215350720025, latency 0.012895822525024414
estimating query 2284: predicted 832392.6591700651, true_card 847168, qerror 0.982559137231417, latency 0.006544351577758789
estimating query 2285: predicted 17959086.0, true_card 15900001, qerror 1.1295021931130695, latency 0.0029990673065185547
estimating query 2286: predicted 185838.81623788743, true_card 54737, qerror 3.395122426108253, latency 0.007765054702758789
estimating query 2287: predicted 149452.66306812348, true_card 119632, qerror 1.2492699534248652, latency 0.006186962127685547
estimating query 2288: predicted 1338630.0853600972, true_card 1177021, qerror 1.137303485120569, latency 0.006860494613647461
estimating query 2289: predicted 5371349.545410607, true_card 2247616, qerror 2.389798588998569, latency 0.004529476165771484
estimating query 2290: predicted 5094138.9085552655, true_card 172885, qerror 29.465476522285133, latency 0.00805807113

estimating query 2343: predicted 338585.7676307812, true_card 303416, qerror 1.1159126994976574, latency 0.03481793403625488
estimating query 2344: predicted 1394538.1799698793, true_card 1326566, qerror 1.0512391995346475, latency 0.032111167907714844
estimating query 2345: predicted 79851.0, true_card 79851, qerror 1.0, latency 0.0011036396026611328
estimating query 2346: predicted 277274.36368926027, true_card 18316, qerror 15.138368840863741, latency 0.007635593414306641
estimating query 2347: predicted 2762086.851194438, true_card 361425, qerror 7.642213048888256, latency 0.008804082870483398
estimating query 2348: predicted 711236.0622184487, true_card 253217, qerror 2.8088006027180192, latency 0.03339743614196777
estimating query 2349: predicted 11916163.865151381, true_card 5543342, qerror 2.1496353400442154, latency 0.012987613677978516
estimating query 2350: predicted 129488.35108064536, true_card 63198, qerror 2.0489311541606594, latency 0.012590408325195312
estimating query

estimating query 2399: predicted 6430387.248712353, true_card 44869387, qerror 0.14331346333553327, latency 0.03825092315673828
estimating query 2400: predicted 38941615.53990964, true_card 226229172, qerror 0.17213348391652, latency 0.03885221481323242
estimating query 2401: predicted 113996946.489083, true_card 299574955, qerror 0.3805289614049446, latency 0.04188394546508789
estimating query 2402: predicted 79850947.85011286, true_card 3596066, qerror 22.205084069678602, latency 0.041113853454589844
estimating query 2403: predicted 5726574.985902067, true_card 11214638, qerror 0.510633957681208, latency 0.013942956924438477
estimating query 2404: predicted 6430387.248712353, true_card 42793812, qerror 0.15026441787219968, latency 0.03893566131591797
estimating query 2405: predicted 38941615.53990964, true_card 139632376, qerror 0.27888672137119286, latency 0.041962623596191406
estimating query 2406: predicted 113996946.489083, true_card 319981016, qerror 0.35626159299738897, latency

estimating query 2461: predicted 105779768.97592029, true_card 1328499, qerror 79.62352171580129, latency 0.014326333999633789
estimating query 2462: predicted 5388569.325133598, true_card 12492036, qerror 0.4313603743323825, latency 0.013574361801147461
estimating query 2463: predicted 5236263.015668884, true_card 113409, qerror 46.17149446400977, latency 0.011312723159790039
estimating query 2464: predicted 6110946.0033638235, true_card 21582838, qerror 0.2831391313488904, latency 0.017461061477661133
estimating query 2465: predicted 4692940.180863893, true_card 182241, qerror 25.75128637827872, latency 0.015476465225219727
estimating query 2466: predicted 18829555.89622193, true_card 1785176, qerror 10.547730809859605, latency 0.01463770866394043
estimating query 2467: predicted 35493429.06422247, true_card 96498890, qerror 0.36781178585808055, latency 0.017841100692749023
estimating query 2468: predicted 16829528.569834143, true_card 975631, qerror 17.24989116769982, latency 0.0161

estimating query 2528: predicted 70142.96946858056, true_card 64243, qerror 1.0918383243089607, latency 0.0074694156646728516
estimating query 2529: predicted 331310.2141987304, true_card 357171, qerror 0.927595505230633, latency 0.008491992950439453
estimating query 2530: predicted 322886.15614350786, true_card 33758, qerror 9.564730023801998, latency 0.00596165657043457
estimating query 2531: predicted 2142007.409825555, true_card 117788, qerror 18.185277021645287, latency 0.006409168243408203
estimating query 2532: predicted 953605.3297434251, true_card 412822, qerror 2.3099673218564543, latency 0.009360790252685547
estimating query 2533: predicted 185670.30230000033, true_card 88722, qerror 2.0927199826424148, latency 0.014618158340454102
estimating query 2534: predicted 6632378.4381467365, true_card 51426, qerror 128.96936254320258, latency 0.004633426666259766
estimating query 2535: predicted 1746522.8101777385, true_card 265135, qerror 6.587296321412634, latency 0.00745677947998

estimating query 2588: predicted 320659.10374243197, true_card 32154, qerror 9.972603835990295, latency 0.015781641006469727
estimating query 2589: predicted 18621755.43964935, true_card 3482755, qerror 5.3468462293929235, latency 0.009430408477783203
estimating query 2590: predicted 3562569.460304135, true_card 649110, qerror 5.4883909665605755, latency 0.015544414520263672
estimating query 2591: predicted 884226.7953301966, true_card 790217, qerror 1.1189670626298809, latency 0.012958765029907227
estimating query 2592: predicted 36223079.11286827, true_card 1706906, qerror 21.221484436089785, latency 0.007252931594848633
estimating query 2593: predicted 1388078.7423824386, true_card 387888, qerror 3.5785555170111953, latency 0.009064674377441406
estimating query 2594: predicted 217559.2812562054, true_card 57968, qerror 3.7530927624931927, latency 0.015431880950927734
estimating query 2595: predicted 66586.89430284835, true_card 64156, qerror 1.0378903657155738, latency 0.01314735412

In [55]:
qerror = np.asarray(qerror)

In [56]:
temp_qerror = copy.deepcopy(qerror)
temp_qerror[temp_qerror < 1] = 1/temp_qerror[temp_qerror < 1]

In [57]:
for i in [50, 90, 95, 99, 100]:
    print(f"q-error {i}% percentile is {np.percentile(temp_qerror, i)}")

q-error 50% percentile is 3.269330099670447
q-error 90% percentile is 45.854624152810196
q-error 95% percentile is 146.14350723707727
q-error 99% percentile is 1371.2066796610022
q-error 100% percentile is 438398.8024339036


In [50]:
for i in [50, 90, 95, 99, 100]:
    print(f"q-error {i}% percentile is {np.percentile(temp_qerror, i)}")

q-error 50% percentile is 2.5953660431994563
q-error 90% percentile is 32.120346872131535
q-error 95% percentile is 84.13650230255473
q-error 99% percentile is 1072.511697880098
q-error 100% percentile is 981187.002587944


In [None]:
with open("stats_CEB_sub_queries_CE_scheme.txt", "w") as f:
    for p in pred:
        f.write(str(p)+"\n")

In [None]:
with open("stats_CEB_exec.sql", "r") as f:
    queries = f.readlines()

In [None]:
with open("stats_CEB_exec.sql", "w") as f:
    for q in queries:
        q = q.split("||")[-1]
        f.write(q)