In [1]:
import pickle
import sys
sys.path.append("../")
from Join_scheme.data_prepare import process_stats_data
from BayesCard.Models.Bayescard_BN import Bayescard_BN
import time
import pandas as pd
import numpy as np
from BayesCard.Evaluation.cardinality_estimation import parse_query_single_table

In [2]:
from Join_scheme.data_prepare import process_stats_data
data_path = "/home/ubuntu/End-to-End-CardEst-Benchmark/datasets/stats_simplified/{}.csv"
model_folder = "/home/ubuntu/data_CE/saved_models"
data, null_values, key_attrs, table_buckets, equivalent_keys, schema, bin_size, all_bin_means, all_bin_width = process_stats_data(data_path,
                                                model_folder, 200, "sub_optimal", return_bin_means=True)

bucketizing equivalent key group: {'comments.PostId', 'postLinks.PostId', 'postLinks.RelatedPostId', 'votes.PostId', 'postHistory.PostId', 'posts.Id', 'tags.ExcerptPostId'}
bucketizing equivalent key group: {'posts.OwnerUserId', 'postHistory.UserId', 'votes.UserId', 'users.Id', 'badges.UserId', 'comments.UserId'}


In [3]:
all_bin_means['tags.ExcerptPostId'] = np.ones(48)
all_bin_width['tags.ExcerptPostId'] = np.ones(48) * len(data["tags"]["tags.ExcerptPostId"])/48
all_bin_means['posts.Id'] = np.ones(48)
all_bin_width['posts.Id'] = np.ones(48) * len(data["posts"]["posts.Id"])/48
all_bin_means['users.Id'] = np.ones(107)
all_bin_width['users.Id'] = np.ones(107) * len(data["users"]["users.Id"])/107


In [4]:
def learn_histogram(data, key_attrs, all_bin_means, all_bin_width, all_bin_size, bin_size=50):
    all_histogram = dict()
    all_boundary = dict()
    for table in data:
        all_histogram[table] = dict()
        all_boundary[table] = dict()
        for attr in data[table]:
            if attr in key_attrs[table]:
                assert all_bin_size[table][attr] == len(all_bin_means[attr]) == len(all_bin_width[attr])
            else:
                hist, curr_bins = np.histogram(data[table][attr].values, bins=bin_size)
                all_histogram[table][attr] = hist/np.sum(hist)
                all_boundary[table][attr] = curr_bins
    return all_histogram, all_boundary

In [5]:
all_histogram, all_boundary = learn_histogram(data, key_attrs, all_bin_means, all_bin_width, bin_size, bin_size=50)

In [15]:
import numpy as np
import copy

from Join_scheme.join_graph import process_condition, get_join_hyper_graph
from Join_scheme.data_prepare import identify_key_values
from BayesCard.Evaluation.cardinality_estimation import timestamp_transorform, construct_table_query

OPS = {
    '>': np.greater,
    '<': np.less,
    '>=': np.greater_equal,
    '<=': np.less_equal,
    '=': np.equal,
    '==': np.equal
}

class Bound_ensemble:
    """
    This the class where we store all the trained models and perform inference on the bound.
    """
    def __init__(self, hist, boundary, all_bin_means, all_key_size, schema):
        self.hist = hist
        self.boundary = boundary
        self.schema = schema
        self.all_bin_means = all_bin_means
        self.all_key_size = all_key_size
        self.all_keys, self.equivalent_keys = identify_key_values(schema)

    def parse_query_simple(self, query):
        """
        If your selection query contains no aggregation and nested sub-queries, you can use this function to parse a
        join query. Otherwise, use parse_query function.
        """
        query = query.replace(" where ", " WHERE ")
        query = query.replace(" from ", " FROM ")
        query = query.replace(" and ", " AND ")
        query = query.split(";")[0]
        query = query.strip()
        tables_all = {}
        join_cond = []
        table_probs = {}
        join_keys = {}
        tables_str = query.split(" WHERE ")[0].split(" FROM ")[-1]
        for table_str in tables_str.split(","):
            table_str = table_str.strip()
            if " as " in table_str:
                tables_all[table_str.split(" as ")[-1]] = table_str.split(" as ")[0]
            else:
                tables_all[table_str.split(" ")[-1]] = table_str.split(" ")[0]

        # processing conditions
        conditions = query.split(" WHERE ")[-1].split(" AND ")
        for cond in conditions:
            table, cond, join, join_key = process_condition(cond, tables_all)
            if table not in table_probs:
                table_probs[table] = 1
            if not join:
                attr = cond[0]
                op = cond[1]
                value = cond[2]
                if "Date" in attr:
                    assert "::timestamp" in value
                    value = timestamp_transorform(value.strip().split("::timestamp")[0])
                curr_prob = 0
                for i in range(0, len(self.boundary[table][attr])-1):
                    if OPS[op](self.boundary[table][attr][i], value):
                        curr_prob += self.hist[table][attr][i]
                table_probs[table] *= curr_prob
                #construct_table_query(self.bns[table], table_query[table], attr, op, value)
            else:
                join_cond.append(cond)
                for tab in join_key:
                    if tab in join_keys:
                        join_keys[tab].add(join_key[tab])
                    else:
                        join_keys[tab] = set([join_key[tab]])
        final_probs = 1
        for table in table_probs:
            final_probs *= table_probs[table]
        #print(final_probs)
        return tables_all, final_probs, join_cond, join_keys
    
    def multiply_hist_oned(self, all_probs, all_means):
        all_probs = np.stack(all_probs, axis=0)
        all_means = np.stack(all_means, axis=0)
        multiplier = np.prod(all_means, axis=0)
        min_number = np.amin(all_probs, axis=0)
        multiplier = multiplier * min_number
        return np.sum(multiplier)

    def eliminate_one_key_group(self, key_group, relevant_keys, res):
        all_means = []
        all_probs = []
        for key in relevant_keys:
            if res:
                hist = self.all_bin_means[key] * self.all_key_size[key]
                #print("key", np.sum(hist))
                ratio = res/np.sum(hist)
                all_means.append(self.all_bin_means[key]*ratio)
                all_probs.append(self.all_key_size[key])
            else:
                #print(key, np.sum(self.all_bin_means[key] * self.all_key_size[key]))
                all_means.append(self.all_bin_means[key])
                all_probs.append(self.all_key_size[key])
        return self.multiply_hist_oned(all_probs, all_means)
        

    def get_cardinality(self, query_str):
        tables_all, table_probs, join_cond, join_keys = self.parse_query_simple(query_str)
        equivalent_group = get_join_hyper_graph(join_keys, self.equivalent_keys)
        res = None
        for key_group in equivalent_group:
            res = self.eliminate_one_key_group(key_group, equivalent_group[key_group], res)
        if res <= 1:
            res = 1
        return res * table_probs

In [14]:
BE = Bound_ensemble(all_histogram, all_boundary, all_bin_means, all_bin_width, schema)

In [16]:
query_file = "/home/ubuntu/End-to-End-CardEst-Benchmark/workloads/stats_CEB/sub_plan_queries/stats_CEB_sub_queries.sql"
with open(query_file, "r") as f:
    queries = f.readlines()

In [17]:
qerror = []
latency = []
pred = []
for i, query_str in enumerate(queries):
    #if i == 10: break
    query = query_str.split("||")[0][:-1]
    print("========================")
    true_card = int(query_str.split("||")[-1])
    t = time.time()
    res = BE.get_cardinality(query)
    pred.append(res)
    latency.append(time.time() - t)
    qerror.append(res/true_card)
    print(f"estimating query {i}: predicted {res}, true_card {true_card}, qerror {res/true_card}, latency {time.time() - t}")

0.9999999999999997
badges.UserId 72862.0
users.Id 40325.00000000001
estimating query 0: predicted 47733.7567245126, true_card 79851, qerror 0.5977853342414321, latency 0.0025177001953125
0.9173001348211466
comments.UserId 170648.0
users.Id 40325.00000000001
badges.UserId 72862.0
estimating query 1: predicted 14362954.23812714, true_card 10220614, qerror 1.4052926994530015, latency 0.0025436878204345703
0.12985859910437597
postHistory.UserId 277348.0
users.Id 40325.00000000001
comments.UserId 170648.0
estimating query 2: predicted 34153855.38067131, true_card 1458075, qerror 23.423935929682155, latency 0.0014815330505371094
0.13792920434226688
postHistory.UserId 277348.0
users.Id 40325.00000000001
comments.UserId 170648.0
estimating query 3: predicted 36276489.43055708, true_card 1709781, qerror 21.21703857427184, latency 0.0017466545104980469
0.9173001348211468
comments.UserId 170648.0
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 4: predicted 10393431.07729698, true

estimating query 92: predicted 10379396.346368738, true_card 242937, qerror 42.72464197042335, latency 0.003493070602416992
0.8974362104245073
users.Id 40325.00000000001
comments.UserId 170648.0
estimating query 93: predicted 140066.8967822857, true_card 42543, qerror 3.292360594746156, latency 0.00237274169921875
0.9758701571887733
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
votes.UserId 29638.0
estimating query 94: predicted 2798236.3089985843, true_card 84481, qerror 33.12267029271178, latency 0.002359628677368164
0.9560288824069164
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
estimating query 95: predicted 68867.10349724685, true_card 40496, qerror 1.7005902681066487, latency 0.0020132064819335938
0.9770241241231732
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 96: predicted 28474.707672126802, true_card 244, qerror 116.69962160707706, latency 0.002622842788696289
0.8951658267571954
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
comments.Us

votes.UserId 29638.0
estimating query 162: predicted 0.0, true_card 2201378, qerror 0.0, latency 0.0022089481353759766
0.0
users.Id 40325.00000000001
comments.UserId 170648.0
badges.UserId 72862.0
estimating query 163: predicted 0.0, true_card 3321285, qerror 0.0, latency 0.0007789134979248047
0.0
users.Id 40325.00000000001
comments.UserId 170648.0
estimating query 164: predicted 0.0, true_card 24049, qerror 0.0, latency 0.0010790824890136719
1
users.Id 40325.00000000001
votes.UserId 29638.0
badges.UserId 72862.0
estimating query 165: predicted 993606.9525606718, true_card 1056687, qerror 0.940303942946844, latency 0.0031461715698242188
0.982120272783633
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 166: predicted 28623.23148006299, true_card 33891, qerror 0.844567332922103, latency 0.001674652099609375
0.982120272783633
users.Id 40325.00000000001
badges.UserId 72862.0
estimating query 167: predicted 46880.2901752659, true_card 77791, qerror 0.6026441384641655, laten

posts.Id 91976.0
votes.PostId 328064.0
postHistory.PostId 259700.0
estimating query 245: predicted 889476.313761421, true_card 1331347, qerror 0.6681025410816421, latency 0.0016906261444091797
0.1610092472428271
posts.Id 91976.0
votes.PostId 328064.0
estimating query 246: predicted 31616.849183583156, true_card 150600, qerror 0.20993923760679387, latency 0.002497434616088867
0.1610092472428271
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
estimating query 247: predicted 11598.227520040627, true_card 41569, qerror 0.27901146335106997, latency 0.0011701583862304688
0.1610092472428271
posts.Id 91976.0
votes.PostId 328064.0
postHistory.PostId 259700.0
estimating query 248: predicted 143213.9117190511, true_card 915414, qerror 0.15644715038119486, latency 0.0016901493072509766
0.1610092472428271
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
key 91976.0
key 259700.0
estimating query 249: predicted 3052.0833590462776, true_card 209583, qerror 0.014562647538427629, latency 0.0014

votes.UserId 29638.0
posts.OwnerUserId 90361.0
estimating query 335: predicted 0.0, true_card 222147, qerror 0.0, latency 0.0028939247131347656
0.0
postHistory.UserId 277348.0
users.Id 40325.00000000001
comments.UserId 170648.0
votes.UserId 29638.0
posts.OwnerUserId 90361.0
estimating query 336: predicted 0.0, true_card 60601561, qerror 0.0, latency 0.0016624927520751953
0.0
postHistory.UserId 277348.0
users.Id 40325.00000000001
comments.UserId 170648.0
estimating query 337: predicted 0.0, true_card 393181, qerror 0.0, latency 0.0009076595306396484
0.987550557930065
comments.UserId 170648.0
users.Id 40325.00000000001
badges.UserId 72862.0
estimating query 338: predicted 15462925.309775569, true_card 15890691, qerror 0.973080737003543, latency 0.0006823539733886719
0.987550557930065
comments.UserId 170648.0
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 339: predicted 11189400.578463415, true_card 11026771, qerror 1.0147486130312686, latency 0.0008251667022705078
0.983

users.Id 40325.00000000001
postHistory.UserId 277348.0
posts.OwnerUserId 90361.0
badges.UserId 72862.0
estimating query 439: predicted 0.0, true_card 2418433, qerror 0.0, latency 0.0028624534606933594
0.0
users.Id 40325.00000000001
postHistory.UserId 277348.0
posts.OwnerUserId 90361.0
comments.UserId 170648.0
badges.UserId 72862.0
estimating query 440: predicted 0.0, true_card 86914174, qerror 0.0, latency 0.003135204315185547
0.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
comments.UserId 170648.0
estimating query 441: predicted 0.0, true_card 311724, qerror 0.0, latency 0.0016667842864990234
1
users.Id 40325.00000000001
postHistory.UserId 277348.0
comments.UserId 170648.0
estimating query 442: predicted 263008038.0985751, true_card 263105194, qerror 0.999630733624267, latency 0.0012531280517578125
0.9815155727542544
users.Id 40325.00000000001
comments.UserId 170648.0
badges.UserId 72862.0
estimating query 443: predicted 15368430.375546824, true_card 15716615, qerror 0.977846

estimating query 525: predicted 47733.75672451261, true_card 79851, qerror 0.5977853342414323, latency 0.0032804012298583984
1
users.Id 40325.00000000001
votes.UserId 29638.0
badges.UserId 72862.0
estimating query 526: predicted 993606.9525606718, true_card 1056687, qerror 0.940303942946844, latency 0.002596139907836914
1.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
key 91976.0
key 1032.0
estimating query 527: predicted 56416.62350895754, true_card 593, qerror 95.13764504040057, latency 0.0020318031311035156
1
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
votes.UserId 29638.0
key 91976.0
key 1032.0
estimating query 528: predicted 89394372.95415582, true_card 70752, qerror 1263.4889890625823, latency 0.003373861312866211
1
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
key 91976.0
key 1032.0
estimating query 529: predicted 143903022.22486958, true_card 78986, qerror 1821.8801081820775, latency 0.002727985382080078
1.0
users.Id 40325.000000000

estimating query 598: predicted 0.0, true_card 1593915597881, qerror 0.0, latency 0.0028035640716552734
0.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
postHistory.UserId 277348.0
votes.UserId 29638.0
key 91976.0
key 10807.0
estimating query 599: predicted 0.0, true_card 17849233970, qerror 0.0, latency 0.0056993961334228516
0.00013690840161962412
posts.Id 91976.0
postLinks.RelatedPostId 10807.0
estimating query 600: predicted 1.479569096303278, true_card 7972, qerror 0.00018559572206513773, latency 0.0007259845733642578
0.0
users.Id 40325.00000000001
postHistory.UserId 277348.0
posts.OwnerUserId 90361.0
estimating query 601: predicted 0.0, true_card 416123, qerror 0.0, latency 0.0021839141845703125
0.00014903190326337188
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
votes.UserId 29638.0
estimating query 602: predicted 427.33808369760584, true_card 196751, qerror 0.002171974138365781, latency 0.0036613941192626953
0.00014922021154339948
users.Id 40

posts.OwnerUserId 90361.0
badges.UserId 72862.0
key 91976.0
key 153297.0
estimating query 667: predicted 65748119.7593148, true_card 846751, qerror 77.64752537560014, latency 0.0035920143127441406
1
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
key 10807.0
estimating query 668: predicted 46660352332.809784, true_card 13327432, qerror 3501.0760011988646, latency 0.0025827884674072266
0.9796604055164005
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 328064.0
key 10807.0
estimating query 669: predicted 354837.35447703325, true_card 179357, qerror 1.978385869952292, latency 0.00263214111328125
0.9796604055164005
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 10807.0
estimating query 670: predicted 140976093.1078513, true_card 98079, qerror 1437.3728637919564, latency 0.0027768611907958984
0.9796604055164005
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserI

users.Id 40325.00000000001
key 91976.0
key 10807.0
estimating query 730: predicted 494.6125719485134, true_card 3757, qerror 0.13165093743638898, latency 0.0017104148864746094
0.008767142398551758
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
estimating query 731: predicted 755156.8526501692, true_card 1120872, qerror 0.6737226486611934, latency 0.003914833068847656
0.008767142398551754
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 328064.0
estimating query 732: predicted 296.056324512767, true_card 60960, qerror 0.004856567003162188, latency 0.002520322799682617
0.008767142398551754
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
estimating query 733: predicted 31895.563351805915, true_card 163757, qerror 0.19477374006488832, latency 0.0019948482513427734
0.0
posts.Id 91976.0
votes.PostId 328064.0
postLinks.RelatedPostId 10807.0
comments.PostId 153297.0
estimating query 734: predict

postLinks.PostId 9502.0
comments.PostId 153297.0
key 40325.00000000001
key 170648.0
key 29638.0
estimating query 807: predicted 0.0, true_card 125401, qerror 0.0, latency 0.0039501190185546875
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
comments.PostId 153297.0
key 40325.00000000001
key 170648.0
estimating query 808: predicted 0.0, true_card 1593, qerror 0.0, latency 0.0017151832580566406
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
key 40325.00000000001
key 29638.0
key 170648.0
estimating query 809: predicted 0.0, true_card 1994249, qerror 0.0, latency 0.004055976867675781
0.0
users.Id 40325.00000000001
comments.UserId 170648.0
key 153297.0
key 91976.0
key 259700.0
estimating query 810: predicted 0.0, true_card 24802, qerror 0.0, latency 0.003311634063720703
0.0
users.Id 40325.00000000001
comments.UserId 170648.0
votes.UserId 29638.0
key 153297.0
key 91976.0
estimating query 811: predicted 0.0, true_card 341092, qerror 0.0, latency 0.002779245376586914
0.

key 29638.0
estimating query 879: predicted 627468.8105800672, true_card 86857, qerror 7.224159372072109, latency 0.003221750259399414
0.33987126659203376
posts.Id 91976.0
postLinks.PostId 9502.0
comments.PostId 153297.0
key 40325.00000000001
key 170648.0
estimating query 880: predicted 8078.815486755405, true_card 30286, qerror 0.2667508250265933, latency 0.0034356117248535156
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
key 40325.00000000001
key 29638.0
key 170648.0
estimating query 881: predicted 0.0, true_card 311826, qerror 0.0, latency 0.003435373306274414
0.0
users.Id 40325.00000000001
comments.UserId 170648.0
key 153297.0
key 91976.0
key 259700.0
estimating query 882: predicted 0.0, true_card 152546, qerror 0.0, latency 0.0024895668029785156
0.33289232520036854
users.Id 40325.00000000001
comments.UserId 170648.0
votes.UserId 29638.0
key 153297.0
key 91976.0
estimating query 883: predicted 216701589.7163988, true_card 328480, qerror 659.7101489174343

key 170648.0
key 40325.00000000001
key 72862.0
estimating query 959: predicted 660160953.0376434, true_card 70129559, qerror 9.413447944790919, latency 0.0013017654418945312
0.9987380511119781
posts.Id 91976.0
votes.PostId 328064.0
comments.PostId 153297.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 960: predicted 9040551712.345188, true_card 81746778, qerror 110.59214728126885, latency 0.001001119613647461
0.9987380511119781
posts.Id 91976.0
votes.PostId 328064.0
postLinks.RelatedPostId 10807.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 961: predicted 11261412.851995625, true_card 10257634, qerror 1.0978567622899809, latency 0.0008273124694824219
1
badges.UserId 72862.0
users.Id 40325.00000000001
comments.UserId 170648.0
key 91976.0
key 259700.0
key 10807.0
key 153297.0
estimating query 962: predicted 319663053423123.9, true_card 26312006, qerror 12148942.707869703, latency 0.006453990936279297
0.9987380511119781
badges.UserId 72862.0
use

postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1029: predicted 207829.78091765757, true_card 227730, qerror 0.9126148549495349, latency 0.0026519298553466797
0.0
posts.Id 91976.0
votes.PostId 328064.0
postLinks.RelatedPostId 10807.0
comments.PostId 153297.0
estimating query 1030: predicted 0.0, true_card 664648, qerror 0.0, latency 0.0035064220428466797
0.9692962514905114
posts.Id 91976.0
postLinks.RelatedPostId 10807.0
comments.PostId 153297.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 1031: predicted 2516190.253917886, true_card 3747965, qerror 0.6713483861022944, latency 0.0012431144714355469
0.0
posts.Id 91976.0
votes.PostId 328064.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1032: predicted 0.0, true_card 3091669, qerror 0.0, latency 0.0031201839447021484
0.9692962514905114
comments.PostId 153297.0
posts.Id 91976.0
postHistory.PostId 259700.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 1033:

0.9166618464782254
posts.Id 91976.0
postLinks.RelatedPostId 10807.0
comments.PostId 153297.0
estimating query 1109: predicted 40005.48919622854, true_card 19618, qerror 2.039223631166711, latency 0.0005197525024414062
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1110: predicted 0.0, true_card 2666, qerror 0.0, latency 0.0008881092071533203
0.9166618464782254
posts.Id 91976.0
votes.PostId 328064.0
comments.PostId 153297.0
estimating query 1111: predicted 606650.2542810695, true_card 562905, qerror 1.0777133873052638, latency 0.003301858901977539
0.9166618464782251
comments.PostId 153297.0
posts.Id 91976.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 1112: predicted 10461148.714564571, true_card 9928635, qerror 1.0536341314354463, latency 0.002240896224975586
0.0
posts.Id 91976.0
postLinks.RelatedPostId 10807.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1113: predicted 0.0, true_card 1218, qerror 

comments.PostId 153297.0
key 72862.0
key 40325.00000000001
key 170648.0
estimating query 1179: predicted 0.0, true_card 2734756, qerror 0.0, latency 0.005812406539916992
0.0
badges.UserId 72862.0
users.Id 40325.00000000001
comments.UserId 170648.0
key 91976.0
key 259700.0
key 328064.0
key 10807.0
key 153297.0
estimating query 1180: predicted 0.0, true_card 2619523, qerror 0.0, latency 0.002997875213623047
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
votes.PostId 328064.0
postLinks.RelatedPostId 10807.0
comments.PostId 153297.0
key 72862.0
key 40325.00000000001
key 170648.0
estimating query 1181: predicted 0.0, true_card 1639421, qerror 0.0, latency 0.005685329437255859
0.005952987266614248
posts.Id 91976.0
comments.PostId 153297.0
estimating query 1182: predicted 425.6047597779973, true_card 86053, qerror 0.004945844535088809, latency 0.0012574195861816406
1
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1183: predicted 276501.8619857278, tru

posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 328064.0
key 153297.0
estimating query 1242: predicted 575.9468931711338, true_card 115152, qerror 0.005001623012810318, latency 0.004614591598510742
0.010769549785213148
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
key 91976.0
key 153297.0
estimating query 1243: predicted 722778.6741660277, true_card 184501, qerror 3.917478356030741, latency 0.002668619155883789
0.011805709120954678
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
key 259700.0
estimating query 1244: predicted 64526494.537086554, true_card 10322152, qerror 6.251263742007147, latency 0.0019385814666748047
0.011740486430118068
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 328064.0
key 259700.0
estimating query 1245: predicted 498.1244369083357, true_card 419910, qerror 0.0011862647636596787, latency 0.0020647048950195312
0.011740486430118068
users.Id 

estimating query 1312: predicted 0.05304232167731096, true_card 175892, qerror 3.0156187704563574e-07, latency 0.0027959346771240234
0.0
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
key 153297.0
estimating query 1313: predicted 0.0, true_card 1440551, qerror 0.0, latency 0.010126113891601562
0.0
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 328064.0
key 153297.0
estimating query 1314: predicted 0.0, true_card 55797, qerror 0.0, latency 0.006962299346923828
1.8445030172896784e-06
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
key 91976.0
key 153297.0
estimating query 1315: predicted 123.79045288990098, true_card 342858, qerror 0.0003610545849590821, latency 0.0041162967681884766
0.0
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
key 259700.0
estimating query 1316: predicted 0.0, true_card 3387979, qerror 0.0, latency 0.00761413574

postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1375: predicted 2851764.261375317, true_card 2792760, qerror 1.0211275803775894, latency 0.0012359619140625
0.008916313922465183
posts.Id 91976.0
votes.PostId 328064.0
postHistory.PostId 259700.0
estimating query 1376: predicted 7930.850040093967, true_card 435449, qerror 0.01821303996586045, latency 0.001277923583984375
0.008927580072210032
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
badges.UserId 72862.0
key 91976.0
key 259700.0
estimating query 1377: predicted 431660.341614723, true_card 1007297, qerror 0.42853333387741943, latency 0.003621816635131836
0.008889500871406063
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
key 91976.0
key 259700.0
estimating query 1378: predicted 168.50894060095374, true_card 42007, qerror 0.004011449058512956, latency 0.0023870468139648438
0.008916313922465181
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
badges.UserId 72862.0
key 91976.0
key 328064.0
estimating

0.9605951642901426
votes.UserId 29638.0
users.Id 40325.00000000001
estimating query 1458: predicted 27995.896743049187, true_card 27929, qerror 1.002395243046625, latency 0.0008721351623535156
0.960595164290143
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
estimating query 1459: predicted 69196.03352523729, true_card 72152, qerror 0.9590313993407984, latency 0.002561807632446289
0.960595164290143
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 1460: predicted 2754436.3839432118, true_card 1100297, qerror 2.503357169876144, latency 0.0008027553558349609
1
comments.PostId 153297.0
posts.Id 91976.0
postHistory.PostId 259700.0
estimating query 1461: predicted 276501.8619857278, true_card 704085, qerror 0.3927109113043564, latency 0.002486705780029297
1.0
comments.PostId 153297.0
posts.Id 91976.0
votes.PostId 328064.0
estimating query 1462: predicted 661803.7574180634, true_card 864796, qerror 0.7652715292601531, latency 0.001724720001220703

1
users.Id 40325.00000000001
votes.UserId 29638.0
badges.UserId 72862.0
estimating query 1541: predicted 993606.9525606718, true_card 1056687, qerror 0.940303942946844, latency 0.0005636215209960938
0.9987352758834467
users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 1542: predicted 29107.464514393912, true_card 23113, qerror 1.259354671154498, latency 0.0007762908935546875
0.4652793845417312
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
estimating query 1543: predicted 1692723.5136948992, true_card 450917, qerror 3.7539580758651794, latency 0.0017080307006835938
0.4646909344831662
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
estimating query 1544: predicted 33473.79903284504, true_card 37799, qerror 0.8855736668389386, latency 0.0020427703857421875
0.9987352758834467
users.Id 40325.00000000001
badges.UserId 72862.0
estimating query 1545: predicted 47673.38669120943, true_card 66192, qerror 0.7202288296351437, latency 0.00138664245605

posts.Id 91976.0
postLinks.PostId 9502.0
estimating query 1637: predicted 66151.63748117721, true_card 60440, qerror 1.094500951045288, latency 0.0015759468078613281
0.9547829219960368
postLinks.PostId 9502.0
posts.Id 91976.0
estimating query 1638: predicted 9072.347324806342, true_card 10423, qerror 0.8704161301742629, latency 0.0007855892181396484
1
votes.PostId 328064.0
posts.Id 91976.0
postHistory.PostId 259700.0
estimating query 1639: predicted 889476.313761421, true_card 1331347, qerror 0.6681025410816421, latency 0.0023345947265625
1
postHistory.PostId 259700.0
posts.Id 91976.0
estimating query 1640: predicted 87259.03998480811, true_card 303187, qerror 0.28780600746340745, latency 0.0007627010345458984
1
votes.PostId 328064.0
posts.Id 91976.0
estimating query 1641: predicted 196366.6666666667, true_card 328064, qerror 0.5985620691897516, latency 0.0002741813659667969
0.875822503071893
comments.PostId 153297.0
posts.Id 91976.0
postLinks.PostId 9502.0
postHistory.PostId 259700.0


posts.OwnerUserId 90361.0
estimating query 1757: predicted 929.6599618309599, true_card 31729, qerror 0.02930000825210249, latency 0.0014772415161132812
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
votes.PostId 328064.0
estimating query 1758: predicted 0.0, true_card 645020, qerror 0.0, latency 0.0010423660278320312
0.012702920610350896
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 259700.0
estimating query 1759: predicted 240.7959373144958, true_card 157452, qerror 0.001529329175332773, latency 0.003192901611328125
0.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
key 91976.0
key 328064.0
estimating query 1760: predicted 0.0, true_card 62169, qerror 0.0, latency 0.0018985271453857422
0.0
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 259700.0
key 328064.0
estimating query 1761: predicted 0.0, true_card 354388, qerror 0.0, latency 0.0026013851165771484
0.007249898645919332
posts.Id 91976.0
postHistory.PostId 259700.0
estimating qu

postHistory.PostId 259700.0
estimating query 1874: predicted 253.6489081169505, true_card 23304, qerror 0.01088435067443145, latency 0.0020492076873779297
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
votes.PostId 328064.0
estimating query 1875: predicted 0.0, true_card 25523, qerror 0.0, latency 0.0010638236999511719
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
votes.PostId 328064.0
estimating query 1876: predicted 0.0, true_card 415486, qerror 0.0, latency 0.0014901161193847656
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
postHistory.PostId 259700.0
votes.PostId 328064.0
estimating query 1877: predicted 0.0, true_card 243903, qerror 0.0, latency 0.0015735626220703125
0.006810075440189384
posts.Id 91976.0
postLinks.PostId 9502.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 1878: predicted 815.4396872912596, true_card 61110, qerror 0.01334380113387759, latency 0.0025475025177001953
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
comments.PostId 153297.0
votes.P

users.Id 40325.00000000001
votes.UserId 29638.0
estimating query 1974: predicted 72143.08133915081, true_card 58846, qerror 1.2259640644929275, latency 0.0018413066864013672
0.0
postHistory.UserId 277348.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
comments.UserId 170648.0
votes.UserId 29638.0
estimating query 1975: predicted 0.0, true_card 1374616477, qerror 0.0, latency 0.003576517105102539
0.0
postHistory.UserId 277348.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
comments.UserId 170648.0
estimating query 1976: predicted 0.0, true_card 2477892, qerror 0.0, latency 0.001516580581665039
0.0
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
comments.UserId 170648.0
votes.UserId 29638.0
estimating query 1977: predicted 0.0, true_card 12940, qerror 0.0, latency 0.0029380321502685547
0.0
postHistory.UserId 277348.0
users.Id 40325.00000000001
comments.UserId 170648.0
votes.UserId 29638.0
estimating query 1978: predicted 0.0, true_card 212898, qerror 0.0, latency 0.002

comments.UserId 170648.0
badges.UserId 72862.0
estimating query 2074: predicted 14362954.238127144, true_card 10238947, qerror 1.4027765001740067, latency 0.0012407302856445312
0.0
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
postHistory.UserId 277348.0
estimating query 2075: predicted 0.0, true_card 3360391, qerror 0.0, latency 0.0009489059448242188
0.0
users.Id 40325.00000000001
posts.OwnerUserId 90361.0
postHistory.UserId 277348.0
estimating query 2076: predicted 0.0, true_card 61634, qerror 0.0, latency 0.0014998912811279297
0.0
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
estimating query 2077: predicted 0.0, true_card 8897, qerror 0.0, latency 0.0017514228820800781
1
users.Id 40325.00000000001
badges.UserId 72862.0
postHistory.UserId 277348.0
estimating query 2078: predicted 16228052.243840132, true_card 16322646, qerror 0.994204753557734, latency 0.0011434555053710938
0.0
users.Id 40325.00000000001
comments.UserId 17064

key 328064.0
estimating query 2174: predicted 114665.13069725414, true_card 55513, qerror 2.0655545673491638, latency 0.0019030570983886719
0.31072399211276563
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
key 91976.0
key 10807.0
estimating query 2175: predicted 44714121.5428035, true_card 134801, qerror 331.70467238969667, latency 0.0014786720275878906
0.3353740774291196
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
key 91976.0
key 328064.0
estimating query 2176: predicted 28887409.518255807, true_card 13909829, qerror 2.076762375601872, latency 0.0023546218872070312
0.31072399211276563
users.Id 40325.00000000001
badges.UserId 72862.0
posts.OwnerUserId 90361.0
key 91976.0
key 328064.0
key 153297.0
key 10807.0
estimating query 2177: predicted 1765615449689.7327, true_card 4231593, qerror 417246.04651008087, latency 0.0025730133056640625
0.31657571284461195
posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 1532

votes.PostId 328064.0
postHistory.PostId 259700.0
postLinks.RelatedPostId 10807.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 2257: predicted 4198968429.8968177, true_card 113925678, qerror 36.85708528236117, latency 0.0042951107025146484
0.9747779631395466
posts.Id 91976.0
comments.PostId 153297.0
estimating query 2258: predicted 69691.0848718897, true_card 171647, qerror 0.40601399891573814, latency 0.001276254653930664
0.917492343721852
posts.Id 91976.0
comments.PostId 153297.0
postLinks.RelatedPostId 10807.0
estimating query 2259: predicted 40041.734239736186, true_card 30393, qerror 1.3174656743242255, latency 0.0010287761688232422
1
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 2260: predicted 276501.8619857278, true_card 704085, qerror 0.3927109113043564, latency 0.001123189926147461
1
posts.Id 91976.0
comments.PostId 153297.0
votes.PostId 328064.0
estimating query 2261: predicted 661803.7574180634, true_card 865125, q

posts.Id 91976.0
votes.PostId 328064.0
key 170648.0
key 40325.00000000001
estimating query 2359: predicted 8825956.230168656, true_card 574721, qerror 15.356940550577855, latency 0.0017337799072265625
0.9058802599319473
comments.UserId 170648.0
users.Id 40325.00000000001
badges.UserId 72862.0
estimating query 2360: predicted 14184143.471386453, true_card 10225026, qerror 1.3871987681387268, latency 0.0005719661712646484
0.0074376371368915445
posts.Id 91976.0
postHistory.PostId 259700.0
postLinks.RelatedPostId 10807.0
estimating query 2361: predicted 366.2095587987119, true_card 68389, qerror 0.00535480207048958, latency 0.0017008781433105469
0.0072947626086388535
posts.Id 91976.0
postLinks.RelatedPostId 10807.0
votes.PostId 328064.0
estimating query 2362: predicted 2305.052242231782, true_card 300737, qerror 0.007664677915360538, latency 0.0019371509552001953
0.007440188744245159
posts.Id 91976.0
postHistory.PostId 259700.0
votes.PostId 328064.0
estimating query 2363: predicted 6617.87

postLinks.RelatedPostId 10807.0
key 170648.0
key 40325.00000000001
estimating query 2443: predicted 357.50596964326394, true_card 16219, qerror 0.022042417512994877, latency 0.002310514450073242
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
votes.PostId 328064.0
estimating query 2444: predicted 0.0, true_card 1000676, qerror 0.0, latency 0.002518177032470703
0.00871314164961333
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
key 170648.0
key 40325.00000000001
key 72862.0
estimating query 2445: predicted 5752075.895360721, true_card 17589523, qerror 0.3270171621686797, latency 0.0024220943450927734
0.009019940025452445
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
key 170648.0
key 40325.00000000001
estimating query 2446: predicted 15640.697248989041, true_card 181816, qerror 0.08602486716784574, latency 0.0016238689422607422
0.0
badges.UserId 72862.0
users.Id 40325.00000000001
comments.UserId 170648.0
key 153297.0

posts.OwnerUserId 90361.0
users.Id 40325.00000000001
key 91976.0
key 153297.0
estimating query 2533: predicted 195.80569813940886, true_card 88722, qerror 0.002206957667088308, latency 0.0018699169158935547
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
postHistory.PostId 259700.0
comments.PostId 153297.0
estimating query 2534: predicted 0.0, true_card 51426, qerror 0.0, latency 0.0007760524749755859
0.9616197785232076
posts.Id 91976.0
postLinks.PostId 9502.0
comments.PostId 153297.0
votes.PostId 328064.0
estimating query 2535: predicted 281882.9584972923, true_card 265135, qerror 1.0631676636328373, latency 0.0035431385040283203
0.0
posts.Id 91976.0
postHistory.PostId 259700.0
comments.PostId 153297.0
votes.PostId 328064.0
estimating query 2536: predicted 0.0, true_card 1411723, qerror 0.0, latency 0.0013718605041503906
0.0
posts.Id 91976.0
postLinks.PostId 9502.0
postHistory.PostId 259700.0
estimating query 2537: predicted 0.0, true_card 12249, qerror 0.0, latency 0.0019469261169433594

In [19]:
for i in [50, 90, 95, 99, 100]:
    print(f"q-error {i}% percentile is {np.percentile(qerror, i)}")
print(f"total inference time: {np.sum(latency)}")

q-error 50% percentile is 0.4861976823827677
q-error 90% percentile is 49.4112299823222
q-error 95% percentile is 798.469580730671
q-error 99% percentile is 360148.0710562391
q-error 100% percentile is 114172012572.37447
total inference time: 5.603515863418579


In [20]:
with open("stats_CEB_join_hist.txt", "w") as f:
    for p in pred:
        f.write(str(p)+"\n")