In [6]:
import itertools
import networkx as nx
import numpy as np
from tqdm import tqdm
from collections import defaultdict
from itertools import chain
import sys
sys.path.append('/home/ziniu.wzn/BayesCard')
from Pgmpy.factors import factor_product
from Pgmpy.models import BayesianModel, JunctionTree
from Pgmpy.inference.EliminationOrder import (
    WeightedMinFill,
    MinNeighbors,
    MinFill,
    MinWeight,
)
from Pgmpy.factors.discrete import TabularCPD

In [7]:
import numpy as np
import pandas as pd
import time
import sys
import copy
from Models.pgmpy_BN import Pgmpy_BN
from Testing.toy_dataset import *

In [32]:
class VariableElimination(object):
    def __init__(self, model, probs=None):
        model.check_model()
        self.model = model
        if probs is not None:
            self.probs = probs
        elif len(self.model.probs) != 0:
            self.probs = model.probs
        else:
            self.probs = dict()

        if isinstance(model, JunctionTree):
            self.variables = set(chain(*model.nodes()))
        else:
            self.variables = model.nodes()

        self.cardinality = {}
        self.factors = defaultdict(list)

        if isinstance(model, BayesianModel):
            self.state_names_map = {}
            for node in model.nodes():
                cpd = model.get_cpds(node)
                if isinstance(cpd, TabularCPD):
                    self.cardinality[node] = cpd.variable_card
                    cpd = cpd.to_factor()
                for var in cpd.scope():
                    self.factors[var].append(cpd)
                self.state_names_map.update(cpd.no_to_name)

        elif isinstance(model, JunctionTree):
            self.cardinality = model.get_cardinality()

            for factor in model.get_factors():
                for var in factor.variables:
                    self.factors[var].append(factor)
        self.root = self.get_root()

    def get_root(self):
        """Returns the network's root node."""

        def find_root(graph, node):
            predecessor = next(self.model.predecessors(node), None)
            if predecessor:
                root = find_root(graph, predecessor)
            else:
                root = node
            return root

        return find_root(self, list(self.model.nodes)[0])

    def steiner_tree(self, nodes):
        """Returns the minimal part of the tree that contains a set of nodes."""
        sub_nodes = set()

        def walk(node, path):
            if len(nodes) == 0:
                return

            if node in nodes:
                sub_nodes.update(path + [node])
                nodes.remove(node)

            for child in self.model.successors(node):
                walk(child, path + [node])

        walk(self.root, [])
        sub_graph = self.model.subgraph(sub_nodes)
        sub_graph.cardinalities = defaultdict(int)
        for node in sub_graph.nodes:
            sub_graph.cardinalities[node] = self.model.cardinalities[node]
        return sub_graph

    def get_probs(self, attribute, values):
        """
        Calculate Pr(attr in values) where values must be a list
        """
        factor = self.probs[attribute]
        values = [factor.get_state_no(attribute, no) for no in values]
        return np.sum(factor.values[values])


    def _get_working_factors(self, variables=[], evidence=None, return_probs=False, reduce=True):
        """
        Uses the evidence given to the query methods to modify the factors before running
        the variable elimination algorithm.
        Parameters
        ----------
        evidence: dict
            Dict of the form {variable: state}
        Returns
        -------
        dict: Modified working factors.
        """

        useful_var = copy.deepcopy(variables)
        if evidence:
            useful_var += list(evidence.keys())
        sub_graph_model = self.steiner_tree(useful_var)
        variables_sub_graph = set(sub_graph_model.nodes)

        working_factors = dict()
        for node in sub_graph_model.nodes:
            working_factors[node] = set()
            for factor in self.factors[node]:
                if set(factor.variables).issubset(variables_sub_graph):
                    working_factors[node].add((factor, None))

        if return_probs:
            probs = dict()
        # Dealing with evidence. Reducing factors over it before VE is run.
        if evidence and reduce:
            for evidence_var in evidence:
                for factor, origin in working_factors[evidence_var]:
                    factor_reduced = factor.reduce(
                        [(evidence_var, evidence[evidence_var])], inplace=False
                    )
                    if return_probs:
                        factor_reduced.normalize()
                        probs[evidence_var] = self.get_probs(evidence_var, evidence[evidence_var])
                    for var in factor_reduced.scope():
                        if var in working_factors:
                            working_factors[var].remove((factor, origin))
                            working_factors[var].add((factor_reduced, evidence_var))
                if type(evidence[evidence_var]) != list:
                    del working_factors[evidence_var]
        if return_probs:
            return working_factors, sub_graph_model, probs
        return working_factors, sub_graph_model

    def _get_elimination_order(
            self, variables=None, evidence=None, model=None, elimination_order="minfill", show_progress=False
    ):
        """
        Deals with all elimination order parameters given to _variable_elimination method
        and returns a list of variables that are to be eliminated
        Parameters
        ----------
        elimination_order: str or list
        Returns
        -------
        list: A list of variables names in the order they need to be eliminated.
        """
        if model is None:
            model = self.model
        if isinstance(model, JunctionTree):
            all_variables = set(chain(*model.nodes()))
        else:
            all_variables = model.nodes()

        if variables is None:
            to_eliminate = set(all_variables)
        else:
            not_evidence_eliminate = []
            if evidence is not None:
                for key in evidence:
                    if type(evidence[key]) != list:
                        not_evidence_eliminate.append(key)
            to_eliminate = (
                    set(all_variables)
                    - set(variables)
                    - set(not_evidence_eliminate)
            )

        # Step 1: If elimination_order is a list, verify it's correct and return.
        if hasattr(elimination_order, "__iter__") and (
                not isinstance(elimination_order, str)
        ):
            if any(
                    var in elimination_order
                    for var in set(variables).union(
                        set(evidence.keys() if evidence else [])
                    )
            ):
                raise ValueError(
                    "Elimination order contains variables which are in"
                    " variables or evidence args"
                )
            else:
                return elimination_order

        # Step 2: If elimination order is None or a Markov model, return a random order.
        elif (elimination_order is None) or (not isinstance(model, BayesianModel)):
            return to_eliminate

        # Step 3: If elimination order is a str, compute the order using the specified heuristic.
        elif isinstance(elimination_order, str) and isinstance(
                model, BayesianModel
        ):
            heuristic_dict = {
                "weightedminfill": WeightedMinFill,
                "minneighbors": MinNeighbors,
                "minweight": MinWeight,
                "minfill": MinFill,
            }
            elimination_order = heuristic_dict[elimination_order.lower()](
                model
            ).get_elimination_order(nodes=to_eliminate, show_progress=show_progress)
            return elimination_order

    def _variable_elimination(
            self,
            variables,
            operation,
            evidence=None,
            elimination_order="minfill",
            joint=True,
            show_progress=False,
    ):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.

        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.

        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        elimination_order: str or list (array-like)
            If str: Heuristic to use to find the elimination order.
            If array-like: The elimination order to use.
            If None: A random elimination order is used.
        """
        # Step 1: Deal with the input arguments.
        if isinstance(variables, str):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, str):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            if joint:
                return factor_product(*set(all_factors))
            else:
                return set(all_factors)

        # Step 2: Prepare data structures to run the algorithm.
        eliminated_variables = set()
        # Get working factors and elimination order
        # tic = time.time()
        working_factors, sub_graph_model = self._get_working_factors(variables, evidence)
        # toc = time.time()
        # print(f"getting working factors takes {toc-tic} secs")
        elimination_order = self._get_elimination_order(
            variables, evidence, sub_graph_model, elimination_order, show_progress=show_progress
        )
        # print(f"getting elimination orders takes {time.time()-toc} secs")
        # Step 3: Run variable elimination
        if show_progress:
            pbar = tqdm(elimination_order)
        else:
            pbar = elimination_order

        for var in pbar:
            #tic = time.time()
            # print(var)
            if show_progress:
                pbar.set_description("Eliminating: {var}".format(var=var))
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor
                for factor, _ in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]
            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                if variable in working_factors:
                    working_factors[variable].add((phi, var))
            eliminated_variables.add(var)
            # print(f"eliminating {var} takes {time.time()-tic} secs")

        # Step 4: Prepare variables to be returned.
        #tic = time.time()
        final_distribution = set()
        for node in working_factors:
            for factor, origin in working_factors[node]:
                if not set(factor.variables).intersection(eliminated_variables):
                    final_distribution.add((factor, origin))
        final_distribution = [factor for factor, _ in final_distribution]
        # print(final_distribution)
        # print(f"the rest takes {time.time()-tic} secs")
        if joint:
            if isinstance(self.model, BayesianModel):
                return factor_product(*final_distribution).normalize(inplace=False)
            else:
                return factor_product(*final_distribution)
        else:
            query_var_factor = {}
            for query_var in variables:
                phi = factor_product(*final_distribution)
                query_var_factor[query_var] = phi.marginalize(
                    list(set(variables) - set([query_var])), inplace=False
                ).normalize(inplace=False)
            return query_var_factor

    def conditional_query(
            self,
            variables,
            evidence=None,
            elimination_order="weightedminfill",
            joint=True,
            show_progress=False,
    ):
        """
        Parameters
        ----------
        variables: list
            list of variables for which you want to compute the probability

        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        joint: boolean (default: True)
            If True, returns a Joint Distribution over `variables`.
            If False, returns a dict of distributions over each of the `variables`.
        """
        common_vars = set(evidence if evidence is not None else []).intersection(
            set(variables)
        )
        if common_vars:
            raise ValueError(
                f"Can't have the same variables in both `variables` and `evidence`. Found in both: {common_vars}"
            )

        return self._variable_elimination(
            variables=variables,
            operation="marginalize",
            evidence=evidence,
            elimination_order=elimination_order,
            joint=joint,
            show_progress=show_progress,
        )


    def query(
            self,
            query,
            elimination_order="minfill",
            show_progress=False,
    ):
        """
        An efficient implementation of probabilistic query

        Parameters
        ----------
        query: Q of form {"attr_name": attr_values}
               attr_values must be a list of values.
        elimination_order: str or list (array-like)
            If str: Heuristic to use to find the elimination order.
            If array-like: The elimination order to use.
            If None: A random elimination order is used.
        """
        tic = time.time()
        # Step 1: Prepare data structures to run the algorithm.
        eliminated_variables = set()
        # Get working factors and elimination order
        # tic = time.time()
        working_factors, sub_graph_model = self._get_working_factors(evidence=query, reduce=False)
        # toc = time.time()
        # print(f"getting working factors takes {toc-tic} secs")

        elimination_order_rest = self._get_elimination_order(variables=list(query.keys()),
                                                        model=sub_graph_model,
                                                        elimination_order=elimination_order,
                                                        show_progress=show_progress)

        elimination_order_variable = self._get_elimination_order(variables=list(elimination_order_rest),
                                                             model=sub_graph_model,
                                                             elimination_order=elimination_order,
                                                             show_progress=show_progress)

        elimination_order = elimination_order_rest + elimination_order_variable
        # print(f"getting elimination orders takes {time.time()-toc} secs")
        # Step 3: Run variable elimination
        if show_progress:
            pbar = tqdm(elimination_order)
        else:
            pbar = elimination_order
        print(pbar)
        result = None
        for var in pbar:
            #tic = time.time()
            print(var)
            if show_progress:
                pbar.set_description("Eliminating: {var}".format(var=var))
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor
                for factor, _ in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]
            phi = factor_product(*factors)
            #If its a query variable, we record its probability
            if var in query:
                print(time.time()-tic)
                print(factors)
                if result is None:
                    print(f"variable {var} of prob {self.get_probs(var, query[var])}")
                    result = self.get_probs(var, query[var])
                else:
                    marg_var = [attr for attr in list(phi.variables) if attr != var]
                    phi_var = phi.marginalize(marg_var, inplace=False)
                    phi_var.normalize()
                    values = [phi_var.get_state_no(var, no) for no in query[var]]
                    print(f"variable {var} of prob {np.sum(phi_var.values[values])}")
                    result *= np.sum(phi_var.values[values])
                    
                for factor, origin in working_factors[var]:
                    factor_reduced = factor.reduce(
                        [(var, query[var])], inplace=False
                    )
                    for fact_var in factor_reduced.scope():
                        if fact_var in working_factors:
                            working_factors[fact_var].remove((factor, origin))
                            working_factors[fact_var].add((factor_reduced, var))
            phi = phi.marginalize([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                if variable in working_factors:
                    working_factors[variable].add((phi, var))

            eliminated_variables.add(var)
            # print(f"eliminating {var} takes {time.time()-tic} secs")

        return result

In [11]:
import pickle
with open('/home/ziniu.wzn/BN_checkpoints/check_points/Census_chow-liu.pkl', 'rb') as f:
    BN = pickle.load(f)
#with open('check_points/Census_junction.pkl', 'rb') as f:
    #BN_J = pickle.load(f)

In [33]:
ve = VariableElimination(BN.model)
BN.infer_machine = ve

In [27]:
tic = time.time()
q = ve.conditional_query(["dAge"], {"iClass": 0, "iKorean": [0,1], "iMay75880": 0, "iRagechld": [0, 1], "iRrelchld": 0})
print(time.time()-tic)

0.049874305725097656


In [14]:
q.values

array([0.02012446, 0.0700407 , 0.05920385, 0.15525431, 0.50966802,
       0.06280474, 0.12157609, 0.00132783])

In [15]:
ve.probs["dAge"].values

array([0.17949424, 0.16455985, 0.15089992, 0.13475167, 0.13130455,
       0.12725335, 0.09865048, 0.01308595])

In [15]:
q.values[0]

0.9729037908982489

In [34]:
tic = time.time()
q = ve.query({"iAvail": [0], "iClass": [0], "iKorean": [0,1], "iMay75880": [0], "iRagechld": [0, 1], "iRrelchld": [0]})
print(time.time()-tic)
print(q*BN.nrows)

['iRspouse', 'iRelat1', 'iLooking', 'dOccup', 'dIndustry', 'iRlabor', 'iYearwrk', 'dAge', 'iFertil', 'iMilitary', 'iRvetserv', 'iRrelchld', 'iAvail', 'iKorean', 'iRagechld', 'iClass', 'iMay75880']
iRspouse
iRelat1
iLooking
dOccup
dIndustry
iRlabor
iYearwrk
dAge
iFertil
iMilitary
iRvetserv
iRrelchld
0.3726825714111328
[<DiscreteFactor representing phi(iKorean:2, iRagechld:5, iRrelchld:2, iClass:10, iAvail:5, iMay75880:2) at 0x7fb888ad6cd0>]
variable iRrelchld of prob 0.744034967467157
iAvail
0.939202070236206
[<DiscreteFactor representing phi(iKorean:2, iRagechld:5, iClass:10, iAvail:5, iMay75880:2) at 0x7fb8812fe0d0>]
variable iAvail of prob 0.9677559762191935
iKorean
1.2828075885772705
[<DiscreteFactor representing phi(iKorean:2, iRagechld:5, iClass:10, iMay75880:2) at 0x7fb900eeb150>]
variable iKorean of prob 1.0
iRagechld
1.3965649604797363
[<DiscreteFactor representing phi(iRagechld:5, iClass:10, iMay75880:2) at 0x7fb900ef34d0>]
variable iRagechld of prob 0.8713790305029725
iClass


In [23]:
tic = time.time()
print(BN.query_inefficient({"iMay75880": 0,"iClass": 0, "iRagechld": [0, 4], "iKorean": [0,1], "iAvail": 0, "iRrelchld": 0}))
print(time.time()-tic)

decoded query is {'iMay75880': 0, 'iClass': 0, 'iRagechld': [0, 1], 'iKorean': [0, 1], 'iAvail': 0, 'iRrelchld': 0}
querying iMay75880 with n_distinct [1]
conditioning on ['iClass', 'iRagechld', 'iKorean', 'iAvail', 'iRrelchld'] with probability 0.9893718709242222
querying iClass with n_distinct [1]
conditioning on ['iRagechld', 'iKorean', 'iAvail', 'iRrelchld'] with probability 0.24407894523123658
querying iRagechld with n_distinct [1, 1]
conditioning on ['iKorean', 'iAvail', 'iRrelchld'] with probability 0.8361923356819273
querying iKorean with n_distinct [1, 1]
conditioning on ['iAvail', 'iRrelchld'] with probability 1.0
querying iAvail with n_distinct [1]
conditioning on ['iRrelchld'] with probability 0.9609798873106272
querying iRrelchld with n_distinct [1]
conditioning on [] with probability 0.744034967467157
354924.0
0.19004273414611816


In [26]:
from Evaluation.cardinality_estimation import parse_query_single_table
from time import perf_counter
def evaluate_cardinality(BN, query_path):
    # read all queries
    with open(query_path) as f:
        queries = f.readlines()
    latencies = []
    q_errors = []
    for query_no, query_str in enumerate(queries):
        cardinality_true = int(query_str.split("||")[-1])
        query_str = query_str.split("||")[0]
        print(f"Predicting cardinality for query {query_no}: {query_str}")
        
        query = parse_query_single_table(query_str.strip(), BN)
        card_start_t = perf_counter()
        print(query)
        try:
            cardinality_predict = BN.query_inefficient(query)
        except:
            continue
        if cardinality_predict is None:
            continue
        card_end_t = perf_counter()
        latency_ms = (card_end_t - card_start_t) * 1000
        if cardinality_predict == 0 and cardinality_true == 0:
            q_error = 1.0
        elif cardinality_predict == 0:
            cardinality_predict = 1
        elif cardinality_true == 0:
            cardinality_true = 1

        q_error = max(cardinality_predict / cardinality_true, cardinality_true / cardinality_predict)
        print(f"latency: {latency_ms} and error: {q_error}")
        latencies.append(latency_ms)
        q_errors.append(q_error)
    return latencies, q_errors

In [27]:
query_path = "/home/ziniu.wzn/Census/cardinality/query_one_side.sql"
latencies, q_errors = evaluate_cardinality(BN, query_path)

Predicting cardinality for query 0: SELECT COUNT(*) FROM climate WHERE iAvail = 0 AND iClass = 0 AND iKorean >= 0 AND iKorean <= 1 AND iMay75880 = 0 AND iRagechld >= 0 AND iRagechld <= 4 AND iRrelchld = 0
{'iAvail': [0], 'iClass': [0], 'iKorean': [0, 1], 'iMay75880': [0], 'iRagechld': [4, 2, 0, 3, 1], 'iRrelchld': [0]}
decoded query is {'iAvail': [0], 'iClass': [0], 'iKorean': [0, 1], 'iMay75880': [0], 'iRagechld': [1, 2, 0, 4, 3], 'iRrelchld': [0]}
querying iAvail with n_distinct [1]
conditioning on ['iClass', 'iKorean', 'iMay75880', 'iRagechld', 'iRrelchld'] with probability 0.9727116891644684
querying iClass with n_distinct [1]
conditioning on ['iKorean', 'iMay75880', 'iRagechld', 'iRrelchld'] with probability 0.23979674725158945
querying iKorean with n_distinct [1, 1]
conditioning on ['iMay75880', 'iRagechld', 'iRrelchld'] with probability 1.0
querying iMay75880 with n_distinct [1]
conditioning on ['iRagechld', 'iRrelchld'] with probability 0.9866439050576342
querying iRagechld wit

querying iImmigr with n_distinct [1]
conditioning on ['dIncome4', 'dIndustry', 'iMay75880', 'iRagechld', 'iRemplpar', 'iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 0.9176512532138563
querying dIncome4 with n_distinct [1, 1]
conditioning on ['dIndustry', 'iMay75880', 'iRagechld', 'iRemplpar', 'iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 1.0
querying dIndustry with n_distinct [1, 1, 1]
conditioning on ['iMay75880', 'iRagechld', 'iRemplpar', 'iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 0.20688132492351485
querying iMay75880 with n_distinct [1]
conditioning on ['iRagechld', 'iRemplpar', 'iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 0.9918647661171562
querying iRagechld with n_distinct [1]
conditioning on ['iRemplpar', 'iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 0.35155477695831555
querying iRemplpar with n_distinct [1]
conditioning on ['iRiders', 'iRspouse', 'iSchool', 'iVietnam'] with probability 0.86

querying dTravtime with n_distinct [1]
conditioning on [] with probability 0.5596328334590985
latency: 202.70968973636627 and error: 1.1316376644710844
Predicting cardinality for query 12: SELECT COUNT(*) FROM climate WHERE iCitizen >= 0 AND iCitizen <= 1 AND dHispanic = 0 AND iOthrserv = 0 AND iRelat2 = 0 AND iRemplpar >= 0 AND iRemplpar <= 111 AND iRrelchld = 0 AND iSchool = 1 AND iTmpabsnt = 0 AND dTravtime >= 2 AND dTravtime <= 3
{'iCitizen': [0, 1], 'dHispanic': [0], 'iOthrserv': [0], 'iRelat2': [0], 'iRemplpar': [0, 111], 'iRrelchld': [0], 'iSchool': [1], 'iTmpabsnt': [0], 'dTravtime': [2, 3]}
decoded query is {'iCitizen': [0, 4], 'dHispanic': [0], 'iOthrserv': [0], 'iRelat2': [0], 'iRemplpar': [0, 1], 'iRrelchld': [0], 'iSchool': [0], 'iTmpabsnt': [0], 'dTravtime': [5, 4]}
querying iCitizen with n_distinct [1, 1]
conditioning on ['dHispanic', 'iOthrserv', 'iRelat2', 'iRemplpar', 'iRrelchld', 'iSchool', 'iTmpabsnt', 'dTravtime'] with probability 0.9294565465269262
querying dHispa

querying dIncome1 with n_distinct [1, 1, 1, 1]
conditioning on ['dIncome2', 'dIncome4', 'iMeans', 'iSept80', 'iSubfam1', 'iSubfam2'] with probability 0.9837434450424288
querying dIncome2 with n_distinct [1, 1]
conditioning on ['dIncome4', 'iMeans', 'iSept80', 'iSubfam1', 'iSubfam2'] with probability 1.0
querying dIncome4 with n_distinct [1]
conditioning on ['iMeans', 'iSept80', 'iSubfam1', 'iSubfam2'] with probability 0.6986271627545829
querying iMeans with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iSept80', 'iSubfam1', 'iSubfam2'] with probability 0.43820315758546674
querying iSept80 with n_distinct [1]
conditioning on ['iSubfam1', 'iSubfam2'] with probability 0.9837925236531742
querying iSubfam1 with n_distinct [1]
conditioning on ['iSubfam2'] with probability 1.0
querying iSubfam2 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 367.68049001693726 and error: 1.0285513258339514
Predicting cardinality for query 19: SELECT COUNT(*) F

querying iImmigr with n_distinct [1]
conditioning on ['dIncome2', 'dIncome6', 'iRagechld', 'dRearning', 'iRlabor'] with probability 0.9164588546624775
querying dIncome2 with n_distinct [1]
conditioning on ['dIncome6', 'iRagechld', 'dRearning', 'iRlabor'] with probability 0.9394030077317072
querying dIncome6 with n_distinct [1]
conditioning on ['iRagechld', 'dRearning', 'iRlabor'] with probability 0.9578996871822353
querying iRagechld with n_distinct [1, 1]
conditioning on ['dRearning', 'iRlabor'] with probability 0.4455178526304385
querying dRearning with n_distinct [1, 1, 1, 1]
conditioning on ['iRlabor'] with probability 0.8387471480558331
querying iRlabor with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on [] with probability 0.7663025239140296
latency: 757.0892609655857 and error: 1.091854947933017
Predicting cardinality for query 24: SELECT COUNT(*) FROM climate WHERE iImmigr >= 0 AND iImmigr <= 1 AND dIncome6 = 0 AND dIncome7 = 0 AND iMarital >= 0 AND iMarital <= 4 AND iRownchld =

querying dIndustry with n_distinct [1]
conditioning on ['iMarital', 'iMobillim', 'iSex', 'iWWII'] with probability 0.21727399976950443
querying iMarital with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iMobillim', 'iSex', 'iWWII'] with probability 1.0
querying iMobillim with n_distinct [1]
conditioning on ['iSex', 'iWWII'] with probability 0.8097436731053085
querying iSex with n_distinct [1]
conditioning on ['iWWII'] with probability 0.51299460802564
querying iWWII with n_distinct [1]
conditioning on [] with probability 0.9615549051472877
latency: 398.7235277891159 and error: 1.2986621400005753
Predicting cardinality for query 32: SELECT COUNT(*) FROM climate WHERE iDisabl2 >= 0 AND iDisabl2 <= 2 AND dIncome4 = 0 AND dRearning >= 2 AND dRearning <= 3 AND iVietnam = 0
{'iDisabl2': [2, 0, 1], 'dIncome4': [0], 'dRearning': [3, 2], 'iVietnam': [0]}
decoded query is {'iDisabl2': [0, 1, 2], 'dIncome4': [0], 'dRearning': [2, 1], 'iVietnam': [0]}
querying iDisabl2 with n_distinct [1, 1, 1]
co

querying dIndustry with n_distinct [1, 1]
conditioning on ['iRownchld', 'iWWII', 'iYearwrk'] with probability 0.15970233208599946
querying iRownchld with n_distinct [1, 1]
conditioning on ['iWWII', 'iYearwrk'] with probability 1.0
querying iWWII with n_distinct [1]
conditioning on ['iYearwrk'] with probability 0.9650873814502713
querying iYearwrk with n_distinct [1, 1]
conditioning on [] with probability 0.7219610419459095
latency: 168.8462756574154 and error: 1.3930551027062277
Predicting cardinality for query 41: SELECT COUNT(*) FROM climate WHERE dAncstry2 = 1 AND iCitizen = 0 AND iDisabl2 >= 1 AND iDisabl2 <= 2 AND iFeb55 = 0 AND dHispanic = 0 AND iImmigr = 0 AND dIncome8 = 0 AND iMilitary = 4 AND dPwgt1 >= 1 AND dPwgt1 <= 2 AND iSubfam2 = 0 AND iVietnam = 0
{'dAncstry2': [1], 'iCitizen': [0], 'iDisabl2': [2, 1], 'iFeb55': [0], 'dHispanic': [0], 'iImmigr': [0], 'dIncome8': [0], 'iMilitary': [4], 'dPwgt1': [1, 2], 'iSubfam2': [0], 'iVietnam': [0]}
decoded query is {'dAncstry2': [0],

querying iDisabl2 with n_distinct [1]
conditioning on ['iEnglish', 'dIndustry', 'iLooking', 'dRpincome', 'iWWII'] with probability 0.9944459736948481
querying iEnglish with n_distinct [1]
conditioning on ['dIndustry', 'iLooking', 'dRpincome', 'iWWII'] with probability 0.8803163511237079
querying dIndustry with n_distinct [1, 1, 1, 1]
conditioning on ['iLooking', 'dRpincome', 'iWWII'] with probability 0.5587154020880405
querying iLooking with n_distinct [1, 1]
conditioning on ['dRpincome', 'iWWII'] with probability 0.8542958562962027
querying dRpincome with n_distinct [1, 1]
conditioning on ['iWWII'] with probability 0.30418116695175695
querying iWWII with n_distinct [1, 1]
conditioning on [] with probability 1.0
latency: 209.14602279663086 and error: 1.0759662800628116
Predicting cardinality for query 48: SELECT COUNT(*) FROM climate WHERE iEnglish = 0 AND dPOB = 0 AND iRlabor >= 1 AND iRlabor <= 6 AND dRpincome >= 2 AND dRpincome <= 4 AND iSex = 1 AND dYrsserv = 0
{'iEnglish': [0], 'd

querying iTmpabsnt with n_distinct [1]
conditioning on ['iWWII'] with probability 0.25271394178568823
querying iWWII with n_distinct [1]
conditioning on [] with probability 0.9615549051472877
latency: 687.4854564666748 and error: 1.1668527624487341
Predicting cardinality for query 52: SELECT COUNT(*) FROM climate WHERE dAncstry2 = 1 AND dIncome5 >= 0 AND dIncome5 <= 1 AND dTravtime >= 0 AND dTravtime <= 2
{'dAncstry2': [1], 'dIncome5': [0, 1], 'dTravtime': [1, 2, 0]}
decoded query is {'dAncstry2': [0], 'dIncome5': [0, 1], 'dTravtime': [3, 5, 0]}
querying dAncstry2 with n_distinct [1]
conditioning on ['dIncome5', 'dTravtime'] with probability 0.7020783488398528
querying dIncome5 with n_distinct [1, 1]
conditioning on ['dTravtime'] with probability 1.0
querying dTravtime with n_distinct [1, 1, 1]
conditioning on [] with probability 0.7043426616523308
latency: 70.59582695364952 and error: 1.0078672328797107
Predicting cardinality for query 53: SELECT COUNT(*) FROM climate WHERE dAncstry1 

querying dAge with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['dAncstry2', 'iAvail', 'dHours', 'dIncome2', 'iKorean', 'iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.7493113217387449
querying dAncstry2 with n_distinct [1, 1]
conditioning on ['iAvail', 'dHours', 'dIncome2', 'iKorean', 'iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.9437486447248165
querying iAvail with n_distinct [1]
conditioning on ['dHours', 'dIncome2', 'iKorean', 'iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.9385718386121592
querying dHours with n_distinct [1]
conditioning on ['dIncome2', 'iKorean', 'iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.5250274911810643
querying dIncome2 with n_distinct [1]
conditioning on ['iKorean', 'iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.9466097199711624
querying iKorean with n_distinct [1]
conditioning on ['iLang1', 'dPwgt1', 'iSubfam1'] with probability 0.9778009614436882
querying iLang1 with n_distinct [1, 1]
conditioning on ['dPwgt1', 'iSubfam1'] with pro

querying dPoverty with n_distinct [1]
conditioning on ['dPwgt1', 'iRelat1', 'dRpincome', 'iSex'] with probability 0.9132925310960515
querying dPwgt1 with n_distinct [1]
conditioning on ['iRelat1', 'dRpincome', 'iSex'] with probability 0.5608126932289966
querying iRelat1 with n_distinct [1, 1]
conditioning on ['dRpincome', 'iSex'] with probability 0.7597460029049283
querying dRpincome with n_distinct [1, 1, 1]
conditioning on ['iSex'] with probability 0.6888783306662549
querying iSex with n_distinct [1]
conditioning on [] with probability 0.5152714188956936
latency: 446.6104470193386 and error: 1.057414883303671
Predicting cardinality for query 65: SELECT COUNT(*) FROM climate WHERE iFertil >= 0 AND iFertil <= 3 AND dIncome1 >= 0 AND dIncome1 <= 1 AND iLang1 = 2 AND iLooking = 0 AND iRelat2 = 0 AND iRemplpar = 0 AND iSept80 = 0
{'iFertil': [1, 3, 0, 2], 'dIncome1': [1, 0], 'iLang1': [2], 'iLooking': [0], 'iRelat2': [0], 'iRemplpar': [0], 'iSept80': [0]}
decoded query is {'iFertil': [1, 

querying dDepart with n_distinct [1, 1]
conditioning on ['iFeb55', 'iLang1', 'iPerscare', 'iSept80', 'iWorklwk', 'iWWII', 'iYearsch'] with probability 0.32694851089737176
querying iFeb55 with n_distinct [1]
conditioning on ['iLang1', 'iPerscare', 'iSept80', 'iWorklwk', 'iWWII', 'iYearsch'] with probability 0.9762770656957075
querying iLang1 with n_distinct [1]
conditioning on ['iPerscare', 'iSept80', 'iWorklwk', 'iWWII', 'iYearsch'] with probability 0.8984673499279011
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['iSept80', 'iWorklwk', 'iWWII', 'iYearsch'] with probability 1.0
querying iSept80 with n_distinct [1]
conditioning on ['iWorklwk', 'iWWII', 'iYearsch'] with probability 0.9776550005111155
querying iWorklwk with n_distinct [1, 1]
conditioning on ['iWWII', 'iYearsch'] with probability 0.9447156455338577
querying iWWII with n_distinct [1]
conditioning on ['iYearsch'] with probability 0.9504320243775898
querying iYearsch with n_distinct [1, 1, 1, 1, 1]
conditioning

querying dIncome1 with n_distinct [1]
conditioning on ['dIndustry', 'iLang1', 'iMay75880', 'iMeans', 'iMilitary', 'iSex', 'dYrsserv'] with probability 0.09648983272886812
querying dIndustry with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iLang1', 'iMay75880', 'iMeans', 'iMilitary', 'iSex', 'dYrsserv'] with probability 0.8370855419003017
querying iLang1 with n_distinct [1]
conditioning on ['iMay75880', 'iMeans', 'iMilitary', 'iSex', 'dYrsserv'] with probability 0.8755674773709671
querying iMay75880 with n_distinct [1]
conditioning on ['iMeans', 'iMilitary', 'iSex', 'dYrsserv'] with probability 1.0
querying iMeans with n_distinct [1]
conditioning on ['iMilitary', 'iSex', 'dYrsserv'] with probability 0.5096857389525886
querying iMilitary with n_distinct [1]
conditioning on ['iSex', 'dYrsserv'] with probability 0.8118379631714149
querying iSex with n_distinct [1]
conditioning on ['dYrsserv'] with probability 0.5076561973917575
querying dYrsserv with n_distinct [1]
conditioning on [] w

querying iMay75880 with n_distinct [1]
conditioning on ['iSubfam2', 'iWork89'] with probability 0.9863616402810886
querying iSubfam2 with n_distinct [1]
conditioning on ['iWork89'] with probability 0.9853459922681487
querying iWork89 with n_distinct [1, 1]
conditioning on [] with probability 0.7663025239140295
latency: 157.5554758310318 and error: 1.0172552800275674
Predicting cardinality for query 84: SELECT COUNT(*) FROM climate WHERE iMay75880 = 0 AND iSex = 0
{'iMay75880': [0], 'iSex': [0]}
decoded query is {'iMay75880': [0], 'iSex': [1]}
querying iMay75880 with n_distinct [1]
conditioning on ['iSex'] with probability 0.9907827152269888
querying iSex with n_distinct [1]
conditioning on [] with probability 0.4847285811043064
latency: 45.46205326914787 and error: 1.0107009737029993
Predicting cardinality for query 85: SELECT COUNT(*) FROM climate WHERE dAge >= 5 AND dAge <= 7 AND iFertil >= 0 AND iFertil <= 3 AND dHour89 >= 0 AND dHour89 <= 3 AND dIncome5 >= 0 AND dIncome5 <= 1 AND i

querying iImmigr with n_distinct [1, 1]
conditioning on ['dIncome6', 'iMay75880', 'iMeans', 'dOccup', 'iRemplpar', 'iWWII'] with probability 0.9281586394852503
querying dIncome6 with n_distinct [1]
conditioning on ['iMay75880', 'iMeans', 'dOccup', 'iRemplpar', 'iWWII'] with probability 0.9702664757142062
querying iMay75880 with n_distinct [1]
conditioning on ['iMeans', 'dOccup', 'iRemplpar', 'iWWII'] with probability 0.9875483750399366
querying iMeans with n_distinct [1, 1]
conditioning on ['dOccup', 'iRemplpar', 'iWWII'] with probability 0.9554098380094826
querying dOccup with n_distinct [1, 1]
conditioning on ['iRemplpar', 'iWWII'] with probability 0.4235026108860678
querying iRemplpar with n_distinct [1]
conditioning on ['iWWII'] with probability 0.7416486000954928
querying iWWII with n_distinct [1]
conditioning on [] with probability 0.9615549051472877
latency: 543.6714328825474 and error: 1.4703877049411898
Predicting cardinality for query 91: SELECT COUNT(*) FROM climate WHERE iC

querying iMobility with n_distinct [1, 1]
conditioning on ['iPerscare', 'dRearning', 'iRelat2', 'iSubfam2'] with probability 0.9016447639895686
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['dRearning', 'iRelat2', 'iSubfam2'] with probability 1.0000000000000002
querying dRearning with n_distinct [1, 1, 1]
conditioning on ['iRelat2', 'iSubfam2'] with probability 0.7127652605049623
querying iRelat2 with n_distinct [1]
conditioning on ['iSubfam2'] with probability 0.9899381962530068
querying iSubfam2 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 436.6367310285568 and error: 1.0417450707341949
Predicting cardinality for query 97: SELECT COUNT(*) FROM climate WHERE dAncstry2 = 2 AND iDisabl2 = 2 AND dHispanic = 0 AND dHour89 >= 0 AND dHour89 <= 3 AND iRelat2 = 0 AND dWeek89 >= 0 AND dWeek89 <= 1
{'dAncstry2': [2], 'iDisabl2': [2], 'dHispanic': [0], 'dHour89': [1, 3, 0, 2], 'iRelat2': [0], 'dWeek89': [1, 0]}
decoded query is {'dAncstry2':

querying iImmigr with n_distinct [1]
conditioning on ['dIncome3', 'iLang1', 'iOthrserv', 'dRearning', 'dRpincome', 'iSchool'] with probability 0.9483420761960664
querying dIncome3 with n_distinct [1]
conditioning on ['iLang1', 'iOthrserv', 'dRearning', 'dRpincome', 'iSchool'] with probability 0.9969620998628894
querying iLang1 with n_distinct [1]
conditioning on ['iOthrserv', 'dRearning', 'dRpincome', 'iSchool'] with probability 0.7606340168259075
querying iOthrserv with n_distinct [1]
conditioning on ['dRearning', 'dRpincome', 'iSchool'] with probability 0.9989094368494829
querying dRearning with n_distinct [1]
conditioning on ['dRpincome', 'iSchool'] with probability 0.43760917427383095
querying dRpincome with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iSchool'] with probability 1.0
querying iSchool with n_distinct [1, 1]
conditioning on [] with probability 0.9172427118906066
latency: 450.4452534019947 and error: 1.0829249734740605
Predicting cardinality for query 102: SELECT CO

querying dAge with n_distinct [1, 1]
conditioning on ['dAncstry2', 'iClass', 'dPOB', 'iSex'] with probability 0.4070633506214706
querying dAncstry2 with n_distinct [1, 1]
conditioning on ['iClass', 'dPOB', 'iSex'] with probability 0.9423556854368993
querying iClass with n_distinct [1]
conditioning on ['dPOB', 'iSex'] with probability 0.44619695750544003
querying dPOB with n_distinct [1]
conditioning on ['iSex'] with probability 0.9145636432291252
querying iSex with n_distinct [1]
conditioning on [] with probability 0.5152714188956936
latency: 242.42760613560677 and error: 1.5532359899105421
Predicting cardinality for query 108: SELECT COUNT(*) FROM climate WHERE dDepart >= 3 AND dDepart <= 5 AND dIncome6 = 0 AND iRelat2 = 0 AND iSex >= 0 AND iSex <= 1 AND iVietnam = 0 AND iWorklwk >= 0 AND iWorklwk <= 2
{'dDepart': [3, 5, 4], 'dIncome6': [0], 'iRelat2': [0], 'iSex': [1, 0], 'iVietnam': [0], 'iWorklwk': [1, 2, 0]}
decoded query is {'dDepart': [1, 4, 2], 'dIncome6': [0], 'iRelat2': [0], 

querying dPoverty with n_distinct [1]
conditioning on ['iRPOB', 'iWork89'] with probability 0.8487181245511386
querying iRPOB with n_distinct [1]
conditioning on ['iWork89'] with probability 0.6311779881207438
querying iWork89 with n_distinct [1, 1]
conditioning on [] with probability 0.7655849504837722
latency: 231.38004913926125 and error: 1.0650163468401164
Predicting cardinality for query 114: SELECT COUNT(*) FROM climate WHERE dAncstry2 >= 1 AND dAncstry2 <= 3 AND iDisabl1 = 2 AND dHours >= 3 AND dHours <= 5 AND iTmpabsnt >= 0 AND iTmpabsnt <= 3
{'dAncstry2': [1, 2, 3], 'iDisabl1': [2], 'dHours': [3, 4, 5], 'iTmpabsnt': [0, 3, 1, 2]}
decoded query is {'dAncstry2': [0, 1, 2], 'iDisabl1': [0], 'dHours': [1, 4, 2], 'iTmpabsnt': [0, 1, 2, 3]}
querying dAncstry2 with n_distinct [1, 1, 1]
conditioning on ['iDisabl1', 'dHours', 'iTmpabsnt'] with probability 0.9697444184396476
querying iDisabl1 with n_distinct [1]
conditioning on ['dHours', 'iTmpabsnt'] with probability 0.9511489939582642

querying iMobility with n_distinct [1, 1]
conditioning on ['iRiders', 'iSchool'] with probability 0.5647861787482997
querying iRiders with n_distinct [1]
conditioning on ['iSchool'] with probability 0.5116922934347699
querying iSchool with n_distinct [1]
conditioning on [] with probability 0.6960246676036342
latency: 204.47911322116852 and error: 1.139086214400022
Predicting cardinality for query 120: SELECT COUNT(*) FROM climate WHERE iClass >= 0 AND iClass <= 6 AND dDepart >= 0 AND dDepart <= 2 AND iDisabl2 = 2 AND iFeb55 = 0 AND iFertil >= 0 AND iFertil <= 4 AND dHispanic = 0 AND iRelat2 = 0 AND iRiders >= 0 AND iRiders <= 2 AND iRvetserv = 0
{'iClass': [5, 1, 0, 6, 3, 4, 2], 'dDepart': [0, 2, 1], 'iDisabl2': [2], 'iFeb55': [0], 'iFertil': [1, 3, 0, 4, 2], 'dHispanic': [0], 'iRelat2': [0], 'iRiders': [1, 0, 2], 'iRvetserv': [0]}
decoded query is {'iClass': [6, 1, 0, 2, 3, 5, 4], 'dDepart': [0, 3, 5], 'iDisabl2': [0], 'iFeb55': [0], 'iFertil': [1, 2, 0, 4, 3], 'dHispanic': [0], 'iRel

querying iKorean with n_distinct [1]
conditioning on ['iLang1', 'dPwgt1', 'iRelat2'] with probability 0.9777363427423915
querying iLang1 with n_distinct [1]
conditioning on ['dPwgt1', 'iRelat2'] with probability 0.79803512132865
querying dPwgt1 with n_distinct [1, 1]
conditioning on ['iRelat2'] with probability 0.8007263445743955
querying iRelat2 with n_distinct [1]
conditioning on [] with probability 0.9861049471481135
latency: 280.8108478784561 and error: 1.0652605256720393
Predicting cardinality for query 126: SELECT COUNT(*) FROM climate WHERE iDisabl1 = 2 AND iDisabl2 = 2 AND iEnglish = 0 AND iMarital >= 1 AND iMarital <= 4 AND iMeans >= 0 AND iMeans <= 1 AND iMilitary = 4 AND iOthrserv = 0 AND iWork89 >= 1 AND iWork89 <= 2
{'iDisabl1': [2], 'iDisabl2': [2], 'iEnglish': [0], 'iMarital': [1, 4, 2, 3], 'iMeans': [1, 0], 'iMilitary': [4], 'iOthrserv': [0], 'iWork89': [1, 2]}
decoded query is {'iDisabl1': [0], 'iDisabl2': [0], 'iEnglish': [0], 'iMarital': [3, 1, 2, 4], 'iMeans': [1, 0

querying dHour89 with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iMeans', 'iMobillim', 'iRiders', 'iRspouse', 'iSchool', 'iSept80', 'dWeek89', 'iYearwrk'] with probability 0.9391384103471798
querying iMeans with n_distinct [1, 1]
conditioning on ['iMobillim', 'iRiders', 'iRspouse', 'iSchool', 'iSept80', 'dWeek89', 'iYearwrk'] with probability 0.8931878439602848
querying iMobillim with n_distinct [1, 1, 1]
conditioning on ['iRiders', 'iRspouse', 'iSchool', 'iSept80', 'dWeek89', 'iYearwrk'] with probability 1.0
querying iRiders with n_distinct [1, 1, 1]
conditioning on ['iRspouse', 'iSchool', 'iSept80', 'dWeek89', 'iYearwrk'] with probability 0.9757637157096997
querying iRspouse with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iSchool', 'iSept80', 'dWeek89', 'iYearwrk'] with probability 0.9815608844539045
querying iSchool with n_distinct [1, 1]
conditioning on ['iSept80', 'dWeek89', 'iYearwrk'] with probability 0.9743668602856296
querying iSept80 with n_distinct [1]
conditioning on

querying iMilitary with n_distinct [1, 1, 1, 1, 1]
conditioning on ['dOccup', 'iRelat2', 'iRlabor', 'iRPOB', 'iSex', 'iSubfam1', 'iYearwrk'] with probability 1.0
querying dOccup with n_distinct [1, 1, 1, 1]
conditioning on ['iRelat2', 'iRlabor', 'iRPOB', 'iSex', 'iSubfam1', 'iYearwrk'] with probability 0.8389555367022419
querying iRelat2 with n_distinct [1]
conditioning on ['iRlabor', 'iRPOB', 'iSex', 'iSubfam1', 'iYearwrk'] with probability 0.9913397137137527
querying iRlabor with n_distinct [1, 1]
conditioning on ['iRPOB', 'iSex', 'iSubfam1', 'iYearwrk'] with probability 0.9192687439962477
querying iRPOB with n_distinct [1]
conditioning on ['iSex', 'iSubfam1', 'iYearwrk'] with probability 0.6347113500492328
querying iSex with n_distinct [1]
conditioning on ['iSubfam1', 'iYearwrk'] with probability 0.5774173890864669
querying iSubfam1 with n_distinct [1]
conditioning on ['iYearwrk'] with probability 0.9858158441706126
querying iYearwrk with n_distinct [1]
conditioning on [] with proba

querying dPoverty with n_distinct [1, 1, 1]
conditioning on ['dRpincome', 'iSept80', 'iSex'] with probability 0.9999999999999998
querying dRpincome with n_distinct [1]
conditioning on ['iSept80', 'iSex'] with probability 0.17189181461299868
querying iSept80 with n_distinct [1]
conditioning on ['iSex'] with probability 0.9858984479412229
querying iSex with n_distinct [1]
conditioning on [] with probability 0.4847285811043064
latency: 419.03818771243095 and error: 1.4209991747603632
Predicting cardinality for query 147: SELECT COUNT(*) FROM climate WHERE iRelat2 = 0 AND dTravtime >= 0 AND dTravtime <= 3 AND iYearsch >= 1 AND iYearsch <= 11
{'iRelat2': [0], 'dTravtime': [1, 2, 0, 3], 'iYearsch': [11, 5, 10, 4, 8, 1, 7, 6, 2, 9, 3]}
decoded query is {'iRelat2': [0], 'dTravtime': [3, 5, 0, 4], 'iYearsch': [1, 2, 0, 4, 7, 8, 6, 9, 15, 11, 14]}
querying iRelat2 with n_distinct [1]
conditioning on ['dTravtime', 'iYearsch'] with probability 0.9856233911760707
querying dTravtime with n_distinct 

querying dIndustry with n_distinct [1, 1]
conditioning on ['iLooking', 'iSubfam1', 'iTmpabsnt', 'iYearsch', 'dYrsserv'] with probability 0.41720899954103774
querying iLooking with n_distinct [1]
conditioning on ['iSubfam1', 'iTmpabsnt', 'iYearsch', 'dYrsserv'] with probability 0.7086752675376571
querying iSubfam1 with n_distinct [1]
conditioning on ['iTmpabsnt', 'iYearsch', 'dYrsserv'] with probability 0.9714092003400445
querying iTmpabsnt with n_distinct [1, 1, 1, 1]
conditioning on ['iYearsch', 'dYrsserv'] with probability 0.9999999999999999
querying iYearsch with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['dYrsserv'] with probability 0.7813737701163525
querying dYrsserv with n_distinct [1]
conditioning on [] with probability 0.8823212117390783
latency: 274.7841775417328 and error: 1.0334985732393527
Predicting cardinality for query 154: SELECT COUNT(*) FROM climate WHERE dAncstry2 >= 1 AND dAncstry2 <= 2 AND dIncome6 = 0 AND dWeek89 >= 0 AND dWeek89 <= 1 AND iWork

querying dIncome2 with n_distinct [1]
conditioning on ['dIncome8', 'iRemplpar', 'iRlabor', 'iSept80', 'dWeek89', 'dYrsserv'] with probability 0.9419272032127994
querying dIncome8 with n_distinct [1]
conditioning on ['iRemplpar', 'iRlabor', 'iSept80', 'dWeek89', 'dYrsserv'] with probability 0.9535393416584814
querying iRemplpar with n_distinct [1, 1, 1, 1]
conditioning on ['iRlabor', 'iSept80', 'dWeek89', 'dYrsserv'] with probability 0.022000808281323676
querying iRlabor with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iSept80', 'dWeek89', 'dYrsserv'] with probability 0.7584431163824598
querying iSept80 with n_distinct [1]
conditioning on ['dWeek89', 'dYrsserv'] with probability 0.9887341363751911
querying dWeek89 with n_distinct [1, 1, 1]
conditioning on ['dYrsserv'] with probability 1.0
querying dYrsserv with n_distinct [1, 1]
conditioning on [] with probability 0.9784870346603425
latency: 296.1599715054035 and error: 1.379924908511953
Predicting cardinality for query 160: SELECT 

querying iAvail with n_distinct [1]
conditioning on ['dIncome4', 'iMilitary', 'iMobillim', 'dOccup', 'iSubfam1'] with probability 0.9588874038501678
querying dIncome4 with n_distinct [1, 1]
conditioning on ['iMilitary', 'iMobillim', 'dOccup', 'iSubfam1'] with probability 1.0
querying iMilitary with n_distinct [1, 1, 1]
conditioning on ['iMobillim', 'dOccup', 'iSubfam1'] with probability 0.9742174490768424
querying iMobillim with n_distinct [1]
conditioning on ['dOccup', 'iSubfam1'] with probability 0.7482802719468602
querying dOccup with n_distinct [1, 1, 1, 1, 1, 1, 1]
conditioning on ['iSubfam1'] with probability 0.9965033376211772
querying iSubfam1 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 174.06785115599632 and error: 1.001243604832705
Predicting cardinality for query 166: SELECT COUNT(*) FROM climate WHERE dAncstry1 >= 1 AND dAncstry1 <= 11 AND iAvail = 0 AND dIncome2 = 0 AND dIncome4 = 0 AND iKorean = 0 AND iRPOB >= 10 AND iRPOB <= 22 AND

querying iClass with n_distinct [1]
conditioning on ['dIncome5', 'iPerscare', 'dPwgt1', 'iRagechld', 'iSept80', 'iSubfam2'] with probability 0.565119504135207
querying dIncome5 with n_distinct [1]
conditioning on ['iPerscare', 'dPwgt1', 'iRagechld', 'iSept80', 'iSubfam2'] with probability 0.820612964258717
querying iPerscare with n_distinct [1, 1]
conditioning on ['dPwgt1', 'iRagechld', 'iSept80', 'iSubfam2'] with probability 0.7848428908657854
querying dPwgt1 with n_distinct [1]
conditioning on ['iRagechld', 'iSept80', 'iSubfam2'] with probability 0.5611734366673196
querying iRagechld with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iSept80', 'iSubfam2'] with probability 1.0000000000000002
querying iSept80 with n_distinct [1]
conditioning on ['iSubfam2'] with probability 0.9837925236531742
querying iSubfam2 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 531.6165089607239 and error: 1.0103055388222797
Predicting cardinality for query 173: SELECT CO

querying iKorean with n_distinct [1]
conditioning on ['iLang1', 'iMay75880', 'iMobility', 'iMobillim', 'iPerscare', 'dPwgt1', 'iRagechld', 'iYearsch'] with probability 0.9736459150838865
querying iLang1 with n_distinct [1]
conditioning on ['iMay75880', 'iMobility', 'iMobillim', 'iPerscare', 'dPwgt1', 'iRagechld', 'iYearsch'] with probability 0.8510815723494343
querying iMay75880 with n_distinct [1]
conditioning on ['iMobility', 'iMobillim', 'iPerscare', 'dPwgt1', 'iRagechld', 'iYearsch'] with probability 0.9864903297778806
querying iMobility with n_distinct [1]
conditioning on ['iMobillim', 'iPerscare', 'dPwgt1', 'iRagechld', 'iYearsch'] with probability 0.45094192268246625
querying iMobillim with n_distinct [1]
conditioning on ['iPerscare', 'dPwgt1', 'iRagechld', 'iYearsch'] with probability 0.8387150107917645
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['dPwgt1', 'iRagechld', 'iYearsch'] with probability 1.0
querying dPwgt1 with n_distinct [1]
conditioning on ['iRage

querying iLang1 with n_distinct [1, 1]
conditioning on ['dPoverty', 'iRelat1', 'iSept80'] with probability 0.8797462701378597
querying dPoverty with n_distinct [1]
conditioning on ['iRelat1', 'iSept80'] with probability 0.8876459508667092
querying iRelat1 with n_distinct [1, 1, 1]
conditioning on ['iSept80'] with probability 0.5287391746696944
querying iSept80 with n_distinct [1]
conditioning on [] with probability 0.9839863970206871
latency: 205.546323210001 and error: 1.010392396358299
Predicting cardinality for query 186: SELECT COUNT(*) FROM climate WHERE dAge >= 1 AND dAge <= 5 AND iFertil = 0 AND dHispanic = 0 AND dHour89 >= 0 AND dHour89 <= 3 AND iMobillim >= 0 AND iMobillim <= 2 AND iRemplpar >= 0 AND iRemplpar <= 111 AND iRvetserv = 0 AND iSept80 = 0
{'dAge': [5, 3, 4, 1, 2], 'iFertil': [0], 'dHispanic': [0], 'dHour89': [1, 3, 0, 2], 'iMobillim': [2, 0, 1], 'iRemplpar': [0, 111], 'iRvetserv': [0], 'iSept80': [0]}
decoded query is {'dAge': [5, 2, 1, 0, 6], 'iFertil': [0], 'dHis

querying dAncstry1 with n_distinct [1]
conditioning on ['dIncome2', 'dIncome8', 'iMobility', 'iOthrserv', 'iRemplpar', 'iRownchld', 'iRspouse', 'iSex', 'iTmpabsnt', 'dTravtime', 'dWeek89', 'iYearsch'] with probability 0.5323424838265771
querying dIncome2 with n_distinct [1]
conditioning on ['dIncome8', 'iMobility', 'iOthrserv', 'iRemplpar', 'iRownchld', 'iRspouse', 'iSex', 'iTmpabsnt', 'dTravtime', 'dWeek89', 'iYearsch'] with probability 0.9167019348786227
querying dIncome8 with n_distinct [1]
conditioning on ['iMobility', 'iOthrserv', 'iRemplpar', 'iRownchld', 'iRspouse', 'iSex', 'iTmpabsnt', 'dTravtime', 'dWeek89', 'iYearsch'] with probability 0.9411980619395671
querying iMobility with n_distinct [1]
conditioning on ['iOthrserv', 'iRemplpar', 'iRownchld', 'iRspouse', 'iSex', 'iTmpabsnt', 'dTravtime', 'dWeek89', 'iYearsch'] with probability 0.5437587983904564
querying iOthrserv with n_distinct [1]
conditioning on ['iRemplpar', 'iRownchld', 'iRspouse', 'iSex', 'iTmpabsnt', 'dTravtime',

querying dIncome5 with n_distinct [1, 1]
conditioning on ['dIndustry', 'iMay75880', 'iRelat2', 'iRemplpar'] with probability 1.0
querying dIndustry with n_distinct [1, 1]
conditioning on ['iMay75880', 'iRelat2', 'iRemplpar'] with probability 0.24229493511041789
querying iMay75880 with n_distinct [1]
conditioning on ['iRelat2', 'iRemplpar'] with probability 0.9867273522858223
querying iRelat2 with n_distinct [1]
conditioning on ['iRemplpar'] with probability 0.9861180762574587
querying iRemplpar with n_distinct [1]
conditioning on [] with probability 0.7497007059799821
latency: 288.00829127430916 and error: 1.1184528605962933
Predicting cardinality for query 201: SELECT COUNT(*) FROM climate WHERE iDisabl1 = 2 AND dIndustry >= 0 AND dIndustry <= 8 AND dPOB = 0 AND iRrelchld = 1 AND iRvetserv >= 0 AND iRvetserv <= 1
{'iDisabl1': [2], 'dIndustry': [4, 1, 0, 7, 6, 3, 5, 8, 2], 'dPOB': [0], 'iRrelchld': [1], 'iRvetserv': [0, 1]}
decoded query is {'iDisabl1': [0], 'dIndustry': [3, 9, 0, 2, 8

querying iEnglish with n_distinct [1]
conditioning on ['iFeb55', 'iFertil', 'dIncome4', 'iMilitary', 'dRearning', 'iRelat1', 'iVietnam'] with probability 0.8739846819504282
querying iFeb55 with n_distinct [1]
conditioning on ['iFertil', 'dIncome4', 'iMilitary', 'dRearning', 'iRelat1', 'iVietnam'] with probability 1.0
querying iFertil with n_distinct [1, 1]
conditioning on ['dIncome4', 'iMilitary', 'dRearning', 'iRelat1', 'iVietnam'] with probability 0.6219917542322013
querying dIncome4 with n_distinct [1, 1]
conditioning on ['iMilitary', 'dRearning', 'iRelat1', 'iVietnam'] with probability 1.0
querying iMilitary with n_distinct [1]
conditioning on ['dRearning', 'iRelat1', 'iVietnam'] with probability 0.5881833244153976
querying dRearning with n_distinct [1, 1, 1]
conditioning on ['iRelat1', 'iVietnam'] with probability 0.7171894749398848
querying iRelat1 with n_distinct [1, 1, 1]
conditioning on ['iVietnam'] with probability 0.8781819641557265
querying iVietnam with n_distinct [1]
cond

querying iEnglish with n_distinct [1]
conditioning on ['dHour89', 'iKorean', 'iMilitary', 'iMobility', 'iYearwrk'] with probability 0.8878935544280127
querying dHour89 with n_distinct [1]
conditioning on ['iKorean', 'iMilitary', 'iMobility', 'iYearwrk'] with probability 0.46940072548949124
querying iKorean with n_distinct [1]
conditioning on ['iMilitary', 'iMobility', 'iYearwrk'] with probability 0.9750506651207327
querying iMilitary with n_distinct [1, 1, 1]
conditioning on ['iMobility', 'iYearwrk'] with probability 1.0
querying iMobility with n_distinct [1]
conditioning on ['iYearwrk'] with probability 0.5458235928746464
querying iYearwrk with n_distinct [1]
conditioning on [] with probability 0.05926733474759843
latency: 191.2788785994053 and error: 1.3130053223906066
Predicting cardinality for query 218: SELECT COUNT(*) FROM climate WHERE dAncstry1 = 1 AND dIncome1 >= 1 AND dIncome1 <= 3 AND iLang1 = 2 AND iOthrserv = 0 AND iSubfam2 = 0
{'dAncstry1': [1], 'dIncome1': [2, 1, 3], 'iL

querying dIncome4 with n_distinct [1]
conditioning on ['dIncome5', 'iWork89'] with probability 0.76751099600893
querying dIncome5 with n_distinct [1]
conditioning on ['iWork89'] with probability 0.943464514835208
querying iWork89 with n_distinct [1, 1]
conditioning on [] with probability 0.7655849504837722
latency: 105.38922622799873 and error: 1.1240732788640408
Predicting cardinality for query 227: SELECT COUNT(*) FROM climate WHERE iClass = 0 AND dIncome3 = 0 AND dIncome8 = 0 AND dPoverty = 2 AND iRagechld >= 0 AND iRagechld <= 4
{'iClass': [0], 'dIncome3': [0], 'dIncome8': [0], 'dPoverty': [2], 'iRagechld': [4, 2, 0, 3, 1]}
decoded query is {'iClass': [0], 'dIncome3': [0], 'dIncome8': [0], 'dPoverty': [0], 'iRagechld': [1, 2, 0, 4, 3]}
querying iClass with n_distinct [1]
conditioning on ['dIncome3', 'dIncome8', 'dPoverty', 'iRagechld'] with probability 0.4153564339992007
querying dIncome3 with n_distinct [1]
conditioning on ['dIncome8', 'dPoverty', 'iRagechld'] with probability 0.9

querying dIncome4 with n_distinct [1]
conditioning on ['dIncome5', 'dPwgt1', 'iRelat2', 'iRPOB', 'iRrelchld', 'iSept80', 'iVietnam'] with probability 0.7471602506373153
querying dIncome5 with n_distinct [1]
conditioning on ['dPwgt1', 'iRelat2', 'iRPOB', 'iRrelchld', 'iSept80', 'iVietnam'] with probability 0.8143557046980255
querying dPwgt1 with n_distinct [1]
conditioning on ['iRelat2', 'iRPOB', 'iRrelchld', 'iSept80', 'iVietnam'] with probability 0.5835615600662185
querying iRelat2 with n_distinct [1]
conditioning on ['iRPOB', 'iRrelchld', 'iSept80', 'iVietnam'] with probability 0.9861103275012992
querying iRPOB with n_distinct [1]
conditioning on ['iRrelchld', 'iSept80', 'iVietnam'] with probability 0.09349798038587405
querying iRrelchld with n_distinct [1]
conditioning on ['iSept80', 'iVietnam'] with probability 0.7336800106054856
querying iSept80 with n_distinct [1]
conditioning on ['iVietnam'] with probability 0.9859789627590808
querying iVietnam with n_distinct [1]
conditioning o

querying iEnglish with n_distinct [1]
conditioning on ['dIncome4', 'dIncome8', 'iRagechld'] with probability 0.8756499796472788
querying dIncome4 with n_distinct [1]
conditioning on ['dIncome8', 'iRagechld'] with probability 0.8053853633494751
querying dIncome8 with n_distinct [1]
conditioning on ['iRagechld'] with probability 0.9584653528781244
querying iRagechld with n_distinct [1, 1, 1, 1, 1]
conditioning on [] with probability 1.0
latency: 198.87376576662064 and error: 1.0447751453586192
Predicting cardinality for query 242: SELECT COUNT(*) FROM climate WHERE dAncstry1 >= 1 AND dAncstry1 <= 7 AND iDisabl1 = 2 AND iKorean = 0 AND iMay75880 = 0 AND iRagechld >= 0 AND iRagechld <= 4 AND iRPOB >= 10 AND iRPOB <= 22 AND iRvetserv = 0 AND iYearwrk >= 0 AND iYearwrk <= 1
{'dAncstry1': [1, 3, 2, 4, 6, 7, 5], 'iDisabl1': [2], 'iKorean': [0], 'iMay75880': [0], 'iRagechld': [4, 2, 0, 3, 1], 'iRPOB': [22, 10, 21], 'iRvetserv': [0], 'iYearwrk': [1, 0]}
decoded query is {'dAncstry1': [0, 3, 4, 7

querying iOthrserv with n_distinct [1]
conditioning on ['dPwgt1', 'dWeek89', 'iWorklwk', 'iYearsch', 'iYearwrk'] with probability 0.9979588088608697
querying dPwgt1 with n_distinct [1, 1]
conditioning on ['dWeek89', 'iWorklwk', 'iYearsch', 'iYearwrk'] with probability 0.8003311409122389
querying dWeek89 with n_distinct [1, 1]
conditioning on ['iWorklwk', 'iYearsch', 'iYearwrk'] with probability 0.4682526236098556
querying iWorklwk with n_distinct [1, 1]
conditioning on ['iYearsch', 'iYearwrk'] with probability 0.6300361045480352
querying iYearsch with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iYearwrk'] with probability 0.7649770442978365
querying iYearwrk with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on [] with probability 0.7188474892048725
latency: 808.2402348518372 and error: 1.2738830366872502
Predicting cardinality for query 246: SELECT COUNT(*) FROM climate WHERE dAncstry2 = 1 AND iMobillim = 2 AND dRpincome >= 0 AND dRpincome <= 2 AND iVietnam = 0 AND 

querying dIncome2 with n_distinct [1]
conditioning on ['dIndustry', 'iKorean', 'iMarital', 'iMay75880', 'iMeans', 'iRrelchld'] with probability 0.958252969618576
querying dIndustry with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iKorean', 'iMarital', 'iMay75880', 'iMeans', 'iRrelchld'] with probability 0.8353780561009065
querying iKorean with n_distinct [1]
conditioning on ['iMarital', 'iMay75880', 'iMeans', 'iRrelchld'] with probability 0.980157187575726
querying iMarital with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iMay75880', 'iMeans', 'iRrelchld'] with probability 0.9999999999999999
querying iMay75880 with n_distinct [1]
conditioning on ['iMeans', 'iRrelchld'] with probability 0.9898175795685387
querying iMeans with n_distinct [1, 1]
conditioning on ['iRrelchld'] with probability 0.9413827119312854
querying iRrelchld with n_distinct [1, 1]
conditioning on [] with probability 1.0
latency: 460.8088470995426 and error: 1.0026567097215553
Predicting cardinality for q

querying iMilitary with n_distinct [1, 1, 1, 1, 1]
conditioning on ['dRearning', 'iRPOB', 'iSubfam2', 'iYearsch'] with probability 1.0
querying dRearning with n_distinct [1, 1, 1]
conditioning on ['iRPOB', 'iSubfam2', 'iYearsch'] with probability 0.5961578598732448
querying iRPOB with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iSubfam2', 'iYearsch'] with probability 1.0000000000000002
querying iSubfam2 with n_distinct [1]
conditioning on ['iYearsch'] with probability 0.9881967943412391
querying iYearsch with n_distinct [1, 1]
conditioning on [] with probability 0.11651578234419524
latency: 417.59467124938965 and error: 1.9096093211153808
Predicting cardinality for query 259: SELECT COUNT(*) FROM climate WHERE iClass >= 0 AND iClass <= 1 AND dIncome5 = 1 AND iSept80 = 0
{'iClass': [1, 0], 'dIncome5': [1], 'iSept80': [0]}
decoded query is {'iClass': [1, 0], 'dIncome5': [1], 'iSept80': [0]}
querying iClass with n_distinct [1, 1]
conditioning on ['dIncome5', 'i

querying iMilitary with n_distinct [1]
conditioning on ['iRelat1', 'iRPOB'] with probability 0.514555816856693
querying iRelat1 with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iRPOB'] with probability 0.558989770774186
querying iRPOB with n_distinct [1]
conditioning on [] with probability 0.63150448381697
latency: 239.5724467933178 and error: 1.2406979618627092
Predicting cardinality for query 266: SELECT COUNT(*) FROM climate WHERE dAncstry2 = 2 AND iFeb55 = 0 AND dIncome1 = 0 AND dIncome3 = 0 AND dPoverty >= 1 AND dPoverty <= 2 AND iRelat1 = 0 AND iRspouse >= 1 AND iRspouse <= 6 AND iSex = 0 AND iSubfam2 = 0
{'dAncstry2': [2], 'iFeb55': [0], 'dIncome1': [0], 'dIncome3': [0], 'dPoverty': [2, 1], 'iRelat1': [0], 'iRspouse': [3, 1, 6, 4, 5, 2], 'iSex': [0], 'iSubfam2': [0]}
decoded query is {'dAncstry2': [1], 'iFeb55': [0], 'dIncome1': [0], 'dIncome3': [0], 'dPoverty': [0, 1], 'iRelat1': [0], 'iRspouse': [4, 0, 2, 3, 5, 6], 'iSex': [1], 'iSubfam2': [0]}
Predicting cardinality for q

querying iAvail with n_distinct [1]
conditioning on ['dHour89', 'dHours', 'dPwgt1', 'iRownchld', 'dTravtime', 'dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 0.913703358176223
querying dHour89 with n_distinct [1, 1]
conditioning on ['dHours', 'dPwgt1', 'iRownchld', 'dTravtime', 'dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 0.23432427322642208
querying dHours with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['dPwgt1', 'iRownchld', 'dTravtime', 'dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 1.0
querying dPwgt1 with n_distinct [1]
conditioning on ['iRownchld', 'dTravtime', 'dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 0.5608418356300552
querying iRownchld with n_distinct [1, 1]
conditioning on ['dTravtime', 'dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 1.0
querying dTravtime with n_distinct [1, 1, 1]
conditioning on ['dWeek89', 'iWork89', 'iWWII', 'dYrsserv'] with probability 0.46489965095998986
querying dWeek89 with n_d

querying iTmpabsnt with n_distinct [1, 1, 1, 1]
conditioning on ['dTravtime', 'dWeek89', 'iWorklwk'] with probability 1.0
querying dTravtime with n_distinct [1, 1, 1, 1, 1]
conditioning on ['dWeek89', 'iWorklwk'] with probability 0.3783702562759581
querying dWeek89 with n_distinct [1, 1]
conditioning on ['iWorklwk'] with probability 0.6262398783439649
querying iWorklwk with n_distinct [1, 1]
conditioning on [] with probability 0.7664550692861081
latency: 228.57607156038284 and error: 1.2216972222550142
Predicting cardinality for query 277: SELECT COUNT(*) FROM climate WHERE dAncstry2 >= 1 AND dAncstry2 <= 2 AND iFertil >= 0 AND iFertil <= 3 AND dIncome2 = 0 AND iMay75880 = 0 AND dPOB = 0
{'dAncstry2': [1, 2], 'iFertil': [1, 3, 0, 2], 'dIncome2': [0], 'iMay75880': [0], 'dPOB': [0]}
decoded query is {'dAncstry2': [0, 1], 'iFertil': [1, 2, 0, 3], 'dIncome2': [0], 'iMay75880': [0], 'dPOB': [0]}
querying dAncstry2 with n_distinct [1, 1]
conditioning on ['iFertil', 'dIncome2', 'iMay75880', '

querying iDisabl2 with n_distinct [1, 1, 1]
conditioning on ['dHispanic', 'dIncome3', 'dPOB', 'iRPOB', 'iWork89'] with probability 1.0
querying dHispanic with n_distinct [1]
conditioning on ['dIncome3', 'dPOB', 'iRPOB', 'iWork89'] with probability 0.977183018015696
querying dIncome3 with n_distinct [1]
conditioning on ['dPOB', 'iRPOB', 'iWork89'] with probability 0.9816156907486234
querying dPOB with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iRPOB', 'iWork89'] with probability 0.999349240672479
querying iRPOB with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iWork89'] with probability 0.9288667733307581
querying iWork89 with n_distinct [1]
conditioning on [] with probability 0.5318874743978017
latency: 366.71239510178566 and error: 1.0099184594031922
Predicting cardinality for query 284: SELECT COUNT(*) FROM climate WHERE iAvail = 0 AND iDisabl2 >= 0 AND iDisabl2 <= 1 AND dIncome2 = 0 AND dIncome4 = 0 AND iLooking = 0 AND dPoverty = 2 AND dPwgt1 = 1 AND iRlabor 

querying iFertil with n_distinct [1]
conditioning on ['dHours', 'dIncome3', 'iLang1', 'iPerscare', 'iRemplpar', 'iRrelchld', 'dWeek89'] with probability 0.5184785083727494
querying dHours with n_distinct [1, 1, 1, 1]
conditioning on ['dIncome3', 'iLang1', 'iPerscare', 'iRemplpar', 'iRrelchld', 'dWeek89'] with probability 0.8205562138341828
querying dIncome3 with n_distinct [1]
conditioning on ['iLang1', 'iPerscare', 'iRemplpar', 'iRrelchld', 'dWeek89'] with probability 0.9861387937909041
querying iLang1 with n_distinct [1]
conditioning on ['iPerscare', 'iRemplpar', 'iRrelchld', 'dWeek89'] with probability 0.8676346967448678
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['iRemplpar', 'iRrelchld', 'dWeek89'] with probability 1.0
querying iRemplpar with n_distinct [1]
conditioning on ['iRrelchld', 'dWeek89'] with probability 0.7497007059799821
querying iRrelchld with n_distinct [1, 1]
conditioning on ['dWeek89'] with probability 1.0
querying dWeek89 with n_distinct [1, 1, 1

querying dAge with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iDisabl2', 'dHispanic', 'dHours', 'dIncome1', 'iRelat1', 'iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.960577882341917
querying iDisabl2 with n_distinct [1, 1]
conditioning on ['dHispanic', 'dHours', 'dIncome1', 'iRelat1', 'iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.2116506779381443
querying dHispanic with n_distinct [1]
conditioning on ['dHours', 'dIncome1', 'iRelat1', 'iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.9660392785409897
querying dHours with n_distinct [1]
conditioning on ['dIncome1', 'iRelat1', 'iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.26338874801315404
querying dIncome1 with n_distinct [1]
conditioning on ['iRelat1', 'iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.1188040212915533
querying iRelat1 with n_distinct [1, 1]
conditioning on ['iRrelchld', 'iRvetserv', 'iYearwrk'] with probability 0.48438018465054783
querying iRrelchld with n_distinct [1, 1]


querying dAge with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['dIncome1', 'dIncome3', 'dIncome7', 'iMilitary', 'dRearning', 'iSubfam2', 'iYearwrk'] with probability 0.9999999999999999
querying dIncome1 with n_distinct [1, 1]
conditioning on ['dIncome3', 'dIncome7', 'iMilitary', 'dRearning', 'iSubfam2', 'iYearwrk'] with probability 0.9993207276770721
querying dIncome3 with n_distinct [1]
conditioning on ['dIncome7', 'iMilitary', 'dRearning', 'iSubfam2', 'iYearwrk'] with probability 0.9823342343862388
querying dIncome7 with n_distinct [1]
conditioning on ['iMilitary', 'dRearning', 'iSubfam2', 'iYearwrk'] with probability 0.9536181076699883
querying iMilitary with n_distinct [1, 1]
conditioning on ['dRearning', 'iSubfam2', 'iYearwrk'] with probability 0.8376413799814818
querying dRearning with n_distinct [1, 1, 1]
conditioning on ['iSubfam2', 'iYearwrk'] with probability 0.4836876643447441
querying iSubfam2 with n_distinct [1]
conditioning on ['iYearwrk'] with probability 0.9858158441

querying dRpincome with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iSubfam1', 'iWWII'] with probability 1.0
querying iSubfam1 with n_distinct [1]
conditioning on ['iWWII'] with probability 0.9732643772056382
querying iWWII with n_distinct [1]
conditioning on [] with probability 0.9615549051472877
latency: 178.34478989243507 and error: 1.0138309532888896
Predicting cardinality for query 309: SELECT COUNT(*) FROM climate WHERE iAvail = 0 AND dHispanic = 0 AND dIndustry >= 8 AND dIndustry <= 11 AND iMilitary = 4 AND iRagechld >= 0 AND iRagechld <= 2 AND iRiders >= 1 AND iRiders <= 2 AND iTmpabsnt >= 0 AND iTmpabsnt <= 3 AND dTravtime >= 0 AND dTravtime <= 5 AND iVietnam = 0 AND dWeek89 >= 0 AND dWeek89 <= 2
{'iAvail': [0], 'dHispanic': [0], 'dIndustry': [10, 9, 8, 11], 'iMilitary': [4], 'iRagechld': [2, 0, 1], 'iRiders': [1, 2], 'iTmpabsnt': [0, 3, 1, 2], 'dTravtime': [5, 1, 2, 0, 4, 3], 'iVietnam': [0], 'dWeek89': [2, 1, 0]}
decoded query is {'iAvail': [0], 'dHispanic': [0], 'dIndus

querying iPerscare with n_distinct [1]
conditioning on ['dPwgt1', 'iRrelchld'] with probability 0.9262774051755842
querying dPwgt1 with n_distinct [1, 1, 1]
conditioning on ['iRrelchld'] with probability 0.9730481586331505
querying iRrelchld with n_distinct [1]
conditioning on [] with probability 0.744034967467157
latency: 112.39887773990631 and error: 1.0151179882042547
Predicting cardinality for query 317: SELECT COUNT(*) FROM climate WHERE iDisabl1 >= 0 AND iDisabl1 <= 2 AND iFeb55 = 0 AND iImmigr = 0 AND dIncome6 = 0 AND iLooking = 0 AND iMeans = 0 AND dOccup >= 0 AND dOccup <= 5 AND iRownchld >= 0 AND iRownchld <= 1 AND iRspouse = 1
{'iDisabl1': [2, 0, 1], 'iFeb55': [0], 'iImmigr': [0], 'dIncome6': [0], 'iLooking': [0], 'iMeans': [0], 'dOccup': [3, 2, 4, 0, 1, 5], 'iRownchld': [0, 1], 'iRspouse': [1]}
decoded query is {'iDisabl1': [0, 1, 2], 'iFeb55': [0], 'iImmigr': [0], 'dIncome6': [0], 'iLooking': [0], 'iMeans': [0], 'dOccup': [4, 1, 6, 0, 2, 5], 'iRownchld': [0, 1], 'iRspouse'

querying iLooking with n_distinct [1]
conditioning on ['iOthrserv', 'dPOB'] with probability 0.7170993209079164
querying iOthrserv with n_distinct [1]
conditioning on ['dPOB'] with probability 0.9984902088288353
querying dPOB with n_distinct [1, 1, 1, 1, 1]
conditioning on [] with probability 0.994977799563517
latency: 198.4882690012455 and error: 1.0166491991925104
Predicting cardinality for query 325: SELECT COUNT(*) FROM climate WHERE iAvail >= 0 AND iAvail <= 4 AND dHours >= 0 AND dHours <= 3 AND dIncome5 = 0 AND iLooking >= 0 AND iLooking <= 2 AND dPwgt1 >= 1 AND dPwgt1 <= 2 AND iRelat1 >= 0 AND iRelat1 <= 2 AND iSchool = 1 AND dTravtime >= 0 AND dTravtime <= 2
{'iAvail': [0, 3, 4, 2, 1], 'dHours': [3, 1, 0, 2], 'dIncome5': [0], 'iLooking': [0, 2, 1], 'dPwgt1': [1, 2], 'iRelat1': [0, 1, 2], 'iSchool': [1], 'dTravtime': [1, 2, 0]}
decoded query is {'iAvail': [0, 2, 1, 3, 4], 'dHours': [1, 3, 0, 5], 'dIncome5': [0], 'iLooking': [0, 1, 2], 'dPwgt1': [0, 1], 'iRelat1': [0, 2, 1], 'iSc

querying iClass with n_distinct [1]
conditioning on ['iDisabl1', 'iKorean', 'iPerscare', 'dPoverty', 'dPwgt1', 'iRownchld'] with probability 0.5303621976455427
querying iDisabl1 with n_distinct [1, 1]
conditioning on ['iKorean', 'iPerscare', 'dPoverty', 'dPwgt1', 'iRownchld'] with probability 0.9973599367599987
querying iKorean with n_distinct [1]
conditioning on ['iPerscare', 'dPoverty', 'dPwgt1', 'iRownchld'] with probability 0.973271958387106
querying iPerscare with n_distinct [1]
conditioning on ['dPoverty', 'dPwgt1', 'iRownchld'] with probability 0.909700916543088
querying dPoverty with n_distinct [1]
conditioning on ['dPwgt1', 'iRownchld'] with probability 0.8585083292706851
querying dPwgt1 with n_distinct [1]
conditioning on ['iRownchld'] with probability 0.5608997185183673
querying iRownchld with n_distinct [1]
conditioning on [] with probability 0.7630758842038251
latency: 439.43068012595177 and error: 1.0680613173539457
Predicting cardinality for query 331: SELECT COUNT(*) FR

querying iRelat2 with n_distinct [1]
conditioning on ['iRrelchld', 'iRvetserv', 'iSept80', 'iSubfam2'] with probability 0.9900534612086188
querying iRrelchld with n_distinct [1]
conditioning on ['iRvetserv', 'iSept80', 'iSubfam2'] with probability 0.7266588392188185
querying iRvetserv with n_distinct [1]
conditioning on ['iSept80', 'iSubfam2'] with probability 0.8954233802620123
querying iSept80 with n_distinct [1]
conditioning on ['iSubfam2'] with probability 0.9837925236531742
querying iSubfam2 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 535.5027578771114 and error: 1.0851061262413573
Predicting cardinality for query 336: SELECT COUNT(*) FROM climate WHERE iDisabl1 >= 1 AND iDisabl1 <= 2 AND dIncome8 = 0 AND iMilitary = 4 AND dOccup = 0 AND iRemplpar >= 0 AND iRemplpar <= 222
{'iDisabl1': [2, 1], 'dIncome8': [0], 'iMilitary': [4], 'dOccup': [0], 'iRemplpar': [0, 121, 112, 211, 111, 122, 222, 221, 113, 141, 114, 213, 134, 133, 212]}
decoded quer

querying dIncome4 with n_distinct [1]
conditioning on ['iLang1', 'iLooking', 'dOccup', 'iRemplpar', 'iRspouse', 'iYearsch', 'dYrsserv'] with probability 0.8910479591678466
querying iLang1 with n_distinct [1]
conditioning on ['iLooking', 'dOccup', 'iRemplpar', 'iRspouse', 'iYearsch', 'dYrsserv'] with probability 0.8335167665452032
querying iLooking with n_distinct [1, 1, 1]
conditioning on ['dOccup', 'iRemplpar', 'iRspouse', 'iYearsch', 'dYrsserv'] with probability 1.0
querying dOccup with n_distinct [1, 1, 1]
conditioning on ['iRemplpar', 'iRspouse', 'iYearsch', 'dYrsserv'] with probability 0.8729040408116161
querying iRemplpar with n_distinct [1]
conditioning on ['iRspouse', 'iYearsch', 'dYrsserv'] with probability 0.10604503145799811
querying iRspouse with n_distinct [1]
conditioning on ['iYearsch', 'dYrsserv'] with probability 0.17854727340380583
querying iYearsch with n_distinct [1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['dYrsserv'] with probability 0.7027170963915998
querying dYrss

querying dHour89 with n_distinct [1, 1, 1]
conditioning on ['iMay75880', 'iMobility', 'iRelat1', 'iRlabor'] with probability 0.5280891427254437
querying iMay75880 with n_distinct [1]
conditioning on ['iMobility', 'iRelat1', 'iRlabor'] with probability 0.9866854678770072
querying iMobility with n_distinct [1, 1]
conditioning on ['iRelat1', 'iRlabor'] with probability 0.5525868665564208
querying iRelat1 with n_distinct [1]
conditioning on ['iRlabor'] with probability 0.367390274113864
querying iRlabor with n_distinct [1, 1, 1, 1, 1, 1, 1]
conditioning on [] with probability 1.0
latency: 125.4231408238411 and error: 1.0998587343613864
Predicting cardinality for query 347: SELECT COUNT(*) FROM climate WHERE iFertil = 0 AND iMilitary = 4 AND iMobility = 2 AND dOccup >= 2 AND dOccup <= 6 AND iSchool >= 1 AND iSchool <= 2 AND iVietnam = 0 AND iYearwrk = 1
{'iFertil': [0], 'iMilitary': [4], 'iMobility': [2], 'dOccup': [3, 2, 4, 6, 5], 'iSchool': [1, 2], 'iVietnam': [0], 'iYearwrk': [1]}
decode

querying dIncome7 with n_distinct [1]
conditioning on ['dIncome8', 'iMilitary', 'dPoverty', 'dRearning', 'iSept80', 'dTravtime', 'iWorklwk'] with probability 0.9488636732963392
querying dIncome8 with n_distinct [1]
conditioning on ['iMilitary', 'dPoverty', 'dRearning', 'iSept80', 'dTravtime', 'iWorklwk'] with probability 0.9365143478732646
querying iMilitary with n_distinct [1]
conditioning on ['dPoverty', 'dRearning', 'iSept80', 'dTravtime', 'iWorklwk'] with probability 0.8402843297719542
querying dPoverty with n_distinct [1]
conditioning on ['dRearning', 'iSept80', 'dTravtime', 'iWorklwk'] with probability 0.8569982643463583
querying dRearning with n_distinct [1]
conditioning on ['iSept80', 'dTravtime', 'iWorklwk'] with probability 0.19646219034845716
querying iSept80 with n_distinct [1]
conditioning on ['dTravtime', 'iWorklwk'] with probability 0.9802913654140151
querying dTravtime with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iWorklwk'] with probability 0.9667414318509915
qu

querying dIncome7 with n_distinct [1]
conditioning on ['iMilitary', 'dOccup', 'iRagechld', 'iRownchld'] with probability 0.9454676257391706
querying iMilitary with n_distinct [1, 1]
conditioning on ['dOccup', 'iRagechld', 'iRownchld'] with probability 0.15727750932863674
querying dOccup with n_distinct [1]
conditioning on ['iRagechld', 'iRownchld'] with probability 0.11653352212671357
querying iRagechld with n_distinct [1]
conditioning on ['iRownchld'] with probability 0.42853515882355214
querying iRownchld with n_distinct [1]
conditioning on [] with probability 0.7630758842038251
latency: 692.0105628669262 and error: 2.565715207224641
Predicting cardinality for query 358: SELECT COUNT(*) FROM climate WHERE dDepart >= 0 AND dDepart <= 4 AND iDisabl2 = 2 AND dHours >= 1 AND dHours <= 2 AND iImmigr >= 0 AND iImmigr <= 2 AND dIncome8 = 0 AND iPerscare = 2 AND iRemplpar = 0 AND iRPOB = 10 AND iRspouse = 1 AND iSex >= 0 AND iSex <= 1 AND iVietnam = 0
{'dDepart': [3, 4, 0, 2, 1], 'iDisabl2':

querying iDisabl2 with n_distinct [1]
conditioning on ['iPerscare', 'iRiders', 'dWeek89'] with probability 0.5018727554006458
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['iRiders', 'dWeek89'] with probability 1.0
querying iRiders with n_distinct [1]
conditioning on ['dWeek89'] with probability 0.6035459680224222
querying dWeek89 with n_distinct [1, 1, 1]
conditioning on [] with probability 1.0
latency: 195.75512781739235 and error: 1.0122430760728633
Predicting cardinality for query 363: SELECT COUNT(*) FROM climate WHERE dAncstry1 >= 0 AND dAncstry1 <= 11 AND iAvail = 0 AND dIndustry >= 9 AND dIndustry <= 10 AND iMay75880 = 0 AND iMobility >= 1 AND iMobility <= 2 AND iMobillim >= 0 AND iMobillim <= 1 AND iRagechld >= 0 AND iRagechld <= 4 AND iRelat2 = 0 AND iSubfam1 = 0 AND iVietnam = 0
{'dAncstry1': [0, 1, 11, 3, 2, 9, 8, 10, 4, 6, 7, 5], 'iAvail': [0], 'dIndustry': [10, 9], 'iMay75880': [0], 'iMobility': [2, 1], 'iMobillim': [0, 1], 'iRagechld': [4, 2, 0, 3, 1], 'i

querying iRPOB with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iRrelchld', 'iSubfam2', 'iWWII'] with probability 0.215373399027002
querying iRrelchld with n_distinct [1]
conditioning on ['iSubfam2', 'iWWII'] with probability 0.7454825384725023
querying iSubfam2 with n_distinct [1]
conditioning on ['iWWII'] with probability 0.9732643772056381
querying iWWII with n_distinct [1]
conditioning on [] with probability 0.9615549051472877
latency: 877.4432614445686 and error: 1.534075350028709
Predicting cardinality for query 368: SELECT COUNT(*) FROM climate WHERE dAge >= 5 AND dAge <= 7 AND dIncome5 = 0 AND dPOB = 4 AND iSubfam1 = 0
{'dAge': [5, 6, 7], 'dIncome5': [0], 'dPOB': [4], 'iSubfam1': [0]}
decoded query is {'dAge': [5, 3, 4], 'dIncome5': [0], 'dPOB': [1], 'iSubfam1': [0]}
querying dAge with n_distinct [1, 1, 1]
conditioning on ['dIncome5', 'dPOB', 'iSubfam1'] with probability 0.2882354031436064
querying dIncome5 with n_distinct [1]
conditioning on ['dPOB', 'iSubfa

querying dDepart with n_distinct [1, 1, 1]
conditioning on ['iEnglish', 'dIncome7', 'iMilitary', 'iRPOB', 'dYrsserv'] with probability 0.6855127067370023
querying iEnglish with n_distinct [1, 1, 1, 1]
conditioning on ['dIncome7', 'iMilitary', 'iRPOB', 'dYrsserv'] with probability 0.9944076553690772
querying dIncome7 with n_distinct [1]
conditioning on ['iMilitary', 'iRPOB', 'dYrsserv'] with probability 0.933922903228563
querying iMilitary with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iRPOB', 'dYrsserv'] with probability 0.9999999999999999
querying iRPOB with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['dYrsserv'] with probability 0.9171819886372007
querying dYrsserv with n_distinct [1, 1]
conditioning on [] with probability 0.9784870346603425
latency: 317.4968808889389 and error: 1.0046136134347283
Predicting cardinality for query 375: SELECT COUNT(*) FROM climate WHERE dAncstry2 >= 1 AND dAncstry2 <= 2 AND iEnglish >= 0 AND iEnglish <= 2 AND iPerscare = 2 AND dPOB = 0 AND dPove

querying iRrelchld with n_distinct [1, 1]
conditioning on ['iRvetserv', 'dYrsserv'] with probability 1.0
querying iRvetserv with n_distinct [1]
conditioning on ['dYrsserv'] with probability 1.0
querying dYrsserv with n_distinct [1]
conditioning on [] with probability 0.8823212117390783
latency: 182.36791342496872 and error: 1.0144333363691773
Predicting cardinality for query 380: SELECT COUNT(*) FROM climate WHERE dDepart = 0 AND iImmigr = 0 AND dIncome5 >= 0 AND dIncome5 <= 1 AND iMobillim >= 1 AND iMobillim <= 2 AND iRelat1 = 2 AND iRemplpar >= 0 AND iRemplpar <= 221 AND iTmpabsnt = 0 AND dWeek89 = 0 AND iWorklwk >= 0 AND iWorklwk <= 2 AND iYearsch >= 11 AND iYearsch <= 14
{'dDepart': [0], 'iImmigr': [0], 'dIncome5': [0, 1], 'iMobillim': [2, 1], 'iRelat1': [2], 'iRemplpar': [0, 121, 112, 211, 111, 122, 221, 113, 141, 114, 213, 134, 133, 212], 'iTmpabsnt': [0], 'dWeek89': [0], 'iWorklwk': [1, 2, 0], 'iYearsch': [11, 12, 14, 13]}
decoded query is {'dDepart': [0], 'iImmigr': [0], 'dInco

querying iKorean with n_distinct [1]
conditioning on ['iLooking', 'iMay75880', 'iRemplpar', 'iRvetserv'] with probability 1.0
querying iLooking with n_distinct [1, 1, 1]
conditioning on ['iMay75880', 'iRemplpar', 'iRvetserv'] with probability 1.0
querying iMay75880 with n_distinct [1]
conditioning on ['iRemplpar', 'iRvetserv'] with probability 1.0
querying iRemplpar with n_distinct [1]
conditioning on ['iRvetserv'] with probability 0.722767043412253
querying iRvetserv with n_distinct [1]
conditioning on [] with probability 0.8823212117390782
latency: 212.27290853857994 and error: 1.010087445460686
Predicting cardinality for query 385: SELECT COUNT(*) FROM climate WHERE iCitizen >= 0 AND iCitizen <= 1 AND iFeb55 = 0 AND iMay75880 = 0 AND iRemplpar = 0 AND iRspouse = 1 AND iRvetserv = 0 AND iVietnam = 0 AND dYrsserv = 0
{'iCitizen': [0, 1], 'iFeb55': [0], 'iMay75880': [0], 'iRemplpar': [0], 'iRspouse': [1], 'iRvetserv': [0], 'iVietnam': [0], 'dYrsserv': [0]}
decoded query is {'iCitizen':

querying iKorean with n_distinct [1]
conditioning on ['iMobillim', 'iRPOB', 'iRspouse', 'iRvetserv', 'iSchool'] with probability 1.0
querying iMobillim with n_distinct [1]
conditioning on ['iRPOB', 'iRspouse', 'iRvetserv', 'iSchool'] with probability 0.9101617717448288
querying iRPOB with n_distinct [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
conditioning on ['iRspouse', 'iRvetserv', 'iSchool'] with probability 0.17330639030494538
querying iRspouse with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iRvetserv', 'iSchool'] with probability 0.8085126150002305
querying iRvetserv with n_distinct [1]
conditioning on ['iSchool'] with probability 0.8746544253856358
querying iSchool with n_distinct [1, 1]
conditioning on [] with probability 0.9172427118906066
latency: 271.50700241327286 and error: 1.5388837748457265
Predicting cardinality for query 390: SELECT COUNT(*) FROM climate WHERE dPoverty >= 1 AND dPoverty <= 2 AND dRpincome = 4 AND iSubfam1 = 0 AND iWWII = 0
{'dPoverty': [2, 1], 'dRpincome': [4],

querying dIncome2 with n_distinct [1]
conditioning on ['dWeek89', 'iWork89'] with probability 0.9580586311916665
querying dWeek89 with n_distinct [1, 1]
conditioning on ['iWork89'] with probability 0.609874493188789
querying iWork89 with n_distinct [1, 1]
conditioning on [] with probability 0.7655849504837722
latency: 78.62459123134613 and error: 1.0327290814346837
Predicting cardinality for query 396: SELECT COUNT(*) FROM climate WHERE iAvail = 0 AND iClass >= 0 AND iClass <= 1 AND iDisabl1 >= 0 AND iDisabl1 <= 2 AND iEnglish = 0 AND iRrelchld = 0 AND iWork89 = 1
{'iAvail': [0], 'iClass': [1, 0], 'iDisabl1': [2, 0, 1], 'iEnglish': [0], 'iRrelchld': [0], 'iWork89': [1]}
decoded query is {'iAvail': [0], 'iClass': [1, 0], 'iDisabl1': [0, 1, 2], 'iEnglish': [0], 'iRrelchld': [0], 'iWork89': [0]}
querying iAvail with n_distinct [1]
conditioning on ['iClass', 'iDisabl1', 'iEnglish', 'iRrelchld', 'iWork89'] with probability 0.9591968442678613
querying iClass with n_distinct [1, 1]
conditioni

querying dPOB with n_distinct [1]
conditioning on ['iRrelchld', 'iRspouse', 'iSubfam2', 'iTmpabsnt', 'dWeek89'] with probability 0.9057016603735414
querying iRrelchld with n_distinct [1]
conditioning on ['iRspouse', 'iSubfam2', 'iTmpabsnt', 'dWeek89'] with probability 0.047678755010340725
querying iRspouse with n_distinct [1]
conditioning on ['iSubfam2', 'iTmpabsnt', 'dWeek89'] with probability 0.30428633401312827
querying iSubfam2 with n_distinct [1, 1]
conditioning on ['iTmpabsnt', 'dWeek89'] with probability 0.9988138237310668
querying iTmpabsnt with n_distinct [1, 1, 1, 1]
conditioning on ['dWeek89'] with probability 1.0
querying dWeek89 with n_distinct [1, 1]
conditioning on [] with probability 0.7008666611072354
latency: 554.1645511984825 and error: 2.6493318887685087
Predicting cardinality for query 403: SELECT COUNT(*) FROM climate WHERE dHispanic = 0 AND dIndustry >= 0 AND dIndustry <= 8 AND iPerscare >= 0 AND iPerscare <= 2 AND dRpincome >= 2 AND dRpincome <= 5 AND iRPOB >= 2

querying iDisabl2 with n_distinct [1]
conditioning on ['dIncome1', 'iMarital', 'iOthrserv', 'iRagechld', 'iSubfam1', 'iYearsch'] with probability 0.7525579640449069
querying dIncome1 with n_distinct [1]
conditioning on ['iMarital', 'iOthrserv', 'iRagechld', 'iSubfam1', 'iYearsch'] with probability 0.38027535197533885
querying iMarital with n_distinct [1, 1, 1, 1]
conditioning on ['iOthrserv', 'iRagechld', 'iSubfam1', 'iYearsch'] with probability 0.60617360462022
querying iOthrserv with n_distinct [1]
conditioning on ['iRagechld', 'iSubfam1', 'iYearsch'] with probability 0.9981697561481121
querying iRagechld with n_distinct [1]
conditioning on ['iSubfam1', 'iYearsch'] with probability 0.4374741991028892
querying iSubfam1 with n_distinct [1, 1]
conditioning on ['iYearsch'] with probability 0.9883525837773057
querying iYearsch with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on [] with probability 0.5269254785348323
latency: 398.0198875069618 and error: 1.1778544275356158
Predicting cardin

querying dAncstry1 with n_distinct [1, 1]
conditioning on ['iMay75880', 'iRownchld', 'iRPOB'] with probability 0.10393431444887355
querying iMay75880 with n_distinct [1]
conditioning on ['iRownchld', 'iRPOB'] with probability 0.9869174442399016
querying iRownchld with n_distinct [1]
conditioning on ['iRPOB'] with probability 0.765819980650593
querying iRPOB with n_distinct [1]
conditioning on [] with probability 0.63150448381697
latency: 128.87561321258545 and error: 1.1011702665607166
Predicting cardinality for query 417: SELECT COUNT(*) FROM climate WHERE dIncome6 = 0 AND dPoverty = 2 AND dPwgt1 = 0 AND iSchool >= 1 AND iSchool <= 2
{'dIncome6': [0], 'dPoverty': [2], 'dPwgt1': [0], 'iSchool': [1, 2]}
decoded query is {'dIncome6': [0], 'dPoverty': [0], 'dPwgt1': [2], 'iSchool': [0, 1]}
querying dIncome6 with n_distinct [1]
conditioning on ['dPoverty', 'dPwgt1', 'iSchool'] with probability 0.9661344805323396
querying dPoverty with n_distinct [1]
conditioning on ['dPwgt1', 'iSchool'] wi

querying iEnglish with n_distinct [1]
conditioning on ['iMilitary', 'iPerscare', 'iRagechld', 'iYearsch'] with probability 0.8937238632797845
querying iMilitary with n_distinct [1]
conditioning on ['iPerscare', 'iRagechld', 'iYearsch'] with probability 0.8157537312306634
querying iPerscare with n_distinct [1, 1, 1]
conditioning on ['iRagechld', 'iYearsch'] with probability 1.0
querying iRagechld with n_distinct [1, 1, 1, 1, 1]
conditioning on ['iYearsch'] with probability 1.0
querying iYearsch with n_distinct [1, 1, 1, 1]
conditioning on [] with probability 0.2795863783084549
latency: 252.4927742779255 and error: 1.0216623855913298
Predicting cardinality for query 423: SELECT COUNT(*) FROM climate WHERE iCitizen = 0 AND dIncome2 = 0 AND dIncome4 = 0 AND dWeek89 = 0 AND dYrsserv = 0
{'iCitizen': [0], 'dIncome2': [0], 'dIncome4': [0], 'dWeek89': [0], 'dYrsserv': [0]}
decoded query is {'iCitizen': [0], 'dIncome2': [0], 'dIncome4': [0], 'dWeek89': [0], 'dYrsserv': [0]}
querying iCitizen wi

querying dAncstry2 with n_distinct [1]
conditioning on ['iFertil', 'dHispanic', 'iMeans', 'dRearning', 'iRelat2', 'iSept80', 'iWork89'] with probability 0.6912736480501405
querying iFertil with n_distinct [1, 1, 1, 1]
conditioning on ['dHispanic', 'iMeans', 'dRearning', 'iRelat2', 'iSept80', 'iWork89'] with probability 0.8392976446704132
querying dHispanic with n_distinct [1, 1, 1]
conditioning on ['iMeans', 'dRearning', 'iRelat2', 'iSept80', 'iWork89'] with probability 0.981154861057689
querying iMeans with n_distinct [1, 1]
conditioning on ['dRearning', 'iRelat2', 'iSept80', 'iWork89'] with probability 0.923769829306875
querying dRearning with n_distinct [1, 1, 1, 1, 1, 1]
conditioning on ['iRelat2', 'iSept80', 'iWork89'] with probability 1.0
querying iRelat2 with n_distinct [1]
conditioning on ['iSept80', 'iWork89'] with probability 0.98596189093195
querying iSept80 with n_distinct [1]
conditioning on ['iWork89'] with probability 0.9791027662319047
querying iWork89 with n_distinct [

querying iSept80 with n_distinct [1]
conditioning on ['iSex', 'iSubfam1'] with probability 0.9820514840977368
querying iSex with n_distinct [1]
conditioning on ['iSubfam1'] with probability 0.5195020282148075
querying iSubfam1 with n_distinct [1]
conditioning on [] with probability 0.9737300597774464
latency: 263.046782463789 and error: 1.3053879319445794
Predicting cardinality for query 436: SELECT COUNT(*) FROM climate WHERE dIncome1 >= 0 AND dIncome1 <= 1 AND dIncome3 = 0 AND dIncome7 = 0 AND iKorean = 0 AND iMarital >= 0 AND iMarital <= 4 AND dOccup >= 0 AND dOccup <= 1 AND iPerscare = 2 AND dRpincome >= 0 AND dRpincome <= 2 AND iRrelchld = 0 AND iYearsch >= 5 AND iYearsch <= 10
{'dIncome1': [1, 0], 'dIncome3': [0], 'dIncome7': [0], 'iKorean': [0], 'iMarital': [1, 0, 4, 2, 3], 'dOccup': [0, 1], 'iPerscare': [2], 'dRpincome': [2, 0, 1], 'iRrelchld': [0], 'iYearsch': [5, 10, 8, 7, 6, 9]}
decoded query is {'dIncome1': [1, 0], 'dIncome3': [0], 'dIncome7': [0], 'iKorean': [0], 'iMarital

querying iSept80 with n_distinct [1, 1]
conditioning on ['iVietnam'] with probability 1.0
querying iVietnam with n_distinct [1]
conditioning on [] with probability 0.9659766869992699
latency: 114.89835381507874 and error: 1.007857830965227
Predicting cardinality for query 441: SELECT COUNT(*) FROM climate WHERE dAge >= 3 AND dAge <= 7 AND dIncome3 = 0 AND iMeans = 0 AND iMobility = 1 AND iOthrserv = 0 AND iSex = 1 AND iWork89 >= 0 AND iWork89 <= 1
{'dAge': [5, 6, 3, 4, 7], 'dIncome3': [0], 'iMeans': [0], 'iMobility': [1], 'iOthrserv': [0], 'iSex': [1], 'iWork89': [1, 0]}
decoded query is {'dAge': [5, 3, 2, 1, 4], 'dIncome3': [0], 'iMeans': [0], 'iMobility': [0], 'iOthrserv': [0], 'iSex': [0], 'iWork89': [0, 2]}
querying dAge with n_distinct [1, 1, 1, 1, 1]
conditioning on ['dIncome3', 'iMeans', 'iMobility', 'iOthrserv', 'iSex', 'iWork89'] with probability 0.4315019274054154
querying dIncome3 with n_distinct [1]
conditioning on ['iMeans', 'iMobility', 'iOthrserv', 'iSex', 'iWork89'] wit

querying iCitizen with n_distinct [1]
conditioning on ['iEnglish', 'dHispanic', 'iImmigr', 'dIncome2', 'dIncome4', 'dIncome8', 'iKorean', 'iMarital', 'iPerscare', 'iRvetserv'] with probability 1.0
querying iEnglish with n_distinct [1, 1]
conditioning on ['dHispanic', 'iImmigr', 'dIncome2', 'dIncome4', 'dIncome8', 'iKorean', 'iMarital', 'iPerscare', 'iRvetserv'] with probability 0.9648670321188874
querying dHispanic with n_distinct [1]
conditioning on ['iImmigr', 'dIncome2', 'dIncome4', 'dIncome8', 'iKorean', 'iMarital', 'iPerscare', 'iRvetserv'] with probability 0.9786887608904611
querying iImmigr with n_distinct [1]
conditioning on ['dIncome2', 'dIncome4', 'dIncome8', 'iKorean', 'iMarital', 'iPerscare', 'iRvetserv'] with probability 0.9174539000239977
querying dIncome2 with n_distinct [1, 1]
conditioning on ['dIncome4', 'dIncome8', 'iKorean', 'iMarital', 'iPerscare', 'iRvetserv'] with probability 1.0
querying dIncome4 with n_distinct [1]
conditioning on ['dIncome8', 'iKorean', 'iMarit

querying iCitizen with n_distinct [1]
conditioning on ['iFertil', 'dIncome2', 'dIncome7', 'iSept80'] with probability 0.9145619288192053
querying iFertil with n_distinct [1, 1, 1]
conditioning on ['dIncome2', 'dIncome7', 'iSept80'] with probability 0.27664652491461017
querying dIncome2 with n_distinct [1]
conditioning on ['dIncome7', 'iSept80'] with probability 0.950264458932765
querying dIncome7 with n_distinct [1]
conditioning on ['iSept80'] with probability 0.9337696304981261
querying iSept80 with n_distinct [1]
conditioning on [] with probability 0.9839863970206871
latency: 204.9524448812008 and error: 1.0310852491920708
Predicting cardinality for query 453: SELECT COUNT(*) FROM climate WHERE iFertil = 0 AND dHours >= 0 AND dHours <= 1 AND dIncome2 = 0 AND iLooking = 0 AND dPwgt1 >= 0 AND dPwgt1 <= 1 AND dRpincome >= 2 AND dRpincome <= 4 AND iRPOB = 10 AND iRrelchld = 0 AND iSex >= 0 AND iSex <= 1
{'iFertil': [0], 'dHours': [1, 0], 'dIncome2': [0], 'iLooking': [0], 'dPwgt1': [1, 0]

querying iLang1 with n_distinct [1]
conditioning on ['iRemplpar', 'iSchool', 'dWeek89'] with probability 0.8647300029002336
querying iRemplpar with n_distinct [1]
conditioning on ['iSchool', 'dWeek89'] with probability 0.7249650756244973
querying iSchool with n_distinct [1, 1]
conditioning on ['dWeek89'] with probability 0.8930686707597584
querying dWeek89 with n_distinct [1, 1]
conditioning on [] with probability 0.7008666611072354
latency: 177.03517526388168 and error: 1.084871309977884
Predicting cardinality for query 458: SELECT COUNT(*) FROM climate WHERE iFertil >= 0 AND iFertil <= 4 AND dHour89 = 3 AND iImmigr = 0 AND iLang1 = 2 AND dOccup >= 0 AND dOccup <= 2 AND iRemplpar = 0 AND dTravtime >= 0 AND dTravtime <= 5
{'iFertil': [1, 3, 0, 4, 2], 'dHour89': [3], 'iImmigr': [0], 'iLang1': [2], 'dOccup': [2, 0, 1], 'iRemplpar': [0], 'dTravtime': [5, 1, 2, 0, 4, 3]}
decoded query is {'iFertil': [1, 2, 0, 4, 3], 'dHour89': [1], 'iImmigr': [0], 'iLang1': [0], 'dOccup': [1, 0, 2], 'iRemp

querying iDisabl2 with n_distinct [1]
conditioning on ['iEnglish', 'iRagechld', 'iRelat1', 'iRemplpar', 'iSept80'] with probability 0.8967800103758186
querying iEnglish with n_distinct [1]
conditioning on ['iRagechld', 'iRelat1', 'iRemplpar', 'iSept80'] with probability 0.8777540524902159
querying iRagechld with n_distinct [1]
conditioning on ['iRelat1', 'iRemplpar', 'iSept80'] with probability 0.3732463243576128
querying iRelat1 with n_distinct [1, 1]
conditioning on ['iRemplpar', 'iSept80'] with probability 0.7575181809855289
querying iRemplpar with n_distinct [1]
conditioning on ['iSept80'] with probability 0.7463681602633431
querying iSept80 with n_distinct [1]
conditioning on [] with probability 0.9839863970206871
latency: 199.97680187225342 and error: 1.273479322367979
Predicting cardinality for query 464: SELECT COUNT(*) FROM climate WHERE iCitizen = 0 AND iDisabl1 = 0 AND iFeb55 = 0 AND iImmigr = 0 AND iMobillim >= 0 AND iMobillim <= 2 AND iRownchld = 0 AND iRspouse >= 1 AND iR

In [28]:
for i in [50,90,95,99,100]:
    print(np.percentile(q_errors, i))

1.0615151087683672
1.4680862092599711
2.052275981274598
20.62931050656592
235.0


In [None]:
from numba import cuda, jit, guvectorize
from numba import int32, float32    # import the types
from numba.experimental import jitclass

spec = [
    ('value', int32),               # a simple scalar field
    ('array', float32[:]),          # an array field
]

@guvectorize(["void(float64[:,:], float64[:,:], float64[:,:])"],
             "(m,n),(n,p)->(m,p)")
def matmul(A, B, result):
    result = A.dot(B)

In [None]:
a = np.random.randint(10, size=(10,10))
b = np.random.randint(10, size=(10,10))
c = np.ones((10,10))

In [None]:
tic = time.time()
a.dot(b)
print(time.time()-tic)

In [None]:
tic = time.time()
matmul(a, b, c)
print(time.time()-tic)

In [None]:
@jit(target = 'cpu')
def func2(a):
    for i in range(10000000):
        a += 1

In [None]:
tic = time.time()
func2(1)
print(time.time()-tic)

In [None]:
def func3(a):
    for i in range(10000000):
        a += 1

In [None]:
tic = time.time()
func3(1)
print(time.time()-tic)

In [None]:
@jit
def reduce2(phi_o, values, inplace=True):
    """
    Reduces the factor to the context of given variable values.

    Parameters
    ----------
    values: list, array-like
        A list of tuples of the form (variable_name, variable_state).

    inplace: boolean
        If inplace=True it will modify the factor itself, else would return
        a new factor.

    Returns
    -------
    DiscreteFactor or None: if inplace=True (default) returns None
                    if inplace=False returns a new `DiscreteFactor` instance.
    """
    # Check if values is an array
    if isinstance(values, str):
        raise TypeError("values: Expected type list or array-like, got type str")

    if not all([isinstance(state_tuple, tuple) for state_tuple in values]):
        raise TypeError(
            "values: Expected type list of tuples, get type {type}", type(values[0])
        )

    # Check if all variables in values are in the factor
    #for var, _ in values:
     #   if var not in self.variables:
      #      raise ValueError(f"The variable: {var} is not in the factor")

    phi = phi_o if inplace else phi_o.copy()

    # Convert the state names to state number. If state name not found treat them as
    # state numbers.
    for i, (var, state_name) in enumerate(values):
        if type(state_name) == list:
            values[i] = (var, [phi.get_state_no(var, no) for no in state_name])
        else:
            values[i] = (var, phi.get_state_no(var, state_name))

    var_index_to_del = []
    slice_ = [slice(None)] * len(phi.variables)
    point_query = True
    cardinality = phi.cardinality
    state_names = phi.state_names
    del_state_names = []
    for var, state in values:
        var_index = phi.variables.index(var)
        slice_[var_index] = state
        if type(state) == list:
            point_query = False
            cardinality[var_index] = len(state)
            state_names[var] = state
        else:
            var_index_to_del.append(var_index)
            del_state_names.append(var)

    var_index_to_keep = sorted(
        set(range(len(phi.variables))) - set(var_index_to_del)
    )
    # set difference is not gaurenteed to maintain ordering
    phi.variables = [phi.variables[index] for index in var_index_to_keep]
    phi.cardinality = cardinality[var_index_to_keep]
    phi.del_state_names(del_state_names)
    phi.values = phi.values[tuple(slice_)]

    if not point_query:
        super(DiscreteFactor, phi).store_state_names(
            phi.variables, cardinality, state_names
        )

    if not inplace:
        return phi

In [None]:
f = list(f['iRelat1'])[0][0]

In [None]:
f

In [None]:
s = [("iRspouse", [1])]

In [None]:
print(a[0][0].values)
print(a[1][0].values)
print(b.values)

In [None]:
tic = time.time()
a = f.reduce(s, inplace=False)
print(time.time()-tic)

In [None]:
a.values.shape
a.cardinality

In [None]:
np.sum(a.values)

In [None]:
tic = time.time()
reduce2(f, s, inplace=False)
print(time.time()-tic)

In [None]:
tic = time.time()
f, s =ve._get_working_factors(["iAvail"], {"iClass": 0, "iKorean": [0,1], "iMay75880": 0, "iRagechld": [0, 4], "iRrelchld": 0})
print(time.time() - tic)

In [None]:
def get_probs(ve, attribute, values):
    factor = ve.probs[attribute]
    values = [factor.get_state_no(attribute, no) for no in values]
    return np.sum(factor.values[values])

In [None]:
get_probs(ve, "iClass", [1,2,3])