#### Test simple cost model

In [29]:
# auto reload all modules
%load_ext autoreload
%autoreload 2

from simple_cost_model import *
from ssb_qgen_class import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
qgen = QGEN()

example_query = qgen.generate_query(2)
print(example_query)
print(example_query.predicate_dict)

template id: 2, query: 
                SELECT SUM(lo_extendedprice * lo_discount) AS revenue
                FROM lineorder, dwdate
                WHERE lo_orderdate = d_datekey
                AND d_yearmonthnum = 199609
                AND lo_discount BETWEEN 0  AND 2 
                AND lo_quantity BETWEEN 6 AND 15;
            , payload: {'lineorder': ['lo_extendedprice', 'lo_discount']}, predicates: {'lineorder': ['lo_orderdate', 'lo_discount', 'lo_quantity'], 'dwdate': ['d_datekey', 'd_yearmonthnum']}, order by: {}, group by: {}
{'lineorder': [{'column': 'lo_orderdate', 'operator': 'eq', 'value': 'd_datekey', 'join': True}, {'column': 'lo_discount', 'operator': 'range', 'value': (0, 2), 'join': False}, {'column': 'lo_quantity', 'operator': 'range', 'value': (6, 15), 'join': False}], 'dwdate': [{'column': 'd_yearmonthnum', 'operator': 'eq', 'value': 199609, 'join': False}]}


In [31]:
def actual_cost(query_object, indexes):
    candidate_indexes = list(indexes.values())
    conn = create_connection()
    # drop all existing secondary indexes 
    drop_all_indexes(conn)

    cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query_object.query_string, show_plan=False)
    print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
    print(f"Scan costs: {scan_costs}")

    # create hypothetical indexes for candidate indexes
    hypothetical_indexes = {}
    created_indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes, return_size=True)
    for i in range(len(created_indexes)):
        hypothetical_indexes[created_indexes[i][0]] = (candidate_indexes[i], created_indexes[i][1]) 
        index_oid, index_size_mb = created_indexes[i]
        #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

    # get the cost of the query with the hypothetical indexes
    print()
    cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query_object.query_string, show_plan=False)
    print(f"\nEstimated cost with hypothetical indexes: {cost_w_indexes}")
    print(f"Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")
    print("\nIndexes used in the query plan:")
    for oid, scan_type, scan_cost in indexes_used:
        print(hypothetical_indexes[oid][0], ", Scan type: ", scan_type, ", Scan cost: ", scan_cost)

    bulk_drop_hypothetical_indexes(conn)
    close_connection(conn)   

In [32]:
# extract candidate indexes
candidate_indexes = extract_query_indexes(example_query, max_key_columns=3, include_cols=True, exclude_pk_indexes_ssb=True)
indexes = {index.index_id: index for index in candidate_indexes} 

In [33]:
# instantiate simple cost model
model = SimpleCost(index_scan_cost_multiplier=1.5)

# make prediction for example query
model.predict(example_query, indexes, verbose=False)


Cheapest access paths: 
Table: lineorder, Cheapest path: {'scan_type': 'Index Only Scan', 'index_id': 'ixn_lineorder_lo_quantity_lo_orderdate_lo_discount_lo_e'}, Cost: 73870.5
Table: dwdate, Cheapest path: {'scan_type': 'Index Scan', 'index_id': 'ix_dwdate_d_yearmonthnum'}, Cost: 0.0


In [34]:
actual_cost(example_query, indexes)

Estimated cost without hypothetical indexes: 2105042.35
Scan costs: {'Seq Scan': 2096399.27}


Estimated cost with hypothetical indexes: 6815.61
Speedup: 308.8560

Indexes used in the query plan:
Index name: ixn_lineorder_lo_orderdate_lo_quantity_lo_discount_lo_e, Key cols: ('lo_orderdate', 'lo_quantity', 'lo_discount'), Include cols: ('lo_extendedprice',), Current OID: None, Size: None MB , Scan type:  Index Only Scan , Scan cost:  207.38
Index name: ix_dwdate_d_yearmonthnum_d_datekey, Key cols: ('d_yearmonthnum', 'd_datekey'), Include cols: (), Current OID: None, Size: None MB , Scan type:  Index Only Scan , Scan cost:  4.55


#### Compare predicted and actual access paths for all query templates

In [36]:
for i in range(1, 15):
    query_object = qgen.generate_query(i)
    print(query_object)
    candidate_indexes = extract_query_indexes(query_object, max_key_columns=3, include_cols=True, exclude_pk_indexes_ssb=True)
    indexes = {index.index_id: index for index in candidate_indexes}
    model.predict(query_object, indexes, verbose=False)
    print("\n\n")
    actual_cost(query_object, indexes)
    print("\n")


template id: 1, query: 
                SELECT SUM(lo_extendedprice * lo_discount) AS revenue
                FROM lineorder, dwdate
                WHERE lo_orderdate = d_datekey
                AND d_year = 1993
                AND lo_discount BETWEEN 5 AND 7 
                AND lo_quantity < 25;
            , payload: {'lineorder': ['lo_extendedprice', 'lo_discount']}, predicates: {'lineorder': ['lo_orderdate', 'lo_discount', 'lo_quantity'], 'dwdate': ['d_datekey', 'd_year']}, order by: {}, group by: {}

Cheapest access paths: 
Table: lineorder, Cheapest path: {'scan_type': 'Index Only Scan', 'index_id': 'ixn_lineorder_lo_discount_lo_orderdate_lo_quantity_lo_e'}, Cost: 102438.0
Table: dwdate, Cheapest path: {'scan_type': 'Index Scan', 'index_id': 'ix_dwdate_d_year'}, Cost: 7.5



Estimated cost without hypothetical indexes: 1972536.97
Scan costs: {'Seq Scan': 1946433.73}


Estimated cost with hypothetical indexes: 119540.67
Speedup: 16.5010

Indexes used in the query plan:
Index na