#### What-if Query Execution Cost estimation for Hypothetical indexes

In [2]:
%load_ext autoreload
%autoreload 2

from ssb_qgen_class import *
from pg_utils import *

import time


#### Generate an SSB query

In [3]:
# create a query generator object
qg = QGEN()

In [12]:
query = qg.generate_query(9)

print(query)

template id: 9, query: 
                SELECT c_city, s_city, d_year, SUM(lo_revenue) AS revenue
                FROM customer, lineorder, supplier, dwdate
                WHERE lo_custkey = c_custkey
                AND lo_suppkey = s_suppkey
                AND lo_orderdate = d_datekey
                AND (c_city = 'BRAZIL   9' OR c_city = 'ALGERIA  1')
                AND (s_city = 'BRAZIL   9' OR s_city = 'ALGERIA  1')
                AND d_year >= 1992 AND d_year <= 1997
                GROUP BY c_city, s_city, d_year
                ORDER BY d_year ASC, revenue DESC;
            , payload: {'lineorder': ['lo_revenue'], 'dwdate': ['d_year'], 'customer': ['c_city'], 'supplier': ['s_city']}, predicates: {'lineorder': ['lo_custkey', 'lo_suppkey', 'lo_orderdate'], 'dwdate': ['d_year', 'd_datekey'], 'customer': ['c_custkey', 'c_city'], 'supplier': ['s_suppkey', 's_city']}, order by: {'lineorder': ['lo_revenue'], 'dwdate': ['d_year']}, group by: {'customer': ['c_city'], 'supplier': ['s

#### Generate candidate indexes (with include columns) for this query

In [11]:
candidate_indexes = extract_query_indexes(query, max_key_columns=3, include_cols=True, exclude_pk_indexes_ssb=True)

for index in candidate_indexes:
    print(index)

print(f"Total number of candidate indexes: {len(candidate_indexes)}")

Index name: ix_lineorder_lo_custkey, Key cols: ('lo_custkey',), Include cols: (), Current OID: None
Index name: ixn_lineorder_lo_custkey_lo_r, Key cols: ('lo_custkey',), Include cols: ('lo_revenue',), Current OID: None
Index name: ix_lineorder_lo_suppkey, Key cols: ('lo_suppkey',), Include cols: (), Current OID: None
Index name: ixn_lineorder_lo_suppkey_lo_r, Key cols: ('lo_suppkey',), Include cols: ('lo_revenue',), Current OID: None
Index name: ix_lineorder_lo_orderdate, Key cols: ('lo_orderdate',), Include cols: (), Current OID: None
Index name: ixn_lineorder_lo_orderdate_lo_r, Key cols: ('lo_orderdate',), Include cols: ('lo_revenue',), Current OID: None
Index name: ix_lineorder_lo_custkey_lo_suppkey, Key cols: ('lo_custkey', 'lo_suppkey'), Include cols: (), Current OID: None
Index name: ixn_lineorder_lo_custkey_lo_suppkey_lo_r, Key cols: ('lo_custkey', 'lo_suppkey'), Include cols: ('lo_revenue',), Current OID: None
Index name: ix_lineorder_lo_custkey_lo_orderdate, Key cols: ('lo_cus

#### Estimate cost without and without hypothetical indexes to get estimated speedup 

In [8]:
conn = create_connection()

# drop all existing secondary indexes 
drop_all_indexes(conn)

cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query.query_string, show_plan=True)
print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
print(f"Scan costs: {scan_costs}")

# create hypothetical indexes for candidate indexes

hypothetical_indexes = {}
indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes, return_size=True)
for i in range(len(indexes)):
    hypothetical_indexes[indexes[i][0]] = (candidate_indexes[i], indexes[i][1]) 
    index_oid, index_size_mb = indexes[i]
    #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

# get the cost of the query with the hypothetical indexes
print()
cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=True)
print(f"\nEstimated cost with hypothetical indexes: {cost_w_indexes}")
print(f"Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")

print("\nIndexes used in the query plan:")
for oid, scan_type, scan_cost in indexes_used:
    print(hypothetical_indexes[oid][0], ", Scan type: ", scan_type, ", Scan cost: ", scan_cost)

bulk_drop_hypothetical_indexes(conn)

close_connection(conn)    

[
  {
    "Plan": {
      "Node Type": "Incremental Sort",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 1682178.72,
      "Total Cost": 1682409.94,
      "Plan Rows": 3217,
      "Plan Width": 58,
      "Sort Key": [
        "dwdate.d_year",
        "(sum(lineorder.lo_revenue)) DESC"
      ],
      "Presorted Key": [
        "dwdate.d_year"
      ],
      "Plans": [
        {
          "Node Type": "Aggregate",
          "Strategy": "Sorted",
          "Partial Mode": "Simple",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Async Capable": false,
          "Startup Cost": 1682146.9,
          "Total Cost": 1682227.33,
          "Plan Rows": 3217,
          "Plan Width": 58,
          "Group Key": [
            "dwdate.d_year",
            "customer.c_city",
            "supplier.s_city"
          ],
          "Plans": [
            {
              "Node Type": "Sort",
              "Parent Relationship": "Ou

In [20]:
# get unsed index objects
indexes_used = [hypothetical_indexes[oid][0] for oid, _, _ in indexes_used]
print("\nIndexes used in the query plan:")
for index in indexes_used:
    print(index)


Indexes used in the query plan:
Index name: ixn_lineorder_lo_orderdate_lo_discount_lo_quantity_lo_e, Key cols: ('lo_orderdate', 'lo_discount', 'lo_quantity'), Include cols: ('lo_extendedprice',), Current OID: None
Index name: ix_dwdate_d_year_d_datekey, Key cols: ('d_year', 'd_datekey'), Include cols: (), Current OID: None


In [7]:
conn = create_connection()
lineorder_table_info = get_table_size_and_row_count(conn, 'lineorder')
dwdate_table_info = get_table_size_and_row_count(conn, 'dwdate')
close_connection(conn)


In [8]:
print(lineorder_table_info)
print(dwdate_table_info)

{'size': 16586, 'row_count': 119994746}
{'size': 0, 'row_count': 2556}


In [22]:
# get actual query cost without indexes
conn = create_connection()
total_execution_time, rows, table_access_info, index_access_info, bitmap_heapscan_info = execute_query(conn, query.query_string, with_explain=True, return_access_info=True, print_results=True)
close_connection(conn)


[
  {
    "Plan": {
      "Node Type": "Aggregate",
      "Strategy": "Plain",
      "Partial Mode": "Simple",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 1972407.13,
      "Total Cost": 1972407.14,
      "Plan Rows": 1,
      "Plan Width": 32,
      "Actual Startup Time": 15853.896,
      "Actual Total Time": 15853.911,
      "Actual Rows": 1,
      "Actual Loops": 1,
      "Shared Hit Blocks": 0,
      "Shared Read Blocks": 896643,
      "Shared Dirtied Blocks": 0,
      "Shared Written Blocks": 0,
      "Local Hit Blocks": 0,
      "Local Read Blocks": 0,
      "Local Dirtied Blocks": 0,
      "Local Written Blocks": 0,
      "Temp Read Blocks": 0,
      "Temp Written Blocks": 0,
      "Plans": [
        {
          "Node Type": "Hash Join",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Async Capable": false,
          "Join Type": "Inner",
          "Startup Cost": 79.5,
          "Total Cost": 1966873

In [23]:
# materialize these indexes
conn = create_connection()
#drop_all_indexes(conn)
bulk_create_indexes(conn, indexes_used) # indexes_used
close_connection(conn)

Successfully created index: 'ixn_lineorder_lo_orderdate_lo_discount_lo_quantity_lo_e', size: 2323.2265625000000000 MB, creation time: 50746.57 ms
Successfully created index: 'ix_dwdate_d_year_d_datekey', size: 0.07031250000000000000 MB, creation time: 3.40 ms


In [14]:
restart_postgresql(clear_cache=True, delay=2)

Cache cleared successfully.
PostgreSQL restarted successfully.


In [15]:
# get actual query cost
conn = create_connection()
total_execution_time, rows, table_access_info, index_access_info, bitmap_heapscan_info = execute_query(conn, query.query_string, with_explain=True, return_access_info=True, print_results=True)
close_connection(conn)


[
  {
    "Plan": {
      "Node Type": "Aggregate",
      "Strategy": "Plain",
      "Partial Mode": "Simple",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 117114.2,
      "Total Cost": 117114.21,
      "Plan Rows": 1,
      "Plan Width": 32,
      "Actual Startup Time": 1016.957,
      "Actual Total Time": 1016.969,
      "Actual Rows": 1,
      "Actual Loops": 1,
      "Shared Hit Blocks": 611291,
      "Shared Read Blocks": 10874,
      "Shared Dirtied Blocks": 0,
      "Shared Written Blocks": 0,
      "Local Hit Blocks": 0,
      "Local Read Blocks": 0,
      "Local Dirtied Blocks": 0,
      "Local Written Blocks": 0,
      "Temp Read Blocks": 0,
      "Temp Written Blocks": 0,
      "Plans": [
        {
          "Node Type": "Nested Loop",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Async Capable": false,
          "Join Type": "Inner",
          "Startup Cost": 0.84,
          "Total Cost": 111224

In [10]:
conn = create_connection()

list_hypothetical_indexes(conn)

close_connection(conn)

No hypothetical indexes found.


#### Generate a query from each of the 13 templates and estimate speedup for each query with hypothetical candidate indexes, also determine which indexes get used in query plan 

In [11]:
for t in range(1,2):
    print("\n------------------------------------------------------------------------")
    # generate query
    query = qg.generate_query(t)
    print(query)
    # generate candidate indexes
    candidate_indexes = extract_query_indexes(query, include_cols=True)
    print(f"\nTotal number of candidate indexes generated: {len(candidate_indexes)}")
    
    # measure time taken to obtain estimated speedup with hypothetical indexes
    start_time = time.perf_counter()

    conn = create_connection()
    cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query.query_string)
    print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
    print(f"Scan costs: {scan_costs}")

    # create hypothetical indexes for candidate indexes

    hypothetical_indexes = {}
    indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes, return_size=True)
    for i in range(len(indexes)):
        hypothetical_indexes[indexes[i][0]] = (candidate_indexes[i], indexes[i][1]) 
        index_oid, index_size_mb = indexes[i]
        #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

    # get the cost of the query with the hypothetical indexes
    print()
    cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=False)
    print(f"Estimated cost with hypothetical indexes: {cost_w_indexes}")
    print(f"Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")

    print("\nIndexes used in the query plan:")
    for oid, scan_type, scan_cost in indexes_used:
        print(hypothetical_indexes[oid][0], ", Index Size (Mb): ", hypothetical_indexes[oid][1],", Scan type: ", scan_type, ", Scan cost: ", scan_cost)


    bulk_drop_hypothetical_indexes(conn)

    end_time = time.perf_counter()
    execution_time = end_time - start_time
    print(f"\nTotal time to obtain estimated speedup: {execution_time:.4f} seconds")
    
    close_connection(conn)   


------------------------------------------------------------------------
template id: 1, query: 
                SELECT SUM(lo_extendedprice * lo_discount) AS revenue
                FROM lineorder, dwdate
                WHERE lo_orderdate = d_datekey
                AND d_year = 1995
                AND lo_discount BETWEEN 7 AND 9 
                AND lo_quantity < 25;
            , payload: {'lineorder': ['lo_extendedprice', 'lo_discount']}, predicates: {'lineorder': ['lo_orderdate', 'lo_discount', 'lo_quantity'], 'dwdate': ['d_datekey', 'd_year']}, order by: {}, group by: {}

Total number of candidate indexes generated: 42
Estimated cost without hypothetical indexes: 1972426.67
Scan costs: {'Index Scan': 127.04, 'Seq Scan': 1946358.78}

Estimated cost with hypothetical indexes: 117730.04
Speedup: 16.7538

Indexes used in the query plan:
Index name: ixn_lineorder_lo_orderdate_lo_discount_lo_quantity_lo_e, Key cols: ('lo_orderdate', 'lo_discount', 'lo_quantity'), Include cols: ('lo_

#### Now for each query, materialize the hypothetical indexes that get used in query plan, and compare hypothetical speedup with actual speedup

In [23]:
# first, drop all existing secondary indexes 
conn = create_connection()
drop_all_indexes(conn)
close_connection(conn)   

for t in range(11, 14):
    print("\n------------------------------------------------------------------------")
    # generate query
    query = qg.generate_query(t)
    print(query)
    # generate candidate indexes
    candidate_indexes = extract_query_indexes(query, max_key_columns=4, include_cols=True)
    print(f"\nTotal number of candidate indexes generated: {len(candidate_indexes)}")
    
    # measure time taken to obtain estimated speedup with hypothetical indexes
    start_time = time.perf_counter()

    conn = create_connection()
    cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query.query_string)
    print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
    print(f"Scan costs: {scan_costs}")
    
    # create hypothetical indexes for candidate indexes
    hypothetical_indexes = {}
    indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes, return_size=True)
    for i in range(len(indexes)):
        hypothetical_indexes[indexes[i][0]] = (candidate_indexes[i], indexes[i][1]) 
        index_oid, index_size_mb = indexes[i]
        #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

    # get the cost of the query with the hypothetical indexes
    print()
    cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=False)
    print(f"Estimated cost with hypothetical indexes: {cost_w_indexes}")
    print(f"Hypothetical Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")

    print("\nIndexes used in the query plan:")
    for oid, scan_type, scan_cost in indexes_used:
        print(hypothetical_indexes[oid][0], ", Index Size (Mb): ", hypothetical_indexes[oid][1],", Scan type: ", scan_type, ", Scan cost: ", scan_cost)

    bulk_drop_hypothetical_indexes(conn)

    close_connection(conn)

    indexes_to_materialize = list(set([hypothetical_indexes[oid][0] for oid, _, _ in indexes_used]))

    end_time = time.perf_counter()
    execution_time = end_time - start_time
    print(f"\nTotal time to obtain estimated speedup: {execution_time:.4f} seconds")

    # get query execution time without indexes
    #start_time = time.perf_counter()
    restart_postgresql(clear_cache=True, delay=2)
    conn = create_connection()
    query_execution_time_wo_indexes, rows, table_access_info, index_access_info, bitmap_heapscan_info = execute_query(conn, query.query_string, with_explain=True, return_access_info=True, print_results=False)
    close_connection(conn)
    #end_time = time.perf_counter()
    #execution_time_wo_indexes = end_time - start_time
    print(f"\nTotal time to execute query without indexes: {query_execution_time_wo_indexes/1000:.4f} seconds")
    print(f"Indexes used in the query plan: {list(index_access_info.keys())}")

    # materialize the indexes
    start_time = time.perf_counter()
    print(f"\nMaterializing {len(indexes_to_materialize)} indexes...")
    conn = create_connection()
    bulk_create_indexes(conn, indexes_to_materialize)
    close_connection(conn)
    end_time = time.perf_counter()
    execution_time = end_time - start_time
    print(f"\nTotal time to materialize indexes: {execution_time:.4f} seconds")

    # get query execution time with indexes
    #start_time = time.perf_counter()
    restart_postgresql(clear_cache=True, delay=2)
    conn = create_connection()
    query_execution_time_w_indexes, rows, table_access_info, index_access_info, bitmap_heapscan_info = execute_query(conn, query.query_string, with_explain=True, return_access_info=True, print_results=False)
    close_connection(conn)
    #end_time = time.perf_counter()
    #execution_time_w_indexes = end_time - start_time
    print(f"\nTotal time to execute query with indexes: {query_execution_time_w_indexes/1000:.4f} seconds")
    print(f"Indexes used in the query plan: {list(index_access_info.keys())}")
    print(f"\nActual Speedup with materialized indexes: {query_execution_time_wo_indexes/query_execution_time_w_indexes:.4f}\n")

    # drop all indexes
    conn = create_connection()
    drop_all_indexes(conn)
    close_connection(conn)



------------------------------------------------------------------------
template id: 11, query: 
                SELECT d_year, c_nation, SUM(lo_revenue - lo_supplycost) AS profit
                FROM dwdate, customer, supplier, part, lineorder
                WHERE lo_custkey = c_custkey
                AND lo_suppkey = s_suppkey
                AND lo_partkey = p_partkey
                AND lo_orderdate = d_datekey
                AND c_region = 'AMERICA'
                AND s_region = 'AMERICA'
                AND (p_mfgr = 'MFGR#4' OR p_mfgr = 'MFGR#5')
                GROUP BY d_year, c_nation
                ORDER BY d_year, c_nation;
            , payload: {'lineorder': ['lo_revenue', 'lo_supplycost'], 'dwdate': ['d_year'], 'customer': ['c_nation']}, predicates: {'lineorder': ['lo_custkey', 'lo_suppkey', 'lo_orderdate', 'lo_partkey'], 'dwdate': ['d_year', 'd_datekey'], 'customer': ['c_custkey', 'c_region', 'c_nation'], 'part': ['p_partkey', 'p_mfgr'], 'supplier': ['s_suppkey',

In [17]:
conn = create_connection()

# get the size of the database
database_size = get_database_size(conn)
print(f"\nDatabase size: {database_size} MB")

# get sizes of all tables
tables_sizes = get_all_table_sizes(conn)
print(f"\nTable sizes in MB: {tables_sizes}")    

close_connection(conn)   



Database size: 9140 MB

Table sizes in MB: {'lineorder': 8969, 'part': 111, 'customer': 46, 'supplier': 3, 'dwdate': 0}


#### Test Hypothetical Hiding and Unhiding of Indexes

In [34]:
# pick a test index
query = qg.generate_query(14)
print(query, "\n")

# generate candidate indexes
candidate_indexes = extract_query_indexes(query, include_cols=True)
for index in candidate_indexes:
    print(index)

print(f"Total number of candidate indexes: {len(candidate_indexes)}")

template id: 14, query: 
                SELECT lo_linenumber, lo_quantity, lo_orderdate  
                FROM lineorder
                WHERE lo_linenumber >= 3 AND lo_linenumber <= 4
                AND lo_quantity = 12;
            , payload: {'lineorder': ['lo_linenumber', 'lo_quantity', 'lo_orderdate']}, predicates: {'lineorder': ['lo_linenumber', 'lo_quantity']}, order by: {}, group by: {} 

Index name: ix_lineorder_lo_linenumber, Key cols: ('lo_linenumber',), Include cols: (), Current OID: None
Index name: ixn_lineorder_lo_linenumber_lo_o, Key cols: ('lo_linenumber',), Include cols: ('lo_orderdate',), Current OID: None
Index name: ixn_lineorder_lo_linenumber_lo_q, Key cols: ('lo_linenumber',), Include cols: ('lo_quantity',), Current OID: None
Index name: ixn_lineorder_lo_linenumber_lo_o_lo_q, Key cols: ('lo_linenumber',), Include cols: ('lo_orderdate', 'lo_quantity'), Current OID: None
Index name: ix_lineorder_lo_quantity, Key cols: ('lo_quantity',), Include cols: (), Current O

In [36]:
# check which indexes will be used in the query plan

conn = create_connection()

# drop all existing secondary indexes 
drop_all_indexes(conn)

cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query.query_string, show_plan=False)
print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
print(f"Scan costs: {scan_costs}")

# create hypothetical indexes for candidate indexes

hypothetical_indexes = {}
indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes, return_size=True)
for i in range(len(indexes)):
    hypothetical_indexes[indexes[i][0]] = (candidate_indexes[i], indexes[i][1]) 
    index_oid, index_size_mb = indexes[i]
    #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

# get the cost of the query with the hypothetical indexes
print()
cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=False)
print(f"\nEstimated cost with hypothetical indexes: {cost_w_indexes}")
print(f"Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")

print("\nIndexes used in the query plan:")
indexes_to_materialize = {} # indexes_used may contain duplicates
for oid, scan_type, scan_cost in indexes_used:
    print(hypothetical_indexes[oid][0], ", Index Size (Mb): ", hypothetical_indexes[oid][1],", Scan type: ", scan_type, ", Scan cost: ", scan_cost)
    indexes_to_materialize[oid] = hypothetical_indexes[oid][0]
indexes_to_materialize = list(indexes_to_materialize.values())    

bulk_drop_hypothetical_indexes(conn)

close_connection(conn)    

Estimated cost without hypothetical indexes: 1460325.89
Scan costs: {'Seq Scan': 1420791.49}


Estimated cost with hypothetical indexes: 15242.3
Speedup: 95.8074

Indexes used in the query plan:
Index name: ixn_lineorder_lo_quantity_lo_linenumber_lo_o, Key cols: ('lo_quantity', 'lo_linenumber'), Include cols: ('lo_orderdate',), Current OID: None , Index Size (Mb):  1992.9140625 , Scan type:  Index Only Scan , Scan cost:  15242.3


In [37]:
print(indexes_to_materialize[0])

Index name: ixn_lineorder_lo_quantity_lo_linenumber_lo_o, Key cols: ('lo_quantity', 'lo_linenumber'), Include cols: ('lo_orderdate',), Current OID: None


In [38]:
# materialize the 'IXN_lineorder_lo_quantity_lo_linenumber_lo_o' index
conn = create_connection()
bulk_create_indexes(conn, indexes_to_materialize)
close_connection(conn)

Successfully created index 'ixn_lineorder_lo_quantity_lo_linenumber_lo_o': 1804.4843750000000000, creation time: 25.04 seconds


In [62]:
# check which indexes will be used in the query plan
conn = create_connection()


# hide the materialized indexes
bulk_hide_indexes(conn, indexes_to_materialize)
candidate_indexes_wo_materialized = [index for index in candidate_indexes if index not in indexes_to_materialize]

cost_wo_indexes, scan_costs = get_query_cost_estimate(conn, query.query_string, show_plan=True)
print(f"Estimated cost without hypothetical indexes: {cost_wo_indexes}")
print(f"Scan costs: {scan_costs}")

# create hypothetical indexes for candidate indexes
hypothetical_indexes = {}
indexes = bulk_create_hypothetical_indexes(conn, candidate_indexes_wo_materialized, return_size=True)
for i in range(len(indexes)):
    hypothetical_indexes[indexes[i][0]] = (candidate_indexes_wo_materialized[i], indexes[i][1]) 
    index_oid, index_size_mb = indexes[i]
    #print(f"Index {candidate_indexes[i].index_id} created with oid {index_oid} and size {index_size_mb} MB")

# get the cost of the query with the hypothetical indexes
print()
cost_w_indexes, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=False)
print(f"\nEstimated cost with hypothetical indexes: {cost_w_indexes}")
print(f"Speedup: {cost_wo_indexes/cost_w_indexes:.4f}")

print("\nIndexes used in the query plan:")
#indexes_to_materialize = {} # indexes_used may contain duplicates
for oid, scan_type, scan_cost in indexes_used:
    print(hypothetical_indexes[oid][0], ", Index Size (Mb): ", hypothetical_indexes[oid][1],", Scan type: ", scan_type, ", Scan cost: ", scan_cost)
    #indexes_to_materialize[oid] = hypothetical_indexes[oid][0]
#indexes_to_materialize = list(indexes_to_materialize.values())    

# unhide the materialized indexes
bulk_unhide_indexes(conn, indexes_to_materialize)


bulk_drop_hypothetical_indexes(conn)

close_connection(conn)    

Index with OID 16493 hidden: True
[
  {
    "Plan": {
      "Node Type": "Gather",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 1000.0,
      "Total Cost": 1460325.89,
      "Plan Rows": 385344,
      "Plan Width": 12,
      "Workers Planned": 2,
      "Single Copy": false,
      "Plans": [
        {
          "Node Type": "Seq Scan",
          "Parent Relationship": "Outer",
          "Parallel Aware": true,
          "Async Capable": false,
          "Relation Name": "lineorder",
          "Alias": "lineorder",
          "Startup Cost": 0.0,
          "Total Cost": 1420791.49,
          "Plan Rows": 160560,
          "Plan Width": 12,
          "Filter": "((lo_linenumber >= 3) AND (lo_linenumber <= 4) AND (lo_quantity = 12))"
        }
      ]
    }
  }
]
Estimated cost without hypothetical indexes: 1460325.89
Scan costs: {'Seq Scan': 1420791.49}


Estimated cost with hypothetical indexes: 47757.79
Speedup: 30.5778

Indexes used in the query plan

#### Hiding primary key indexes

In [16]:
pk_indexes = ssb_pk_index_objects()
for index in pk_indexes:
    print(index)

Index name: pk_lineorder, Key cols: ['lo_orderkey', 'lo_linenumber'], Include cols: (), Current OID: 59848, Size: 1285.0 MB
Index name: pk_customer, Key cols: ['c_custkey'], Include cols: (), Current OID: 59854, Size: 6.4453125 MB
Index name: pk_supplier, Key cols: ['s_suppkey'], Include cols: (), Current OID: 59852, Size: 0.4453125 MB
Index name: pk_part, Key cols: ['p_partkey'], Include cols: (), Current OID: 59850, Size: 17.15625 MB
Index name: pk_dwdate, Key cols: ['d_datekey'], Include cols: (), Current OID: 59856, Size: 0.0703125 MB


In [5]:
query = qg.generate_query(9)

In [10]:
conn = create_connection()

# drop all existing secondary indexes 
drop_all_indexes(conn)

# get query plan
cost, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=True)
print(f"\nHypo cost: {cost}")
print(f"Indexes used in the query plan: {indexes_used}")

# hide the primary indexes
bulk_hide_indexes(conn, pk_indexes)
# get query plan
cost, indexes_used = get_query_cost_estimate_hypo_indexes(conn, query.query_string, show_plan=True)
print(f"\nHypo cost: {cost}")
print(f"Indexes used in the query plan: {indexes_used}")

# unhide the primary indexes
bulk_unhide_indexes(conn, pk_indexes)

close_connection(conn)

[
  {
    "Plan": {
      "Node Type": "Incremental Sort",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 1682178.72,
      "Total Cost": 1682409.94,
      "Plan Rows": 3217,
      "Plan Width": 58,
      "Sort Key": [
        "dwdate.d_year",
        "(sum(lineorder.lo_revenue)) DESC"
      ],
      "Presorted Key": [
        "dwdate.d_year"
      ],
      "Plans": [
        {
          "Node Type": "Aggregate",
          "Strategy": "Sorted",
          "Partial Mode": "Simple",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Async Capable": false,
          "Startup Cost": 1682146.9,
          "Total Cost": 1682227.33,
          "Plan Rows": 3217,
          "Plan Width": 58,
          "Group Key": [
            "dwdate.d_year",
            "customer.c_city",
            "supplier.s_city"
          ],
          "Plans": [
            {
              "Node Type": "Sort",
              "Parent Relationship": "Ou