### Parsing predicates from TPC-H queries

In [131]:
# Load the autoreload extension
%load_ext autoreload
# Set autoreload mode
%autoreload 2

from tpch_query_predicates_parser import *
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [132]:
# define a query object class
class Query:
    def __init__(self, template_id, query_string, payload, predicates, order_bys, group_bys, predicate_dict=None, benchmark="TPCH"):
        self.template_id = template_id
        self.query_string = query_string
        self.payload = payload
        self.predicates = predicates
        self.order_bys = order_bys
        self.group_bys = group_bys
        self.benchmark = benchmark
        self.predicate_dict = predicate_dict

    def __str__(self):
        return f"template: {self.template}\n\query string: {self.query_string}\npayload: {self.payload}\npredicates: {self.predicates}\norder_bys: {self.order_bys}"

In [133]:
 # load tpch static workload from a file
with open('./TPCH_workloads/tpch_static_workload_100_rounds.pkl', 'rb') as f:
    workload_dict = pickle.load(f) 

workload_metadata = workload_dict['metadata']
workload = workload_dict['workload']    

print(f"Loaded static workload from file with {len(workload)} queries.")
print(f"Num rounds: {workload_metadata['num_rounds']}, Num queries per round: {workload_metadata['num_queries_per_round']}")

Loaded static workload from file with 2100 queries.
Num rounds: 100, Num queries per round: 21


In [137]:
n = 21

for i in range(n, n+1):
    query_string = workload[i-1].query_string
    template_num = workload[i-1].template_id
    print(f"Query template: {template_num}")
    print("-" * 80)
    print(query_string)
    print("\nParsed predicates:")
    print("-" * 80)
    predicate_dict = parse_tpch_query(query_string, template_num)
    #print(predicate_dict)
    for table_name, predicates_list in predicate_dict.items():
        print(f"{table_name}:")
        for predicate in predicates_list:
            print("\t",predicate)
    print("\n\n")



Query template: 22
--------------------------------------------------------------------------------
select
	cntrycode,
	count(*) as numcust,
	sum(c_acctbal) as totacctbal
from
	(
		select
			substring(c_phone, 1, 2) as cntrycode,
			c_acctbal
		from
			customer
		where
			substring(c_phone, 1, 2) in
				('40', '36', '34', '42', '23', '31', '33')
			and c_acctbal > (
				select
					avg(c_acctbal)
				from
					customer
				where
					c_acctbal > 0.00
					and substring(c_phone, 1, 2) in
						('40', '36', '34', '42', '23', '31', '33')
			)
			and not exists (
				select
					*
				from
					orders
				where
					o_custkey = c_custkey
			)
	) as custsale
group by
	cntrycode
order by
	cntrycode
;

Parsed predicates:
--------------------------------------------------------------------------------
customer:
	 {'column': 'c_acctbal', 'operator': '>', 'value': '0.00', 'join': False}
orders:
	 {'column': 'o_custkey', 'operator': '=', 'value': 'c_custkey', 'join': True}



