In [60]:
from sqlanalyzer import column_parser, unbundle
import re
import json

In [65]:
query = """WITH opp_product_share AS
  (SELECT DATE_FORMAT(service_start_day, 'yyyy-MM-01') AS service_start_month,
          DATE_FORMAT(service_end_day_r, 'yyyy-MM-01') AS service_end_month,
          a.*,
          arr * SHARE AS arr_p
   FROM
     (SELECT *,
             CASE
                 WHEN mapped_product = 'maps'
                      AND product_name = 'unknown'
                      AND num_items <= 1 THEN 1
                 WHEN (total_value > 0
                       AND total_product_value = 0) THEN list_price_value / total_value
                 WHEN total_product_value > 0 THEN product_value / total_product_value
                 ELSE 0
             END AS SHARE,
             service_end_day AS service_end_day_r
      FROM opportunity_product) a), 
      
arr_product_exp AS
  (SELECT md.account_id,
          md.account_name,
          md.product_name,
          CASE
              WHEN md.mapped_product IN ('bundled') THEN 'maps'
              ELSE md.mapped_product
          END AS mapped_product, service_month,
                                 COLLECT_SET(opportunity_id) AS opportunity_id_s, COLLECT_SET(CASE
                                                                                                  WHEN stage_name NOT IN ('Won', '7 - ICR', 'Won - Pending') THEN opportunity_id
                                                                                                  ELSE NULL
                                                                                              END) AS opportunity_id_nw, SUM(arr_p) AS arr_p
   FROM arr_by_month_dummy md
   LEFT JOIN opp_product_share ps ON md.account_id = ps.account_id
   AND md.product_name = ps.product_name
   AND md.service_month BETWEEN ps.service_start_month AND ps.service_end_month
   GROUP BY md.account_id,
            md.account_name,
            md.product_name,
            md.mapped_product,
            service_month),
                                    
arr_product_prev AS
  (SELECT account_id,
          account_name,
          mapped_product,
          product_name,
          service_month,
          COALESCE(ROUND(arr_p, 2), 0) AS arr_p,
          COALESCE(LAG(ROUND(arr_p, 2)) OVER (PARTITION BY account_id, product_name
                                              ORDER BY service_month ASC), 0) AS prev_arr_p,
          opportunity_id_s AS opportunity_id,
          CONCAT_WS(',', opportunity_id_s) AS opportunity_id_p,
          LAG(CONCAT_WS(',', opportunity_id_s)) OVER (PARTITION BY account_id,
                                                                   product_name
                                                      ORDER BY service_month ASC) AS prev_opportunity_id_p,
                                                     CONCAT_WS(',', opportunity_id_nw) AS opportunity_id_nw
   FROM arr_product_exp)

SELECT service_month,
       account_id,
       account_name,
       mapped_product,
       mid_product,
       product_name,
       account_status,
       mapped_status,
       mid_status,
       product_status,
       arr_p,
       cum_arr_p,
       prev_cum_arr_p,
       opportunity_id_p,
       prev_opportunity_id_p,
       opportunity_id_nw
FROM arr_full_status
"""

In [66]:
formatter = column_parser.Parser(query)
formatted_query = formatter.format_query(query)
query_list = formatted_query.split('\n')


In [67]:
print(formatted_query)

WITH opp_product_share AS
  (SELECT DATE_FORMAT(service_start_day, 'yyyy-MM-01') AS service_start_month,
          DATE_FORMAT(service_end_day_r, 'yyyy-MM-01') AS service_end_month,
          a.*,
          arr * SHARE AS arr_p
   FROM
     (SELECT *,
             CASE
                 WHEN mapped_product = 'maps'
                      AND product_name = 'unknown'
                      AND num_items <= 1 THEN 1
                 WHEN (total_value > 0
                       AND total_product_value = 0) THEN list_price_value / total_value
                 WHEN total_product_value > 0 THEN product_value / total_product_value
                 ELSE 0
             END AS SHARE,
             service_end_day AS service_end_day_r
      FROM opportunity_product) a),
     arr_product_exp AS
  (SELECT md.account_id,
          md.account_name,
          md.product_name,
          CASE
              WHEN md.mapped_product IN ('bundled') THEN 'maps'
              ELSE md.mapped_product
          END A

In [68]:
cte = re.compile(r"(WITH)*(.*AS\s*\(SELECT)")
pos_list = []
for pos in cte.finditer(formatted_query):
    pos_list.append(pos.start())

cte_main = re.compile(r"(SELECT)")
pos_list_main = []
for pos in cte_main.finditer(formatted_query):
    pos_list_main.append(pos.start())


In [69]:
pos_list

[0, 766, 1711]

In [70]:
cte = re.compile(r"(WITH)*(.*AS\s*\(SELECT)")
pos_list = []
for pos in cte.finditer(formatted_query):
    pos_list.append(pos.start())

In [71]:
formatted_query[:962]

"WITH opp_product_share AS\n  (SELECT DATE_FORMAT(service_start_day, 'yyyy-MM-01') AS service_start_month,\n          DATE_FORMAT(service_end_day_r, 'yyyy-MM-01') AS service_end_month,\n          a.*,\n          arr * SHARE AS arr_p\n   FROM\n     (SELECT *,\n             CASE\n                 WHEN mapped_product = 'maps'\n                      AND product_name = 'unknown'\n                      AND num_items <= 1 THEN 1\n                 WHEN (total_value > 0\n                       AND total_product_value = 0) THEN list_price_value / total_value\n                 WHEN total_product_value > 0 THEN product_value / total_product_value\n                 ELSE 0\n             END AS SHARE,\n             service_end_day AS service_end_day_r\n      FROM opportunity_product) a),\n     arr_product_exp AS\n  (SELECT md.account_id,\n          md.account_name,\n          md.product_name,\n          CASE\n              WHEN md.mapped_product IN ('bundled') THEN 'maps'\n              "

In [72]:
cte = re.compile(r"(WITH)*(.*AS\s*\(SELECT)")
x = re.search(cte, formatted_query[:962])
print(x.span())

(0, 35)


In [73]:
formatted_query[:27]

'WITH opp_product_share AS\n '

In [74]:
for index, pos in enumerate(pos_list): print(index, pos)

0 0
1 766
2 1711


In [75]:
index = 0; pos = 0

In [76]:
query = formatted_query
query[pos:pos_list[index+1]]

"WITH opp_product_share AS\n  (SELECT DATE_FORMAT(service_start_day, 'yyyy-MM-01') AS service_start_month,\n          DATE_FORMAT(service_end_day_r, 'yyyy-MM-01') AS service_end_month,\n          a.*,\n          arr * SHARE AS arr_p\n   FROM\n     (SELECT *,\n             CASE\n                 WHEN mapped_product = 'maps'\n                      AND product_name = 'unknown'\n                      AND num_items <= 1 THEN 1\n                 WHEN (total_value > 0\n                       AND total_product_value = 0) THEN list_price_value / total_value\n                 WHEN total_product_value > 0 THEN product_value / total_product_value\n                 ELSE 0\n             END AS SHARE,\n             service_end_day AS service_end_day_r\n      FROM opportunity_product) a),\n"

In [77]:
# if pos_list != []:
query = formatted_query
cte_dict = {}
for index, pos in enumerate(pos_list):
    if index < len(pos_list)-1:
        cte_query = query[pos:pos_list[index+1]]
    else:
        cte_query = query[pos:pos_list_main[-1]]

    cte_query = cte_query.rstrip('\n,')
    cte_query = re.sub(r"\)$", "", cte_query)

    cte_name = re.findall(r"(WITH)*(.*)AS", cte_query)[0][1].strip(' ')    
    cte_removed = re.compile(r"\(SELECT")
    pos_list_removed = []

    for pos in cte_removed.finditer(cte_query):
        pos_list_removed.append(pos.start())

    cte_dict[cte_name] = cte_query[pos_list_removed[0]+1:]

cte_dict['main'] = query[pos_list_main[-1]:]


In [78]:
cte_dict = formatter.parse_cte(formatted_query)
cte_dict

{'opp_product_share': "SELECT DATE_FORMAT(service_start_day, 'yyyy-MM-01') AS service_start_month,\n          DATE_FORMAT(service_end_day_r, 'yyyy-MM-01') AS service_end_month,\n          a.*,\n          arr * SHARE AS arr_p\n   FROM\n     (SELECT *,\n             CASE\n                 WHEN mapped_product = 'maps'\n                      AND product_name = 'unknown'\n                      AND num_items <= 1 THEN 1\n                 WHEN (total_value > 0\n                       AND total_product_value = 0) THEN list_price_value / total_value\n                 WHEN total_product_value > 0 THEN product_value / total_product_value\n                 ELSE 0\n             END AS SHARE,\n             service_end_day AS service_end_day_r\n      FROM opportunity_product) a",
 'arr_product_exp': "SELECT md.account_id,\n          md.account_name,\n          md.product_name,\n          CASE\n              WHEN md.mapped_product IN ('bundled') THEN 'maps'\n              ELSE md.mapped_product\n     

In [79]:
cte_dict.keys()

dict_keys(['opp_product_share', 'arr_product_exp', 'arr_product_prev', 'main'])