In [124]:
from sqlanalyzer import column_parser
import pandas as pd
import re

In [109]:
query = """SELECT opportunity_id,
          op.product_2_id, p.name AS product_name,
                           COALESCE(CAST(quantity AS DOUBLE) * CAST(list_price AS DOUBLE), 0) AS total_price,
                           netsuite_conn_netsuite_item_key_id_c AS netsuite_conn_net_suite_item_key_id_c,
                           COUNT(netsuite_conn_netsuite_item_key_id_c) OVER (PARTITION BY opportunity_id) AS num_netsuite_items
   FROM sfdc.opportunity_product op
   LEFT JOIN sfdc.products p ON op.product_2_id = p.id
   AND p.dt = '{run_date}'
   WHERE op.dt = '{run_date}'
     AND opportunity_id IS NOT NULL
     AND product_name_c IS NOT NULL
     AND op.is_deleted = FALSE 
"""

In [110]:
formatter = column_parser.Parser(query)
formatted = formatter.format_query(query)
print(formatted)

SELECT opportunity_id,
       op.product_2_id,
       p.name AS product_name,
       COALESCE(CAST(quantity AS DOUBLE) * CAST(list_price AS DOUBLE), 0) AS total_price,
       netsuite_conn_netsuite_item_key_id_c AS netsuite_conn_net_suite_item_key_id_c,
       COUNT(netsuite_conn_netsuite_item_key_id_c) OVER (PARTITION BY opportunity_id) AS num_netsuite_items
FROM sfdc.opportunity_product op
LEFT JOIN sfdc.products p ON op.product_2_id = p.id
AND p.dt = '{run_date}'
WHERE op.dt = '{run_date}'
  AND opportunity_id IS NOT NULL
  AND product_name_c IS NOT NULL
  AND op.is_deleted = FALSE


In [112]:
db_fields_1 = pd.DataFrame({'db_table': 'sfdc.opportunity_product', 
            'all_columns': ['actual_quantity_c',
 'annual_list_price_value_c',
 'annual_product_value_c',
 'annual_recurring_revenue_c',
 'contract_is_12_months_or_more_c',
 'created_by_id',
 'created_date',
 'description',
 'discount_c',
 'end_date_c',
 'final_year_of_contract_c',
 'id',
 'invoice_schedule_c',
 'is_deleted',
 'last_modified_by_id',
 'last_modified_date',
 'line_family_c',
 'list_price',
 'list_price_value_c',
 'monthly_recurring_revenue_c',
 'name',
 'netsuite_conn_netsuite_item_id_import_c',
 'netsuite_conn_netsuite_item_key_id_c',
 'netsuite_conn_pushed_from_netsuite_c',
 'netsuite_conn_start_date_c',
 'opp_end_date_lineitem_end_date_c',
 'opportunity_id',
 'opportunity_product_line_types_c',
 'opportunity_service_days_c',
 'overage_price_c',
 'pricebook_entry_id',
 'product_2_id',
 'product_code',
 'product_family_c',
 'product_name_c',
 'product_value_c',
 'quantity',
 'roll_up_summary_years_c',
 'service_date',
 'service_days_c',
 'service_year_c',
 'service_year_to_text_c',
 'system_modstamp',
 'time_fetched_from_salesforce',
 'total_price',
 'unit_price',
 'update_everything_c',
 'x18_digit_opportunity_id_c',
 'dt']})


In [113]:
db_fields_2 = pd.DataFrame({'db_table': 'sfdc.products', 
            'all_columns': ['availability_c',
 'billing_type_c',
 'cpm_product_c',
 'created_date',
 'exempt_api_calls_c',
 'family',
 'id',
 'implementing_sdks_c',
 'is_active',
 'is_deleted',
 'launch_date_c',
 'name',
 'netsuite_conn_celigo_update_c',
 'netsuite_conn_item_category_c',
 'netsuite_conn_netsuite_id_c',
 'netsuite_conn_sub_type_c',
 'pql_usage_tier_c',
 'product_code',
 'product_id_c',
 'service_organization_c',
 'sku_id_c',
 'volume_discount_c',
 'dt']})


In [115]:
df = db_fields_1.append(db_fields_2, ignore_index=True)
db_fields = df

In [117]:
fields = formatter.match_queried_fields(formatted, db_fields)
fields

[{'database_name': 'sfdc', 'table_name': 'products', 'column_name': 'name'},
 {'database_name': 'sfdc', 'table_name': 'products', 'column_name': 'dt'},
 {'database_name': 'sfdc',
  'table_name': 'opportunity_product',
  'column_name': 'dt'},
 {'database_name': 'sfdc',
  'table_name': 'opportunity_product',
  'column_name': 'opportunity_id'},
 {'database_name': 'sfdc',
  'table_name': 'opportunity_product',
  'column_name': 'product_name_c'},
 {'database_name': 'sfdc',
  'table_name': 'opportunity_product',
  'column_name': 'netsuite_conn_netsuite_item_key_id_c'},
 {'database_name': 'sfdc', 'table_name': 'products', 'column_name': 'id'},
 {'database_name': 'sfdc',
  'table_name': 'opportunity_product',
  'column_name': 'is_deleted'}]