In [1]:
%pip install sqlparse

Collecting sqlparse
  Using cached sqlparse-0.4.2-py3-none-any.whl (42 kB)
Installing collected packages: sqlparse
Successfully installed sqlparse-0.4.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import snowflake.connector
import getpass
import sys
import pandas as pd
import sqlparse

In [3]:
# Configure Snowflake Connector
SNOWFLAKE_CREDS_DICT = {
    "SNOWFLAKE_USER": "YWEI",
    "SNOWFLAKE_PASSWORD": getpass.getpass()
}
SNOWFLAKE_CONFIG_DICT = {
    'account':'komodohealth',
    'database':'SANDBOX_KOMODO',
    'warehouse': 'XLARGE_WH',
    'role': 'ANALYST'
}
SF_DICT = {
  'sfURL':SNOWFLAKE_CONFIG_DICT['account'] + '.snowflakecomputing.com',
  'sfUser':SNOWFLAKE_CREDS_DICT['SNOWFLAKE_USER'],
  'sfPassword':SNOWFLAKE_CREDS_DICT['SNOWFLAKE_PASSWORD'],
  'sfDatabase':SNOWFLAKE_CONFIG_DICT['database'],
  'sfWarehouse':SNOWFLAKE_CONFIG_DICT['warehouse'],
  'sfRole': SNOWFLAKE_CONFIG_DICT['role'],
  'tracing':'All',
}   
ctx = snowflake.connector.connect(
  user=SF_DICT['sfUser'],
  password=SF_DICT['sfPassword'],
  account=SNOWFLAKE_CONFIG_DICT['account'],
  )

········


In [4]:
# Switch database, warehouse and role
ctx.cursor().execute('USE ROLE ' + SF_DICT['sfRole'])
ctx.cursor().execute('USE WAREHOUSE ' + SF_DICT['sfWarehouse'])
ctx.cursor().execute('USE ' + SF_DICT['sfDatabase'])
ctx.cursor().execute('USE SCHEMA ' + 'AYWEI')

<snowflake.connector.cursor.SnowflakeCursor at 0x7f8ea8669df0>

In [5]:
def send_query(connection = None, sql = "", execute = False, verbose = True):
    if not execute or verbose:
        print(sqlparse.format(sql, reindent = True))
        
    if execute and connection is not None:
        connection.cursor().execute(sql)
    return

def load_data(connection = None, table = "", limit = None):
#     sql = """
#     select * from METDPP4_V3_COHORT_FUNNEL_183_BY_YEAR 
#     ORDER BY cohort, year, criteria
#     """
    sql = f"""
        select * from {table}
    """
    if limit is not None:
        sql += f"""
            limit {limit}
        """
    sql += ";"
    #pd.set_option('display.max_rows', 100)
    df = pd.read_sql(sql, connection)
    return df

# RX Cost Verification

In [7]:
def verify_rx_cost(
    connection = None,
    prefix = 'RX_Cost',
    rx_version = '20220511', 
    dm_version = '20220513',
    sample = 0.01,
    execute = False,
    verbose = True):
    
    rx_cost = f"SANDBOX_KOMODO.PROJECT_CURRENCY.RX_ENCOUNTERS_{rx_version}_PAID_COST"
    dm_def = f"MAP_VOCABULARY.RXNORM_{dm_version}.DRUG_MASTER"
        
    sql_sample = f"""
        create or replace table {prefix}_SAMPLE as
        select *
        from {rx_cost} sample ({sample})
        ;
    """
    
    if sample < 1:
        # send_query(connection, sql_sample, execute, verbose)
        rx_cost = f"{prefix}_SAMPLE"
        
    # check ndc valid in other tables
    sql_ndc = f"""
        create or replace table {prefix}_CHECK_NDC as
        with ndc as(
        select encounter_key, claim_date, ndc,
            ndc in (select distinct ndc from {dm_def}) as in_dm,
            ndc in (select distinct ndc2 from ASP_NDC_HCPCS_CROSSWALK_2022JAN) as in_asp,
            ndc in (select distinct NDC_OR_ALTERNATE_ID from ASP_NDC_HCPCS_CROSSWALK_2022JAN) as in_asp_noc,
            ndc in (select distinct ndc from NADAC_20220322) as in_nadac,
        from {rx_cost}
        ) 
        select 
            count(*) as n, 
            count_if(ndc is null or length(ndc < 11)) as ndc_null, 
            count(distinct ndc) as n_ndc, 
            count(in_dm) as n_in_dm,
            count(distinct (case when in_dm then ndc end)) as n_ndc_in_dm,
            count(in_asp) as n_in_asp,
            count(distinct (case when in_asp then ndc end)) as n_ndc_in_asp,
            count(in_asp_noc) as n_in_asp_noc,
            count(distinct (case when in_asp_noc then ndc end)) as n_ndc_in_asp_noc,
            count(in_nadac) as n_in_nadac,
            count(distinct (case when in_nadac then ndc end)) as n_ndc_in_nadac
        from ndc
        ;
    """
    
#     sql_cost_summ = f"""  
#         create or replace table {prefix}_COST_SUMM as
#         select f.upk_key2, f.index_date, f.cohort, sum(least(100000, ALLOWED_AMOUNT_CLEAN)) as mx_cost, sum(least(100000, PLAN_PAY_FINAL)) as rx_cost
#         from {prefix}_FIRST_POTENTIAL f
#         left join {prefix}_COST_MX mx
#         on f.upk_key2 = mx.upk_key2
#         left join {prefix}_COST_RX rx
#         on f.upk_key2 = rx.upk_key2
#         group by f.upk_key2, f.index_date, f.cohort
#         ;
#     """
    send_query(connection, sql_ndc, execute, verbose)
    return

In [None]:
verify_rx_cost(connection = ctx, execute = True, verbose = False)