### Reproduce Results in Kim 2019 paper

In [10]:
%pip install sqlparse

Note: you may need to restart the kernel to use updated packages.


In [11]:
import snowflake.connector
import getpass
import sys
import pandas as pd
import sqlparse
pd.set_option('display.max_rows', 200)
pd.set_option('display.width', 1000)

In [12]:
# Configure Snowflake Connector
SNOWFLAKE_CREDS_DICT = {
    "SNOWFLAKE_USER": "YWEI",
    "SNOWFLAKE_PASSWORD": getpass.getpass()
}
SNOWFLAKE_CONFIG_DICT = {
    'account':'komodohealth',
    'database':'SANDBOX_KOMODO',
    'warehouse': 'XLARGE_WH',
    'role': 'ANALYST'
}
SF_DICT = {
  'sfURL':SNOWFLAKE_CONFIG_DICT['account'] + '.snowflakecomputing.com',
  'sfUser':SNOWFLAKE_CREDS_DICT['SNOWFLAKE_USER'],
  'sfPassword':SNOWFLAKE_CREDS_DICT['SNOWFLAKE_PASSWORD'],
  'sfDatabase':SNOWFLAKE_CONFIG_DICT['database'],
  'sfWarehouse':SNOWFLAKE_CONFIG_DICT['warehouse'],
  'sfRole': SNOWFLAKE_CONFIG_DICT['role'],
  'tracing':'All',
}   
ctx = snowflake.connector.connect(
  user=SF_DICT['sfUser'],
  password=SF_DICT['sfPassword'],
  account=SNOWFLAKE_CONFIG_DICT['account'],
  )

········


In [13]:
# Switch database, warehouse and role
ctx.cursor().execute('USE ROLE ' + SF_DICT['sfRole'])
ctx.cursor().execute('USE WAREHOUSE ' + SF_DICT['sfWarehouse'])
ctx.cursor().execute('USE ' + SF_DICT['sfDatabase'])
ctx.cursor().execute('USE SCHEMA ' + 'AYWEI')

<snowflake.connector.cursor.SnowflakeCursor at 0x7f8db5002b20>

In [14]:
def send_query(connection = None, sql = "", execute = False, verbose = True):
    if not execute or verbose:
        print(sqlparse.format(sql, reindent = True))
        
    if execute and connection is not None:
        connection.cursor().execute(sql)
    return

def load_data(connection = None, table = "", limit = None):
#     sql = """
#     select * from METDPP4_V3_COHORT_FUNNEL_183_BY_YEAR 
#     ORDER BY cohort, year, criteria
#     """
    sql = f"""
        select * from {table}
    """
    if limit is not None:
        sql += f"""
            limit {limit}
        """
    sql += ";"
    #pd.set_option('display.max_rows', 100)
    df = pd.read_sql(sql, connection)
    return df

### Data Asset

LBP, Redflag DGN, Opioid
SANDBOX_AA.HEOR_FEASIBILITY_COMPARE_TRUVEN_LBP_SPINE_SURGERY.T0_ALL_CODES

Surgical codes
https://docs.google.com/spreadsheets/d/1PnYunOisk7FDR6v0o4asAFBD6vbi3ChtTliVl-sYmU8/edit#gid=983689721

In [13]:
def get_encounters(
    connection = None,
    prefix = 'LBP',
    query_start_date = '2013-01-01',
    query_end_date = '2022-12-31',
    mx_version = '20220509',
    rx_version = '20220511',
    dm_version = '20220513',
    execute = False,
    verbose = True
    ):
    '''
    query all data needed from encounters into tables
    '''
    
    rx_enc = f"MAP_ENCOUNTERS.RX_ENCOUNTERS_{rx_version}.RX_ENCOUNTER_LS_GA"
    dm_def = f"MAP_VOCABULARY.RXNORM_{dm_version}.DRUG_MASTER_ACTIVE_AND_HISTORICAL"
    
    mx_lite_enc = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.ENCOUNTERSMX_LITE_LS_GA"
    mx_enc = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.ENCOUNTERSMX_LS_GA"
    mx_visit = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.VISITS_LS_GA"
    
    mx_def = 'SANDBOX_AA.HEOR_FEASIBILITY_COMPARE_TRUVEN_LBP_SPINE_SURGERY.T0_ALL_CODES'
    mx_surg_def = 'LBP_DEF_SPINAL_SURGERY'
    
    bene = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.BENEFICIARY_LS_GA"
    
    opioid_list = f"""
        '%hydrocodone%', '%oxycodone%', '%fentanyl%', '%morphine%',
        '%buprenorphine%', '%meperidine%', '%hydromorphone%', '%oxymorphone%',
        '%methadone%', '%tapentadol%', '%tramadol%', '%codeine%'    
    """
    
    # create all Mx tables
    sql_mx = f"""
        create or replace table {prefix}_ALL_MX as
        select upk_key2, claim_date, 
            patient_dob, patient_gender, patient_state, patient_zip,
            diagnosis_array, procedure_array,
            arrays_overlap(
                diagnosis_array,
                (select array_agg(code) from {mx_def} where ta_category = 'LBP')
            ) as lbp,
            arrays_overlap(
                diagnosis_array,
                (select array_agg(code) from {mx_def} where ta_category = 'Red Flag Diagnosis')
            ) as rfd,
            arrays_overlap(
                procedure_array,
                (select array_agg(code) from {mx_surg_def})
            ) as ss
        from {mx_lite_enc}
        where claim_date >= '{query_start_date}'
            and (
            arrays_overlap(
                diagnosis_array,
                (select array_agg(code) from {mx_def} where code is not null)
            ) or
            arrays_overlap(
                procedure_array,
                (select array_agg(code) from {mx_surg_def})
            )
        )
        ;
    """
    
    # create ndc table
    sql_ndc = f"""
        create or replace table {prefix}_DEF_NDC as
        select ndc, cui_l1_name as name
        from {dm_def}
        where cui_l1_name ilike any ({opioid_list})
        ;
    """
    
    # create all Rx tables, subset to patients with LBP
    sql_rx = f"""
        create or replace table {prefix}_ALL_RX as
        select upk_key2, claim_date, 
            patient_dob, patient_gender, patient_state, patient_zip,
            ndc, days_supply, quantity_dispensed
        from {rx_enc}
        where claim_date >= '{query_start_date}'
            and ndc in (select ndc from {prefix}_DEF_NDC)
            and upk_key2 in (
            select distinct upk_key2 
            from {prefix}_ALL_MX
            where lbp
        )
        ;
    """
        
    # create all bene tables, subset to patients with LBP
    sql_bene = f"""
        create or replace table {prefix}_ALL_BENE as
        select upk_key2, patient_dob, patient_gender, patient_state, patient_zip,
            eligibility_start_date, eligibility_end_date, closed_start_date, closed_end_date,
            medical_coverage_indicator, pharmacy_coverage_indicator, closed_indicator
        from {bene}
        where upk_key2 in (
            select distinct upk_key2 
            from {prefix}_ALL_MX
            where lbp
        )
        ;
    """
    
    #send_query(connection, sql_mx, execute, verbose)
    #send_query(connection, sql_ndc, execute, verbose)
    send_query(connection, sql_rx, execute, verbose)
    send_query(connection, sql_bene, execute, verbose)
    
    return

In [14]:
get_encounters(connection = ctx, execute = True, verbose = False)

In [15]:
def get_sql_split_fun(grace_period = 45):
    sql_split = f"""
create or replace function SPLIT_RANGES(dates variant)
   returns variant
   language javascript
as '
return DATES
            .sort(function (a, b) {{ return a.start - b.start || a.end - b.end; }})
            .reduce(function (r, a) {{
                  var last = r[r.length - 1] || [];
                  if (last.start <= a.start && a.start <= last.end + {grace_period}*24*60*60) {{
                        if (last.end < a.end) {{
                              last.end = a.end;
                        }}
                        return r;
                  }}
                  return r.concat(a);
            }}, []);
'
;"""
    return sql_split

def get_sql_ce_by_type_kh(type, prefix = 'METDPP4_V4', grace_period = 45):
    if type == 'mx':
        cov_ind = 'MEDICAL_COVERAGE_INDICATOR'
    elif type == 'rx':
        cov_ind = 'PHARMACY_COVERAGE_INDICATOR'

    sql_ce = f"""    
        with b as (
            SELECT distinct upk_key2, 
            closed_start_date AS start_date_date, 
            date_part('EPOCH_SECOND', to_timestamp(start_date_date)) as start_date, 
            least(closed_end_date, current_date) as end_date_date,
            date_part('EPOCH_SECOND', to_timestamp(end_date_date)) as end_date, 
            max(end_date) over (partition by upk_key2, start_date) as max_end_date, 
            min(start_date) over (partition by upk_key2, end_date) as min_start_date
        FROM {prefix}_ALL_BENE
        WHERE {cov_ind} 
            and closed_start_date IS NOT NULL 
            AND closed_end_date IS NOT NULL 
            AND start_date <= end_date 
            AND start_date <= date_part('EPOCH_SECOND', to_timestamp(current_date)) 
            and closed_indicator 
            qualify max_end_date=end_date and min_start_date=start_date
        ),
        g as (
            select upk_key2, 
                arrayagg(object_construct('start', start_date, 'end', end_date)) as ranges 
                from b
                group by upk_key2
        ), 
        non_overlap as (
            select upk_key2, ranges, SPLIT_RANGES(ranges) as clean_ranges from g
        )
        select upk_key2, 
            dateadd(day, 1, to_date(to_timestamp(clean_range.value:start))) as start_date,
            dateadd(day, 1, to_date(to_timestamp(clean_range.value:end))) as end_date
        from non_overlap, lateral flatten(input => clean_ranges) as clean_range
    """
    return sql_ce

def get_ce_kh(
    connection = None,
    prefix = 'LBP',
    grace_period = 45,
    execute = False,
    verbose = True):
    
    sql_split = get_sql_split_fun(grace_period = grace_period)
    sql_ce_mx = get_sql_ce_by_type_kh(type = "mx", prefix = prefix, grace_period = grace_period)
    sql_ce_rx = get_sql_ce_by_type_kh(type = "rx", prefix = prefix, grace_period = grace_period)
    
    sql_ce = f"""  
        create or replace table {prefix}_ELIG_KH as
        with mx as ({sql_ce_mx}), rx as ({sql_ce_rx})
        select mx.upk_key2, 
            greatest(mx.start_date, rx.start_date) as start_date,
            least(mx.end_date, rx.end_date) as end_date
            from mx inner join rx
            on mx.upk_key2 = rx.upk_key2
            where mx.start_date <= rx.end_date and mx.end_date >= rx.start_date
        ;
    """
    send_query(connection, sql_split, execute, verbose)
    send_query(connection, sql_ce, execute, verbose)
    return    

In [16]:
get_ce_kh(connection = ctx, execute = True, verbose = False)

In [57]:
def get_all_potential(
    connection = None,
    prefix = 'LBP',
    cohort_start_date = '2014-01-01',
    cohort_end_date = '2020-12-31',
    execute = False,
    verbose = True):
    
    sql_all_potential = f"""  
        create or replace table {prefix}_ALL_POTENTIAL as
        with first_lbp as (
            select upk_key2, min(claim_date) as index_date
            from {prefix}_ALL_MX
            where lbp
                and claim_date >= '{cohort_start_date}' and claim_date <= '{cohort_end_date}'
            group by upk_key2
        ),
        demo as (
            select distinct *, 
                year(index_date) - year(patient_dob) as age
            from (
                select distinct first_lbp.upk_key2, index_date,
                first_value(patient_dob) over (partition by first_lbp.upk_key2 order by patient_dob nulls last) as patient_dob,
                first_value(patient_gender) over (partition by first_lbp.upk_key2 order by patient_gender nulls last) as patient_gender,
                first_value(patient_state) over (partition by first_lbp.upk_key2 order by patient_state nulls last) as patient_state,
                first_value(patient_zip) over (partition by first_lbp.upk_key2 order by patient_zip nulls last) as patient_zip
            from first_lbp
            left join {prefix}_ALL_BENE b
            on first_lbp.upk_key2 = b.upk_key2 
                and first_lbp.index_date <= b.closed_end_date 
                and first_lbp.index_date >= b.closed_start_date
            )
        ),
        elig as (
            select first_lbp.upk_key2,
                count_if(index_date - 365 >= start_date and index_date + 365 <= end_date) as elig
            from first_lbp
            left join {prefix}_ELIG_KH elig
            on first_lbp.upk_key2 = elig.upk_key2
            group by first_lbp.upk_key2
        ),
        rfd as (
            select upk_key2, claim_date from {prefix}_ALL_MX
            where rfd
        ),
        rfd_cond as (
            select first_lbp.upk_key2,
                count_if(rfd.claim_date >= first_lbp.index_date - 365 and rfd.claim_date <= first_lbp.index_date + 365) as n_rfd
            from first_lbp
            left join rfd
            on first_lbp.upk_key2 = rfd.upk_key2
                and rfd.claim_date >= first_lbp.index_date - 365
                and rfd.claim_date <= first_lbp.index_date + 365
            group by first_lbp.upk_key2
        ),
        opioid as (
            select upk_key2, claim_date from {prefix}_ALL_RX
        ),
        opioid_cond as (
            select first_lbp.upk_key2,
                count_if(opioid.claim_date >= first_lbp.index_date - 183 and opioid.claim_date <= first_lbp.index_date) as n_opioid
            from first_lbp
            inner join opioid
            on first_lbp.upk_key2 = opioid.upk_key2
                and opioid.claim_date >= first_lbp.index_date - 183
                and opioid.claim_date <= first_lbp.index_date
            group by first_lbp.upk_key2
        ),
        surgery as (
            select upk_key2, claim_date from {prefix}_ALL_MX
            where ss
        ),
        surgery_cohort as (
            select first_lbp.upk_key2,
                count_if(surgery.claim_date >= first_lbp.index_date and surgery.claim_date <= first_lbp.index_date + 365) as n_surgery
            from first_lbp
            inner join surgery
            on first_lbp.upk_key2 = surgery.upk_key2
                and surgery.claim_date >= first_lbp.index_date
                and surgery.claim_date <= first_lbp.index_date + 365
            group by first_lbp.upk_key2
        )
        select f.upk_key2, f.index_date,
            patient_dob, patient_gender, patient_state, patient_zip, age,
            elig, n_rfd, n_opioid, n_surgery,
            case when n_surgery > 0 then 'surg'
            else 'non_surg' end as cohort 
        from first_lbp f
        left join demo d
        on f.upk_key2 = d.upk_key2
        left join elig e
        on f.upk_key2 = e.upk_key2
        left join rfd_cond r
        on f.upk_key2 = r.upk_key2
        left join opioid_cond o
        on f.upk_key2 = o.upk_key2
        left join surgery_cohort s
        on f.upk_key2 = s.upk_key2
        ;
    """
    send_query(connection, sql_all_potential, execute, verbose)  
    return

In [58]:
get_all_potential(connection = ctx, execute = True, verbose = False)

In [61]:
def get_first_potential(
    connection = None,
    prefix = 'LBP',
    execute = False,
    verbose = True):
    
    sql_first_potential = f"""  
        create or replace table {prefix}_FIRST_POTENTIAL as
        select *
        from {prefix}_ALL_POTENTIAL
        where elig > 0
            and (n_rfd = 0 or n_rfd is null)
            and (n_opioid = 0 or n_opioid is null)
            and age >= 18
        ;
    """
    send_query(connection, sql_first_potential, execute, verbose)
    return

In [62]:
get_first_potential(connection = ctx, execute = True, verbose = False)

In [80]:
def get_cohort_funnel(
    connection = None,
    prefix = 'LBP',
    by_year = True,
    execute = False,
    verbose = True):
    
    if by_year:
        year_date_str = ', year(index_date) as year'
        year_by_str = ', year'
        year_name_str = '_by_year'
    else:
        year_date_str = ''
        year_by_str = ''
        year_name_str = ''
    
    keep_str = f"""
        cohort{year_date_str}, count(distinct upk_key2) as n_bene, count(*) as n_rx
    """
    
    sql_funnel = f"""  
        
        create or replace table {prefix}_COHORT_FUNNEL{year_name_str} as
        
        select 'a. all patients in study window' as criteria, {keep_str}
        from {prefix}_ALL_POTENTIAL
        group by cohort{year_by_str}

        union all
        select 'b. continuously enrolled 365 days before and after index date' as criteria, {keep_str}
        from {prefix}_ALL_POTENTIAL
        where elig > 0
        group by cohort{year_by_str}

        union all
        select 'c. opioid naive 183 days before index date' as criteria, {keep_str}
        from {prefix}_ALL_POTENTIAL
        where elig > 0
            and (n_opioid = 0 or n_opioid is null)
        group by cohort{year_by_str}

        union all
        select 'd. no red-flag diagnosis 365 days before and after index date' as criteria, {keep_str}
        from {prefix}_ALL_POTENTIAL
        where elig > 0
            and (n_opioid = 0 or n_opioid is null)
            and (n_rfd = 0 or n_rfd is null)
        group by cohort{year_by_str}
        
        union all
        select 'e. 18 years or plus on index date' as criteria, {keep_str}
        from {prefix}_ALL_POTENTIAL
        where elig > 0
            and (n_opioid = 0 or n_opioid is null)
            and (n_rfd = 0 or n_rfd is null)
            and age >= 18
        group by cohort{year_by_str}
        ;
    """
    send_query(connection, sql_funnel, execute, verbose)
    return

In [81]:
get_cohort_funnel(connection = ctx, by_year = True, execute = True, verbose = False)
get_cohort_funnel(connection = ctx, by_year = False, execute = True, verbose = False)

In [17]:
def get_cost(
    connection = None,
    prefix = 'LBP',
    mx_version = '20220509',
    rx_version = '20220511',    
    query_start_date = '2013-01-01',
    query_end_date = '2022-12-31',
    execute = False,
    verbose = True):
    
    # current cost table use encounter_key as key
    # need to get all encounters first
    
    rx_enc = f"MAP_ENCOUNTERS.RX_ENCOUNTERS_{rx_version}.RX_ENCOUNTER_LS_GA"
    mx_enc = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.ENCOUNTERSMX_LS_GA"
    rx_cost = f"SANDBOX_KOMODO.PROJECT_CURRENCY.RX_ENCOUNTERS_{rx_version}_IMPUTED"
    mx_cost_prof = 'SANDBOX_KOMODO.PROJECT_CURRENCY.LBP_PROFESSIONAL'
    # mx_cost_ip = 'SANDBOX_KOMODO.PROJECT_CURRENCY.LBP_INPATIENT_DRG'
    mx_cost_fac = 'SANDBOX_KOMODO.PROJECT_CURRENCY.LBP_FACILITY_ALLOWED'
    
    sql_rx = f"""  
        create or replace table {prefix}_COST_RX_ENC as
        select rx.upk_key2, rx.claim_date, rx.encounter_key
        from {prefix}_FIRST_POTENTIAL f
        inner join
        (
        select upk_key2, claim_date, encounter_key
        from {rx_enc}
        where upk_key2 in (select upk_key2 from {prefix}_FIRST_POTENTIAL)
            and claim_date >= '{query_start_date}'
        ) rx
        on f.upk_key2 = rx.upk_key2 and rx.claim_date between f.index_date and f.index_date + 365
        ;
    """    
    
    sql_rx_cost = f"""  
        create or replace table {prefix}_COST_RX as
        select e.upk_key2, c.encounter_key, c.claim_date, NDC, cost_final, cost_impute_adj
        from {rx_cost} c
        inner join
        {prefix}_COST_RX_ENC e
        on c.encounter_key = e.encounter_key
        where cost_final >= 0
        ;
    """
    
    sql_mx = f"""  
        create or replace table {prefix}_COST_MX_ENC as
        select mx.upk_key2, mx.claim_date, mx.encounter_key
        from {prefix}_FIRST_POTENTIAL f
        inner join
        (
        select upk_key2, claim_date, encounter_key
        from {mx_enc}
        where upk_key2 in (select upk_key2 from {prefix}_FIRST_POTENTIAL)
            and claim_date >= '{query_start_date}'
        ) mx
        on f.upk_key2 = mx.upk_key2 and mx.claim_date between f.index_date and f.index_date + 365
        ;
    """
    
    sql_mx_cost_prof = f"""  
        create or replace table {prefix}_COST_MX_PROF as
        select e.upk_key2, c.encounter_key, c.claim_date, c.PROCEDURE, c.ALLOWED_AMOUNT
        from {mx_cost_prof} c
        inner join
        {prefix}_COST_MX_ENC e
        on c.encounter_key = e.encounter_key
        where ALLOWED_AMOUNT >= 0
        ;
    """
    
    sql_mx_cost_fac = f"""  
        create or replace table {prefix}_COST_MX_FAC as
        select e.upk_key2, c.encounter_key, e.claim_date, c.ALLOWED_AMOUNT
        from {mx_cost_fac} c
        inner join
        {prefix}_COST_MX_ENC e
        on c.encounter_key = e.encounter_key
        where ALLOWED_AMOUNT >= 0
        ;
    """
    # send_query(connection, sql_rx, execute, verbose)
    # send_query(connection, sql_rx_cost, execute, verbose)
    # send_query(connection, sql_mx, execute, verbose)
    # send_query(connection, sql_mx_cost_prof, execute, verbose)
    send_query(connection, sql_mx_cost_fac, execute, verbose)
    return

In [18]:
get_cost(connection = ctx, execute = True, verbose = False)

In [22]:
def summarize_cost(
    connection = None,
    prefix = 'LBP',
    execute = False,
    verbose = True):
    
    sql_cost_summ = f"""  
        create or replace table {prefix}_COST_SUMM as
        select f.upk_key2, f.index_date, f.cohort, 
            coalesce(mx_cost_pb, 0) as mx_cost_pb,
            coalesce(mx_cost_fac, 0) as mx_cost_fac,
            coalesce(rx_cost, 0) as rx_cost
        from {prefix}_FIRST_POTENTIAL f
        left join (select upk_key2, sum(ALLOWED_AMOUNT) as mx_cost_pb from {prefix}_COST_MX_PROF group by upk_key2) prof
        on f.upk_key2 = prof.upk_key2
        left join (select upk_key2, sum(ALLOWED_AMOUNT) as mx_cost_fac from {prefix}_COST_MX_FAC group by upk_key2) fac
        on f.upk_key2 = fac.upk_key2
        left join (select upk_key2, sum(cost_final) as rx_cost from {prefix}_COST_RX group by upk_key2) rx
        on f.upk_key2 = rx.upk_key2
        ;
    """
    send_query(connection, sql_cost_summ, execute, verbose)
    return

In [23]:
%%time
summarize_cost(connection = ctx, execute = True, verbose = False)

CPU times: user 5.92 ms, sys: 0 ns, total: 5.92 ms
Wall time: 12.2 s


In [7]:
def read_sql(sql, connection = ctx):
    df = pd.read_sql(sql, connection)
    return df

In [29]:
sql = f"""
select * 
from SANDBOX_KOMODO.PROJECT_CURRENCY.RX_CERTIFIED_CLAIMS_IMPUTED
where upk_key2 ='lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g='
and date_of_service between '2018-06-29' and '2019-06-28'
;
"""
read_sql(sql)



Unnamed: 0,UPK_KEY2,CLAIM_ID,DATE_OF_SERVICE,NDC11,NDC9,PLAN_PAY_ACTUAL,PLAN_PAY_CLEAN,TOTAL_COST_ACTUAL,TOTAL_COST_CLEAN,COST_IMPUTE,...,UNIT_PRICE_FINAL,UNIT_PRICE_SOURCE,MEDIAN_PER_UNIT,NADAC_PER_UNIT,ASP_PER_UNIT,UNIT_OF_MEASURE,PAYER_TYPE_CODE,PATIENT_STATE,SOURCE,DFG_NAME_ARRAY
0,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,b90b2e9de845c944443bcc8bcb3072b23777abd3,2018-06-29,29300012510,293000125,,,5.0,5.0,3.0,...,0.1,median_by_year,0.1,0.02348,,,,WY,stanger,"[\n ""Oral Product"",\n ""Pill""\n]"
1,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,6a3784e10cc1b7b8c15109190cf568d505252c4d,2018-11-01,781185220,7811852,0.0,0.0,9.0,9.0,7.0,...,0.35,median_by_year,0.35,0.27265,,EA,,WY,fleming-crick,"[\n ""Oral Product"",\n ""Pill""\n]"
2,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,b12b04e0b708793acec0110595123591d85f7676,2018-08-07,29300012510,293000125,0.0,0.0,5.0,5.0,3.0,...,0.1,median_by_year,0.1,0.02348,,EA,,WY,fleming-crick,"[\n ""Oral Product"",\n ""Pill""\n]"
3,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,e1d7acc2368690fae15785513845fa1ae674737b,2018-09-04,172392680,1723926,,,,,1.666667,...,0.083333,median_by_year,0.083333,0.02411,,,,,blue,"[\n ""Oral Product"",\n ""Pill""\n]"
4,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,a1d6d2a2b8aeeedb5e08f33da67fc3eeda791eea,2018-08-07,29300012510,293000125,,,,,3.0,...,0.1,median_by_year,0.1,0.02348,,,,,blue,"[\n ""Oral Product"",\n ""Pill""\n]"
5,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,d4409590efa95197ee804f4fe6089d25ba0df7bc,2018-11-01,781185220,7811852,,,,,7.0,...,0.35,median_by_year,0.35,0.27265,,,,,blue,"[\n ""Oral Product"",\n ""Pill""\n]"
6,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,b66c33220535978b3120ed1df3f2fcc6b2f58c3c,2018-06-29,29300012510,293000125,0.0,0.0,5.0,5.0,3.0,...,0.1,median_by_year,0.1,0.02348,,EA,,WY,fleming-crick,"[\n ""Oral Product"",\n ""Pill""\n]"
7,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,db1005c244873fb46644859b6e4a311599cb2bef,2018-08-07,29300012510,293000125,,,5.0,5.0,3.0,...,0.1,median_by_year,0.1,0.02348,,,,WY,stanger,"[\n ""Oral Product"",\n ""Pill""\n]"
8,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,f98e61757562836fb1833a9a6e83b0d043a99553,2018-11-01,781185220,7811852,,,9.0,9.0,7.0,...,0.35,median_by_year,0.35,0.27265,,,,WY,stanger,"[\n ""Oral Product"",\n ""Pill""\n]"
9,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,247d2e1d7570db75c59bf197a2332316e1e43f29,2018-06-29,29300012510,293000125,,,,,3.0,...,0.1,median_by_year,0.1,0.02348,,,,,blue,"[\n ""Oral Product"",\n ""Pill""\n]"


In [30]:
sql = f"""
select * 
from SANDBOX_KOMODO.PROJECT_CURRENCY.RX_ENCOUNTERS_20220511_IMPUTED
where upk_key2 ='lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g='
and claim_date between '2018-06-29' and '2019-06-28'
;
;
"""
read_sql(sql)



Unnamed: 0,UPK_KEY2,ENCOUNTER_KEY,CLAIM_DATE,NDC,NDC9,PLAN_PAY_ACTUAL,PLAN_PAY_CLEAN,TOTAL_COST_ACTUAL,TOTAL_COST_CLEAN,COST_IMPUTE,...,UNIT_PRICE_SOURCE,MEDIAN_PER_UNIT,NADAC_PER_UNIT,ASP_PER_UNIT,UNIT_OF_MEASURE,PAYER_TYPE_CODE,PATIENT_STATE,SOURCES,DFG_NAME_ARRAY,CLAIM_ID_ARRAY
0,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,bd91f0e7aefa57e28b4351ad6d3469af,2018-09-04,172392680,1723926,,,,,1.666667,...,median_by_year,0.083333,0.02411,,,,,"[\n ""blue""\n]","[\n ""Oral Product"",\n ""Pill""\n]","[\n ""e1d7acc2368690fae15785513845fa1ae674737b..."
1,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,ff339ece436c65a6e5a5aac0d7e35846,2018-11-01,781185220,7811852,0.0,0.0,9.0,9.0,7.0,...,median_by_year,0.35,0.27265,,EA,,WY,"[\n ""blue"",\n ""fleming-crick"",\n ""stanger""\n]","[\n ""Oral Product"",\n ""Pill""\n]","[\n ""6a3784e10cc1b7b8c15109190cf568d505252c4d..."
2,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,6528fc55a8b6d1634e803cfb46c85702,2018-06-29,29300012510,293000125,0.0,0.0,5.0,5.0,3.0,...,median_by_year,0.1,0.02348,,EA,,WY,"[\n ""blue"",\n ""fleming-crick"",\n ""stanger""\n]","[\n ""Oral Product"",\n ""Pill""\n]","[\n ""247d2e1d7570db75c59bf197a2332316e1e43f29..."
3,lC+jZkIa/itgFIGGeMaZ+NkiX1GcseIlpqPmzA0oH/g=,88c49f5f951c2618ae234756930b8085,2018-08-07,29300012510,293000125,0.0,0.0,5.0,5.0,3.0,...,median_by_year,0.1,0.02348,,EA,,WY,"[\n ""blue"",\n ""fleming-crick"",\n ""stanger""\n]","[\n ""Oral Product"",\n ""Pill""\n]","[\n ""a1d6d2a2b8aeeedb5e08f33da67fc3eeda791eea..."


In [8]:
sql = f"""
select * 
from SANDBOX_KOMODO.PROJECT_CURRENCY.RX_CERTIFIED_CLAIMS_IMPUTED
where claim_id ='23812b8a28bcd1b173d5eb97e6285718'
;
"""
read_sql(sql)



Unnamed: 0,UPK_KEY2,CLAIM_ID,DATE_OF_SERVICE,NDC11,NDC9,PLAN_PAY_ACTUAL,PLAN_PAY_CLEAN,TOTAL_COST_ACTUAL,TOTAL_COST_CLEAN,COST_IMPUTE,...,UNIT_PRICE_FINAL,UNIT_PRICE_SOURCE,MEDIAN_PER_UNIT,NADAC_PER_UNIT,ASP_PER_UNIT,UNIT_OF_MEASURE,PAYER_TYPE_CODE,PATIENT_STATE,SOURCE,DFG_NAME_ARRAY


In [9]:
sql = f"""
select * 
from SANDBOX_KOMODO.PROJECT_CURRENCY.RX_ENCOUNTERS_20220511_CLAIM_IMPUTED
where claim_id ='23812b8a28bcd1b173d5eb97e6285718'
;
"""
read_sql(sql)



Unnamed: 0,CLAIM_ID,UPK_KEY2,ENCOUNTER_KEY,CLAIM_DATE,NDC,NDC9,PLAN_PAY_ACTUAL,PLAN_PAY_CLEAN,TOTAL_COST_ACTUAL,TOTAL_COST_CLEAN,...,UNIT_PRICE_SOURCE,MEDIAN_PER_UNIT,NADAC_PER_UNIT,ASP_PER_UNIT,UNIT_OF_MEASURE,PAYER_TYPE_CODE,PATIENT_STATE,SOURCES,DFG_NAME_ARRAY,CLAIM_ID_ARRAY
