# Create Covid19 cohorts

In [1]:
import snowflake.connector
import getpass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 200)

import sys
sys.path.append('/home/jovyan/credentials')
import jupytertoolz_credentials as jt

sys.path.append('/home/jovyan/komodo_research/library/Python')
from utils import *

In [2]:
%load_ext autoreload
%autoreload 2

In [28]:
## Snowflake tables 
ROLE = 'ANALYST'
WAREHOUSE = 'XLARGE_WH'
DATABASE = 'SANDBOX_KOMODO'  
SCHEMA = 'AYWEI'

connection = snowflake.connector.connect(user='YWEI', password=getpass.getpass(), account='komodohealth')
connection.cursor().execute('USE ROLE ' + ROLE)
connection.cursor().execute('USE WAREHOUSE ' + WAREHOUSE)

def read_sql(sql, connection = connection):
    df = pd.read_sql(sql, connection)
    return df

def execute_sql(sql, connection = connection):
    #pd.io.sql.execute(sql, connection)
    connection.cursor().execute(sql)
    
execute_sql(f"USE ROLE {ROLE}")
execute_sql(f"USE database {DATABASE}")
execute_sql(f"USE SCHEMA {SCHEMA}")

········


1. Query covid-19 diagnisis and vaccines
2. Apply CE
3. Create flags for CE before/after initial diagnosis
4. Summarize

In [66]:
## Table reference
covid19_diag = 'COVID19_DIAG'
covid19_vac = 'COVID19_VAC'
rx_version = '20220613'
# rx_version = '20220511'
rx_enc = f"MAP_ENCOUNTERS.RX_ENCOUNTERS_{rx_version}.RX_ENCOUNTER_LS_GA"
mx_version = '20220605'
mx_enc = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.ENCOUNTERSMX_LITE_LS_GA"
mx_bene = f"MAP_ENCOUNTERS.MX_ENCOUNTERS_{mx_version}.BENEFICIARY_LS_GA"
dm_version = '20220606'
dm_file = f"MAP_VOCABULARY.RXNORM_{dm_version}.DRUG_MASTER_ACTIVE_AND_HISTORICAL"
proc_file = f"MAP_VOCABULARY.RXNORM_{dm_version}.V_PROCEDURE"
rx_claim = "MAP_CERTIFIED_CLAIMS.PUBLIC.VIEW_CERTIFIED_CLAIMS_PHARMACY_WITH_JENNER"

## Output table names
prefix = 'COVID19'
covid19_diag_enc = f"{prefix}_diag_encounter"
covid19_vac_enc = f"{prefix}_vac_encounter"
covid19_bene = f"{prefix}_bene"
covid19_ce = f"{prefix}_ce"
covid19_diag_enc_closed = f"{prefix}_diag_encounter_closed"
covid19_vac_enc_closed = f"{prefix}_vac_encounter_closed"
covid19_diag_summ = f"{prefix}_diag_summary"
covid19_vac_summ = f"{prefix}_vac_summary"

In [31]:
%%time
# build CE table for all patients
sql = f"""  
    create or replace table ce_{mx_version} as
    {get_ce(mx_version = mx_version, grace_period = 45)}
    ;
"""
execute_sql(sql)

CPU times: user 34.6 ms, sys: 7.28 ms, total: 41.9 ms
Wall time: 5min 59s


In [6]:
%%time
sql = f"""  
    create or replace table {covid19_diag_enc} as
    select upk_key2, claim_date, diagnosis_array
    from {mx_enc}
    where claim_date >= '2020-01-01' and
        arrays_overlap(
            diagnosis_array,
            (select array_agg(code) from {covid19_diag})
        )
    ;
"""
execute_sql(sql)

CPU times: user 15.4 ms, sys: 2.09 ms, total: 17.5 ms
Wall time: 2min 6s


In [7]:
%%time
sql = f"""  
    create or replace table {covid19_vac_enc} as
    select upk_key2, claim_date, NULL as ndc, procedure_array
    from {mx_enc}
    where claim_date >= '2020-01-01' and
        arrays_overlap(
            procedure_array,
            (select array_agg(code) from {covid19_vac} where codetype in ('CPT', 'HCPCS', 'ICD10PCS'))
        )
    union all
    select upk_key2, claim_date, ndc, NULL as procedure_array
    from {rx_enc}
    where claim_date >= '2020-01-01' and
        ndc in (select code from {covid19_vac} where codetype = 'NDC')
    ;
"""
execute_sql(sql)

CPU times: user 20.1 ms, sys: 1.5 ms, total: 21.6 ms
Wall time: 2min 32s


In [9]:
%%time
sql = f"""  
    create or replace table {covid19_bene} as
    select *
    from {mx_bene}
    where upk_key2 in (
        select distinct upk_key2 from {covid19_diag_enc} 
        union 
        select distinct upk_key2 from {covid19_vac_enc}
        )
    ;
"""
execute_sql(sql)

CPU times: user 11.3 ms, sys: 654 µs, total: 12 ms
Wall time: 1min 17s


In [7]:
%%time
sql = f"""  
    create or replace table {covid19_ce} as
    {get_ce(bene_input = covid19_bene, grace_period = 45)}
    ;
"""
execute_sql(sql)

CPU times: user 18.9 ms, sys: 2.25 ms, total: 21.1 ms
Wall time: 2min 25s


In [14]:
%%time
# find covid19 diagnosed patients while enrolled.
sql = f"""  
    create or replace table {covid19_diag_enc_closed} as
    select e.*
    from {covid19_diag_enc} e 
    inner join {covid19_ce} c 
    on e.upk_key2 = c.upk_key2 and e.claim_date between c.start_date and c.end_date
    ;
"""
execute_sql(sql)

CPU times: user 7.53 ms, sys: 578 µs, total: 8.11 ms
Wall time: 18.5 s


In [15]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    create or replace table {covid19_vac_enc_closed} as
    select e.*
    from {covid19_vac_enc} e 
    inner join {covid19_ce} c 
    on e.upk_key2 = c.upk_key2 and e.claim_date between c.start_date and c.end_date
    ;
"""
execute_sql(sql)

CPU times: user 4.42 ms, sys: 1.32 ms, total: 5.75 ms
Wall time: 19.3 s


In [22]:
%%time
# summarize
sql = f"""  
    select NULL as year, NULL as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {covid19_diag_enc_closed} e 
    union all 
    select year(claim_date) as year, month(claim_date) as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {covid19_diag_enc_closed} e 
    group by year, month
    order by year, month
    ;
"""
df = read_sql(sql)
display(df)



Unnamed: 0,YEAR,MONTH,N_EVENT,N_PATIENT
0,2020.0,1.0,12417,4688
1,2020.0,2.0,10707,4032
2,2020.0,3.0,129956,55624
3,2020.0,4.0,979374,259409
4,2020.0,5.0,1026666,319920
5,2020.0,6.0,944192,378840
6,2020.0,7.0,1554445,589550
7,2020.0,8.0,1197207,464437
8,2020.0,9.0,930514,406115
9,2020.0,10.0,1273475,570753


CPU times: user 17.2 ms, sys: 208 µs, total: 17.4 ms
Wall time: 3.98 s


In [24]:
%%time
# summarize
sql = f"""  
    select NULL as year, NULL as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {covid19_vac_enc_closed} e 
    union all 
    select year(claim_date) as year, month(claim_date) as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {covid19_vac_enc_closed} e 
    group by year, month
    order by year, month
    ;
"""
df = read_sql(sql)
display(df)



Unnamed: 0,YEAR,MONTH,N_EVENT,N_PATIENT
0,2020.0,1.0,50,50
1,2020.0,2.0,40,38
2,2020.0,3.0,54,52
3,2020.0,4.0,70,69
4,2020.0,5.0,24,24
5,2020.0,6.0,16,16
6,2020.0,7.0,7,7
7,2020.0,8.0,28,25
8,2020.0,9.0,21,21
9,2020.0,10.0,21,21


CPU times: user 17.6 ms, sys: 435 µs, total: 18 ms
Wall time: 9.54 s


In [17]:
#Oxbryta
oxbryta_enc = f"oxbryta_encounter"
oxbryta_ndc = "('72786011102', '72786011103', '72786010101')"
oxbryta_ce = f"oxbryta_ce"
oxbryta_enc_closed = f"oxbryta_encounter_closed"

In [10]:
%%time
sql = f"""  
    create or replace table {oxbryta_enc} as
    select upk_key2, claim_date, ndc
    from {rx_enc}
    where ndc in {oxbryta_ndc}
    ;
"""
execute_sql(sql)

CPU times: user 236 ms, sys: 0 ns, total: 236 ms
Wall time: 1min 29s


In [8]:
%%time
sql = f"""  
    create or replace table {oxbryta_ce} as
    {get_ce(bene_input = oxbryta_enc, mx_version = mx_version, grace_period = 45)}
    ;
"""
execute_sql(sql)

CPU times: user 5.15 ms, sys: 480 µs, total: 5.63 ms
Wall time: 11.8 s


In [18]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    create or replace table {oxbryta_enc_closed} as
    select e.*
    from {oxbryta_enc} e 
    inner join {oxbryta_ce} c 
    on e.upk_key2 = c.upk_key2 and e.claim_date between c.start_date and c.end_date
    ;
"""
execute_sql(sql)

CPU times: user 4.83 ms, sys: 304 µs, total: 5.14 ms
Wall time: 2.19 s


In [25]:
%%time
# summarize
sql = f"""  
    select NULL as year, NULL as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {oxbryta_enc_closed} e 
    union all 
    select year(claim_date) as year, month(claim_date) as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from {oxbryta_enc_closed} e 
    group by year, month
    order by year, month
    ;
"""
df = read_sql(sql)
display(df)



Unnamed: 0,YEAR,MONTH,N_EVENT,N_PATIENT
0,2019.0,12.0,23,18
1,2020.0,1.0,94,68
2,2020.0,2.0,193,141
3,2020.0,3.0,302,219
4,2020.0,4.0,360,275
5,2020.0,5.0,379,297
6,2020.0,6.0,390,320
7,2020.0,7.0,413,343
8,2020.0,8.0,426,334
9,2020.0,9.0,454,375


CPU times: user 233 ms, sys: 0 ns, total: 233 ms
Wall time: 1.24 s


### selinexor and dorzalax

In [62]:
%%time
sql_ndc = f"select * from {dm_file} where CUI_L1_NAME ilike any('%selinexor%')"
df = read_sql(sql_ndc)
display(df)

sql = f"""  
    create or replace table selinexor_enc as
    select upk_key2, claim_date, ndc, NULL as NDC_ARRAY, 'rx' as type
    from {rx_enc}
    where ndc in (select ndc from ({sql_ndc}))
    union all
    select upk_key2, claim_date, NULL as ndc, NDC_ARRAY, 'mx' as type
    from {mx_enc}
    where arrays_overlap(
            NDC_ARRAY,
            (select array_agg(ndc) from ({sql_ndc}))
        )
    ;
"""
execute_sql(sql)



Unnamed: 0,NDC,ACTIVE,START_DATE,END_DATE,CUI_L1,CUI_L1_NAME,BRAND_GENERIC,CUI_L2_ARRAY,CUI_L2_NAME_ARRAY,INGREDIENT_CUI_ARRAY,...,DF_CUI_ARRAY,DF_NAME_ARRAY,DFG_CUI_ARRAY,DFG_NAME_ARRAY,MIN_CUI,MIN_NAME,MULTI_INGREDIENT,PIN_ARRAY,PIN_NAME_ARRAY,ALL_INGREDIENTS
0,72237010101,True,190001,,2178403,{12 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
1,72237010102,True,190001,,2178405,{16 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
2,72237010103,True,190001,,2382990,{24 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
3,72237010104,True,190001,,2178407,{32 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
4,72237010105,True,190001,,2178409,{20 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
5,72237010106,True,190001,,2382991,{16 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
6,72237010107,True,190001,,2382993,{8 (selinexor 20 mg oral tablet [xpovio]) } pa...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
7,72237010111,True,190001,,2178403,{12 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
8,72237010112,True,190001,,2178405,{16 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio
9,72237010113,True,190001,,2382990,{24 (selinexor 20 mg oral tablet [xpovio]) } p...,brand,"[\n ""2178398""\n]","[\n ""selinexor 20 mg [xpovio]""\n]","[\n ""2178390""\n]",...,,,,,,,False,,,selinexor || Xpovio


CPU times: user 54.1 ms, sys: 1.69 ms, total: 55.8 ms
Wall time: 3min 6s


In [67]:
%%time
sql_ndc = f"select * from {dm_file} where CUI_L1_NAME ilike any('%darzalex%', '%daratumumab%')"
df = read_sql(sql_ndc)
display(df)
sql_proc = f"select * from {proc_file} where CODE_DESCRIPTION ilike any('%darzalex%', '%daratumumab%')"
df = read_sql(sql_proc)
display(df)

sql = f"""  
    create or replace table darzalex_enc as
    select upk_key2, claim_date, ndc, NULL as NDC_ARRAY, NULL as PROCEDURE_ARRAY, 'rx' as type
    from {rx_enc}
    where ndc in (select ndc from ({sql_ndc}))
    union all
    select upk_key2, claim_date, NULL as ndc, NDC_ARRAY, PROCEDURE_ARRAY, 'mx' as type
    from {mx_enc}
    where arrays_overlap(
            NDC_ARRAY,
            (select array_agg(ndc) from ({sql_ndc}))
        ) or arrays_overlap(
            PROCEDURE_ARRAY,
            (select array_agg(code) from ({sql_proc}))
        )
    ;
"""
execute_sql(sql)



Unnamed: 0,NDC,ACTIVE,START_DATE,END_DATE,CUI_L1,CUI_L1_NAME,BRAND_GENERIC,CUI_L2_ARRAY,CUI_L2_NAME_ARRAY,INGREDIENT_CUI_ARRAY,...,DF_CUI_ARRAY,DF_NAME_ARRAY,DFG_CUI_ARRAY,DFG_NAME_ARRAY,MIN_CUI,MIN_NAME,MULTI_INGREDIENT,PIN_ARRAY,PIN_NAME_ARRAY,ALL_INGREDIENTS
0,57894050205,True,190001,,1721956,5 ml daratumumab 20 mg/ml injection [darzalex],brand,"[\n ""1721953""\n]","[\n ""daratumumab 20 mg/ml [darzalex]""\n]","[\n ""1721947""\n]",...,"[\n ""1721955""\n]","[\n ""Darzalex Injectable Product""\n]","[\n ""1151126""\n]","[\n ""Injectable Product""\n]",,,False,"[\n ""2375130""\n]","[\n ""daratumumab-fihj""\n]",daratumumab || Darzalex
1,57894050220,True,190001,,1726440,20 ml daratumumab 20 mg/ml injection [darzalex],brand,"[\n ""1721953""\n]","[\n ""daratumumab 20 mg/ml [darzalex]""\n]","[\n ""1721947""\n]",...,"[\n ""1721955""\n]","[\n ""Darzalex Injectable Product""\n]","[\n ""1151126""\n]","[\n ""Injectable Product""\n]",,,False,"[\n ""2375130""\n]","[\n ""daratumumab-fihj""\n]",daratumumab || Darzalex
2,57894050301,True,190001,,2375141,15 ml daratumumab-fihj 120 mg/ml / hyaluronida...,brand,"[\n ""2375138"",\n ""2375138""\n]","[\n ""daratumumab-fihj 120 mg/ml / hyaluronida...","[\n ""5464"",\n ""1721947""\n]",...,"[\n ""2375140""\n]","[\n ""Darzalex Faspro Injectable Product""\n]","[\n ""1151126""\n]","[\n ""Injectable Product""\n]",0.0,hyaluronidase / daratumumab,True,"[\n ""1300478"",\n ""2119708"",\n ""2375132"",\n ...","[\n ""hyaluronidase, human recombinant"",\n ""h...",hyaluronidase || daratumumab || Darzalex Faspro
3,57894050505,True,190001,,1721956,5 ml daratumumab 20 mg/ml injection [darzalex],brand,"[\n ""1721953""\n]","[\n ""daratumumab 20 mg/ml [darzalex]""\n]","[\n ""1721947""\n]",...,"[\n ""1721955""\n]","[\n ""Darzalex Injectable Product""\n]","[\n ""1151126""\n]","[\n ""Injectable Product""\n]",,,False,"[\n ""2375130""\n]","[\n ""daratumumab-fihj""\n]",daratumumab || Darzalex
4,57894050520,True,190001,,1726440,20 ml daratumumab 20 mg/ml injection [darzalex],brand,"[\n ""1721953""\n]","[\n ""daratumumab 20 mg/ml [darzalex]""\n]","[\n ""1721947""\n]",...,"[\n ""1721955""\n]","[\n ""Darzalex Injectable Product""\n]","[\n ""1151126""\n]","[\n ""Injectable Product""\n]",,,False,"[\n ""2375130""\n]","[\n ""daratumumab-fihj""\n]",daratumumab || Darzalex




Unnamed: 0,CODE,CODE_ORIG,CODE_DESCRIPTION,CODE_SOURCE
0,J9144,J9144,"Injection, daratumumab, 10 mg and hyaluronidas...",HCPCS
1,J9145,J9145,"Injection, daratumumab, 10 mg",HCPCS
2,C9062,C9062,"Injection, daratumumab 10 mg and hyaluronidase...",HCPCS
3,C9476,C9476,"Injection, daratumumab, 10 mg",HCPCS


CPU times: user 312 ms, sys: 0 ns, total: 312 ms
Wall time: 7min 59s


In [68]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    create or replace table darzalex_closed as
    select distinct e.*
    from darzalex_enc e 
    inner join ce_{mx_version} c 
    on e.upk_key2 = c.upk_key2 and e.claim_date between c.start_date and c.end_date
    ;
"""
execute_sql(sql)

CPU times: user 5.32 ms, sys: 0 ns, total: 5.32 ms
Wall time: 5.79 s


In [69]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    select ndc, count(*) as n_claim, count(distinct upk_key2) as n_patient
    from darzalex_closed
    group by ndc
    ;
"""
read_sql(sql)



CPU times: user 7.63 ms, sys: 0 ns, total: 7.63 ms
Wall time: 2.19 s


Unnamed: 0,NDC,N_CLAIM,N_PATIENT
0,57894050220.0,4412,475
1,,293747,20052
2,57894050301.0,1257,213
3,57894050205.0,2684,320


In [70]:
%%time
# summarize
sql = f"""  
    select NULL as year, NULL as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from darzalex_closed e 
    union all 
    select year(claim_date) as year, month(claim_date) as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from darzalex_closed e 
    group by year, month
    order by year, month
    ;
"""
df = read_sql(sql)
display(df)



Unnamed: 0,YEAR,MONTH,N_EVENT,N_PATIENT
0,2015.0,8.0,2,1
1,2015.0,9.0,7,2
2,2015.0,10.0,8,2
3,2015.0,11.0,15,7
4,2015.0,12.0,98,48
5,2016.0,1.0,158,80
6,2016.0,2.0,255,119
7,2016.0,3.0,302,142
8,2016.0,4.0,328,159
9,2016.0,5.0,348,165


CPU times: user 25.1 ms, sys: 0 ns, total: 25.1 ms
Wall time: 753 ms


In [63]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    create or replace table selinexor_closed as
    select e.*
    from selinexor_enc e 
    inner join ce_{mx_version} c 
    on e.upk_key2 = c.upk_key2 and e.claim_date between c.start_date and c.end_date
    ;
"""
execute_sql(sql)

CPU times: user 4.98 ms, sys: 168 µs, total: 5.15 ms
Wall time: 5.26 s


In [64]:
%%time
# find covid19 vaccinated patients while enrolled.
sql = f"""  
    select ndc, count(*) as n_claim, count(distinct upk_key2) as n_patient
    from selinexor_closed
    group by ndc
    ;
"""
read_sql(sql)



CPU times: user 6.87 ms, sys: 695 µs, total: 7.56 ms
Wall time: 813 ms


Unnamed: 0,NDC,N_CLAIM,N_PATIENT
0,72237010101,399,133
1,72237010202,426,139
2,72237010117,6,2
3,72237010111,1,1
4,72237010113,2,2
5,72237010115,4,3
6,72237010112,6,6
7,72237010105,420,173
8,72237010106,19,14
9,72237010207,137,50


In [65]:
%%time
# summarize
sql = f"""  
    select NULL as year, NULL as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from selinexor_closed e 
    union all 
    select year(claim_date) as year, month(claim_date) as month, count(*) as n_event, count(distinct upk_key2) as n_patient
    from selinexor_closed e 
    group by year, month
    order by year, month
    ;
"""
df = read_sql(sql)
display(df)



Unnamed: 0,YEAR,MONTH,N_EVENT,N_PATIENT
0,2019.0,7.0,40,21
1,2019.0,8.0,56,41
2,2019.0,9.0,74,52
3,2019.0,10.0,81,54
4,2019.0,11.0,74,56
5,2019.0,12.0,75,60
6,2020.0,1.0,60,46
7,2020.0,2.0,57,44
8,2020.0,3.0,58,38
9,2020.0,4.0,94,70


CPU times: user 17.8 ms, sys: 91 µs, total: 17.9 ms
Wall time: 768 ms
