In [5]:
import re
import pandas as pd
from omop_etl.load import Loader
from omop_etl.utils import search, find

In [6]:
omop = Loader('z:/Covid19/Covid19_OMOP/new_pipeline/config.yml')

In [3]:
def domain_counts(table):
    
    dimension = table.split('_')[0]

    with omop.engine.connect() as con:
        dom = pd.read_sql('''
        select distinct b.domain_id, count(*) N
        from dbo.{0} a
        join xref.concept b
        on a.{1}_concept_id = b.concept_id
        group by b.domain_id
        '''.format(table, dimension), con)

        return dom

In [4]:
domain_counts('condition_occurrence')

Unnamed: 0,domain_id,N
0,Metadata,363949
1,Condition,74748009


In [5]:
domain_counts('observation')

Unnamed: 0,domain_id,N
0,Observation,136112769


In [6]:
domain_counts('measurement')

Unnamed: 0,domain_id,N
0,Measurement,520105066
1,Metadata,31671828


In [7]:
domain_counts('procedure_occurrence')

Unnamed: 0,domain_id,N
0,Metadata,1716043
1,Procedure,14527678


In [8]:
domain_counts('drug_exposure')

Unnamed: 0,domain_id,N
0,Drug,96169798
1,Metadata,354337


# Procedure -> measurement

In [23]:
with omop.engine.connect() as con:
    cpt_to_meas = pd.read_sql('''
    select distinct procedure_source_value
        ,vocabulary_id, domain_id
        ,CPT_CD_TYPE, CPT_CD_LVL1_DESC
        ,CPT_CD_LVL2_DESC, count(distinct CPT_CD) N 
    from preload.procedure_occurrence a
    join xref.concept b
    on a.procedure_concept_id = b.concept_id
    join dws_prod.dbo.ALL_CPT_PROCEDURE_CODES c
    on a.procedure_source_value = c.CPT_CD
    where domain_id = 'Measurement' -- @domain
    and CPT_CD_TYPE = 'CPT'
    group by procedure_source_value, vocabulary_id, domain_id, CPT_CD_TYPE, CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC
    order by procedure_source_value
    ''', con)

In [21]:
# cpt_to_meas.to_csv('cpt_codes.csv', index=False)

In [31]:
cpt_to_meas[find('Chemistry', cpt_to_meas.CPT_CD_LVL2_DESC)]

Unnamed: 0,procedure_source_value,vocabulary_id,domain_id,CPT_CD_TYPE,CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC,N
112,82003,CPT4,Measurement,CPT,80000-89398 Pathology and Laboratory Tests,82000-84999 Chemistry Pathology and Laboratory...,1
113,82009,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
114,82010,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
115,82017,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
116,82024,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
...,...,...,...,...,...,...,...
368,84630,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
369,84681,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
370,84702,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
371,84703,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1


# Condition -> Observation

In [36]:
with omop.engine.connect() as con:
    icd_to_obs = pd.read_sql('''
    select distinct 
         ICD_TYPE
        ,DIAG_CD_LVL2_DESC as [DESC]
        ,count(distinct condition_source_value) N
    from preload.condition_occurrence a
    join xref.concept b
    on a.condition_concept_id = b.concept_id
    join dws_prod.dbo.ALL_ICD_DIAGNOSIS_CODES c
    on a.condition_source_value = c.DIAG_CD_DECML
    where domain_id = 'Observation'
    group by ICD_TYPE, DIAG_CD_LVL2_DESC
    ''', con)

In [47]:
icd_to_obs['Range'] = icd_to_obs.DESC.apply(lambda s: s.split(' ', 1)[0] if search('-[0-9A-Z]', s) else '')
icd_to_obs['DESC'] = icd_to_obs.DESC.apply(lambda s: s.split(' ', 1)[1] if search('-[0-9A-Z]', s) else s)
icd_to_obs['DESC'] = icd_to_obs.DESC.apply(lambda s: re.sub('\(.*\)$','',s).lower().capitalize().strip())

In [61]:
# icd_to_obs

In [65]:
icd_to_obs[find('Supple', icd_to_obs.DESC)]

Unnamed: 0,ICD_TYPE,DESC,N,Range
4,ICD10,Supplementary factors related to causes of mor...,1,
116,ICD10,Supplementary factors related to causes of mor...,269,Y90-Y99


In [85]:
icd_grouped = icd_to_obs.groupby(['ICD_TYPE', 'DESC']).sum()
icd_grouped['Code range'] = (icd_to_obs.groupby(['ICD_TYPE', 'DESC']).last()).Range
icd_grouped = icd_grouped.reset_index()
icd_grouped['DESC'] = icd_grouped.apply(lambda r: r.DESC + f" ({r['Code range']})", axis=1)

In [87]:
# icd_grouped.to_csv('icd_to_observation.csv', index=False)

# Procedures

In [88]:
with omop.engine.connect() as con:
    cpt = pd.read_sql('''
    select distinct procedure_source_value, vocabulary_id, domain_id, CPT_CD_TYPE, CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC 
    from preload.procedure_occurrence a
    join xref.concept b
    on a.procedure_concept_id = b.concept_id
    join dws_prod.dbo.ALL_CPT_PROCEDURE_CODES c
    on a.procedure_source_value = c.CPT_CD
    where domain_id = 'Procedure'
    and CPT_CD_TYPE = 'CPT'
    order by procedure_source_value
    ''', con)

In [89]:
cpt

Unnamed: 0,procedure_source_value,vocabulary_id,domain_id,CPT_CD_TYPE,CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC
0,00100,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
1,00102,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
2,00103,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
3,00104,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
4,00120,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
...,...,...,...,...,...,...
6987,99495,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021131 Transitional Care Evaluation and Manag...
6988,99496,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021131 Transitional Care Evaluation and Manag...
6989,99497,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021844 Advance Care Planning Evaluation and M...
6990,99498,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021844 Advance Care Planning Evaluation and M...


In [93]:
cpt['Desc'] = cpt.CPT_CD_LVL1_DESC.apply(lambda s: s.split(' ', 1)[1] if search('[0-9]', s) else '')

In [97]:
cpt['Desc2'] = cpt.CPT_CD_LVL2_DESC.apply(lambda s: s.split(' ', 1)[1] if search('[0-9]', s) else '')

In [99]:
cpt_sum = cpt.groupby(['Desc','Desc2']).count()

In [1]:
# cpt_sum.to_csv('cpt_proc.csv')

# Drug classes

In [7]:
import requests

In [15]:
with omop.engine.connect() as con:
    rxcui = pd.read_sql('''
        select distinct drug_source_value
        from preload.drug_exposure a
        join xref.concept b
        on a.drug_concept_id = b.concept_id
        where b.domain_id = 'Drug'
    ''', con)

In [16]:
rxcui

Unnamed: 0,drug_source_value
0,1234512
1,198228
2,313009
3,5464
4,861493
...,...
8171,244250
8172,198057
8173,310798
8174,562600


In [49]:
rxclass = []
failed = {}
for rx in rxcui.drug_source_value[:1000]:
    url = f'https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={rx}&relaSource=SNOMEDCT'
    resp = requests.get(url).json()
    try:
        info = resp['rxclassDrugInfoList']['rxclassDrugInfo'][0]['rxclassMinConceptItem']
        rxclass.append((rx, info['className']))
    except KeyError:
        failed[rx] = resp