In [1]:
import re
import pandas as pd
from omop_etl.load import Loader
from omop_etl.utils import search, find

In [2]:
omop = Loader('z:/Covid19/Covid19_OMOP/new_pipeline/config.yml')

In [3]:
def domain_counts(table):
    
    dimension = table.split('_')[0]

    with omop.engine.connect() as con:
        dom = pd.read_sql('''
        select distinct b.domain_id, count(*) N
        from dbo.{0} a
        join xref.concept b
        on a.{1}_concept_id = b.concept_id
        group by b.domain_id
        '''.format(table, dimension), con)

        return dom

In [4]:
domain_counts('condition_occurrence')

Unnamed: 0,domain_id,N
0,Metadata,363949
1,Condition,74748009


In [5]:
domain_counts('observation')

Unnamed: 0,domain_id,N
0,Observation,136112769


In [6]:
domain_counts('measurement')

Unnamed: 0,domain_id,N
0,Measurement,520105066
1,Metadata,31671828


In [7]:
domain_counts('procedure_occurrence')

Unnamed: 0,domain_id,N
0,Metadata,1716043
1,Procedure,14527678


In [8]:
domain_counts('drug_exposure')

Unnamed: 0,domain_id,N
0,Drug,96169798
1,Metadata,354337


# Procedure -> measurement

In [23]:
with omop.engine.connect() as con:
    cpt_to_meas = pd.read_sql('''
    select distinct procedure_source_value
        ,vocabulary_id, domain_id
        ,CPT_CD_TYPE, CPT_CD_LVL1_DESC
        ,CPT_CD_LVL2_DESC, count(distinct CPT_CD) N 
    from preload.procedure_occurrence a
    join xref.concept b
    on a.procedure_concept_id = b.concept_id
    join dws_prod.dbo.ALL_CPT_PROCEDURE_CODES c
    on a.procedure_source_value = c.CPT_CD
    where domain_id = 'Measurement' -- @domain
    and CPT_CD_TYPE = 'CPT'
    group by procedure_source_value, vocabulary_id, domain_id, CPT_CD_TYPE, CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC
    order by procedure_source_value
    ''', con)

In [21]:
# cpt_to_meas.to_csv('cpt_codes.csv', index=False)

In [31]:
cpt_to_meas[find('Chemistry', cpt_to_meas.CPT_CD_LVL2_DESC)]

Unnamed: 0,procedure_source_value,vocabulary_id,domain_id,CPT_CD_TYPE,CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC,N
112,82003,CPT4,Measurement,CPT,80000-89398 Pathology and Laboratory Tests,82000-84999 Chemistry Pathology and Laboratory...,1
113,82009,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
114,82010,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
115,82017,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
116,82024,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
...,...,...,...,...,...,...,...
368,84630,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
369,84681,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
370,84702,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1
371,84703,CPT4,Measurement,CPT,1011136 Pathology and Laboratory Procedures,1011237 Chemistry Procedures,1


# Condition -> Observation

In [36]:
with omop.engine.connect() as con:
    icd_to_obs = pd.read_sql('''
    select distinct 
         ICD_TYPE
        ,DIAG_CD_LVL2_DESC as [DESC]
        ,count(distinct condition_source_value) N
    from preload.condition_occurrence a
    join xref.concept b
    on a.condition_concept_id = b.concept_id
    join dws_prod.dbo.ALL_ICD_DIAGNOSIS_CODES c
    on a.condition_source_value = c.DIAG_CD_DECML
    where domain_id = 'Observation'
    group by ICD_TYPE, DIAG_CD_LVL2_DESC
    ''', con)

In [47]:
icd_to_obs['Range'] = icd_to_obs.DESC.apply(lambda s: s.split(' ', 1)[0] if search('-[0-9A-Z]', s) else '')
icd_to_obs['DESC'] = icd_to_obs.DESC.apply(lambda s: s.split(' ', 1)[1] if search('-[0-9A-Z]', s) else s)
icd_to_obs['DESC'] = icd_to_obs.DESC.apply(lambda s: re.sub('\(.*\)$','',s).lower().capitalize().strip())

In [61]:
# icd_to_obs

In [65]:
icd_to_obs[find('Supple', icd_to_obs.DESC)]

Unnamed: 0,ICD_TYPE,DESC,N,Range
4,ICD10,Supplementary factors related to causes of mor...,1,
116,ICD10,Supplementary factors related to causes of mor...,269,Y90-Y99


In [85]:
icd_grouped = icd_to_obs.groupby(['ICD_TYPE', 'DESC']).sum()
icd_grouped['Code range'] = (icd_to_obs.groupby(['ICD_TYPE', 'DESC']).last()).Range
icd_grouped = icd_grouped.reset_index()
icd_grouped['DESC'] = icd_grouped.apply(lambda r: r.DESC + f" ({r['Code range']})", axis=1)

In [87]:
# icd_grouped.to_csv('icd_to_observation.csv', index=False)

# Procedures

In [88]:
with omop.engine.connect() as con:
    cpt = pd.read_sql('''
    select distinct procedure_source_value, vocabulary_id, domain_id, CPT_CD_TYPE, CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC 
    from preload.procedure_occurrence a
    join xref.concept b
    on a.procedure_concept_id = b.concept_id
    join dws_prod.dbo.ALL_CPT_PROCEDURE_CODES c
    on a.procedure_source_value = c.CPT_CD
    where domain_id = 'Procedure'
    and CPT_CD_TYPE = 'CPT'
    order by procedure_source_value
    ''', con)

In [89]:
cpt

Unnamed: 0,procedure_source_value,vocabulary_id,domain_id,CPT_CD_TYPE,CPT_CD_LVL1_DESC,CPT_CD_LVL2_DESC
0,00100,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
1,00102,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
2,00103,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
3,00104,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
4,00120,CPT4,Procedure,CPT,1002796 Anesthesia,1002797 Anesthesia for Procedures on the Head
...,...,...,...,...,...,...
6987,99495,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021131 Transitional Care Evaluation and Manag...
6988,99496,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021131 Transitional Care Evaluation and Manag...
6989,99497,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021844 Advance Care Planning Evaluation and M...
6990,99498,CPT4,Procedure,CPT,1013625 Evaluation and Management Services,1021844 Advance Care Planning Evaluation and M...


In [93]:
cpt['Desc'] = cpt.CPT_CD_LVL1_DESC.apply(lambda s: s.split(' ', 1)[1] if search('[0-9]', s) else '')

In [97]:
cpt['Desc2'] = cpt.CPT_CD_LVL2_DESC.apply(lambda s: s.split(' ', 1)[1] if search('[0-9]', s) else '')

In [99]:
cpt_sum = cpt.groupby(['Desc','Desc2']).count()

In [1]:
# cpt_sum.to_csv('cpt_proc.csv')

# Drug classes

In [7]:
import requests

In [4]:
with omop.engine.connect() as con:
    rxcui = pd.read_sql('''
        select distinct drug_source_value
        from preload.drug_exposure a
        join xref.concept b
        on a.drug_concept_id = b.concept_id
        where b.domain_id = 'Drug'
    ''', con)

rxcui.head()

Unnamed: 0,drug_source_value
0,2052814
1,197407
2,142442
3,1928686
4,8214


In [5]:
from multiprocessing import Pool

In [53]:
rxclasses = []
failed = {}
results = {}

def pull_request(rxcui, relaSource = None, rela = None):
    if relaSource:
        relaSource = f'&relaSource={relaSource}'
    if relaSource and rela:
        relaSource = f'{relaSource}&rela={rela}'
    url = f'https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={rxcui}{relaSource or ""}'
    return requests.get(url).json()

for rx in rxcui.drug_source_value:
    resp = pull_request(rx, relaSource='VA', rela='has_VAClass')
    info = resp.get('rxclassDrugInfoList')
    if info:
        results[rx] = resp
        rxclass = resp['rxclassDrugInfoList']['rxclassDrugInfo'][0]['rxclassMinConceptItem']['className']
        rxclasses.append((rx, rxclass))
    else: 
        resp = pull_request(rx)
        failed[rx] = resp
        
#     infoList = resp.get('rxclassDrugInfoList')
    
#     if infoList:
#         
#     else:
#         url = f'https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={rx}&relaSource=FDASPL'
#         resp = requests.get(url).json()
#         failed[rx] = resp

In [45]:
rxclasses

[('197407', 'ANTI-INFLAMMATORY,TOPICAL'),
 ('142442', 'NONSALICYLATE NSAIs,ANTIRHEUMATIC'),
 ('1928686', 'IMMUNE SUPPRESSANTS'),
 ('8214', 'LIPID SUPPLEMENTS'),
 ('905455', 'THYROID SUPPLEMENTS'),
 ('636522', 'NON-OPIOID-CONTAINING ANTITUSSIVES/EXPECTORANTS'),
 ('142432', 'ANTIHYPERTENSIVES,OTHER'),
 ('197668', 'ESTROGENS')]

In [59]:
# failed.get('9641').get('rxclassDrugInfoList')

In [54]:
len(set(list(zip(*rxclasses))[1]))

302

In [57]:
len(set(list(zip(*rxclasses))[0]))

7178

In [121]:
for rx in list(failed.keys())[:100]:
    _rxcui = failed.get(rx)
    infoList = _rxcui.get('rxclassDrugInfoList')#
    if infoList:
        infoClass = infoList.get('rxclassDrugInfo')
        className = set([
            s['rxclassMinConceptItem']['className'] for s in infoClass 
            if (s['rela'] in ('has_ingredient', 'has_chemical_structure'))
        ])
        if className:
            print(className)
        else:
            className = set([
                (s['rxclassMinConceptItem']['className'], s['relaSource'], s['rela']) for s in infoClass 
            ])
            print('Empty:', className)
    else:
        print('Failed: ', _rxcui)
#     if classinfo:
#         print(classinfo['rela'])
#     else:
#         print('Failed: ', _rxcui)

{'Phytosterols'}
{'Selenium'}
{'Benzethonium', 'Lidocaine', 'Amides'}
{'Plant Extracts'}
Empty: {('Unknown Cellular or Molecular Interaction', 'MEDRT', 'has_moa'), ('Drug Hypersensitivity', 'MEDRT', 'ci_with'), ('Abnormalities, Drug-Induced', 'MEDRT', 'may_treat'), ('Unknown Physiological Effect', 'MEDRT', 'has_pe'), ('Milk Hypersensitivity', 'MEDRT', 'ci_with')}
Failed:  {'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '2103236'}}
{'Measles Vaccine', 'Rubella Vaccine', 'Mumps Vaccine', 'Vaccines, Attenuated'}
Failed:  {'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '1306885'}}
Failed:  {'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '891766'}}
{'Antibodies, Monoclonal', 'Antibodies, Monoclonal, Humanized'}
{'Cellulose'}
{'Ranitidine'}
Failed:  {'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '2109751'}}
Failed:  {'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '259283'}}
{'Allergens', 'Benzocaine'}
Failed:  {'userInput': {'

In [111]:
[s['rxclassMinConceptItem']['className'] for s in infoClass if (s['relaSource'] == 'MEDRT' and s['rela'] == 'has_ingredient')] #'minConcept', 'rxclassMinConceptItem', 'rela', 'relaSource'

['Antibodies, Monoclonal, Humanized', 'Antibodies, Monoclonal, Humanized']

In [88]:
_rxcuid

{'userInput': {'relaSource': 'ALL', 'relas': 'ALL', 'rxcui': '2045627'},
 'rxclassDrugInfoList': {'rxclassDrugInfo': [{'minConcept': {'rxcui': '2045613',
     'name': 'erenumab',
     'tty': 'IN'},
    'rxclassMinConceptItem': {'classId': 'D000077221',
     'className': 'Calcitonin Gene-Related Peptide Receptor Antagonists',
     'classType': 'MESHPA'},
    'rela': '',
    'relaSource': 'MESH'},
   {'minConcept': {'rxcui': '2045613', 'name': 'erenumab', 'tty': 'IN'},
    'rxclassMinConceptItem': {'classId': 'N02CD',
     'className': 'Calcitonin gene-related peptide (CGRP) antagonists',
     'classType': 'ATC1-4'},
    'rela': '',
    'relaSource': 'ATC'},
   {'minConcept': {'rxcui': '2045613', 'name': 'erenumab', 'tty': 'IN'},
    'rxclassMinConceptItem': {'classId': 'D008881',
     'className': 'Migraine Disorders',
     'classType': 'DISEASE'},
    'rela': 'may_prevent',
    'relaSource': 'MEDRT'},
   {'minConcept': {'rxcui': '2045613', 'name': 'erenumab', 'tty': 'IN'},
    'rxclass

In [61]:
rxclassified = pd.DataFrame(rxclasses, columns=['RxNorm', 'Class'])

In [64]:
rxsummary = rxclassified.groupby('Class').count()

In [65]:
# rxsummary.to_csv('rxclassification.csv')