In [158]:
import requests
import math

In [2]:
version = 'current'
identifier = 'C0042210' #Vaccine Identifier in UMLS Add 'C5399710'
apikey = '0c4d7175-6688-4c45-9a13-4c64d7354673'

In [3]:
"""
Retrieve All Atoms for Vaccine Identifier (Synonyms)
https://documentation.uts.nlm.nih.gov/rest/atoms/index.html
"""
uri = 'https://uts-ws.nlm.nih.gov'
page = 0
pagesize = 100
content_endpoint_a = '/rest/content/'+str(version)+'/CUI/'+str(identifier)+'/atoms/' #a - Atoms
query = {'apiKey':apikey, 'pageSize':pagesize}
r = requests.get(uri+content_endpoint_a, params=query)
r.encoding = 'utf-8'

In [19]:
items  = r.json()
jsonData = items['result']
Atoms = {x['ui']:{'source':x['rootSource'],'name':x['name'], 'code':x['code']} for x in jsonData if (x['classType']=='Atom' and x['obsolete']=='false' and x['language']=='ENG')}
# Atoms.keys() contain all atoms of Vaccines belonging to external vocabularies

In [66]:
len(Atoms)

33

In [124]:
import requests

def retrieve_entities(version, identifiers, task, apikey):
    uri = 'https://uts-ws.nlm.nih.gov'
    pagesize = 100
    all_entities = {}
    for identifier in identifiers:
        page = 1
        entity_query = {'apiKey': apikey, 'pageNumber': page, 'language':"ENG"}
        if task == 'atoms':
            content_endpoint = f'/rest/content/{version}/CUI/{identifier}/atoms/'
        elif task == 'desc':
            content_endpoint = f'/rest/content/{version}/AUI/{identifier}/descendants/'
        response = requests.get(uri + content_endpoint, params=entity_query)
        response.encoding = 'utf-8'

        # print(response.url)
        
        if response.status_code != 200:
            print(f'Identifier: {identifier} - Error: {response.status_code}')
        else:
            while response.status_code == 200:
                # print(f'Fetching page {page} for identifier: {identifier}')
                data = response.json()
                entities = data['result']
                for entity in entities:
                    all_entities[entity['ui']] = {
                        'concept': entity['concept'],
                        'ui': entity['ui'],
                        'source': entity['rootSource'],
                        'sourceConcept': entity['sourceConcept'],
                        'code': entity['code'],
                        'obsolete': entity['obsolete'],
                        'name': entity['name'],
                        'termType': entity['termType'],
                        'language':entity['language']
                    }
                page += 1
                entity_query = {'apiKey': apikey, 'pageNumber': page}
                response = requests.get(uri + content_endpoint, params=entity_query)
                response.encoding = 'utf-8'
                
            print(f'Fetched {page-1} pages for identifier: {identifier}')
    return all_entities

### Extracting Synonymous Atoms from Vaccine Superconcepts [Vaccine [APC] (C5399710), Vaccines (C0042210)]

In [105]:
version = 'current'
identifiers = ['C0042210', 'C5399710']  # List of identifiers
content_endpoint = 'atoms'  # 'atoms' or 'descendants'
apikey = '0c4d7175-6688-4c45-9a13-4c64d7354673'

entities = retrieve_entities(version, identifiers, 'atoms', apikey)

Fetched 3 pages for identifier: C0042210
Fetched 1 pages for identifier: C5399710


In [106]:
len(entities)

55

In [107]:
vac_df = pd.DataFrame.from_records(list(entities.values()))
vac_df = vac_df[vac_df['language']=='ENG'].reset_index()
vac_atoms = vac_df['ui'].to_list()
vac_df

Unnamed: 0,index,concept,ui,source,sourceConcept,obsolete,name,termType,language
0,0,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0131113,LCH,NONE,False,Vaccines,PT,ENG
1,1,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A18650458,CHV,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,False,vaccines,SY,ENG
2,2,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A33486017,MEDLINEPLUS,NONE,False,Vaccines,PT,ENG
3,3,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0320232,CSP,NONE,False,vaccine,PT,ENG
4,4,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0815124,SNMI,NONE,False,VACCINES,HT,ENG
5,5,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A12101066,VANDF,NONE,False,VACCINES,PT,ENG
6,6,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A10133377,HL7V3.0,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,False,Vaccine,PT,ENG
7,7,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A18136219,LNC,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,False,Vaccine,LPN,ENG
8,8,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A23518358,LNC,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,False,Vaccine,LS,ENG
9,9,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A32797963,LNC,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,False,Vaccine,LPDN,ENG


### Extracting Atom descendants from Vaccine Atoms Identified above

In [108]:
vac_desc = retrieve_entities(version, vac_atoms, 'desc', apikey)

Identifier: A0131113 - Error: 404
Identifier: A18650458 - Error: 404
Identifier: A33486017 - Error: 404
Fetched 2 pages for identifier: A0320232
Fetched 2 pages for identifier: A0815124
Identifier: A12101066 - Error: 404
Identifier: A10133377 - Error: 404
Identifier: A18136219 - Error: 404
Identifier: A23518358 - Error: 404
Identifier: A32797963 - Error: 404
Identifier: A18618187 - Error: 404
Fetched 13 pages for identifier: A0131115
Fetched 57 pages for identifier: A7589791
Identifier: A4367028 - Error: 404
Fetched 2 pages for identifier: A24665667
Identifier: A18575988 - Error: 404
Identifier: A0815691 - Error: 404
Fetched 1 pages for identifier: A1397511
Identifier: A7751560 - Error: 404
Identifier: A0806393 - Error: 404
Identifier: A23870698 - Error: 404
Fetched 26 pages for identifier: A14066811
Identifier: A32282011 - Error: 404
Fetched 5 pages for identifier: A0131114
Identifier: A24099624 - Error: 404
Fetched 6 pages for identifier: A22722839
Identifier: A18581050 - Error: 404


In [109]:
print(f'Vaccine Concepts Identified by the ancestor method: {len(vac_desc)}')

Vaccine Concepts Identified by the ancestor method: 2463


In [113]:
vac_desc_df = pd.DataFrame.from_records(list(vac_desc.values()))
# vac_df = vac_df[vac_df['language']=='ENG'].reset_index()
# vac_atoms = vac_df['ui'].to_list()
vac_desc_df['concept_code'] = vac_desc_df['concept'].str.extract(r'/CUI/([A-Z0-9]+)$')
vac_desc_df

Unnamed: 0,concept,ui,source,sourceConcept,obsolete,name,termType,language,concept_code
0,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0318991,CSP,NONE,false,fungal vaccine,PT,ENG,C0016831
1,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0319385,CSP,NONE,false,live vaccine,PT,ENG,C0042211
2,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0319836,CSP,NONE,false,protozoal vaccine,PT,ENG,C0080067
3,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A1198670,CSP,NONE,false,malaria vaccine,PT,ENG,C0206255
4,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0320096,CSP,NONE,false,synthetic vaccine,PT,ENG,C0042213
...,...,...,...,...,...,...,...,...,...
2458,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941343,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Inactivated Japanese Encephalitis Virus Vaccine,PT,ENG,C3537138
2459,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941361,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Live Attenuated Salmonella Typhi Vaccine,PT,ENG,C3257579
2460,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941362,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Inactivated Human Papillomavirus Vaccine,PT,ENG,C3257549
2461,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941373,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Live Attenuated Varicella Zoster Virus Vaccine,PT,ENG,C3257586


In [111]:
vac_desc_df['source'].value_counts()

source
NCI            1020
MEDCIN          627
RCD             270
SNOMEDCT_US     200
ATC             126
MSH              94
CSP              34
SNMI             31
USPMG            29
MED-RT           29
AOD               3
Name: count, dtype: int64

In [115]:
vaccine_concepts = vac_desc_df['concept_code'].to_list()

In [125]:
all_vaccine_atoms = retrieve_entities(version, vaccine_concepts, 'atoms', apikey)

Fetched 1 pages for identifier: C0016831
Fetched 2 pages for identifier: C0042211
Fetched 2 pages for identifier: C0080067
Fetched 2 pages for identifier: C0206255
Fetched 3 pages for identifier: C0042213
Fetched 2 pages for identifier: C0042742
Fetched 2 pages for identifier: C0085432
Fetched 2 pages for identifier: C0042723
Fetched 3 pages for identifier: C0032374
Fetched 5 pages for identifier: C0021403
Fetched 3 pages for identifier: C0025010
Fetched 3 pages for identifier: C0034496
Fetched 1 pages for identifier: C0596696
Fetched 1 pages for identifier: C0597172
Fetched 1 pages for identifier: C0597260
Fetched 2 pages for identifier: C0597418
Fetched 3 pages for identifier: C0037355
Fetched 2 pages for identifier: C0004632
Fetched 2 pages for identifier: C0041305
Fetched 4 pages for identifier: C0004886
Fetched 3 pages for identifier: C0008359
Fetched 3 pages for identifier: C0031237
Fetched 1 pages for identifier: C0596409
Fetched 1 pages for identifier: C0596627
Fetched 2 pages 

In [131]:
len(all_vaccine_atoms)

11092

In [126]:
all_vaccine_atoms_df = pd.DataFrame.from_records(list(all_vaccine_atoms.values()))

In [127]:
pd.set_option('display.max_rows', 20)
all_vaccine_atoms_df['source'].value_counts()

source
NCI              2616
MEDCIN           1389
PDQ              1378
SNOMEDCT_US      1019
MSH               775
                 ... 
DXP                 1
CCSR_ICD10PCS       1
PSY                 1
MTHICD9             1
MSHDUT              1
Name: count, Length: 87, dtype: int64

In [122]:
all_vaccine_atoms_df.to_csv('./ALL_UMLS_CONCEPTS.csv')

In [130]:
all_vaccine_atoms_df['source_code'] = all_vaccine_atoms_df['code'].str.extract(r'/([^/]+)$')
all_vaccine_atoms_df

Unnamed: 0,concept,ui,source,sourceConcept,code,obsolete,name,termType,language,source_code
0,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0318991,CSP,NONE,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,fungal vaccine,PT,ENG,3057-9055
1,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0131119,MSH,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,"Vaccines, Fungal",PM,ENG,D005657
2,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0061303,MSH,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Fungal Vaccines,MH,ENG,D005657
3,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A32288783,MSH,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,"Vaccine, Attenuated",PM,ENG,D014613
4,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A0319385,CSP,NONE,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,live vaccine,PT,ENG,3058-0460
...,...,...,...,...,...,...,...,...,...,...
11087,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28937026,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Live Attenuated Varicella Zoster Virus Vaccine...,FN,ENG,N0000183911
11088,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941373,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Live Attenuated Varicella Zoster Virus Vaccine,PT,ENG,N0000183911
11089,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28940372,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Live Attenuated Chickenpox Virus Vaccine,SY,ENG,N0000183911
11090,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,A28941400,MED-RT,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,https://uts-ws.nlm.nih.gov/rest/content/2023AB...,false,Inactivated Corynebacterium Diphtheriae Vaccine,PT,ENG,N0000183887


In [132]:
test = all_vaccine_atoms_df[all_vaccine_atoms_df['source']=='RXNORM']['source_code'].to_list()

In [134]:
len(test)

51

In [154]:
xx = requests.get('https://uts-ws.nlm.nih.gov/rest/search/current?string=vaccine&apiKey=0c4d7175-6688-4c45-9a13-4c64d7354673&sabs=RXNORM&pageSize=50&returnIdType=code&pageNumber=19')
xx.encoding = 'utf-8'
print(xx.status_code)
test_dir = xx.json()

200


In [177]:
requests.get('https://uts-ws.nlm.nih.gov/rest/search/current?string=vaccine&apiKey=0c4d7175-6688-4c45-9a13-4c64d7354673&sabs=RXNORM&pageSize=50&returnIdType=code&pageNumber=1').json()['result']['results']

[{'ui': '798306',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/798306',
  'name': 'tetanus toxoid vaccine, inactivated'},
 {'ui': '8080',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/8080',
  'name': 'pertussis vaccine'},
 {'ui': '797752',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/797752',
  'name': 'hepatitis B surface antigen vaccine'},
 {'ui': '798304',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/798304',
  'name': 'diphtheria toxoid vaccine, inactivated'},
 {'ui': '807219',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/807219',
  'name': 'typhoid Vi polysaccharide vaccine, S typhi Ty2 strain'},
 {'ui': '2468230',
  'rootSource': 'RXNORM',
  'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/2468230',
  'na

In [165]:
math.ceil(879/50)+1

19

In [146]:
test_dir.keys()

dict_keys(['pageSize', 'pageNumber', 'result', 'partialSearch'])

In [150]:
test_dir['result']

{'classType': 'searchResults',
 'results': [{'ui': '798306',
   'rootSource': 'RXNORM',
   'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/798306',
   'name': 'tetanus toxoid vaccine, inactivated'},
  {'ui': '8080',
   'rootSource': 'RXNORM',
   'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/8080',
   'name': 'pertussis vaccine'},
  {'ui': '797752',
   'rootSource': 'RXNORM',
   'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/797752',
   'name': 'hepatitis B surface antigen vaccine'},
  {'ui': '798304',
   'rootSource': 'RXNORM',
   'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/798304',
   'name': 'diphtheria toxoid vaccine, inactivated'},
  {'ui': '807219',
   'rootSource': 'RXNORM',
   'uri': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/RXNORM/807219',
   'name': 'typhoid Vi polysaccharide vaccine, S typhi Ty2 strain'},
  {'ui': '2468230',
   'rootSource': 'RXNORM',
   'uri': 'https://uts

In [151]:
test_dir['result']['recCount']

879

### Extracting All Concepts using Keyword Search

In [161]:
def get_data(uri, content_endpoint, query_params):
    # Perform GET request
    response = requests.get(uri + content_endpoint, params=query_params)
    response.encoding = 'utf-8'

    # Check for successful response
    if response.status_code == 200:
        # Parse JSON data
        data = response.json()
        return data
    else:
        # Print error message if request fails
        print(f"Error: Failed to fetch data. Status code: {response.status_code}")
        return None

In [178]:
def search_entities(version, keywords, apikey, query_params):
    uri = 'https://uts-ws.nlm.nih.gov'
    pagesize = query_params['pageSize']
    all_entities = {}
    for keyword in keywords:
        content_endpoint = f'/rest/search/{version}/'
        query_params['string']=keyword
        pages = math.ceil(get_data(uri,content_endpoint,query_params)['result']['recCount']/pagesize)+1
        for page in range(1,pages):
            query_params['pageNumber']=page
            entities = get_data(uri,content_endpoint,query_params)['result']['results']
            for entity in entities:
                all_entities[entity['ui']] = entity['name']
    return all_entities

In [198]:
rx_vac_params = {'apiKey': apikey, 'language':'ENG', 'sabs':'RXNORM', 'returnIdType':'code', 'pageSize':50}
rx_vac = search_entities(version, ['vaccine','immunization'], apikey, rx_vac_params)

In [199]:
len(rx_vac)

1183

In [197]:
len(rx_vac)

879

In [182]:
rx_columns = {'RXN':'Int64', 'RX_STR':'str','TTY':'category'}
RX_DF_ALL = pd.read_csv('RXN_Concepts.csv',header=0, names=rx_columns.keys(), dtype=rx_columns)
RXN_LIST = list(RX_DF_ALL['RXN'])
len(RXN_LIST)

2306

In [200]:
missing_umls = [rx for rx in RXN_LIST if str(rx) not in list(rx_vac.keys())]
len(missing)

1666

In [201]:
missing_rx = [rx for rx in list(rx_vac.keys()) if int(rx) not in RXN_LIST]
len(missing_rx)

543

### Evaluation: 

1. RxNorm Concepts missing from UMLS Keywords - Output of name / obsolete / TTY
2. RxNorm concepts missing from RxNorm Relationships - Output of name / obsolete / TTY [Check how to extract new concepts using the API method]

#### RxNorm Concepts missing from UMLS Keyword Search ['vaccine', 'immunization']

In [207]:
missing_umls

[7288,
 29501,
 29503,
 50937,
 76469,
 114883,
 139056,
 200259,
 200260,
 200288,
 203439,
 204459,
 204525,
 204527,
 205255,
 205256,
 205257,
 205262,
 207269,
 219579,
 221050,
 221105,
 224903,
 242233,
 242857,
 247765,
 247766,
 250071,
 251653,
 251817,
 251908,
 260122,
 260123,
 260208,
 260209,
 260299,
 260301,
 287617,
 314513,
 316781,
 316782,
 318241,
 318242,
 318243,
 328467,
 331446,
 335677,
 336250,
 336710,
 336990,
 342864,
 343255,
 343382,
 347145,
 347699,
 350758,
 350965,
 350966,
 350967,
 352572,
 352877,
 358497,
 359119,
 359455,
 363454,
 363455,
 363592,
 365627,
 365683,
 371915,
 372385,
 374054,
 374403,
 374444,
 374588,
 375536,
 375806,
 378439,
 378654,
 378898,
 379116,
 379163,
 380956,
 380957,
 384561,
 402455,
 402456,
 402457,
 404894,
 412908,
 412992,
 413786,
 415373,
 415374,
 450546,
 450547,
 452027,
 452809,
 452810,
 496892,
 545640,
 545641,
 545642,
 547243,
 562127,
 568054,
 574101,
 574102,
 583409,
 583410,
 583411,
 605718

In [226]:
res_missing_umls = {}
rx_uri = 'https://rxnav.nlm.nih.gov'
for rx_cui in missing_umls:
    rx_content_endpoint = f'/REST/rxcui/{rx_cui}/properties.json'
    res = get_data(rx_uri,rx_content_endpoint,None)
    if len(res) !=0:
        res_missing_umls[res['properties']['rxcui']] = {'name':res['properties']['name'], 
                                                       'tty':res['properties']['tty'],
                                                       'suppress':res['properties']['suppress']}
    else:
        res_missing_umls[rx_cui] = {'name':'N/A',
                                    'tty':'N/A',
                                   'suppress': 'N/A'}

In [227]:
res_missing_umls

{'7288': {'name': 'Neisseria meningitidis', 'tty': 'IN', 'suppress': 'N'},
 '29501': {'name': 'meningococcal group A polysaccharide',
  'tty': 'IN',
  'suppress': 'N'},
 '29503': {'name': 'meningococcal group C polysaccharide',
  'tty': 'IN',
  'suppress': 'N'},
 '50937': {'name': 'Haemophilus influenzae type b',
  'tty': 'IN',
  'suppress': 'N'},
 '76469': {'name': 'BCG, live, Connaught strain',
  'tty': 'IN',
  'suppress': 'N'},
 '114883': {'name': 'Typhim VI', 'tty': 'BN', 'suppress': 'N'},
 139056: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 200259: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 200260: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 200288: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 '203439': {'name': 'Havrix', 'tty': 'BN', 'suppress': 'N'},
 204459: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 204525: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 204527: {'name': 'N/A', 'tty': 'N/A', 'suppress': 'N/A'},
 205255: {'name': 'N/A', 't

In [228]:
missing_umls_df = pd.DataFrame.from_records(list(res_missing_umls.values()))

In [231]:
missing_umls_df['suppress'].value_counts()

suppress
N/A    1309
N       357
Name: count, dtype: int64