In [1]:
import xml.etree.ElementTree as ET
import pandas as pd

def getCUIfromXML(patient_name):
    tree = ET.parse('ctakes_results/{}_out'.format(patient_name))
    root = tree.getroot()
    
    pd_CUI = pd.DataFrame(columns=['Text', 'CUI'])
    for umlsConcept in root.findall('org.apache.ctakes.typesystem.type.refsem.UmlsConcept'):
        text = umlsConcept.attrib['preferredText']
        cui = umlsConcept.attrib['cui']
        pd_CUI = pd_CUI.append({'Text':text, 'CUI':cui}, ignore_index=True)
    
    return pd_CUI.drop_duplicates()
        

In [2]:
getCUIfromXML('test2')

Unnamed: 0,Text,CUI
0,Brain,C0006104
1,Obliteration procedure,C1293136
2,Edema,C0013604
6,Brain Edema,C1527311
7,Hemorrhage,C0019080
11,Hernia,C0019270
13,Uncinate herniation,C0393985
17,Contusions,C0009938
20,Subarachnoid Hemorrhage,C0038525


In [3]:
import pandas as pd
predications = pd.read_csv('semmed_csv/PREDICATIONS_OCCURS.csv')

In [4]:
predications

Unnamed: 0,PREDICATE,SUBJECT_CUI,SUBJECT_NAME,SUBJECT_SEMTYPE,SUBJECT_NOVELTY,SUBJECT_SCORE,OBJECT_CUI,OBJECT_NAME,OBJECT_SEMTYPE,OBJECT_NOVELTY,OBJECT_SCORE,OCCURS
0,ADMINISTERED_TO,C0000248,2-Acetylaminofluorene,hops,1.0,1000,C0018561,Mesocricetus auratus,mamm,1.0,1000,2
1,ADMINISTERED_TO,C0000248,2-Acetylaminofluorene,hops,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,861,31
2,ADMINISTERED_TO,C0000248,2-Acetylaminofluorene,hops,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,1000,31
3,ADMINISTERED_TO,C0000275,2-Chloroadenosine,phsu,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,1000,2
4,ADMINISTERED_TO,C0000407,3-Hydroxyanthranilic Acid,bacs,1.0,1000,C0029974,Ovum,cell,1.0,888,1
5,ADMINISTERED_TO,C0000431,3MC,hops,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,888,31
6,ADMINISTERED_TO,C0000431,3MC,hops,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,1000,31
7,ADMINISTERED_TO,C0000431,3MC,orch,1.0,1000,C0034693,Rattus norvegicus,mamm,1.0,1000,31
8,ADMINISTERED_TO,C0000608,6-Aminocaproic Acid,phsu,1.0,1000,C0043210,Woman,humn,1.0,861,2
9,ADMINISTERED_TO,C0000618,6-Mercaptopurine,phsu,1.0,901,C0025926,"Mice, Inbred NZB",mamm,1.0,1000,1


In [8]:
predications_name = pd.DataFrame(columns = ['CUI', 'NAME'])
predications_name['CUI'] = predications['SUBJECT_CUI'].append(predications['OBJECT_CUI'])
predications_name['NAME'] = predications['SUBJECT_NAME'].append(predications['OBJECT_NAME'])
predications_name = predications_name.drop_duplicates().reset_index()

In [9]:
import networkx as nx

G = nx.from_pandas_dataframe(predications, 'SUBJECT_CUI', 'OBJECT_CUI', ['PREDICATE', 'OCCURS'], create_using=nx.DiGraph())
nx.set_node_attributes(G, pd.Series(list(predications_name.NAME), index=list(predications_name.CUI)).to_dict(), 'NAME')

In [11]:
G['C0038525']

AtlasView({'C0581310': {'PREDICATE': 'MANIFESTATION_OF', 'OCCURS': 2}, 'C0012984': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 132}, 'C0027859': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 2}, 'C0429306': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 1}, 'C0751523': {'PREDICATE': 'CAUSES', 'OCCURS': 1}, 'C0234533': {'PREDICATE': 'CAUSES', 'OCCURS': 1}, 'C0403082': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 2}, 'C0033860': {'PREDICATE': 'AFFECTS', 'OCCURS': 1}, 'C0078988': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 1}, 'C1512692': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 2}, 'C0030362': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 10}, 'C0599779': {'PREDICATE': 'PROCESS_OF', 'OCCURS': 36}, 'C0560651': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 1}, 'C0948017': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 2}, 'C0597304': {'PREDICATE': 'ASSOCIATED_WITH', 'OCCURS': 2}, 'C0264733': {'PREDICATE': 'PRECEDES', 'OCCURS': 7}, 'C0340613': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 1}, 'C0748609': {'PREDICATE': 'COEXISTS_WITH', 'OCCURS': 1

In [None]:
predications_name[['death' in n or 'Death' in n for n in predications_name['NAME']]]

In [12]:
predications_name[predications_name['CUI']=='C0581310']

Unnamed: 0,index,CUI,NAME
128271,13223863,C0581310,Scalp injury
