## Merging data retrieved from Wikidata and MIMIC

In [None]:
!pip install neo4j



In [None]:
from neo4j import GraphDatabase
from neo4j import Query

In [None]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            #response = list(session.run(query, parameters))
            response = list(session.run(Query(query, timeout=1000.0), parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response


conn = Neo4jConnection(uri="bolt://100.26.248.51:7687", 
                       user="neo4j",              
                       pwd="pier-adaption-petroleum")

In [None]:
import pandas as pd 
data = pd.read_csv('final_relations.csv', index_col=0)
data.confidence = data.confidence.astype(float)
data = data[(data.relation!='o')]
data.dropna(inplace=True)
data.head(20)

Unnamed: 0,Unnamed: 0.1,relation,entity1,entity1_begin,entity1_end,chunk1,entity2,entity2_begin,entity2_end,chunk2,confidence
0,0,CAUSED BY,PROBLEM,3816,3827,pneumothorax,TREATMENT,3839,3856,chest tube removal,0.906085
1,1,HAS TREATMENT,PROBLEM,4939,4949,hypotensive,TREATMENT,4955,4966,the epidural,0.905324
2,2,HAS TREATMENT,PROBLEM,4939,4949,hypotensive,TREATMENT,5003,5015,a fluid bolus,0.901337
3,3,DETECTS,TEST,5193,5204,pan cultured,PROBLEM,5220,5227,organism,0.962884
4,4,TREATS PROBLEM,TREATMENT,5345,5357,Beta-blockers,PROBLEM,5376,5386,tachycardia,0.96487
5,5,DETECTS,TEST,5506,5519,the chest film,PROBLEM,5528,5566,a right lower lobe effusion/atelectasis,0.999408
6,6,DETECTS,TEST,6181,6203,A grape juice challenge,PROBLEM,6223,6246,obvious anastomoses leak,0.960455
7,7,TREATS PROBLEM,TREATMENT,8082,8094,Acetaminophen,PROBLEM,8193,8196,pain,0.997368
8,8,HAS TREATMENT,PROBLEM,794,820,inferior lead ST-elevations,TREATMENT,833,835,ASA,0.993174
9,9,HAS TREATMENT,PROBLEM,794,820,inferior lead ST-elevations,TREATMENT,838,844,heparin,0.995132


In [None]:
print(len(data.index))

601610


In [None]:
data = data[['relation', 'entity1', 'chunk1', 'entity2', 'chunk2']]
data.head()

Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,CAUSED BY,PROBLEM,pneumothorax,TREATMENT,chest tube removal
1,HAS TREATMENT,PROBLEM,hypotensive,TREATMENT,the epidural
2,HAS TREATMENT,PROBLEM,hypotensive,TREATMENT,a fluid bolus
3,DETECTS,TEST,pan cultured,PROBLEM,organism
4,TREATS PROBLEM,TREATMENT,Beta-blockers,PROBLEM,tachycardia


In [None]:
data2 = pd.read_csv('health_problem_infectious_disease')
data2.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,relation,entity1,chunk1,entity2,chunk2
0,0,1284,HAS TREATMENT,PROBLEM,erysipelas,TREATMENT,cephalexin
1,1,377,DIAGNOSED WITH,PROBLEM,coronavirus disease,TEST,ELISA
2,2,2240,HAS SYMPTOMS,PROBLEM,Rotaviral gastroenteritis,PROBLEM,vomiting
3,3,2601,HAS SYMPTOMS,PROBLEM,Crimean-Congo hemorrhagic fever,PROBLEM,tachycardia
4,4,2816,HAS SYMPTOMS,PROBLEM,scrub typhus,PROBLEM,insomnia


In [None]:
data2 = data2[['relation', 'entity1', 'chunk1', 'entity2', 'chunk2']]
data2.head()

Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,HAS TREATMENT,PROBLEM,erysipelas,TREATMENT,cephalexin
1,DIAGNOSED WITH,PROBLEM,coronavirus disease,TEST,ELISA
2,HAS SYMPTOMS,PROBLEM,Rotaviral gastroenteritis,PROBLEM,vomiting
3,HAS SYMPTOMS,PROBLEM,Crimean-Congo hemorrhagic fever,PROBLEM,tachycardia
4,HAS SYMPTOMS,PROBLEM,scrub typhus,PROBLEM,insomnia


In [None]:
finaldata = pd.concat([data, data2], ignore_index=True, sort=False)
finaldata.head()

Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,CAUSED BY,PROBLEM,pneumothorax,TREATMENT,chest tube removal
1,HAS TREATMENT,PROBLEM,hypotensive,TREATMENT,the epidural
2,HAS TREATMENT,PROBLEM,hypotensive,TREATMENT,a fluid bolus
3,DETECTS,TEST,pan cultured,PROBLEM,organism
4,TREATS PROBLEM,TREATMENT,Beta-blockers,PROBLEM,tachycardia


In [None]:
print(len(finaldata.index))

608328


In [None]:
finaldata = finaldata.sample(frac=1).reset_index(drop=True)
finaldata.head(10)

Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,DETECTS,TEST,Right upper quadrant ultrasound,PROBLEM,sludge in the gallbladder
1,CAUSED BY,PROBLEM,low calcium,TREATMENT,thyroid surgery
2,DETECTS,TEST,MRI,PROBLEM,interval change
3,TREATS PROBLEM,TREATMENT,urgent HD,PROBLEM,your high potassium
4,HAS TREATMENT,PROBLEM,severe OSA,TREATMENT,tracheostomy
5,TREATS PROBLEM,TREATMENT,deep suctioning,PROBLEM,large amount of secretions
6,DETECTS,TEST,Evaluation,PROBLEM,associated spinal cord injury
7,DETECTS,TEST,CXR PA-L,PROBLEM,pneumonia
8,DETECTED BY,PROBLEM,diabetic ketoacidosis,TEST,laboratory values
9,DETECTED BY,PROBLEM,empyema,TEST,cytologic exam


In [None]:
finaldata.to_csv('medical_knowledge_base', sep=',')

In [None]:
import pandas as pd

data = pd.read_csv('medical_knowledge_base')
data.dropna(inplace=True)
data.head()

Unnamed: 0.1,Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,0,DETECTS,TEST,Right upper quadrant ultrasound,PROBLEM,sludge in the gallbladder
1,1,CAUSED BY,PROBLEM,low calcium,TREATMENT,thyroid surgery
2,2,DETECTS,TEST,MRI,PROBLEM,interval change
3,3,TREATS PROBLEM,TREATMENT,urgent HD,PROBLEM,your high potassium
4,4,HAS TREATMENT,PROBLEM,severe OSA,TREATMENT,tracheostomy


In [None]:
print(len(data.index))

608328


In [None]:
data = data[['relation', 'entity1', 'chunk1', 'entity2', 'chunk2']]
data.head()

Unnamed: 0,relation,entity1,chunk1,entity2,chunk2
0,DETECTS,TEST,Right upper quadrant ultrasound,PROBLEM,sludge in the gallbladder
1,CAUSED BY,PROBLEM,low calcium,TREATMENT,thyroid surgery
2,DETECTS,TEST,MRI,PROBLEM,interval change
3,TREATS PROBLEM,TREATMENT,urgent HD,PROBLEM,your high potassium
4,HAS TREATMENT,PROBLEM,severe OSA,TREATMENT,tracheostomy
