<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Healthcare
    </h1>
    <p>
        Link to dataset : <a href="https://www.kaggle.com/datasets/prasad22/healthcare-dataset">Kaggle link</a>
    </p>
</div>

# Import modules and functions

In [1]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd

import sys
sys.path.insert(1, f"{os.getenv('HOME')}/turingdb-examples/examples/functions")

from functions import *

# Set paths

In [2]:
path_data = f"{os.getenv('HOME')}/turingdb-examples/examples/data"
folder = "clinical_healthcare"

# Import and format data

In [9]:
df = pd.read_csv(f"{path_data}/{folder}/healthcare_dataset.csv")
df['Name'] = df['Name'].apply(lambda x: f"{x.split(' ')[0].capitalize()} {x.split(' ')[1].upper()}")
df['Doctor'] = df['Doctor'].apply(lambda x: f"{x.split(' ')[0].capitalize()} {x.split(' ')[1].upper()}")
df = create_ID_column(df)

df = df.iloc[:10,:]

df

Unnamed: 0,Patient ID,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Doctor,Hospital,Insurance Provider,Billing Amount,Room Number,Admission Type,Discharge Date,Medication,Test Results
0,0,Bobby JACKSON,30,Male,B-,Cancer,2024-01-31,Matthew SMITH,Sons and Miller,Blue Cross,18856.281306,328,Urgent,2024-02-02,Paracetamol,Normal
1,1,Leslie TERRY,62,Male,A+,Obesity,2019-08-20,Samantha DAVIES,Kim Inc,Medicare,33643.327287,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,2,Danny SMITH,76,Female,A-,Obesity,2022-09-22,Tiffany MITCHELL,Cook PLC,Aetna,27955.096079,205,Emergency,2022-10-07,Aspirin,Normal
3,3,Andrew WATTS,28,Female,O+,Diabetes,2020-11-18,Kevin WELLS,"Hernandez Rogers and Vang,",Medicare,37909.78241,450,Elective,2020-12-18,Ibuprofen,Abnormal
4,4,Adrienne BELL,43,Female,AB+,Cancer,2022-09-19,Kathleen HANNA,White-White,Aetna,14238.317814,458,Urgent,2022-10-09,Penicillin,Abnormal
5,5,Emily JOHNSON,36,Male,A+,Asthma,2023-12-20,Taylor NEWTON,Nunez-Humphrey,UnitedHealthcare,48145.110951,389,Urgent,2023-12-24,Ibuprofen,Normal
6,6,Edward EDWARDS,21,Female,AB-,Diabetes,2020-11-03,Kelly OLSON,Group Middleton,Medicare,19580.872345,389,Emergency,2020-11-15,Paracetamol,Inconclusive
7,7,Christina MARTINEZ,20,Female,A+,Cancer,2021-12-28,Suzanne THOMAS,"Powell Robinson and Valdez,",Cigna,45820.462722,277,Emergency,2022-01-07,Paracetamol,Inconclusive
8,8,Jasmine AGUILAR,82,Male,AB+,Asthma,2020-07-01,Daniel FERGUSON,Sons Rich and,Cigna,50119.222792,316,Elective,2020-07-14,Aspirin,Abnormal
9,9,Christopher BERG,58,Female,AB-,Cancer,2021-05-23,Heather DAY,Padilla-Walker,UnitedHealthcare,19784.631062,249,Elective,2021-06-22,Paracetamol,Inconclusive


# Create graph from dataframe

In [10]:
label_str = 'displayName'

G = create_graph_from_df(
    df,
    directed=True,
    
    source_node_col={
        'id': 'Patient ID',
        label_str: 'Name',
        'type': 'Patient'
    },
    attributes_source_node_cols=['Age', 'Date of Admission', 'Discharge Date'],
    optional_nodes_cols={
        'Gender': {
            'id': 'Gender',
            'link_to_source': True
        },
        'Blood Type': {
            'id': 'Blood Type',
            'link_to_source': True
        },
        'Medical Condition': {
            'id': 'Medical Condition',
            'link_to_source': True
        },
        'Doctor': {
            'id': 'Doctor',
            'link_to_source': True
        },
        'Hospital': {
            'id': 'Hospital',
            'attributes': ['Room Number'],
            'link_to_source': True
        },
        'Insurance Provider': {
            'id': 'Insurance Provider',
            'attributes': ['Billing Amount'],
            'link_to_source': True
        },
        'Admission Type': {
            'id': 'Admission Type',
            'link_to_source': True
        },
        'Medication': {
            'id': 'Medication',
            'link_to_source': True
        },
        'Test Results': {
            'id': 'Test Results',
            'link_to_source': True
        }
    }
)
print(f"Resulting graph : {G}")

Resulting graph : DiGraph with 57 nodes and 90 edges


In [14]:
for node in list(G.nodes(data=True))[:20]:
    print(node)

('00000', {'displayName': 'Bobby JACKSON', 'type': 'Patient', 'Age': 30, 'Date of Admission': '2024-01-31', 'Discharge Date': '2024-02-02'})
('Male', {'displayName': 'Male'})
('B-', {'displayName': 'B-'})
('Cancer', {'displayName': 'Cancer'})
('Matthew SMITH', {'displayName': 'Matthew SMITH'})
('Sons and Miller', {'displayName': 'Sons and Miller', 'Room Number': 328})
('Blue Cross', {'displayName': 'Blue Cross', 'Billing Amount': 18856.281305978155})
('Urgent', {'displayName': 'Urgent'})
('Paracetamol', {'displayName': 'Paracetamol'})
('Normal', {'displayName': 'Normal'})
('00001', {'displayName': 'Leslie TERRY', 'type': 'Patient', 'Age': 62, 'Date of Admission': '2019-08-20', 'Discharge Date': '2019-08-26'})
('A+', {'displayName': 'A+'})
('Obesity', {'displayName': 'Obesity'})
('Samantha DAVIES', {'displayName': 'Samantha DAVIES'})
('Kim Inc', {'displayName': 'Kim Inc', 'Room Number': 265})
('Medicare', {'displayName': 'Medicare', 'Billing Amount': 33643.327286577885})
('Emergency', {

In [15]:
for edge in list(G.edges(data=True))[:20]:
    print(edge)

('00000', 'Male', {})
('00000', 'B-', {})
('00000', 'Cancer', {})
('00000', 'Matthew SMITH', {})
('00000', 'Sons and Miller', {})
('00000', 'Blue Cross', {})
('00000', 'Urgent', {})
('00000', 'Paracetamol', {})
('00000', 'Normal', {})
('00001', 'Male', {})
('00001', 'A+', {})
('00001', 'Obesity', {})
('00001', 'Samantha DAVIES', {})
('00001', 'Kim Inc', {})
('00001', 'Medicare', {})
('00001', 'Emergency', {})
('00001', 'Ibuprofen', {})
('00001', 'Inconclusive', {})
('00002', 'Female', {})
('00002', 'A-', {})


# Create graph using `turingdb` python package

In [16]:
# Set graph name
graph_name = "patient_network"

In [17]:
from turingdb import TuringDB

# Create TuringDB client
client = TuringDB(host="http://localhost:6666")

# Create a new graph
client.query(f"CREATE GRAPH {graph_name}")
client.set_graph(graph_name)

# Create a new change on the graph
change = client.query("CHANGE NEW")["Change ID"][0]

# Checkout into the change
client.checkout(change=change)

In [25]:
# Build CREATE command from networkx object
create_command = build_create_command_from_networkx(G)
print(f"Cypher CREATE command :\n\n{100 * '*'}\n{create_command}\n{100 * '*'}")


Cypher CREATE command :

****************************************************************************************************
CREATE (n0:Patient {id:"00000", "displayName":"Bobby JACKSON", "type":"Patient", "Age":"30", "Date of Admission":"2024-01-31", "Discharge Date":"2024-02-02"}),
(n1:Node {id:"Male", "displayName":"Male"}),
(n2:Node {id:"B-", "displayName":"B-"}),
(n3:Node {id:"Cancer", "displayName":"Cancer"}),
(n4:Node {id:"Matthew SMITH", "displayName":"Matthew SMITH"}),
(n5:Node {id:"Sons and Miller", "displayName":"Sons and Miller", "Room Number":"328"}),
(n6:Node {id:"Blue Cross", "displayName":"Blue Cross", "Billing Amount":"18856.281305978155"}),
(n7:Node {id:"Urgent", "displayName":"Urgent"}),
(n8:Node {id:"Paracetamol", "displayName":"Paracetamol"}),
(n9:Node {id:"Normal", "displayName":"Normal"}),
(n10:Patient {id:"00001", "displayName":"Leslie TERRY", "type":"Patient", "Age":"62", "Date of Admission":"2019-08-20", "Discharge Date":"2019-08-26"}),
(n11:Node {id:"A+", "d

In [26]:
# Run CREATE command
client.query(create_command)

# Commit the change
client.query("COMMIT")
client.query("CHANGE SUBMIT")

# Checkout into main
client.checkout()

# Show link to open visualiser

In [29]:
show_link(link_str="https://8080--main--newmain--maximevincent.work.turing.bio/")