In [1]:
# !pip install py2neo==4.3.0

In [40]:
# !pip install neo4j

In [4]:
import pandas as pd
import sqlalchemy as sql
# from py2neo import Node, Relationship, Graph, Subgraph
import numpy as np
# from neo4j import GraphDatabase

## Concepts

In [None]:
rm -rf data/databases/*

In [None]:
cypher-shell

In [None]:
bin/neo4j-admin import
--nodes:Semantic /import/TUIs.csv
--nodes:Concept /import/CUIs.csv
--nodes:Code /import/CODEs.csv
--nodes:Term /import/SUIs.csv
--nodes:Definition /import/DEFs.csv
--relationships:ISA_STY /import/TUIrel.csv
--relationships:STY /import/CUI-TUIs.csv
--relationships /import/CUI-CUIs.csv
--relationships:CODE /import/CUI-CODEs.csv
--relationships /import/CODE-SUIs.csv
--relationships:PREF_TERM /import/CUI-SUIs.csv
--relationships:DEF /import/DEFrel.csv
--skip-duplicate-nodes
--skip-bad-relationships

In [None]:
CALL db.index.fulltext.createNodeIndex("Term_name",["Term"],["name"]);

## Semantic

In [13]:
con = pd.read_csv('data/UMLS-Graph-Extracts/TUIs.csv')
con.head()

Unnamed: 0,TUI,name,STN,DEF
0,T001,Organism,A1.1,"Generally, a living individual, including all ..."
1,T002,Plant,A1.1.3.3,"An organism having cellulose cell walls, growi..."
2,T004,Fungus,A1.1.3.2,A eukaryotic organism characterized by the abs...
3,T005,Virus,A1.1.4,An organism consisting of a core of a single n...
4,T007,Bacterium,A1.1.2,"A small, typically one-celled, prokaryotic mic..."


In [None]:
CREATE CONSTRAINT ON (n:Semantic) ASSERT n.TUI IS UNIQUE;
CREATE CONSTRAINT ON (n:Semantic) ASSERT n.STN IS UNIQUE;
CREATE CONSTRAINT ON (n:Semantic) ASSERT n.DEF IS UNIQUE;
CREATE CONSTRAINT ON (n:Semantic) ASSERT n.name IS UNIQUE;

In [None]:
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///TUIs.csv" as line
FIELDTERMINATOR '\t'
with line
MERGE (:Semantic
        { TUI: line.TUI, name: line.name, STN: line.STN, DEF: line.DEF} );


## Concept

In [12]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CUIs.csv')
con.head()

Unnamed: 0,CUI
0,C0000005
1,C0000039
2,C0000052
3,C0000074
4,C0000084


In [None]:
CREATE CONSTRAINT ON (n:Concept) ASSERT n.CUI IS UNIQUE;

In [None]:
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///CUIs.csv" as line
with line
MERGE (:Concept
        { CUI: line.CUI} );


## Code

In [None]:
CREATE CONSTRAINT ON (n:Code) ASSERT n.CodeID IS UNIQUE;
CREATE INDEX ON :Code(SAB); 
CREATE INDEX ON :Code(CODE); 

In [4]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CODEs.csv', sep='\t')
con.head()

Unnamed: 0,SAB,CODE,CodeID
0,MSH,D012711,MSH D012711
1,MSH,D015060,MSH D015060
2,LNC,LP15542-1,LNC LP15542-1
3,LNC,MTHU010538,LNC MTHU010538
4,RXNORM,1926948,RXNORM 1926948


In [None]:
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///CODEs.csv" as line
FIELDTERMINATOR '\t'
with line
CREATE (:Code
        { CodeID: line.CodeID, SAB: line.SAB, CODE: line.CODE} );

In [None]:
nodes:Code /import/CODEs.csv

## Term

In [5]:
con = pd.read_csv('data/UMLS-Graph-Extracts/SUIs.csv', sep='\t')
con.head()

Unnamed: 0,SUI,name
0,S0000001,Mild mental retardation
1,S0000002,Moderate mental retardation
2,S0000003,Severe mental retardation
3,S0000004,Profound mental retardation
4,S0000005,Unspecified mental retardation


In [7]:
con = con[con['name'].isna()==False]

In [18]:
con['name'] = con['name'].str.replace('"','')

In [8]:
con.to_csv(path_or_buf='data/UMLS-Graph-Extracts/SUIs.csv', header=True, index=False, sep='\t')

In [None]:
CREATE CONSTRAINT ON (n:Term) ASSERT n.SUI IS UNIQUE;
CREATE INDEX ON :Term(name); 

In [None]:
# MATCH (n:Term)
# DELETE n;

In [None]:
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///SUIs.csv" as line
FIELDTERMINATOR '\t'
with line
MERGE (:Term{ SUI: line.SUI, name: line.name} );

In [None]:
nodes:Term /import/SUIs.csv

## Definition

In [22]:
con = pd.read_csv('data/UMLS-Graph-Extracts/DEFs.csv', sep='\t')
con.head()

Unnamed: 0,ATUI,SAB,DEF
0,AT100258389,MSH,Areas set apart as burial grounds.
1,AT100258390,MSH,A non-metal element that has the atomic symbol...
2,AT100258391,MSH,A genus of ectomycorrhizae basidiomycetous fun...
3,AT100258392,MSH,"A republic in southern Africa, south of TANZAN..."
4,AT100258393,MSH,The flow of ions into or out of cells that cau...


In [23]:
con['DEF'] = con['DEF'].str.replace('"','')

In [24]:
con.to_csv(path_or_buf='data/UMLS-Graph-Extracts/DEFs.csv', header=True, index=False, sep='\t')

In [2]:
DROP INDEX ON :Definition(DEF); 
CREATE INDEX ON :Definition(DEF); 

In [None]:
CREATE CONSTRAINT ON (n:Definition) ASSERT n.ATUI IS UNIQUE;
CREATE INDEX ON :Definition(SAB); 


In [None]:
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///DEFs.csv" as line
FIELDTERMINATOR '\t'
with line
MERGE (:Definition
        { ATUI: line.ATUI, SAB: line.SAB, DEF: line.DEF} );

## ISA_STY

In [3]:
con = pd.read_csv('data/UMLS-Graph-Extracts/TUIrel.csv', sep='\t')
con.head()

Unnamed: 0,END_ID,START_ID
0,T071,T001
1,T072,T001
2,T001,T002
3,T071,T002
4,T072,T002


In [None]:
--relationships:ISA_STY /import/TUIrel.csv

In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///TUIrel.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Semantic {TUI: csvLine.END_ID}),(subc:Semantic {TUI: csvLine.START_ID})
CREATE (subc)-[:ISA_STY]->(con);

## STY

In [4]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CUI-TUIs.csv', sep='\t')
con.head()

Unnamed: 0,START_ID,END_ID
0,C0000005,T116
1,C0000005,T121
2,C0000005,T130
3,C0000039,T109
4,C0000039,T121


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///CUI-TUIs.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Semantic {TUI: csvLine.END_ID}),(subc:Concept {CUI: csvLine.START_ID})
CREATE (subc)-[:STY]->(con);

In [None]:
--relationships:STY /import/CUI-TUIs.csv

## cui_2_cui 

In [8]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CUI-CUIs.csv', sep='\t')
con.head()

Unnamed: 0,START_ID,END_ID,TYPE,SAB
0,C0005778,C0005790,measured_by,CPM
1,C3537249,C1255279,measured_by,CPM
2,C0002520,C1255446,measured_by,CPM
3,C0596019,C1255552,measured_by,CPM
4,C0004611,C1254417,measured_by,CPM


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///CUI-CUIs.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Concept {CUI: csvLine.END_ID}),(subc:Concept {CUI: csvLine.START_ID})
MERGE (subc)-[:CUI_REL{TYPE: csvLine.TYPE, SAB: csvLine.SAB}]->(con);

## CODE

In [11]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CUI-CODEs.csv', sep='\t')
con.head()

Unnamed: 0,START_ID,END_ID
0,C0026106,ICD10 F70
1,C0026106,ICD10AM F70
2,C0026351,ICD10 F71
3,C0026351,ICD10AM F71
4,C0036857,ICD10 F72


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///CUI-CODEs.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Code {CodeID: csvLine.END_ID}),(subc:Concept {CUI: csvLine.START_ID})
CREATE (subc)-[:CUI_CODE]->(con);

In [None]:
--relationships:CODE /import/CUI-CODEs.csv

## CODE_TERM

In [10]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CODE-SUIs.csv', sep='\t')
con.head()

Unnamed: 0,END_ID,TYPE,CUI,START_ID
0,S0000001,HT,C0026106,ICD10 F70
1,S0000001,HT,C0026106,ICD10AM F70
2,S0000002,HT,C0026351,ICD10 F71
3,S0000002,HT,C0026351,ICD10AM F71
4,S0000003,HT,C0036857,ICD10 F72


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///CODE-SUIs.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Term {SUI: csvLine.END_ID}),(subc:Code {CodeID: csvLine.START_ID})
CREATE (subc)-[:CODE_TERM{TYPE: csvLine.TYPE, CUI: csvLine.CUI}]->(con);

In [None]:
relationships /import/CODE-SUIs.csv

## PREF_TERM

In [None]:
--relationships:PREF_TERM /import/CUI-SUIs.csv

In [14]:
con = pd.read_csv('data/UMLS-Graph-Extracts/CUI-SUIs.csv', sep='\t')
con.head()

Unnamed: 0,START_ID,END_ID
0,C0011251,S0000118
1,C0030540,S0000220
2,C0042721,S0000337
3,C0040255,S0000353
4,C0342122,S0000439


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///CUI-SUIs.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Term {SUI: csvLine.END_ID}),(subc:Concept {CUI: csvLine.START_ID})
CREATE (subc)-[:PREF_TERM]->(con);

## DEFrel

In [25]:
con = pd.read_csv('data/UMLS-Graph-Extracts/DEFrel.csv', sep='\t')
con.head()

Unnamed: 0,END_ID,START_ID
0,AT100258389,C0007662
1,AT100258390,C0031705
2,AT100258391,C0319858
3,AT100258392,C0026655
4,AT100258393,C2350764


In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///DEFrel.csv" AS csvLine
FIELDTERMINATOR '\t'
MATCH (con:Definition {ATUI: csvLine.END_ID}),(subc:Concept {CUI: csvLine.START_ID})
CREATE (subc)-[:DEF]->(con);

In [None]:
--relationships:DEF /import/DEFrel.csv

In [None]:
bin/neo4j-admin import
--nodes:Semantic /import/TUIs.csv
--nodes:Concept /import/CUIs.csv
--nodes:Code /import/CODEs.csv
--nodes:Term /import/SUIs.csv
--nodes:Definition /import/DEFs.csv
--relationships:ISA_STY /import/TUIrel.csv
--relationships:STY /import/CUI-TUIs.csv
--relationships /import/CUI-CUIs.csv
--relationships:CODE /import/CUI-CODEs.csv
--relationships /import/CODE-SUIs.csv
--relationships:PREF_TERM /import/CUI-SUIs.csv
--relationships:DEF /import/DEFrel.csv
--skip-duplicate-nodes
--skip-bad-relationships

In [50]:
# Concepts
CREATE CONSTRAINT ON (c:Concepts) ASSERT c.id IS UNIQUE;
CREATE INDEX ON :Concepts(cui); // 2 ms
    
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///home/ubuntu/snomed/data/concepts.csv" as line
with line
CREATE (:Concepts
        { id: line.CUI, cui: line.CUI, name: line.STR} );

In [None]:
MATCH (n:Concepts)
DETACH DELETE n; 

In [None]:
# SubConcepts
CREATE CONSTRAINT ON (c:SubConcepts) ASSERT c.id IS UNIQUE;
CREATE INDEX ON :SubConcepts(scui); // 2 ms
CREATE INDEX ON :SubConcepts(cui); // 2 ms
    
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///home/ubuntu/snomed/data/subconcepts.csv" as line
with line
CREATE (:SubConcepts
        {id: line.SCUI, scui: line.SCUI, cui: line.CUI, name: line.STR, vocabulary: line.SAB});

In [None]:
MATCH (n:SubConcepts)
DETACH DELETE n; 

In [8]:
# CALL apoc.periodic.iterate(
# "MATCH (con:Concepts)
# WITH con
# MATCH (subc:SubConcepts)
# WHERE subc.CUI = con.CUI
# RETURN con, subc",
# "MERGE(subc)-[r:scui_2_cui]-(con)
# DELETE r", {batchSize:1000, parallel:true});

In [None]:
# scui_2_cui
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///home/ubuntu/snomed/data/subconcepts.csv" AS csvLine
MATCH (con:Concepts {id: csvLine.CUI}),(subc:SubConcepts {id: csvLine.SCUI})
CREATE (subc)-[:scui_2_cui]->(con);

In [None]:
# cui_2_cui
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///home/ubuntu/snomed/data/relations.csv" AS csvLine
MATCH (con1:Concepts {id: csvLine.CUI1}),(con2:Concepts {id: csvLine.CUI2})
CREATE (con1)-[:cui_2_cui {relation: csvLine.RELA}]->(con2);

In [None]:
# Semantic_type
CREATE CONSTRAINT ON (c:SemanticType) ASSERT c.id IS UNIQUE;
CREATE INDEX ON :SemanticType(tui); // 2 ms
    
USING PERIODIC COMMIT 500
LOAD csv with headers from "file:///home/ubuntu/snomed/data/stypes.csv" as line
with line
CREATE (:SemanticType
        { id: line.TUI, tui: line.TUI, name: line.STY, chapter: line.STN} );

In [None]:
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM "file:///home/ubuntu/snomed/data/semantype.csv" AS csvLine
MATCH (con1:Concepts {id: csvLine.CUI}),(con2:SemanticType {id: csvLine.TUI})
CREATE (con1)-[:cui_2_tui]->(con2);