In [14]:
import pandas as pd
from py2neo import Graph
from py2neo.bulk import create_nodes, create_relationships
import json
import pprint
import db_credential as db_cre

In [15]:
data = pd.read_csv('data/dgedges.csv')

In [16]:
data

Unnamed: 0,Source,AName,AType,ASource,Target,BName,BType,BSource
0,FD00000,11-beta-hydroxylase deficiency,disease,faers,FG0736,CYP11B1,gene,MalaCards
1,FD00001,17-alpha-hydroxylase deficiency,disease,faers,FG0738,CYP17A1,gene,MalaCards
2,FD00003,21-hydroxylase deficiency,disease,faers,FG0743,CYP21A2,gene,MalaCards
3,FD00016,aids dementia complex,disease,faers,FG1584,ITIH4,gene,MalaCards
4,FD00016,aids dementia complex,disease,faers,FG0728,CXCR4,gene,MalaCards
...,...,...,...,...,...,...,...,...
12719,FD15348,xanthomatosis,disease,faers,FG2427,NR1H4,gene,MalaCards
12720,FD15348,xanthomatosis,disease,faers,FG0753,CYP3A4,gene,MalaCards
12721,FD15352,xerophthalmia,disease,faers,FG2836,RHO,gene,MalaCards
12722,FD15357,yellow fever,disease,faers,FG2921,SCP2,gene,MalaCards


### Extract from the dataframe all fields related to diseases.

In [17]:
json_disease = data.filter(['AName','Source', 'AType', 'ASource']).\
    drop_duplicates('AName', keep='last').\
    to_json(orient='records')
dict_disease = json.loads(json_disease)

### Extract from the dataframe all fields related to genes.

In [18]:
json_gene = data.filter([ 'BName','Target','BType', 'BSource']).\
    drop_duplicates('BName', keep='last').\
    to_json(orient='records')
dict_gene = json.loads(json_gene)

### Create a relationship between diseases and genes by relating theirs names.

In [19]:
json_relationship = data.filter(['AName', 'BName']).\
    to_json(orient='records')
dict_relationship = json.loads(json_relationship)

### Etablish a connection to the Graph database

In [20]:
graph = Graph(db_cre.Connection_URI, auth=(db_cre.USER, db_cre.PASSWORD))

### Create in the DB to diseases nodes

In [21]:
create_nodes(graph.auto(), dict_disease, labels={'Disease'})
print(graph.nodes.match('Disease').count())

988


### Create in the DB to genes nodes

In [22]:
create_nodes(graph.auto(), dict_gene, labels={'Gene'})
print(graph.nodes.match('Gene').count())

3556


### Create relationship between disease and gene

In [23]:
dis_gen = []
for p in dict_relationship:
    disease = p['AName']
    gene = p['BName']
    p.pop('AName')
    p.pop('BName')
    dis_gen.append((disease,p, gene))

In [25]:
print(dis_gen)

[('11-beta-hydroxylase deficiency', {}, 'CYP11B1'), ('17-alpha-hydroxylase deficiency', {}, 'CYP17A1'), ('21-hydroxylase deficiency', {}, 'CYP21A2'), ('aids dementia complex', {}, 'ITIH4'), ('aids dementia complex', {}, 'CXCR4'), ('aids dementia complex', {}, 'CCR3'), ('aids dementia complex', {}, 'TNF'), ('aids dementia complex', {}, 'B2M'), ('aids dementia complex', {}, 'CXCL12'), ('abscess', {}, 'LACTB'), ('abscess', {}, 'IGES'), ('acanthosis', {}, 'INSR'), ('acanthosis', {}, 'FGFR3'), ('acanthosis', {}, 'INS'), ('acanthosis', {}, 'FGFR2'), ('acanthosis', {}, 'LEP'), ('acanthosis', {}, 'ADIPOQ'), ('acanthosis', {}, 'IGF1R'), ('acanthosis', {}, 'LMNA'), ('acanthosis', {}, 'SHBG'), ('acanthosis', {}, 'IGF1'), ('acanthosis', {}, 'BAIAP2L1'), ('acanthosis nigricans', {}, 'INSR'), ('acanthosis nigricans', {}, 'FGFR3'), ('acanthosis nigricans', {}, 'INS'), ('acanthosis nigricans', {}, 'FGFR2'), ('acanthosis nigricans', {}, 'LEP'), ('acanthosis nigricans', {}, 'ADIPOQ'), ('acanthosis nigri

### Create the relationship between the entities. i.e. : Disease1 'ASSOCIATED_WITH' Gene1

In [24]:
rel_pb = create_relationships(graph.auto(),dis_gen,'ASSOCIATED_WITH',start_node_key=("Disease","AName"),end_node_key=("Gene","BName"))
print(rel_pb)

None


### Next step: Query the DB in Cypher Query Language using Python