# Create summary files for diseases, compounds, and indications

In [1]:
import json

import py2neo
import pandas

In [2]:
with open('../all-features/servers.json') as read_file:
    instances = json.load(read_file)

for instance in instances:
    if instance['name'] == 'rephetio-v2.0':
        bolt_port = instance['port']
        
        neo = py2neo.Graph(
            host = "localhost", http_port=bolt_port+1,
            bolt_port = bolt_port, bolt = True
        )
        
        break
neo

<Graph uri='http://localhost:7501/db/data/'>

In [3]:
def to_df(record_list):
    """Convert a py2neo RecordList to a dataframe"""
    return pandas.DataFrame(record_list.data())

## Retreive compounds and diseases that are connected

In [4]:
query = '''
MATCH (disease:Disease)
WHERE exists((disease)-[]-())
RETURN
  disease.identifier AS disease_id,
  disease.name AS disease_name,
  size((disease)-[:TREATS_CtD]-()) AS treats,
  size((disease)-[:PALLIATES_CpD]-()) AS palliates,
  size((disease)-[]-()) AS total_edges
ORDER BY disease_name
'''

disease_df = to_df(neo.run(query))
# disease_df = to_df(neo.cypher.execute(query))
disease_df.head(2)

Unnamed: 0,disease_id,disease_name,palliates,total_edges,treats
0,DOID:10652,Alzheimer's disease,0,765,2
1,DOID:9206,Barrett's esophagus,0,541,2


In [5]:
disease_df.shape

(136, 5)

In [6]:
disease_df["disease_id"].nunique()

136

In [7]:
disease_df["treats"].sum()

592

In [8]:
disease_df.query("treats > 0").shape

(73, 5)

---

In [9]:
query = '''
MATCH (compound:Compound)
WHERE exists((compound)-[]-())
RETURN
  compound.identifier AS chemical_id,
  compound.name AS chemical_name,
  size((compound)-[:TREATS_CtD]-()) AS treats,
  size((compound)-[:PALLIATES_CpD]-()) AS palliates,
  size((compound)-[]-()) AS total_edges
ORDER BY chemical_name
'''

compound_df = to_df(neo.run(query))
compound_df.head(2)

Unnamed: 0,chemical_id,chemical_name,palliates,total_edges,treats
0,DB01048,Abacavir,0,144,1
1,DB05812,Abiraterone,0,81,1


In [10]:
compound_df.shape

(1538, 5)

In [11]:
compound_df["chemical_id"].nunique()

1538

In [12]:
compound_df["treats"].sum()

592

In [13]:
compound_df.query("treats > 0")["chemical_id"].nunique()

323

In [14]:
compound_df.to_csv('compounds.tsv', sep='\t', index=False)
disease_df.to_csv('diseases.tsv', sep='\t', index=False)

## Retrieve indications from hetnet

In [15]:
indication_query = '''
MATCH (compound:Compound)-[rel]->(disease:Disease)
RETURN
  compound.identifier AS chemical_id,
  compound.name AS chemical_name,
  disease.identifier AS disease_id,
  disease.name AS disease_name,
  type(rel) AS rel_type
ORDER BY
  chemical_name, rel_type DESC, disease_name
'''

indication_df = to_df(neo.run(indication_query))
# indication_df = to_df(neo.cypher.execute(indication_query))
indication_df.head(2)

Unnamed: 0,chemical_id,chemical_name,disease_id,disease_name,rel_type
0,DB01048,Abacavir,DOID:635,acquired immunodeficiency syndrome,TREATS_CtD
1,DB05812,Abiraterone,DOID:10283,prostate cancer,TREATS_CtD


In [16]:
indication_df.shape

(592, 5)

In [17]:
indication_df.to_csv('indications.tsv', sep='\t', index=False)