# Generate adjacency lists

2018-07-31

In [1]:
import pandas as pd

from collections import defaultdict

---

## Read master deepwalk id conversion list

In [2]:
dw_ids = pd.read_csv("data/hetionet/deepwalk_ids.tsv", sep='\t')

In [3]:
dw_ids.head()

Unnamed: 0,deepwalk_id,node_id,name,het_type
0,0,DB00795,Sulfasalazine,Compound
1,1,N0000000151,Histamine H2 Receptor Antagonists,Pharmacologic Class
2,2,100996420,DNM1P50,Gene
3,3,DB04898,Ximelagatran,Compound
4,4,C0278151,Facial spasm,Side Effect


In [4]:
id_map = {
    row.node_id: row.deepwalk_id
    
    for row in dw_ids.itertuples()
}

---

## Read edges

In [5]:
edges = pd.read_csv("data/min_hetionet/minhet_edges.tsv", sep='\t')

In [6]:
edges.head()

Unnamed: 0,start_id,end_id,het_etype,start_htype,end_htype
0,DB00643,51547,UPREGULATES_CuG,Compound,Gene
1,DB08881,10450,UPREGULATES_CuG,Compound,Gene
2,DB01211,10450,DOWNREGULATES_CdG,Compound,Gene
3,DB00374,10450,DOWNREGULATES_CdG,Compound,Gene
4,DB00398,10450,UPREGULATES_CuG,Compound,Gene


In [7]:
edges.shape

(62060, 5)

## Build adjlist

In [8]:
adjlist = defaultdict(set)

for row in edges.itertuples():
    source = row.start_id
    target = row.end_id
    
    dw_source = id_map[source]
    dw_target = id_map[target]
    
    adjlist[dw_source].add(dw_target)
    adjlist[dw_target].add(dw_source)

## Save to file

In [9]:
fname = "data/min_hetionet/minhet_adj.txt"

with open(fname, "w") as fout:
    for node_id, neighbours in adjlist.items():
        neighbours = sorted(list(neighbours))
        neighbours = list(map(str, neighbours))
        
        fout.write("{} {}\n".format(node_id, " ".join(neighbours)))