# Build network based on which training piece is being used

In [1]:
import pandas as pd

## Read nodes

In [2]:
nodes = (pd
    .read_csv("data/nodes_7_metanode_slim.csv", sep=',')
    .rename(columns={
        ":ID": "node_id",
        ":LABEL": "ntype"
    })
)

In [3]:
nodes.shape

(214590, 3)

In [4]:
nodes.head()

Unnamed: 0,node_id,name,ntype
0,C0016192,Flagella,Anatomy
1,C0230349,Cubital fossa,Anatomy
2,C0447417,Entire retromolar area of mouth,Anatomy
3,C0033151,Primitive Gut,Anatomy
4,C0225861,Left auricular appendage,Anatomy


### Assign deepwalk ids for nodes

In [5]:
node_map = (nodes
    .sort_values("node_id")
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "node_uid"})
)

In [6]:
node_map.head()

Unnamed: 0,node_uid,node_id,name,ntype
0,0,C0000039,"1,2-Dipalmitoylphosphatidylcholine",Chemicals & Drugs
1,1,C0000052,"1,4-alpha-Glucan Branching Enzyme",Chemicals & Drugs
2,2,C0000084,1-Carboxyglutamic Acid,Chemicals & Drugs
3,3,C0000096,1-Methyl-3-isobutylxanthine,Chemicals & Drugs
4,4,C0000097,"1-Methyl-4-phenyl-1,2,3,6-tetrahydropyridine",Chemicals & Drugs


In [7]:
node_map.to_csv("data/node_map.tsv", sep='\t', index=False)

---

## Incorporate gold standard edges into network

In [8]:
edges = pd.read_csv("data/edges_no_treats.tsv", sep='\t')

In [9]:
K = 5
for i in range(K):
    train = pd.read_csv(
        "data/training/training_piece{}.tsv".format(i),
        sep='\t'
    )
    
    good = (train
        .query("etype == 'TREATS_CDtDO'")
        [["chemical_id", "disease_id", "etype"]]
        .rename(columns={
            "chemical_id": "source_id",
            "disease_id": "target_id"
        })
    )
    
    res = pd.concat([edges, good])
    
    res.to_csv(
        "data/network/network_fold{}.tsv".format(i),
        sep='\t', index=False
    )