In [15]:
import numpy as np
import pandas as pd
import pickle as pkl
import json
import scipy.sparse as sp

import networkx as nx
from networkx.readwrite import json_graph

from sc_dw.utils import *
from sklearn.manifold import spectral_embedding
from node2vec import Node2Vec

import torch
from torch import optim
import torch.nn.functional as F

from datetime import datetime, timedelta
import time
import random
from collections import OrderedDict
import warnings; warnings.filterwarnings('ignore')

In [16]:
graphs = ['original', 'train', 'test']
obj = []
for graph in graphs:
    with open(graph +'.graph', 'r') as f:
        data = json.load(f)
    obj.append(data)
orig_g = json_graph.node_link_graph(obj[0])
train_g = json_graph.node_link_graph(obj[1])
test_g = json_graph.node_link_graph(obj[2])

In [17]:
train_adj = nx.adjacency_matrix(train_g, nodelist=train_g.nodes())

In [18]:
orig_adj = nx.adjacency_matrix(orig_g, nodelist=train_g.nodes())

In [19]:
idx2nodes = {}
nodes2idx = {}
for idx, node in enumerate(train_g.nodes()):
    idx2nodes[idx] = node
    nodes2idx[node] = idx

In [20]:
# test_edges_name = []
# for i, j in test_g.edges():
#     if all(x in train_g.nodes() for x in [i, j]):
#         test_edges_name.append((i,j))

In [21]:
test_edges_name = []
for i, j in test_g.edges():
    if all(x in train_g.nodes() for x in [i, j]) and (i, j) not in train_g.edges() and (j, i) not in train_g.edges():
        test_edges_name.append((i,j))

In [22]:
print(len(test_edges_name))
print(test_edges_name[:5])

177
[('a61b', 'a61h'), ('a61p', 'c07k'), ('a62b', 'd01d'), ('a62b', 'd01f'), ('a62b', 'd03d')]


In [23]:
test_edges = []
for i, j in test_edges_name:
    test_edges.append((nodes2idx[i], nodes2idx[j]))

In [24]:
def ismember(idx_i, idx_j, edgelist):
    if ((idx_i, idx_j) in edgelist) or ((idx_j, idx_i) in edgelist):
        return True
    else:
        return False

In [25]:
test_edges_false_name = []
while len(test_edges_false_name) < len(test_edges):
    idx_i, idx_j = random.sample(train_g.nodes(), 2)
    if ismember(idx_i, idx_j,  orig_g.edges()):
        continue
    if ismember(idx_i, idx_j, test_edges_false_name):
        continue
    test_edges_false_name.append((idx_i, idx_j))
len(test_edges_false_name)

177

In [26]:
print(len(test_edges_false_name))
print(test_edges_false_name[:5])

177
[('c12r', 'h03k'), ('e04g', 'b03d'), ('c03b', 'h02j'), ('f03h', 'b08b'), ('g03h', 'b05b')]


In [27]:
test_edges_false = []
for i, j in test_edges_false_name:
    test_edges_false.append((nodes2idx[i], nodes2idx[j]))

In [28]:
test_edges_false[:5]

[(140, 304), (161, 27), (114, 293), (182, 33), (247, 29)]

### Model

In [43]:
n_iter = 10
dim = 128

In [44]:
sc_AUC_scores = []
sc_AP_scores = []
for i in range(n_iter):
    
    random_state = random.sample(range(0, 50), 1)[0]
    spectral_emb = spectral_embedding(train_adj, n_components=dim, random_state=random_state)
    sc_score_matrix = np.dot(spectral_emb, spectral_emb.T)
    sc_test_roc, sc_test_ap = get_roc_score(test_edges, test_edges_false, sc_score_matrix, apply_sigmoid=True)
    
    sc_AUC_scores.append(sc_test_roc)
    sc_AP_scores.append(sc_test_ap)
    
    print('Experiment {} result - ROC(AUC) score: {}, AP score: {}'.format(i+1, np.round(sc_test_roc, 4), np.round(sc_test_ap, 4)), end='\n')

Experiment 1 result - ROC(AUC) score: 0.3711, AP score: 0.4102
Experiment 2 result - ROC(AUC) score: 0.3998, AP score: 0.4282
Experiment 3 result - ROC(AUC) score: 0.3999, AP score: 0.4295
Experiment 4 result - ROC(AUC) score: 0.378, AP score: 0.4148
Experiment 5 result - ROC(AUC) score: 0.3603, AP score: 0.404
Experiment 6 result - ROC(AUC) score: 0.39, AP score: 0.4265
Experiment 7 result - ROC(AUC) score: 0.378, AP score: 0.4148
Experiment 8 result - ROC(AUC) score: 0.378, AP score: 0.4148
Experiment 9 result - ROC(AUC) score: 0.3949, AP score: 0.4257
Experiment 10 result - ROC(AUC) score: 0.3849, AP score: 0.4171


In [42]:
print('SC_AUC mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(sc_AUC_scores)*100, 2), np.round(np.std(sc_AP_scores)*100, 2) ))
print('SC_AP mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(sc_AP_scores)*100, 2), np.round(np.std(sc_AP_scores)*100, 2) ))

SC_AUC mean: 48.31 ± 0.74
SC_AP mean: 50.05 ± 0.74


In [52]:
dim = 128
walk_len = 16
num_walk = 16
window = 8

In [53]:
dw_AUC_scores = []
dw_AP_scores = []
for i in range(n_iter):
    
    G_train = nx.from_scipy_sparse_matrix(train_adj)
    
    model_train = Node2Vec(G_train, dimensions=dim, walk_length=walk_len, num_walks=num_walk)
    n2v_train = model_train.fit(window=window, min_count=1)
    edge_emb = n2v_train.wv
    
    emb_list = []
    for node_index in range(0, train_adj.shape[0]):
        node_emb = edge_emb[str(node_index)]
        emb_list.append(node_emb)
    emb_matrix = np.vstack(emb_list)

    n2v_score_matrix = np.dot(emb_matrix, emb_matrix.T)
    n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, n2v_score_matrix, apply_sigmoid=True)
    
    dw_AUC_scores.append(n2v_test_roc)
    dw_AP_scores.append(n2v_test_ap)
    
    print('Experiment {} result - ROC(AUC) score: {}, AP score: {}'.format(i+1, np.round(n2v_test_roc, 4), np.round(n2v_test_ap, 4)), end='\n')

Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.16it/s]


Experiment 1 result - ROC(AUC) score: 0.6642, AP score: 0.5996


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.04it/s]


Experiment 2 result - ROC(AUC) score: 0.6455, AP score: 0.5914


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:03<00:00,  4.98it/s]


Experiment 3 result - ROC(AUC) score: 0.6317, AP score: 0.5758


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  5.62it/s]


Experiment 4 result - ROC(AUC) score: 0.6587, AP score: 0.5997


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.10it/s]


Experiment 5 result - ROC(AUC) score: 0.6512, AP score: 0.5822


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.09it/s]


Experiment 6 result - ROC(AUC) score: 0.6547, AP score: 0.5962


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.34it/s]


Experiment 7 result - ROC(AUC) score: 0.6476, AP score: 0.5889


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.36it/s]


Experiment 8 result - ROC(AUC) score: 0.6485, AP score: 0.5935


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.42it/s]


Experiment 9 result - ROC(AUC) score: 0.6505, AP score: 0.5857


Computing transition probabilities:   0%|          | 0/322 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|███████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.17it/s]


Experiment 10 result - ROC(AUC) score: 0.6606, AP score: 0.6003


In [54]:
print('DW_AUC mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(dw_AUC_scores)*100, 2), np.round(np.std(dw_AUC_scores)*100, 2) ))
print('DW_AP mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(dw_AP_scores)*100, 2), np.round(np.std(dw_AP_scores)*100, 2) ))

DW_AUC mean: 65.13 ± 0.87
DW_AP mean: 59.13 ± 0.78
