In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
pip install node2vec

Collecting node2vec
  Downloading node2vec-0.4.3.tar.gz (4.6 kB)
Building wheels for collected packages: node2vec
  Building wheel for node2vec (setup.py) ... [?25l[?25hdone
  Created wheel for node2vec: filename=node2vec-0.4.3-py3-none-any.whl size=5978 sha256=cd5b223836088fe22f90e406ff739752a3a45c008b8b20ac49184dda16bfc6fe
  Stored in directory: /root/.cache/pip/wheels/07/62/78/5202cb8c03cbf1593b48a8a442fca8ceec2a8c80e22318bae9
Successfully built node2vec
Installing collected packages: node2vec
Successfully installed node2vec-0.4.3


In [None]:
from etc_emb.model import *
from etc_emb.utils import *
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import json
from collections import OrderedDict
import warnings; warnings.filterwarnings('ignore')

In [None]:
model_name = 'deepwalk'
dataset = 'pubmed' # 'citeseer', 'cora', 'pubmed', 'arx'
task = 'link_prediction' # 'classification', 'link_prediction'
feat_norm = False
n_iter = 10
prevent_disconnect_dw = False

In [None]:
dim = 128
test_val_ratio = [0.1, 0.05] # len_test = len_total * test_ratio, len_val = len_total * val_ratio
test = test_val_ratio[0]
val = test_val_ratio[1]

In [None]:
if model_name == 'spectral_clustering':
    walk_len=None; num_walk=None; window=None; prevent_disconnect = False
elif model_name == 'deepwalk':
    walk_len=80; num_walk=10; window=10; prevent_disconnect = prevent_disconnect_dw

In [None]:
hparams = dim, test_val_ratio, prevent_disconnect, walk_len, num_walk, window

In [None]:
if task == 'link_prediction':
    adj, features = load_data(dataset, task, feat_norm)
elif task == 'classification':
    adj, features, labels = load_data(dataset, task, feat_norm)

In [None]:
AUC_scores = []
AP_scores = []
for i in range(n_iter):
    train_test_split = edge_split(adj, test, val, prevent_disconnect=prevent_disconnect)
    
    if model_name == 'spectral_clustering':
        scores = spectral_clustering_scores(train_test_split, random_state=3, dim=dim)
    elif model_name == 'deepwalk':
        scores = deepwalk_scores(train_test_split, dim=dim, walk_len=walk_len, num_walk=num_walk, window=window)
    
    AUC_scores.append(scores['test_roc'])
    AP_scores.append(scores['test_ap'])
    
    print('Experiment {} result - ROC(AUC) score: {}, AP score: {}'.format(i+1, np.round(scores['test_roc'], 5), np.round(scores['test_ap'], 5)), end='\n')

Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:15<00:00, 67.52s/it]


Experiment 1 result - ROC(AUC) score: 0.70303, AP score: 0.76751


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:27<00:00, 68.75s/it]


Experiment 2 result - ROC(AUC) score: 0.7016, AP score: 0.76591


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:16<00:00, 67.63s/it]


Experiment 3 result - ROC(AUC) score: 0.69844, AP score: 0.76412


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:16<00:00, 67.65s/it]


Experiment 4 result - ROC(AUC) score: 0.70432, AP score: 0.77182


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:16<00:00, 67.65s/it]


Experiment 5 result - ROC(AUC) score: 0.70068, AP score: 0.76437


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:13<00:00, 67.40s/it]


Experiment 6 result - ROC(AUC) score: 0.6968, AP score: 0.76431


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:19<00:00, 67.93s/it]


Experiment 7 result - ROC(AUC) score: 0.70072, AP score: 0.76867


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:23<00:00, 68.36s/it]


Experiment 8 result - ROC(AUC) score: 0.69306, AP score: 0.75852


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:22<00:00, 68.29s/it]


Experiment 9 result - ROC(AUC) score: 0.69858, AP score: 0.76995


Computing transition probabilities:   0%|          | 0/19717 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [11:25<00:00, 68.54s/it]


Experiment 10 result - ROC(AUC) score: 0.70314, AP score: 0.77314


In [None]:
print('AUC mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(AUC_scores)*100, 2), np.round(np.std(AUC_scores)*100, 2) ))
print('AP mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(AP_scores)*100, 2), np.round(np.std(AP_scores)*100, 2) ))

AUC mean: 70.00 ± 0.32
AP mean: 76.68 ± 0.41


In [None]:
results = AUC_scores, AP_scores
# date = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S"); date
date = (datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")

In [None]:
log_dict= OrderedDict()

In [None]:
log_dict['model'] = model_name
log_dict['dataset'] = dataset
log_dict['datetime'] = date
log_dict['setting_order'] = ['emb_dim', 'test_val_ratio', 'prevent_disconnect', 'walk_len', 'num_walk', 'window_size']
log_dict['setting_value'] = hparams
log_dict['iteration'] = n_iter
log_dict['roc'] = results[0]
log_dict['roc_mean'] = np.mean(results[0])
log_dict['roc_std'] = np.std(results[0])
log_dict['ap'] = results[1]
log_dict['ap_mean'] = np.mean(results[1])
log_dict['ap_std'] = np.std(results[1])
pd.DataFrame(log_dict.items(), columns=['key', 'value'])

Unnamed: 0,key,value
0,model,deepwalk
1,dataset,pubmed
2,datetime,2021-09-10 14:05:22
3,setting_order,"[emb_dim, test_val_ratio, prevent_disconnect, ..."
4,setting_value,"(128, [0.1, 0.05], False, 80, 10, 10)"
5,iteration,10
6,roc,"[0.7030309188833427, 0.7015974036870023, 0.698..."
7,roc_mean,0.700038
8,roc_std,0.0032267
9,ap,"[0.7675081559605261, 0.7659105173083716, 0.764..."


In [None]:
data = json.load(open('results_link_prediction(etc_emb).json'))
data.append(log_dict)
with open('results_link_prediction(etc_emb).json', 'w') as f:
    json.dump(data, f)
# print("Last data saved at: {}".format((datetime.now()).strftime("%Y-%m-%d %H:%M:%S")))
print("Last data saved at: {}".format((datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")))
print("Total data num: {}".format(len(data)))

Last data saved at: 2021-09-10 14:05:23
Total data num: 9


In [None]:
# empty = []
# with open('results_link_prediction(etc_emb).json', 'w') as f:
#     json.dump(empty, f)

In [None]:
# data = json.load(open('results_link_prediction(etc_emb).json'))
# print("Total data num: {}".format(len(data)))

Total data num: 2
