In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
pip install node2vec

In [7]:
from sc_dw.model import *
from sc_dw.utils import *
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import json
from collections import OrderedDict
import warnings; warnings.filterwarnings('ignore')

In [8]:
model_name = 'deepwalk'
dataset = 'pubmed' # 'citeseer', 'cora', 'pubmed', 'arx'
task = 'link_prediction' # 'classification', 'link_prediction'
feat_norm = False
n_iter = 10
prevent_disconnect_dw = False

In [9]:
dim = 128
test_val_ratio = [0.1, 0.05] # len_test = len_total * test_ratio, len_val = len_total * val_ratio
test = test_val_ratio[0]
val = test_val_ratio[1]

In [10]:
if model_name == 'spectral_clustering':
    walk_len=None; num_walk=None; window=None; prevent_disconnect = False
elif model_name == 'deepwalk':
    walk_len=80; num_walk=10; window=10; prevent_disconnect = prevent_disconnect_dw

In [11]:
hparams = dim, test_val_ratio, prevent_disconnect, walk_len, num_walk, window

In [12]:
if task == 'link_prediction':
    adj, features = load_data(dataset, task, feat_norm)
elif task == 'classification':
    adj, features, labels = load_data(dataset, task, feat_norm)

In [None]:
AUC_scores = []
AP_scores = []
for i in range(n_iter):
    train_test_split = edge_split(adj, test, val, prevent_disconnect=prevent_disconnect)
    
    if model_name == 'spectral_clustering':
        scores = spectral_clustering_scores(train_test_split, random_state=3, dim=dim)
    elif model_name == 'deepwalk':
        scores = deepwalk_scores(train_test_split, dim=dim, walk_len=walk_len, num_walk=num_walk, window=window)
    
    AUC_scores.append(scores['test_roc'])
    AP_scores.append(scores['test_ap'])
    
    print('Experiment {} result - ROC(AUC) score: {}, AP score: {}'.format(i+1, np.round(scores['test_roc'], 5), np.round(scores['test_ap'], 5)), end='\n')

In [None]:
print('AUC mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(AUC_scores)*100, 2), np.round(np.std(AUC_scores)*100, 2) ))
print('AP mean:', '{:.2f} ± {:.2f}'.format( np.round(np.mean(AP_scores)*100, 2), np.round(np.std(AP_scores)*100, 2) ))

AUC mean: 70.00 ± 0.32
AP mean: 76.68 ± 0.41


In [None]:
results = AUC_scores, AP_scores
# date = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S"); date
date = (datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")

In [None]:
log_dict= OrderedDict()

In [None]:
log_dict['model'] = model_name
log_dict['dataset'] = dataset
log_dict['datetime'] = date
log_dict['setting_order'] = ['emb_dim', 'test_val_ratio', 'prevent_disconnect', 'walk_len', 'num_walk', 'window_size']
log_dict['setting_value'] = hparams
log_dict['iteration'] = n_iter
log_dict['roc'] = results[0]
log_dict['roc_mean'] = np.mean(results[0])
log_dict['roc_std'] = np.std(results[0])
log_dict['ap'] = results[1]
log_dict['ap_mean'] = np.mean(results[1])
log_dict['ap_std'] = np.std(results[1])
pd.DataFrame(log_dict.items(), columns=['key', 'value'])

Unnamed: 0,key,value
0,model,deepwalk
1,dataset,pubmed
2,datetime,2021-09-10 14:05:22
3,setting_order,"[emb_dim, test_val_ratio, prevent_disconnect, ..."
4,setting_value,"(128, [0.1, 0.05], False, 80, 10, 10)"
5,iteration,10
6,roc,"[0.7030309188833427, 0.7015974036870023, 0.698..."
7,roc_mean,0.700038
8,roc_std,0.0032267
9,ap,"[0.7675081559605261, 0.7659105173083716, 0.764..."


In [None]:
data = json.load(open('results_link_prediction(sc_dw).json'))
data.append(log_dict)
with open('results_link_prediction(sc_dw).json', 'w') as f:
    json.dump(data, f)
# print("Last data saved at: {}".format((datetime.now()).strftime("%Y-%m-%d %H:%M:%S")))
print("Last data saved at: {}".format((datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")))
print("Total data num: {}".format(len(data)))

Last data saved at: 2021-09-10 14:05:23
Total data num: 9


In [None]:
# empty = []
# with open('results_link_prediction(etc_emb).json', 'w') as f:
#     json.dump(empty, f)

In [None]:
# data = json.load(open('results_link_prediction(etc_emb).json'))
# print("Total data num: {}".format(len(data)))

Total data num: 2
