In [1]:
import numpy as np
import pandas as pd
from scipy import io as sio
import time

import dgl

import torch
import torch_geometric as tg

Using backend: pytorch


In [2]:
data_name = 'ACM'
num_train = 80
adam_lr = 0.01
hid_dim = 256
drop_ratio = 0.5

In [3]:
batch_size = None
dropout = True # no necessary
relu = True # optional for shallow layers
early_stop = 100

#################Parameters for model#################
loc_time = time.localtime()
model_config={'data_name': data_name,
              'num_train': num_train,
              'num_epoch': 1001, # 2001
              'batch_size': batch_size,
              'adam_lr': adam_lr, # 1e-2, 5e-3
              'l2_regularization': 5e-4, #5e-4, 7e-4
              'dropout': dropout, 
              'drop_ratio': drop_ratio,
              'relu': relu,
              'hid_dim': hid_dim,
              'verbose': 1, 
              'early_stop': early_stop,
              'model_dir':'checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model',
              'alias':'test_model_{}_{}_{}_{}_{}'.format(loc_time[0], loc_time[1], loc_time[2], loc_time[3], loc_time[4])}

if torch.cuda.is_available():
    model_config['use_cuda'] = True
else:
    model_config['use_cuda'] = False

print('Parameters of Model are:')
for _ in model_config:
    print(_, model_config[_])
    
# set up device
device = torch.device('cuda:'+str(0) if model_config['use_cuda'] else 'cpu')
model_config['device'] = device

Parameters of Model are:
data_name ACM
num_train 80
num_epoch 1001
batch_size None
adam_lr 0.01
l2_regularization 0.0005
dropout True
drop_ratio 0.5
relu True
hid_dim 256
verbose 1
early_stop 100
model_dir checkpoints/{}_Epoch{}_HR{:.4f}_NDCG{:.4f}.model
alias test_model_2020_7_7_21_3
use_cuda True


In [4]:
data = sio.loadmat('../data/acm/ACM.mat')
p_vs_l = data['PvsL']       # paper-field?
p_vs_a = data['PvsA']       # paper-author
p_vs_p = data['PvsP']       # paper-paper
# features
p_vs_t = data['PvsT']       # paper-term, bag of words
# labels
p_vs_c = data['PvsC']       # paper-conference, labels come from that

# We assign
# (1) KDD papers as class 0 (data mining),
# (2) SIGMOD and VLDB papers as class 1 (database),
# (3) SIGCOMM and MOBICOMM papers as class 2 (communication)
conf_ids = [0, 1, 9, 10, 13]
label_ids = [0, 1, 2, 2, 1]

p_vs_c_filter = p_vs_c[:, conf_ids]
p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0]

# across levels
p_vs_l = p_vs_l[p_selected]
p_vs_l = p_vs_l[:, np.where(p_vs_l.sum(0))[1]]
# 
p_vs_a = p_vs_a[p_selected]
p_vs_a = p_vs_a[:, np.where(p_vs_a.sum(0))[1]]
# 
a_vs_l = p_vs_a.transpose() * p_vs_l

# within level
p_vs_p = p_vs_p[p_selected,:][:,p_selected]
a_vs_a = p_vs_a.transpose() * p_vs_a
l_vs_l = p_vs_l.transpose() * p_vs_l

# others
p_vs_t = p_vs_t[p_selected]
p_vs_c = p_vs_c[p_selected]

features = torch.FloatTensor(p_vs_t.toarray())
features_author = torch.FloatTensor(np.identity(a_vs_a.shape[0]))
features_field = torch.FloatTensor(np.identity(l_vs_l.shape[0]))

pc_p, pc_c = p_vs_c.nonzero()
labels = np.zeros(len(p_selected), dtype=np.int64)
for conf_id, label_id in zip(conf_ids, label_ids):
    labels[pc_p[pc_c == conf_id]] = label_id
labels = torch.LongTensor(labels)

In [22]:
data = sio.loadmat('../data/acm/ACM.mat')
p_vs_l = data['PvsL']       # paper-field?
p_vs_a = data['PvsA']       # paper-author
p_vs_t = data['PvsT']       # paper-term, bag of words
p_vs_c = data['PvsC']       # paper-conference, labels come from that

# We assign
# (1) KDD papers as class 0 (data mining),
# (2) SIGMOD and VLDB papers as class 1 (database),
# (3) SIGCOMM and MOBICOMM papers as class 2 (communication)
conf_ids = [0, 1, 9, 10, 13]
label_ids = [0, 1, 2, 2, 1]

p_vs_c_filter = p_vs_c[:, conf_ids]
p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0]
p_vs_l = p_vs_l[p_selected]
p_vs_a = p_vs_a[p_selected]
p_vs_t = p_vs_t[p_selected]
p_vs_c = p_vs_c[p_selected]

pa = dgl.bipartite(p_vs_a, 'paper', 'pa', 'author')
ap = dgl.bipartite(p_vs_a.transpose(), 'author', 'ap', 'paper')
pl = dgl.bipartite(p_vs_l, 'paper', 'pf', 'field')
lp = dgl.bipartite(p_vs_l.transpose(), 'field', 'fp', 'paper')
hg = dgl.hetero_from_relations([pa, ap, pl, lp])

features = torch.FloatTensor(p_vs_t.toarray())

pc_p, pc_c = p_vs_c.nonzero()
labels = np.zeros(len(p_selected), dtype=np.int64)
for conf_id, label_id in zip(conf_ids, label_ids):
    labels[pc_p[pc_c == conf_id]] = label_id
labels = torch.LongTensor(labels)

num_classes = 3

float_mask = np.zeros(len(pc_p))
for conf_id in conf_ids:
    pc_c_mask = (pc_c == conf_id)
    float_mask[pc_c_mask] = np.random.permutation(np.linspace(0, 1, pc_c_mask.sum()))
train_idx = np.where(float_mask <= 0.2)[0]
val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0]
test_idx = np.where(float_mask > 0.3)[0]

num_nodes = hg.number_of_nodes('paper')