In [5]:
import dgl
import numpy as np
import torch
import torch.nn as nn
import dgl.data
import networkx as nx
import scipy.sparse as sp
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sigvaemodel import SIGVAE_GIN
from loss import loss
import time

CoraDataset=dgl.data.CoraGraphDataset()
graph=CoraDataset[0]
featureMatrix=graph.ndata['feat']
print(featureMatrix.size())
print(featureMatrix)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
torch.Size([2708, 1433])
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0526, 0.0000]])


In [38]:
def edges_to_lists(headMatrix, tailMatrix):
    results=[]
    assert len(headMatrix)==len(tailMatrix)
    hlength=len(headMatrix)
    for i in range(hlength):
        results.append([headMatrix[i], tailMatrix[i]])
    return results

In [39]:
def preprocess(graph):
    u, v=graph.edges()
    testidx=np.arange(graph.number_of_edges())
    testidx=np.random.permutation(testidx)
    testSize=int(len(testidx)/10)
    train_size=graph.number_of_edges()-testSize
    test_pos_u, test_pos_v=u[testidx[:testSize]], v[testidx[:testSize]]
    train_pos_u, train_pos_v = u[testidx[testSize:]], v[testidx[testSize:]]

    adj=sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
    adj_neg=1-adj.todense()-np.eye(graph.number_of_nodes())
    adj=adj.todense()
    neg_u, neg_v=np.where(adj_neg!=0)
    #print(len(neg_u))
    #print(len(neg_v))
    neg_testidx=np.random.choice(len(neg_u), graph.number_of_edges())
    test_neg_u, test_neg_v=neg_u[neg_testidx[:testSize]], neg_v[neg_testidx[:testSize]]
    train_neg_u, train_neg_v=neg_u[neg_testidx[testSize:]], neg_v[neg_testidx[testSize:]]
    #print(test_neg_u)
    #print(test_neg_v)
    adj_train=sp.coo_matrix((np.ones(len(train_pos_u)), (train_pos_u.numpy(), train_pos_v.numpy())))
    adj_train=adj_train-sp.dia_matrix((adj_train.diagonal()[np.newaxis, :], [0]), shape=adj_train.shape)
    adj_train=adj_train.todense()
    adj_test=sp.coo_matrix((np.ones(len(test_pos_u)), (test_pos_u.numpy(), test_pos_v.numpy())))
    adj_test=adj_test-sp.dia_matrix((adj_test.diagonal()[np.newaxis, :], [0]), shape=adj_test.shape)
    adj_test=adj_test.todense()
    train_graph=dgl.remove_edges(graph, testidx[:testSize])
    train_pos=edges_to_lists(train_pos_u, train_pos_v)
    test_pos=edges_to_lists(test_pos_u, test_pos_v)
    test_neg=edges_to_lists(test_neg_u, test_neg_v)
    train_neg=edges_to_lists(train_neg_u, train_neg_v)
    
    featureMatrix=graph.ndata['feat']
        
    
    return adj, adj_train, adj_test, featureMatrix, train_graph, train_pos, test_pos, train_neg, test_neg


In [None]:
def step(model, adj, feat, epoch, optimizer, device='cpu'):
    if optimizer is None:
        model.eval()
    else:
        model.train()
    
    generated_prob, mu, sigma, latent_representation, Z, epsilon=model.forward(adj.to(device), feat.to(device))
    loss_rec, loss_prior, loss_post=loss(adj, mu, sigma, Z, latent_representation)
    loss=loss_rec+loss_prior+loss_post
    
    WarmingUP=np.min([epoch/300, 1])
    reg=(loss_post-loss_prior) * WarmingUP / (adj.size(0)**2)
    loss_train=loss_rec+WarmingUP*reg
    
    if optimizer is not None:
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
    return latent_representation, generated_prob, loss_train

In [None]:
def get_auc(embedding, pos_edges, neg_edges):
    pos_array=np.array(pos_edges) #(2, #positive edges)
    neg_array=np.array(neg_edges) #(2, #negative edges)
    emb_pos_head=embedding[:, pos_array[0], :]
    emb_pos_tail=embedding[:, pos_array[1], :]
    preds_pos=np.einsum('abc, abc->ab', emb_pos_head, emb_pos_tail)
    
    emb_neg_head=embedding[:, neg_array[0], :]
    emb_neg_tail=embedding[:, neg_array[1], :]
    preds_neg=np.einsum('abc, abc->ab', emb_neg_head, emb_neg_tail)
    
    preds=torch.cat([preds_pos, preds_neg], dim=0)
    labels=torch.cat([torch.ones(preds_pos.shape[-1]), torch.zeros(preds_neg.shape[-1])])
    
    roc=torch.Tensor([roc_auc_score(labels, pred.flatten()) for pred in torch.split(preds, embedding.shape[0])].mean())
    
    return roc

In [1]:
adj, adj_train, adj_test, featMatrix, train_graph, train_pos, test_pos, train_neg, test_neg=preprocess(graph)
adj_label=adj_train+sp.eye(adj_train.shape[0])
adj_label=torch.FloatTensor(adj_label.toarray())
  
model=SIGVAE_GIN(Lu=1, 
                Lmu=1, 
                Lsigma=1, 
                input_dim=adj_train.shape[0], 
                output_dim_u=[32], 
                output_dim_mu=[16], 
                output_dim_sigma=[16],
                K=15,
                J=20,
                noise_dim=64,
                activation=nn.ReLU,
                dropout=0)
optimizer=torch.optim.Adam(model.parameters(), lr=0.0005)
epoch=100
    
for epc in range(epoch):
    t=time.time()
    model.train()
    optimizer.zero_grad()
    generated_prob, mu, sigma, latent_representation, z_proc, epsilon=model(adj_train, featureMatrix)
    loss_accumulate=0
    latent_representation, generated_prob, loss_train=step(model, adj_train, featMatrix, epc, optimizer, device='cpu')
    loss_accumulate+=loss_train
    
    z_proc=z_proc.numpy()
    roc=get_auc(z_proc, train_pos, train_neg)
    
    print("Epoch : ", '%d' %(epoch+1), "loss=", "%f".format(loss_train.item()),
          "time=", "%f".format(time.time()-t))
    
    if((epoch+1) % 10 == 0):
            model.eval()
            generated_prob, mu, sigma, latent_representation, z_proc, epsilon=model(adj_train, featureMatrix)
            z_proc = z_proc.numpy()
            roc_score = get_auc(z_proc, test_pos, test_neg)
            rslt = "Test ROC score: %f".format(roc_score)
            print("\n", rslt, "\n")
    #latent_representation, generated_prob, loss_train=step(model, adj_train, epoch, optimizer )