In [1]:
import pickle
import numpy as np
import itertools
import torch

file = open("features.pickle",'rb')
features = pickle.load(file)

file = open("hypergraph.pickle",'rb')
hypergraph = pickle.load(file)

file = open("labels.pickle",'rb')
labels = pickle.load(file)

In [2]:
edges = []
for hyperedge in hypergraph.values():
    for result in itertools.combinations(hyperedge, 2):
        edges.append(result)
print('# of edges in graph: %d' % len(edges))

edges = np.array(edges)

# of edges in graph: 500240


In [3]:
import scipy.sparse as sp

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


adj = sp.coo_matrix((np.ones(len(edges)), (edges[:, 0], edges[:, 1])),
                        shape=(len(labels), len(labels)),
                        dtype=np.float32)
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)

adj 

tensor(indices=tensor([[    0,   283,  5027,  ..., 10896, 14475, 41301],
                       [    0,     0,     0,  ..., 41301, 41301, 41301]]),
       values=tensor([0.0244, 0.0345, 0.0870,  ..., 0.3333, 0.2000, 0.2500]),
       size=(41302, 41302), nnz=906564, layout=torch.sparse_coo)

In [4]:
features = normalize(features)
features = torch.FloatTensor(np.array(features.todense()))
features

tensor([[0.0000, 0.0000, 0.0204,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0250,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])

In [5]:
labels = torch.LongTensor(labels)
labels

tensor([3, 3, 3,  ..., 3, 3, 3])

In [6]:
filename = 'dblp'
import os
os.mkdir(filename)
torch.save(adj, filename+'/adj.pt')
torch.save(features, filename+'/features.pt')
torch.save(labels, filename+'/labels.pt')