In [1]:
import pickle
import numpy as np
import itertools
import torch

file = open("features.pickle",'rb')
features = pickle.load(file)

file = open("hypergraph.pickle",'rb')
hypergraph = pickle.load(file)

file = open("labels.pickle",'rb')
labels = pickle.load(file)

In [4]:
print('# of hyperedges in graph: %d' % len(hypergraph))
print('# of nodes in graph: %d' % len(labels))
print('feature shape',features.shape)

# of hyperedges in graph: 1079
# of nodes in graph: 1458
feature shape (1458, 3703)


In [11]:
len_list = [len(list(x)) for x in list(hypergraph.values())]
print('Avg length of hyperedges', np.mean(len_list), 'and the std', np.std(len_list))

Avg length of hyperedges 3.200185356811863 and the std 2.02321099430073


In [4]:
edges = []
for hyperedge in hypergraph.values():
    for result in itertools.combinations(hyperedge, 2):
        edges.append(result)
print('# of edges in graph: %d' % len(edges))

edges = np.array(edges)

# of edges in graph: 6007


In [5]:
import scipy.sparse as sp

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


adj = sp.coo_matrix((np.ones(len(edges)), (edges[:, 0], edges[:, 1])),
                        shape=(len(labels), len(labels)),
                        dtype=np.float32)
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)

adj 

tensor(indices=tensor([[   0,   59,   90,  ..., 1257, 1355, 1457],
                       [   0,    0,    0,  ..., 1457, 1457, 1457]]),
       values=tensor([0.0909, 0.0476, 0.1111,  ..., 0.2857, 0.2857, 0.1429]),
       size=(1458, 1458), nnz=10072, layout=torch.sparse_coo)

In [6]:
features = normalize(features)
features = torch.FloatTensor(np.array(features.todense()))
features

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0345, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])

In [7]:
labels = torch.LongTensor(labels)
labels

tensor([1, 4, 1,  ..., 0, 4, 0])

In [8]:
filename = 'citeseer'
import os
os.mkdir(filename)
torch.save(adj, filename+'/adj.pt')
torch.save(features, filename+'/features.pt')
torch.save(labels, filename+'/labels.pt')