In [18]:
import numpy as np
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from scipy.sparse import coo_matrix, eye, diags, csr_matrix


In [19]:
# Load the dataset

dataset = Planetoid("/tmp/Cora", name="Cora",transform=T.NormalizeFeatures())
num_nodes = dataset.data.num_nodes
num_edges = dataset.data.num_edges // 2
train_len = dataset[0].train_mask.sum()
val_len = dataset[0].val_mask.sum()
test_len = dataset[0].test_mask.sum()
other_len = num_nodes - train_len-val_len - test_len
print(f"Dataset: {dataset.name}")
print(f"Num. nodes: {num_nodes} (train={train_len}, val={val_len}, test={test_len}, other={other_len})")
print(f"Num. edges: {num_edges}")
print(f"Num. node features: {dataset.num_node_features}")
print(f"Num. classes: {dataset.num_classes}")
print(f"Dataset len.: {dataset.len()}")
print(f"Sum of row values with normalization: {dataset[0].x.sum(dim=-1)}")

Dataset: Cora
Num. nodes: 2708 (train=140, val=500, test=1000, other=1068)
Num. edges: 5278
Num. node features: 1433
Num. classes: 7
Dataset len.: 1
Sum of row values with normalization: tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000])


In [20]:
dataset[0].to_dict()

{'x': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'edge_index': tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
         [ 633, 1862, 2582,  ...,  598, 1473, 2706]]),
 'y': tensor([3, 4, 4,  ..., 3, 3, 3]),
 'train_mask': tensor([ True,  True,  True,  ..., False, False, False]),
 'val_mask': tensor([False, False, False,  ..., False, False, False]),
 'test_mask': tensor([False, False, False,  ...,  True,  True,  True])}

In [21]:
# Function to create an adjacency matrix from edge index and edge attributes

def adjacency_matrix(edge_index, edge_attr=None, num_nodes=None):


    # If edge_attr does not exist, assign weight=1 to all edges
    if edge_attr is None:
        edge_attr = torch.ones(edge_index.shape[1], dtype=torch.float)

    # Define the sparse adjacency matrix
    adj_matrix_sparse = torch.sparse_coo_tensor(edge_index, edge_attr, (num_nodes, num_nodes))

    # Convert to a dense matrix
    adj_matrix = adj_matrix_sparse.to_dense()

    return adj_matrix


In [22]:
adj = adjacency_matrix(edge_index= dataset.data.edge_index, edge_attr=dataset.data.edge_attr, num_nodes=dataset.data.num_nodes)
adj

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.]])

In [23]:
# Compute the normalize adjancy matrix based on the paper P ̃ = D ̃^ −1/2.A ̃. D ̃^ −1/2 
def normalize_adjacency_matrix(A):
    # Ensure A is a sparse matrix
    if not isinstance(A, csr_matrix):
        A = csr_matrix(A)

    degrees = np.array(A.sum(axis=1)).flatten()
    D = diags(degrees)
    D_inv_sqrt = diags(1.0 / np.sqrt(degrees))
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt

    return normalized_A

In [24]:
# Convert the matrix to Tensor
def sparse_matrix_to_torch_sparse_tensor(sparse_matrix):
    sparse_matrix = sparse_matrix.tocoo()
    indices = torch.LongTensor(np.vstack((sparse_matrix.row, sparse_matrix.col)))
    values = torch.FloatTensor(sparse_matrix.data)
    shape = torch.Size(sparse_matrix.shape)

    return torch.sparse.FloatTensor(indices, values, shape)

In [25]:
pre_processed_adj = normalize_adjacency_matrix(adj)
pre_processed_adj = sparse_matrix_to_torch_sparse_tensor(pre_processed_adj)
pre_processed_adj

tensor(indices=tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
                       [ 633, 1862, 2582,  ...,  598, 1473, 2706]]),
       values=tensor([0.3333, 0.2887, 0.3333,  ..., 0.0870, 0.2500, 0.2500]),
       size=(2708, 2708), nnz=10556, layout=torch.sparse_coo)