In [None]:
from torch_geometric.datasets import Planetoid
import os.path as osp
import torch_geometric.transforms as T
import torch
from torch_geometric import utils
from torch_geometric.data import Data

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.eval_metrics import *

dataset = 'Cora'
path = osp.join('..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

# data.edge_index, data.edge_weight = gcn_norm(  
#                 data.edge_index, data.edge_weight, data.num_nodes,
#                 add_self_loops=False, dtype=data.x.dtype)
from sklearn.decomposition import PCA

def reduce_dimensions(features, new_dim=64):
    pca = PCA(n_components=new_dim)
    reduced_features = pca.fit_transform(features)
    return torch.tensor(reduced_features, dtype=torch.float)

delta = 0.85
edge_index, edge_weight = utils.get_laplacian(data.edge_index, data.edge_weight, normalization='sym')
L = utils.to_dense_adj(edge_index, edge_attr=edge_weight)
A = torch.eye(data.num_nodes) - delta*L
data.edge_index, data.edge_weight = utils.dense_to_sparse(A)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
data = data.to(device)
original_data = Data(x=reduce_dimensions(data.x.numpy()), edge_index=data.edge_index)

In [None]:
import torch
edge_index = dataset[0].edge_index
num_nodes = dataset[0].num_nodes
adjacency_matrix = torch.zeros((num_nodes, num_nodes), dtype=torch.float)
adjacency_matrix[edge_index[0], edge_index[1]] = 1


In [None]:
from sklearn.cluster import SpectralClustering
adjacency_matrix_np = adjacency_matrix.numpy()
clustering = SpectralClustering(n_clusters=dataset.num_classes, affinity='precomputed', assign_labels='discretize').fit(adjacency_matrix_np)
labels = clustering.labels_


In [None]:
from sklearn.metrics import normalized_mutual_info_score
labels_true = dataset[0].y
acc, nmi, ari = eval_metrics(torch.tensor(labels_true), torch.tensor(labels))
print(f'NMI: {nmi:.3f}, ACC: {acc:.3f}, ARI: {ari:.3f}')

In [None]:
from sklearn.cluster import KMeans
import torch_geometric

kmeans = KMeans(n_clusters=dataset.num_classes, n_init=10, random_state=0)
predicted_clusters = kmeans.fit_predict(labels)

num_clusters = dataset.num_classes
cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

for i in range(data.edge_index.size(1)):
    src, dest = data.edge_index[:, i]
    src_cluster = labels[src.item()]
    dest_cluster = labels[dest.item()]
    if src_cluster != dest_cluster:
        cluster_adj_matrix[src_cluster, dest_cluster] = 1
        cluster_adj_matrix[dest_cluster, src_cluster] = 1

cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)

clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)