In [7]:
import torch
import torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec
import torch_geometric.transforms as T
from sklearn.cluster import KMeans
from math import ceil

import os
import os.path as osp
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.eval_metrics import *

torch.manual_seed(0)

<torch._C.Generator at 0x10c26d510>

In [8]:
dataset = 'Cora'
path = osp.join('..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Node2Vec(data.edge_index, embedding_dim=64, walk_length=20, context_size=10, walks_per_node=10,
                 num_negative_samples=1, p=1.0, q=1.0, sparse=True).to(device)
batch_size = 128
loader = model.loader(batch_size=batch_size, shuffle=True, num_workers=4)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.001)


In [12]:
def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(1, 51):
    loss = train()
    model.eval()
    z = model().cpu().detach().numpy()
    kmeans = KMeans(n_clusters=len(set(data.y.numpy())), n_init=10)
    predicted_clusters = kmeans.fit_predict(z)
    predicted_clusters_tensor = torch.tensor(predicted_clusters, dtype=torch.long)
    acc, nmi = eval_metrics(data.y, predicted_clusters_tensor)
    if (epoch % 5 == 0):
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, NMI: {nmi}, ACC: {acc}')

TypeError: cannot pickle 'PyCapsule' object

In [21]:
model.eval()
z = model().cpu().detach().numpy()

kmeans = KMeans(n_clusters=dataset.num_classes, n_init=10)
predicted_clusters = kmeans.fit_predict(z)

cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)
num_clusters = cluster_centers.size(0)
cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

for i in range(data.edge_index.size(1)):
    src, dest = data.edge_index[:, i]
    src_cluster = predicted_clusters[src.item()]
    dest_cluster = predicted_clusters[dest.item()]
    if src_cluster != dest_cluster:
        cluster_adj_matrix[src_cluster, dest_cluster] = 1
        cluster_adj_matrix[dest_cluster, src_cluster] = 1

cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)

In [22]:
clustered_data

Data(x=[7, 64], edge_index=[2, 42])