In [None]:
import os
import sys

from torch_geometric.data import Data

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

module_path = os.path.abspath(os.path.join('../gmn_config/'))
if module_path not in sys.path:
    sys.path.append(module_path)

from gmn_config.graph_utils import *

from gmn_config.evaluation import compute_similarity, auc
from gmn_config.loss import pairwise_loss, triplet_loss
from gmn_config.gmn_utils import *
from gmn_config.configure_cosine import *

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

config = get_default_config()

# torch.manual_seed(seed + 2)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
torch.autograd.set_detect_anomaly(True)

gmn, optimizer = build_model(config, 64, 4)
gmn.load_state_dict(torch.load("../gmn_config/model64_5.pth"))
gmn.to(device)
gmn.eval()

In [None]:
import torch
import torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec
import torch_geometric.transforms as T
from sklearn.cluster import KMeans
from math import ceil
from torch_geometric import utils
from torch_geometric.data import Data

import os
import os.path as osp
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.eval_metrics import *

torch.manual_seed(0)

In [None]:
dataset = 'Cora'
path = osp.join('..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

delta = 0.85
edge_index, edge_weight = utils.get_laplacian(data.edge_index, data.edge_weight, normalization='sym')
L = utils.to_dense_adj(edge_index, edge_attr=edge_weight)
A = torch.eye(data.num_nodes) - delta*L
data.edge_index, data.edge_weight = utils.dense_to_sparse(A)
original_data = Data(x=reduce_dimensions(data.x.numpy()), edge_index=data.edge_index)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Node2Vec(data.edge_index, embedding_dim=64, walk_length=20, context_size=10, walks_per_node=5,
                 num_negative_samples=1, p=1.0, q=1.0, sparse=True).to(device)
batch_size = 64
loader = model.loader(batch_size=batch_size, shuffle=True, num_workers=4)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.001)
original_data = Data(x=reduce_dimensions(data.x.numpy()), edge_index=data.edge_index)

In [None]:
def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(1, 51):
    loss = train()
    model.eval()
    z = model().cpu().detach().numpy()
    kmeans = KMeans(n_clusters=len(set(data.y.numpy())), n_init=10)
    predicted_clusters = kmeans.fit_predict(z)
    predicted_clusters_tensor = torch.tensor(predicted_clusters, dtype=torch.long)
    cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)
    num_clusters = cluster_centers.size(0)
    cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

    for i in range(data.edge_index.size(1)):
        src, dest = data.edge_index[:, i]
        src_cluster = predicted_clusters[src.item()]
        dest_cluster = predicted_clusters[dest.item()]
        if src_cluster != dest_cluster:
            cluster_adj_matrix[src_cluster, dest_cluster] = 1
            cluster_adj_matrix[dest_cluster, src_cluster] = 1

    cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
    clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)
    sim = similarity(gmn, config, original_data, clustered_data)
    acc, nmi, ari = eval_metrics(data.y, predicted_clusters_tensor)
    if (epoch % 5 == 0):
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, NMI: {nmi}, ACC: {acc}, ARI: {ari:.3f}, SIM: {sim:.3f}')

In [None]:
model.eval()
z = model().cpu().detach().numpy()

kmeans = KMeans(n_clusters=dataset.num_classes, n_init=10)
predicted_clusters = kmeans.fit_predict(z)

cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)
num_clusters = cluster_centers.size(0)
cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

for i in range(data.edge_index.size(1)):
    src, dest = data.edge_index[:, i]
    src_cluster = predicted_clusters[src.item()]
    dest_cluster = predicted_clusters[dest.item()]
    if src_cluster != dest_cluster:
        cluster_adj_matrix[src_cluster, dest_cluster] = 1
        cluster_adj_matrix[dest_cluster, src_cluster] = 1

cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)

In [None]:
clustered_data