In [None]:
import os
import sys

from torch_geometric.data import Data

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

module_path = os.path.abspath(os.path.join('../gmn_config/'))
if module_path not in sys.path:
    sys.path.append(module_path)

from gmn_config.graph_utils import *

from gmn_config.evaluation import compute_similarity, auc
from gmn_config.loss import pairwise_loss, triplet_loss
from gmn_config.gmn_utils import *
from gmn_config.configure_cosine import *

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

config = get_default_config()

# torch.manual_seed(seed + 2)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
torch.autograd.set_detect_anomaly(True)

gmn, optimizer = build_model(config, 64, 4)
gmn.load_state_dict(torch.load("../gmn_config/model64v2.pth"))
gmn.to(device)
gmn.eval()

In [None]:
import torch
import torch.nn as nn
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import VGAE, GCNConv
from torch_geometric.utils import train_test_split_edges
import torch.nn.functional as F
from torch_geometric.nn.models import InnerProductDecoder, VGAE
from torch_geometric.nn.conv import GCNConv
from torch_geometric.utils import negative_sampling, remove_self_loops, add_self_loops
import os.path as osp
from torch.optim import Adam
from torch_geometric.transforms import RandomLinkSplit
from sklearn.cluster import KMeans
import torch_geometric
from torch_geometric import utils

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.eval_metrics import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
dataset = 'Cora'
path = osp.join('..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

delta = 0.85
edge_index, edge_weight = utils.get_laplacian(data.edge_index, data.edge_weight, normalization='sym')
L = utils.to_dense_adj(edge_index, edge_attr=edge_weight)
A = torch.eye(data.num_nodes) - delta*L
data.edge_index, data.edge_weight = utils.dense_to_sparse(A)
original_data = Data(x=reduce_dimensions(data.x.numpy()), edge_index=data.edge_index)

In [None]:
class GCNEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.gcn_shared = GCNConv(in_channels, hidden_channels)
        self.gcn_mu = GCNConv(hidden_channels, out_channels)
        self.gcn_logvar = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.gcn_shared(x, edge_index))
        mu = self.gcn_mu(x, edge_index)
        logvar = self.gcn_logvar(x, edge_index)
        return mu, logvar


class DeepVGAE(VGAE):
    def __init__(self, enc_in_channels, enc_hidden_channels, enc_out_channels):
        super(DeepVGAE, self).__init__(encoder=GCNEncoder(enc_in_channels,
                                                          enc_hidden_channels,
                                                          enc_out_channels),
                                       decoder=InnerProductDecoder())

    def forward(self, x, edge_index):
        z = self.encode(x, edge_index)
        adj_pred = self.decoder.forward_all(z)
        return adj_pred

    def loss(self, x, pos_edge_index, all_edge_index):
        z = self.encode(x, pos_edge_index)

        pos_loss = -torch.log(
            self.decoder(z, pos_edge_index, sigmoid=True) + 1e-15).mean()

        all_edge_index_tmp, _ = remove_self_loops(all_edge_index)
        all_edge_index_tmp, _ = add_self_loops(all_edge_index_tmp)

        neg_edge_index = negative_sampling(all_edge_index_tmp, z.size(0), pos_edge_index.size(1))
        neg_loss = -torch.log(1 - self.decoder(z, neg_edge_index, sigmoid=True) + 1e-15).mean()

        kl_loss = 1 / x.size(0) * self.kl_loss()

        return pos_loss + neg_loss + kl_loss

    def single_test(self, x, train_pos_edge_index, test_pos_edge_index, test_neg_edge_index):
        with torch.no_grad():
            z = self.encode(x, train_pos_edge_index)
        roc_auc_score, average_precision_score = self.test(z, test_pos_edge_index, test_neg_edge_index)
        return roc_auc_score, average_precision_score

In [None]:
model = DeepVGAE(enc_in_channels=data.num_features, enc_hidden_channels=128, enc_out_channels=64).to(device)
optimizer = Adam(model.parameters(), lr=0.01)

losses = []
nmis = []
accs = []
aris = []
sims = []
for epoch in range(0,751):
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    loss = model.recon_loss(z, data.edge_index) + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        model.eval()
        z = model.encode(data.x, data.edge_index)
        kmeans = KMeans(n_clusters=dataset.num_classes, n_init=10, random_state=0).fit(z.detach().cpu().numpy())
        predicted_labels = torch.tensor(kmeans.labels_, device=device)
        num_clusters = dataset.num_classes
        cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

        for i in range(data.edge_index.size(1)):
            src, dest = data.edge_index[:, i]
            src_cluster = predicted_labels[src.item()]
            dest_cluster = predicted_labels[dest.item()]
            if src_cluster != dest_cluster:
                cluster_adj_matrix[src_cluster, dest_cluster] = 1
                cluster_adj_matrix[dest_cluster, src_cluster] = 1

        cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
        cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)

        clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)
        sim = similarity(gmn, config, original_data, clustered_data)

        acc, nmi, ari = eval_metrics(data.y, predicted_labels)
        losses.append(loss.item())
        accs.append(acc)
        nmis.append(nmi)
        aris.append(ari)
        sims.append(sim)
        print(f'Epoch: {epoch:03d}, Loss: {loss.item():.3f}, NMI: {nmi:.3f}, ACC: {acc:.3f}, ARI: {ari:.3f}, SIM: {sim:.3f}')

In [None]:
model.eval()
z = model.encode(data.x, data.edge_index).detach().cpu().numpy()

kmeans = KMeans(n_clusters=dataset.num_classes, n_init=10, random_state=0)
predicted_clusters = kmeans.fit_predict(z)

num_clusters = dataset.num_classes
cluster_adj_matrix = np.zeros((num_clusters, num_clusters))

for i in range(data.edge_index.size(1)):
    src, dest = data.edge_index[:, i]
    src_cluster = predicted_clusters[src.item()]
    dest_cluster = predicted_clusters[dest.item()]
    if src_cluster != dest_cluster:
        cluster_adj_matrix[src_cluster, dest_cluster] = 1
        cluster_adj_matrix[dest_cluster, src_cluster] = 1

cluster_edge_index = torch.tensor(np.array(np.nonzero(cluster_adj_matrix)), dtype=torch.long)
cluster_centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float)

clustered_data = torch_geometric.data.Data(x=cluster_centers, edge_index=cluster_edge_index)
print(clustered_data)

In [None]:
sim = similarity(gmn, config, original_data, clustered_data)
print(sim)

In [None]:
import matplotlib.pyplot as plt

epochs = range(0, 751, 50)

plt.figure(figsize=(10, 6))

plt.plot(epochs, losses, label='Loss')
plt.plot(epochs, nmis, label='NMI')
plt.plot(epochs, accs, label='ACC')
plt.plot(epochs, aris, label='ARI')
plt.plot(epochs, sims, label='SIM')

plt.xlabel('Epochs')
plt.ylabel('Metrics')
plt.title('VGAE')
plt.xticks(range(0, 500, 50))
plt.ylim(top=2)
plt.legend()
plt.show()
