In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, VGAE
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import train_test_split_edges, negative_sampling, to_networkx
import matplotlib.pyplot as plt
import networkx as nx

# Load dataset (using Cora as an example)
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
data = train_test_split_edges(data)  # Splitting the edges for link prediction



In [2]:
data.x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [3]:
data

Data(x=[2708, 1433], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], val_pos_edge_index=[2, 263], test_pos_edge_index=[2, 527], train_pos_edge_index=[2, 8976], train_neg_adj_mask=[2708, 2708], val_neg_edge_index=[2, 263], test_neg_edge_index=[2, 527])

In [4]:
data.train_pos_edge_index.shape

torch.Size([2, 8976])

In [6]:

# Define Encoder using GCN layers
class Encoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Encoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True)
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)  # Mean layer
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)  # Log std layer

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        # Compute the mean and log standard deviation embeddings
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)


# Initialize model, optimizer, and loss function
out_channels = 16  # Latent space dimension
model = VGAE(GCNEncoder(dataset.num_features, out_channels))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training function
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.train_pos_edge_index)
    loss = model.recon_loss(z, data.train_pos_edge_index)  # Reconstruction loss
    loss += (1 / data.num_nodes) * model.kl_loss()  # KL divergence
    loss.backward()
    optimizer.step()
    return loss.item()

# Link prediction test function
@torch.no_grad()
def test():
    model.eval()
    z = model.encode(data.x, data.train_pos_edge_index)
    pos_edge_index = data.test_pos_edge_index
    neg_edge_index = negative_sampling(
        edge_index=data.train_pos_edge_index, num_nodes=data.num_nodes,
        num_neg_samples=pos_edge_index.size(1), method='sparse')

    pos_pred = model.decode(z, pos_edge_index).view(-1)
    neg_pred = model.decode(z, neg_edge_index).view(-1)

    # Calculate the link prediction accuracy
    correct = ((pos_pred > 0).sum() + (neg_pred < 0).sum()).item()
    return correct / (pos_pred.size(0) + neg_pred.size(0))

# Training loop
epochs = 200
for epoch in range(epochs):
    loss = train()
    if epoch % 10 == 0 or epoch == epochs - 1:
        test_acc = test()
        print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Test Accuracy: {test_acc:.4f}')

# Plotting the node embeddings
@torch.no_grad()
def plot_embeddings(z, color):
    plt.figure(figsize=(8, 8))
    z = z.cpu().numpy()
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.title("Node Embeddings in Latent Space")
    plt.xlabel("Dimension 1")
    plt.ylabel("Dimension 2")
    plt.colorbar()
    plt.show()

# Plotting results
z = model.encode(data.x, data.train_pos_edge_index)
plot_embeddings(z, color=data.y)

ValueError: too many values to unpack (expected 2)