In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import networkx as nx
from sklearn.preprocessing import LabelBinarizer
from torch_geometric.nn import GCNConv
import scipy.sparse as sp
import import_ipynb
from util import utils

# 图数据加载

In [2]:
# 节点个数
num_nodes = 20
# 特征个数
num_features = 16
# 邻接矩阵
adjacency_matrix = torch.tensor(np.random.randint(2, size=(num_nodes, num_nodes)), dtype=torch.float32)
# 特征矩阵
node_features = torch.randn(num_nodes, num_features) 

In [3]:
node_features.size()

torch.Size([20, 16])

In [4]:
# 邻接矩阵转edge_index（PyG框架需要的coo形式）
utils.adjacency_matrix_2_edge_index(adjacency_matrix)

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,
          5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,
          7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,
          9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10,
         10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
         14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
         15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
         17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
         19, 19, 19, 19],
        [ 2,  5,  7, 10, 11, 13, 14, 16, 17, 18,  0,  1,  3,  5,  6,  7, 10, 11,
  

# 定义图神经网络模型

## GAE

In [46]:
class GAE(nn. Module):
    def __init__(self, num_features, hidden_dim):
        super(GAE, self).__init__()
        self.encoder = GCNConv(num_features, hidden_dim)
        self.decoder = GCNConv(hidden_dim, num_features)
    def forward(self, node_features, edge_index):
        z = self.encoder(node_features, edge_index)
        z = F.relu(node_features)
        generated_node_features = self.decoder(z, edge_index)
        generated_node_features = F.sigmoid(generated_node_features)
        kld = None
        return z, generated_node_features, kld

## VGAE

In [47]:
class VGAE(nn. Module):
    def __init__(self, num_features, hidden_dim):
        super(VGAE, self).__init__()
        self.encoder = GCNConv(num_features, 2 * hidden_dim)
        self.decoder = GCNConv(hidden_dim, num_features)
        
    def forward(self, node_features, edge_index):
        z = self.encoder(node_features, edge_index)
        z = F.relu(z)
        mu, sigma = z.chunk(2, dim=1)
        # 从正态分布中采样
        epsilon = torch.randn_like(sigma)
        z = mu + torch.exp(0.5 * sigma) * epsilon
        generated_node_features = self.decoder(z, edge_index)
        kld = 0.5 / node_features.size(0) / node_features.size(1) * torch.sum(torch.pow(mu, 2) + torch.pow(sigma, 2) - torch.log(1e-8 + torch.pow(sigma, 2)) - 1)
        return z, generated_node_features, kld

# 初始化

In [48]:
hidden_dim = 16

In [49]:
model = GAE(num_features, hidden_dim)
# model = VGAE(num_features, hidden_dim)

In [50]:
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# 模型训练

In [51]:
num_epochs = 100
clip_value = 0.000000001  

In [52]:
def train(model, node_features, edge_index, adjacency_matrix, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        z, generated_node_features, kld = model(node_features, edge_index)
        # 计算损失
        loss = criterion(generated_node_features, node_features)
        if kld != None:
            # 损失加上KL散度
            loss += kld
            # 梯度进行裁剪
            nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

In [53]:
train(model, node_features, edge_index, adjacency_matrix, criterion, optimizer, num_epochs)

Epoch [10/100], Loss: 0.9477
Epoch [20/100], Loss: 0.8882
Epoch [30/100], Loss: 0.8614
Epoch [40/100], Loss: 0.8479
Epoch [50/100], Loss: 0.8395
Epoch [60/100], Loss: 0.8335
Epoch [70/100], Loss: 0.8288
Epoch [80/100], Loss: 0.8249
Epoch [90/100], Loss: 0.8215
Epoch [100/100], Loss: 0.8186


# 生成新的图数据

In [55]:
z, generated_node_features, _ = model(node_features, edge_index)
print("新的特征矩阵：")
print(generated_node_features)
print("新的邻接矩阵：")
generated_adjacency_matrix = torch.mm(z, z.t())
generated_adjacency_matrix = torch.sigmoid(generated_adjacency_matrix)
print(generated_adjacency_matrix)

新的特征矩阵：
tensor([[0.3756, 0.4103, 0.2546, 0.0565, 0.0594, 0.0323, 0.1939, 0.0783, 0.0534,
         0.0635, 0.0502, 0.2345, 0.0358, 0.3558, 0.4614, 0.4205],
        [0.4848, 0.2706, 0.2271, 0.0577, 0.0519, 0.0214, 0.2231, 0.0518, 0.0433,
         0.0556, 0.0450, 0.4938, 0.0365, 0.4628, 0.4594, 0.4614],
        [0.3937, 0.3401, 0.2335, 0.0501, 0.0464, 0.0143, 0.2069, 0.0465, 0.0337,
         0.0580, 0.0414, 0.4206, 0.0266, 0.3985, 0.2311, 0.2666],
        [0.4988, 0.1516, 0.1644, 0.0361, 0.0336, 0.0147, 0.1724, 0.0326, 0.0326,
         0.0401, 0.0261, 0.6640, 0.0268, 0.4475, 0.5517, 0.6090],
        [0.3024, 0.3822, 0.2906, 0.0680, 0.0766, 0.0350, 0.2267, 0.0899, 0.0574,
         0.0706, 0.0685, 0.3356, 0.0442, 0.4570, 0.3789, 0.3230],
        [0.3605, 0.2398, 0.2846, 0.0486, 0.0472, 0.0257, 0.1933, 0.0538, 0.0406,
         0.0551, 0.0427, 0.5201, 0.0333, 0.4401, 0.5968, 0.6601],
        [0.4093, 0.2957, 0.2326, 0.0471, 0.0482, 0.0267, 0.1848, 0.0560, 0.0463,
         0.0581, 0.0400, 0.39