In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import PCA
from torch_geometric.utils import remove_self_loops, degree

# 加载Cora数据集
dataset = Planetoid(root='/tmp/cora', name='cora')
data = dataset[0]
# 设定降维后的目标维度
pca_dim = 32  # 你可以根据需要调整PCA降维后的维度

# 对节点特征进行PCA降维
def apply_pca(features, pca_dim):
    pca = PCA(n_components=pca_dim)
    pca_result = pca.fit_transform(features)
    return torch.tensor(pca_result, dtype=torch.float)

# # 将数据中的节点特征进行PCA降维
data.x = apply_pca(data.x.numpy(), pca_dim)

def gcn_conv(h, edge_index):
    N = h.size(0)
    edge_index, _ = remove_self_loops(edge_index)
    
    src, dst = edge_index
    deg = degree(dst, num_nodes=N)
    
    deg_src = deg[src].pow(-0.5) 
    deg_src.masked_fill_(deg_src == float('inf'), 0)
    deg_dst = deg[dst].pow(-0.5)
    deg_dst.masked_fill_(deg_dst == float('inf'), 0)
    edge_weight = deg_src * deg_dst

    a = torch.sparse_coo_tensor(edge_index, edge_weight, torch.Size([N, N])).t()
    h_prime = a @ h 
    return h_prime

# 2. Define a simple neural network for estimating W
class SimpleNN(nn.Module):
    def __init__(self, in_features, out_features):
        super(SimpleNN, self).__init__()
        self.fc = nn.Linear(in_features, out_features, bias=False)
    
    def forward(self, x):
        return self.fc(x)

x = data.x
edge_index = data.edge_index

x_prime = gcn_conv(x, edge_index)
x_prime = x_prime

# 5. Prepare data for training
input_features = x.size(1)
output_features = x_prime.size(1)
model = SimpleNN(input_features, output_features)

# Use Mean Squared Error as the loss function
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

# 6. Train the model with original x'
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    x_prime_pred = model(x)
    loss = criterion(x_prime_pred, x_prime)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')

data.x = model(data.x).detach()

class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.dropout = nn.Dropout(p=0.5)  # 添加Dropout层
        self.out = nn.Linear(hidden_dim, output_dim)

    def forward(self, data):
        x = data.x
        hidden_1 = F.relu(self.bn1(self.fc1(x)))
        hidden_1 = self.dropout(hidden_1)  # 在隐藏层之后应用Dropout
        hidden_2 = F.relu(self.bn2(self.fc2(hidden_1)))
        hidden_2 = self.dropout(hidden_2)
        output = self.out(hidden_2)
        return F.log_softmax(output, dim=1), hidden_1, hidden_2

# 定义模型参数
input_dim = data.x.shape[1]  # Cora数据集的输入特征维度
hidden_dim = 32                        # 隐藏层维度为16
output_dim = dataset.num_classes        # 分类数

# 初始化模型、损失函数和优化器
model2 = MLP(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model2.parameters(), lr=0.1, weight_decay=0.001)

# 训练过程
def train():
    model2.train()
    optimizer.zero_grad()  # 清空梯度
    out, _, _  = model2(data)  # 前向传播
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])  # 计算损失
    loss.backward()  # 反向传播
    optimizer.step()  # 更新参数
    return loss.item()

# 测试过程
def test():
    model2.eval()
    logits, accs = model2(data)[0], []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

# 训练和测试模型
for epoch in range(1, 101):
    loss = train()
    train_acc, val_acc, test_acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, '
          f'Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch [10/100], Loss: 0.04696326330304146
Epoch [20/100], Loss: 0.04212697595357895
Epoch [30/100], Loss: 0.040298350155353546
Epoch [40/100], Loss: 0.039844438433647156
Epoch [50/100], Loss: 0.0396617129445076
Epoch [60/100], Loss: 0.03959028050303459
Epoch [70/100], Loss: 0.039566151797771454
Epoch [80/100], Loss: 0.039556071162223816
Epoch [90/100], Loss: 0.03955245018005371
Epoch [100/100], Loss: 0.0395512618124485
Epoch: 001, Loss: 2.0926, Train Acc: 0.2429, Val Acc: 0.1780, Test Acc: 0.1520
Epoch: 002, Loss: 1.8350, Train Acc: 0.3643, Val Acc: 0.2640, Test Acc: 0.2610
Epoch: 003, Loss: 1.6551, Train Acc: 0.5500, Val Acc: 0.3440, Test Acc: 0.3450
Epoch: 004, Loss: 1.3951, Train Acc: 0.6143, Val Acc: 0.4500, Test Acc: 0.4570
Epoch: 005, Loss: 1.4508, Train Acc: 0.6786, Val Acc: 0.5320, Test Acc: 0.5560
Epoch: 006, Loss: 1.3336, Train Acc: 0.6929, Val Acc: 0.5540, Test Acc: 0.5950
Epoch: 007, Loss: 1.2995, Train Acc: 0.6857, Val Acc: 0.5560, Test Acc: 0.5970
Epoch: 008, Loss: 1.1134