# 图神经网络
[图神经网络（GNN）最简单全面原理与代码实现](https://zhuanlan.zhihu.com/p/603486955)
- 图数据是由节点（Node）和边（Edge）组成的数据
- 图数据的信息包含3个层面，分别是**节点信息（V）、边信息（E）、图整体（U）**信息，对应着各自的任务
- GNN是对图上的所有属性（V、E、U）进行的一个可以优化的变换
- GNN对属性向量优化的方法叫做消息传递机制
  - 比如最原始的GNN是SUM求和传递机制；
  - 图卷积网络（GCN）考虑到了节点的度，度越大，权重越小，使用了加权的SUM；
  - 图注意力网络GAT，在消息传递过程中引入了注意力机制；
  - 不同GNN的本质差别就在于它们如何进行节点之间的信息传递和计算，即消息传递机制不同；
- GNN对应任务的不同，在于output层的输入不同
  - 对于节点层面的任务而言，可以直接self.conv = GCNConv(16, dataset.num_classes) 
  - 对于边层面的任务而言，通过GNN提取出节点信息，输入Output层之前需要进行边特征的融合
  - 对于图层面的任务而言，通过GNN提取出节点信息，输入Output层之前需要进行图特征的融合

In [10]:
# 下载Cora数据集
base_url = 'https://gitee.com/jiajiewu/planetoid/raw/master/data/ind.cora.'
import requests
names = ['x', 'tx', 'allx', 'y', 'ty', 'ally', 'graph', 'test.index']
for name in names:
    file_url=base_url+name
    r=requests.get(file_url)
    with open('../data/Cora/raw/ind.cora.'+name, 'wb') as f:
        f.write(r.content)

## 节点分类任务代码实现
学术论文的相关性分类问题（7个类别）

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
#载入数据
dataset = Planetoid(root='../data', name='Cora')
data = dataset[0] # x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708]
#定义网络架构
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)  #num_features=1433
        self.conv2 = GCNConv(16, dataset.num_classes)
    def forward(self, x, edge_index):
        # x: [2708, 1433], edge_index: [2, 10556]
        x = self.conv1(x, edge_index) # [2708, 16]
        x = F.relu(x)
        x = self.conv2(x, edge_index) # [2708, 7]
        return F.log_softmax(x, dim=1) # 先softmax再取log
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#模型训练
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)    #模型的输入有节点特征还有边特征,使用的是全部数据
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])   #损失仅仅计算的是训练集的损失
    loss.backward()
    optimizer.step()
#模型测试
model.eval()
test_predict = model(data.x, data.edge_index)[data.test_mask] # 取测试集的预测结果
max_index = torch.argmax(test_predict, dim=1)
test_true = data.y[data.test_mask]
correct = 0
for i in range(len(max_index)):
    if max_index[i] == test_true[i]:
        correct += 1
print('测试集准确率为：{}%'.format(correct*100/len(test_true)))

测试集准确率为：80.1%


## 边分类任务代码实现
二分类问题

In [11]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

# 边分类模型
class EdgeClassifier(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(EdgeClassifier, self).__init__()
        self.conv = GCNConv(in_channels, out_channels)
        self.classifier = torch.nn.Linear(2 * out_channels, 2)  
    def forward(self, x, edge_index):
        # x: [2708, 1433], edge_index: [2, 10556]
        x = F.relu(self.conv(x, edge_index)) # [2708, 64]
        pos_edge_index = edge_index  # 正边[2, 10556]
        total_edge_index = torch.cat([pos_edge_index, 
                        negative_sampling(pos_edge_index, num_neg_samples=pos_edge_index.size(1))],
                        dim=1) # total_edge_index:[2, 21112]，21112前面一半是正边，后面一半是负边
        edge_features = torch.cat([x[total_edge_index[0]], x[total_edge_index[1]]], 
                        dim=1)  # [21112, 2*out_channels]，21112前面一半是正边，后面一半是负边
        return self.classifier(edge_features)

# 加载数据集
dataset = Planetoid(root='../data', name='Cora')
data = dataset[0]
# 重新设置train_mask和test_mask
edges = data.edge_index.t().cpu().numpy()   # [10556, 2]
num_edges = edges.shape[0] # 10556
train_mask = torch.zeros(num_edges, dtype=torch.bool) # [10556]
test_mask = torch.zeros(num_edges, dtype=torch.bool) #  [10556]
train_size = int(0.8 * num_edges)
train_indices = torch.randperm(num_edges)[:train_size] #生成0~10555的随机索引，取前80%作为训练集
train_mask[train_indices] = True # 根据训练集索引设置train_mask
test_mask[~train_mask] = True #

# 定义模型和优化器/训练/测试
model = EdgeClassifier(dataset.num_features, 64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    logits = model(data.x, data.edge_index) # [21112, 2]
    pos_edge_index = data.edge_index # 正边[2, 10556]
    pos_labels = torch.ones(pos_edge_index.size(1), dtype=torch.long)  
    neg_labels = torch.zeros(pos_edge_index.size(1), dtype=torch.long)  
    labels = torch.cat([pos_labels, neg_labels], dim=0).to(logits.device) # 21112的前10556个是正边，后10556个是负边
    new_train_mask = torch.cat([train_mask, train_mask], dim=0) # # 21112的前半和后半部分是一样的索引
    loss = F.cross_entropy(logits[new_train_mask], labels[new_train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test():
    model.eval()
    with torch.no_grad():
        logits = model(data.x, data.edge_index)
        pos_edge_index = data.edge_index
        pos_labels = torch.ones(pos_edge_index.size(1), dtype=torch.long)
        neg_labels = torch.zeros(pos_edge_index.size(1), dtype=torch.long)
        labels = torch.cat([pos_labels, neg_labels], dim=0).to(logits.device)
        new_test_mask = torch.cat([test_mask, test_mask], dim=0)
        
        predictions = logits[new_test_mask].max(1)[1] # 取概率最大的值的索引
        correct = predictions.eq(labels[new_test_mask]).sum().item()
        return correct / len(predictions)

for epoch in range(0, 50):
    loss = train()
    acc = test()
    print(f"Epoch: {epoch+1:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}")

Epoch: 001, Loss: 0.6939, Acc: 0.5000
Epoch: 002, Loss: 0.6877, Acc: 0.5149
Epoch: 003, Loss: 0.6744, Acc: 0.6044
Epoch: 004, Loss: 0.6700, Acc: 0.5322
Epoch: 005, Loss: 0.6595, Acc: 0.5405
Epoch: 006, Loss: 0.6547, Acc: 0.5987
Epoch: 007, Loss: 0.6442, Acc: 0.6366
Epoch: 008, Loss: 0.6392, Acc: 0.6089
Epoch: 009, Loss: 0.6289, Acc: 0.6158
Epoch: 010, Loss: 0.6267, Acc: 0.6697
Epoch: 011, Loss: 0.6181, Acc: 0.6603
Epoch: 012, Loss: 0.6092, Acc: 0.6411
Epoch: 013, Loss: 0.6055, Acc: 0.6764
Epoch: 014, Loss: 0.5984, Acc: 0.6714
Epoch: 015, Loss: 0.5950, Acc: 0.6768
Epoch: 016, Loss: 0.5886, Acc: 0.6842
Epoch: 017, Loss: 0.5843, Acc: 0.6951
Epoch: 018, Loss: 0.5847, Acc: 0.7083
Epoch: 019, Loss: 0.5798, Acc: 0.6870
Epoch: 020, Loss: 0.5724, Acc: 0.7114
Epoch: 021, Loss: 0.5719, Acc: 0.7015
Epoch: 022, Loss: 0.5702, Acc: 0.7098
Epoch: 023, Loss: 0.5654, Acc: 0.6918
Epoch: 024, Loss: 0.5653, Acc: 0.7093
Epoch: 025, Loss: 0.5637, Acc: 0.7029
Epoch: 026, Loss: 0.5600, Acc: 0.7008
Epoch: 027, 

## 图分类任务代码实现
蛋白酶结构的6分类任务，由600个图组成的，这些图实际上表示了不同的蛋白酶的结构

In [8]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

# 加载数据集
dataset = TUDataset(root='../data/ENZYMES', name='ENZYMES')
dataset = dataset.shuffle()
# x=[19580, 3], edge_index=[2, 74564], y=[600]
train_dataset = dataset[:540] # x=[19580, 3], edge_index=[2, 74564], y=[600]
test_dataset = dataset[540:] # x=[19580, 3], edge_index=[2, 74564], y=[600]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 定义图卷积网络模型
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, dataset.num_classes)
    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = global_mean_pool(x, batch)    # batch指示了每个节点属于哪个图，再对每个图的节点特征求均值
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        return x

model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train(train_loader):
    model.train()
    for data in train_loader: #  data: edge_index=[2, 8116], x=[2117, 3], y=[64], batch=[2117], ptr=[65]
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

for epoch in range(0,50):
    train(train_loader)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch+1:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.2130, Test Acc: 0.2667
Epoch: 002, Train Acc: 0.2426, Test Acc: 0.1500
Epoch: 003, Train Acc: 0.2407, Test Acc: 0.1833
Epoch: 004, Train Acc: 0.2241, Test Acc: 0.1833
Epoch: 005, Train Acc: 0.2315, Test Acc: 0.2167
Epoch: 006, Train Acc: 0.2611, Test Acc: 0.1500
Epoch: 007, Train Acc: 0.2630, Test Acc: 0.2500
Epoch: 008, Train Acc: 0.2944, Test Acc: 0.2833
Epoch: 009, Train Acc: 0.3019, Test Acc: 0.2500
Epoch: 010, Train Acc: 0.2722, Test Acc: 0.2333
Epoch: 011, Train Acc: 0.2889, Test Acc: 0.2167
Epoch: 012, Train Acc: 0.2685, Test Acc: 0.2167
Epoch: 013, Train Acc: 0.2630, Test Acc: 0.2000
Epoch: 014, Train Acc: 0.2907, Test Acc: 0.2167
Epoch: 015, Train Acc: 0.2981, Test Acc: 0.2000
Epoch: 016, Train Acc: 0.2944, Test Acc: 0.1833
Epoch: 017, Train Acc: 0.2870, Test Acc: 0.2333
Epoch: 018, Train Acc: 0.2833, Test Acc: 0.1833
Epoch: 019, Train Acc: 0.2852, Test Acc: 0.2500
Epoch: 020, Train Acc: 0.2852, Test Acc: 0.2000
Epoch: 021, Train Acc: 0.2889, Test Acc:

# GAT网络
[Graph Attention Networks (GAT)](https://nn.labml.ai/graphs/gat/index.html)  
[图注意网络GAT理解及Pytorch代码实现【PyGAT代码详细注释】](https://blog.csdn.net/weixin_43629813/article/details/129278266)

In [1]:
import torch
from torch import nn
from labml_helpers.module import Module


class GraphAttentionLayer(Module):

    def __init__(self, in_features: int, out_features: int, n_heads: int,
                is_concat: bool = True,
                dropout: float = 0.6,
                leaky_relu_negative_slope: float = 0.2):

        super().__init__()

        self.is_concat = is_concat
        self.n_heads = n_heads

        # Calculate the number of dimensions per head
        if is_concat:
            assert out_features % n_heads == 0
            # If we are concatenating the multiple heads
            self.n_hidden = out_features // n_heads
        else:
            # If we are averaging the multiple heads
            self.n_hidden = out_features

        # Linear layer for initial transformation;
        # i.e. to transform the node embeddings before self-attention
        self.linear = nn.Linear(in_features, self.n_hidden * n_heads, bias=False)
        # Linear layer to compute attention score $e_{ij}$
        self.attn = nn.Linear(self.n_hidden * 2, 1, bias=False)
        # The activation for attention score $e_{ij}$
        self.activation = nn.LeakyReLU(negative_slope=leaky_relu_negative_slope)
        # Softmax to compute attention $\alpha_{ij}$
        self.softmax = nn.Softmax(dim=1)
        # Dropout layer to be applied for attention
        self.dropout = nn.Dropout(dropout)

    def forward(self, h: torch.Tensor, adj_mat: torch.Tensor):


        n_nodes = h.shape[0]

        g = self.linear(h).view(n_nodes, self.n_heads, self.n_hidden)

        g_repeat = g.repeat(n_nodes, 1, 1)

        g_repeat_interleave = g.repeat_interleave(n_nodes, dim=0)

        g_concat = torch.cat([g_repeat_interleave, g_repeat], dim=-1)
        # Reshape so that `g_concat[i, j]` is $\overrightarrow{g_i} \Vert \overrightarrow{g_j}$
        g_concat = g_concat.view(n_nodes, n_nodes, self.n_heads, 2 * self.n_hidden)

        e = self.activation(self.attn(g_concat))
        # Remove the last dimension of size `1`
        e = e.squeeze(-1)

        # The adjacency matrix should have shape
        # `[n_nodes, n_nodes, n_heads]` or`[n_nodes, n_nodes, 1]`
        assert adj_mat.shape[0] == 1 or adj_mat.shape[0] == n_nodes
        assert adj_mat.shape[1] == 1 or adj_mat.shape[1] == n_nodes
        assert adj_mat.shape[2] == 1 or adj_mat.shape[2] == self.n_heads
        # Mask $e_{ij}$ based on adjacency matrix.
        # $e_{ij}$ is set to $- \infty$ if there is no edge from $i$ to $j$.
        e = e.masked_fill(adj_mat == 0, float('-inf'))

        a = self.softmax(e)

        # Apply dropout regularization
        a = self.dropout(a)

        attn_res = torch.einsum('ijh,jhf->ihf', a, g)

        # Concatenate the heads
        if self.is_concat:
            # $$\overrightarrow{h'_i} = \Bigg\Vert_{k=1}^{K} \overrightarrow{h'^k_i}$$
            return attn_res.reshape(n_nodes, self.n_heads * self.n_hidden)
        # Take the mean of the heads
        else:
            # $$\overrightarrow{h'_i} = \frac{1}{K} \sum_{k=1}^{K} \overrightarrow{h'^k_i}$$
            return attn_res.mean(dim=1)
        

# Create an instance of the GAT model
default_gat = GraphAttentionLayer(in_features=512, out_features=256, n_heads=1)
time_gat = GraphAttentionLayer(in_features=512, out_features=256, n_heads=1)
star_gat = GraphAttentionLayer(in_features=512, out_features=256, n_heads=1)


# Create some sample input tensors
# Node embeddings
default_h = torch.randn(5, 512)  
time_h = torch.randn(7, 512)  
star_h = torch.randn(3, 512)  
# Adjacency matrix
default_adj = torch.ones(5, 5, 1)
time_adj = torch.ones(7, 7, 1)
star_adj = torch.ones(3, 3, 1)
# Target node index
default_index: int = default_adj.shape[0]//2 
time_index: int = time_adj.shape[0]//2 
star_index: int = star_adj.shape[0]//2

# Forward pass through the GAT model
node_default = default_gat(default_h, default_adj)[default_index]
node_time = time_gat(time_h, time_adj)[time_index]  
node_star = star_gat(star_h, star_adj)[star_index]
node_all = 0.5*node_default + 0.25*node_time + 0.25*node_star

# Print the output
print(node_all.shape)