## GraphSAGE: Inductive Representation Learning on Large Graphs

**GraphSAGE**是一个用于大型图上归纳表示学习的框架。

**GraphSAGE**用于为节点生成低维向量表示, 对于具有丰富节点属性信息的图尤其有用。

### Import packages

In [100]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

print('pytorch version:',torch.__version__,
      '\nnumpy version:' ,np.__version__,
      '\nmatplotlib version:' ,matplotlib.__version__)

# (可选) 适配项目路径
import sys
sys.path.insert(0, "/root/workshop/Deep-Learning-in-Action")

%matplotlib inline

pytorch version: 1.7.1+cu101 
numpy version: 1.18.2 
matplotlib version: 3.2.1


### Defining hyperparameters

In [101]:
INPUT_DIM = 1433 # 输入维度
# Note: 采样的邻居阶数需要与GCN的层数保持一致
HIDDEN_DIM = [128, 7]   # 隐藏单元节点数
NUM_NEIGHBORS_LIST = [10, 10]   # 每阶采样邻居的节点数
assert len(HIDDEN_DIM) == len(NUM_NEIGHBORS_LIST)

BATCH_SIZE = 16 # 批处理大小
EPOCHS = 20
NUM_BATCH_PER_EPOCH = 20 # 每个epoch循环的批次数
LEARNING_RATE = 0.01 # 学习率
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

### Load Dataset: Cora

In [102]:
from graph_neural_networks.GraphSage.cora import CoraData
from collections import namedtuple

Data = namedtuple('Data', ['x', 'y', 'adjacency_dict',
                           'train_mask', 'val_mask', 'test_mask'])

# 加载数据，并转换为torch.Tensor
dataset = CoraData(data_root='/root/data/cora/raw', rebuild=True).data
x = dataset.x / dataset.x.sum(1, keepdims=True)  # 归一化数据，使得每一行和为1
train_idx = np.where(dataset.train_mask)[0]
train_label = dataset.y
test_idx = np.where(dataset.test_mask)[0]

Process data ...
Node's feature shape:  (2708, 1433)
Node's label shape:  (2708,)
Adjacency's shape:  2708
Number of training nodes:  140
Number of validation nodes:  500
Number of test nodes:  1000
Cached file: /root/data/cora/raw/ch7_cached.pkl


### Build GraphSage, define optimizer and loss function

In [103]:
from graph_neural_networks.GraphSage.model import GraphSage

model = GraphSage(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM,
                  num_neighbors_list=NUM_NEIGHBORS_LIST).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)

### Define training function

In [104]:
from graph_neural_networks.GraphSage.sampling import multihop_sampling

def train():
    model.train()
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            bth_src_idx = np.random.choice(train_idx, size=(BATCH_SIZE,))
            bth_src_label = torch.from_numpy(train_label[bth_src_idx]).long().to(DEVICE)
            bth_sampling_res = multihop_sampling(bth_src_idx, NUM_NEIGHBORS_LIST, dataset.adjacency_dict)
            bth_sampling_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in bth_sampling_res]

            bth_train_logits = model(bth_sampling_x)
            loss = criterion(bth_train_logits, bth_src_label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(e, batch, loss.item()))
        test()


@torch.no_grad()
def test():
    model.eval()
    test_sampling_result = multihop_sampling(test_idx, NUM_NEIGHBORS_LIST, dataset.adjacency_dict)
    test_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in test_sampling_result]
    test_label = torch.from_numpy(dataset.y[test_idx]).long().to(DEVICE)

    test_logits = model(test_x)
    predict_y = test_logits.max(1)[1]
    accuarcy = torch.eq(predict_y, test_label).float().mean().item()
    print("--------------------------------")
    print("Test Accuracy: ", accuarcy)
    print("--------------------------------")

### Training model

In [105]:
train()

Epoch 000 Batch 000 Loss: 1.9836
Epoch 000 Batch 001 Loss: 1.9199
Epoch 000 Batch 002 Loss: 1.8195
Epoch 000 Batch 003 Loss: 1.6954
Epoch 000 Batch 004 Loss: 1.6667
Epoch 000 Batch 005 Loss: 1.4803
Epoch 000 Batch 006 Loss: 1.4893
Epoch 000 Batch 007 Loss: 1.4632
Epoch 000 Batch 008 Loss: 0.9855
Epoch 000 Batch 009 Loss: 1.1157
Epoch 000 Batch 010 Loss: 0.9806
Epoch 000 Batch 011 Loss: 0.7594
Epoch 000 Batch 012 Loss: 0.6640
Epoch 000 Batch 013 Loss: 0.6778
Epoch 000 Batch 014 Loss: 0.7728
Epoch 000 Batch 015 Loss: 0.4867
Epoch 000 Batch 016 Loss: 0.5257
Epoch 000 Batch 017 Loss: 0.3501
Epoch 000 Batch 018 Loss: 0.5792
Epoch 000 Batch 019 Loss: 0.4938
Test Accuracy:  0.7660000324249268
Epoch 001 Batch 000 Loss: 0.3622
Epoch 001 Batch 001 Loss: 0.3547
Epoch 001 Batch 002 Loss: 0.2089
Epoch 001 Batch 003 Loss: 0.2336
Epoch 001 Batch 004 Loss: 0.1097
Epoch 001 Batch 005 Loss: 0.1586
Epoch 001 Batch 006 Loss: 0.3172
Epoch 001 Batch 007 Loss: 0.0886
Epoch 001 Batch 008 Loss: 0.1341
Epoch 00