## GraphSAGE: Inductive Representation Learning on Large Graphs

**GraphSAGE**是一个用于大型图上归纳表示学习的框架。

**GraphSAGE**用于为节点生成低维向量表示, 对于具有丰富节点属性信息的图尤其有用。

### Import packages

In [75]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

print('pytorch version:',torch.__version__,
      '\nnumpy version:' ,np.__version__,
      '\nmatplotlib version:' ,matplotlib.__version__)

import sys, os
print(os.path.dirname(os.getcwd()))
sys.path.insert(0, "/root/workshop/Deep-Learning-in-Action")
print(sys.path)

%matplotlib inline

pytorch version: 1.7.1+cu101 
numpy version: 1.18.2 
matplotlib version: 3.2.1
/
['/root/workshop/Deep-Learning-in-Action', '/root/workshop/Deep-Learning-in-Action', '/root/workshop/Deep-Learning-in-Action', '/', '/', '/', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '', '/usr/local/lib/python3.6/dist-packages', '/usr/local/lib/python3.6/dist-packages/pygpu-0.7.6-py3.6-linux-x86_64.egg', '/usr/local/lib/python3.6/dist-packages/Mako-1.1.2-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/MarkupSafe-1.1.1-py3.6-linux-x86_64.egg', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.6/dist-packages/IPython/extensions', '/root/.ipython', '..', '..', '..', '..', '..', '..', '..', '..']


### Defining hyperparameters

In [76]:
INPUT_DIM = 1433 # 输入维度
# Note: 采样的邻居阶数需要与GCN的层数保持一致
HIDDEN_DIM = [128, 7]   # 隐藏单元节点数
NUM_NEIGHBORS_LIST = [10, 10]   # 每阶采样邻居的节点数
assert len(HIDDEN_DIM) == len(NUM_NEIGHBORS_LIST)

BATCH_SIZE = 16 # 批处理大小
EPOCHS = 20
NUM_BATCH_PER_EPOCH = 20 # 每个epoch循环的批次数
LEARNING_RATE = 0.01 # 学习率
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

### Load Dataset: Cora

In [89]:
from graph_neural_networks.data import CoraData
from collections import namedtuple

Data = namedtuple('Data', ['x', 'y', 'adjacency', 'train_mask', 'val_mask', 'test_mask'])

# 加载数据，并转换为torch.Tensor
dataset = CoraData(data_root='/root/data/cora', adj_dict=True).data
x = dataset.x / dataset.x.sum(1, keepdims=True)  # 归一化数据，使得每一行和为1
train_idx = np.where(dataset.train_mask)[0]
train_label = dataset.y
test_idx = np.where(dataset.test_mask)[0]

TypeError: __init__() got an unexpected keyword argument 'adj_dict'

### Build GraphSage, define optimizer and loss function

In [None]:
from graph_neural_networks.GraphSage.model import GraphSage

model = GraphSage(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM,
                  num_neighbors_list=NUM_NEIGHBORS_LIST).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)

### Define training function

In [None]:
from graph_neural_networks.GraphSage.sampling import multihop_sampling

def train():
    model.train()
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            bth_src_idx = np.random.choice(train_idx, size=(BATCH_SIZE,))
            bth_src_label = torch.from_numpy(train_label[bth_src_idx]).long().to(DEVICE)
            bth_sampling_res = multihop_sampling(bth_src_idx, NUM_NEIGHBORS_LIST, dataset.adjacency)
            bth_sampling_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in bth_sampling_res]

            bth_train_logits = model(bth_sampling_x)
            loss = criterion(bth_train_logits, bth_src_label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(e, batch, loss.item()))
        test()


@torch.no_grad()
def test():
    model.eval()
    test_sampling_result = multihop_sampling(test_idx, NUM_NEIGHBORS_LIST, dataset.adjacency)
    test_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in test_sampling_result]
    test_label = torch.from_numpy(dataset.y[test_idx]).long().to(DEVICE)

    test_logits = model(test_x)
    predict_y = test_logits.max(1)[1]
    accuarcy = torch.eq(predict_y, test_label).float().mean().item()
    print("Test Accuracy: ", accuarcy)

### Training model

In [None]:
train()