In [44]:
import os.path as osp
import argparse

import torch
import torch.nn.functional as F

import torch_geometric
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv  # noqa
from torch_geometric.data import NeighborSampler

In [125]:
dataset = 'Cora'
path = '/home/ygx/data'
dataset = Planetoid(path, dataset, T.NormalizeFeatures())
data = dataset[0]

In [126]:
# Node features
data

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])

In [28]:
data.edge_index.shape

torch.Size([2, 10556])

In [33]:
data.train_mask

tensor([ True,  True,  True,  ..., False, False, False])

In [35]:
data.test_mask

tensor([False, False, False,  ...,  True,  True,  True])

In [97]:
sampler = NeighborSampler(data, 10, 5, 10, bipartite=False)

In [98]:
from torch.utils.data import DataLoader

In [54]:
loader = DataLoader(sampler)

In [6]:
xclass Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16, cached=True)
        self.conv2 = GCNConv(16, dataset.num_classes, cached=True)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

        self.reg_params = self.conv1.parameters()
        self.non_reg_params = self.conv2.parameters()

    def forward(self):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        return F.log_softmax(x, dim=1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam([
    dict(params=model.reg_params, weight_decay=5e-4),
    dict(params=model.non_reg_params, weight_decay=0)
], lr=0.01)


def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()


def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs


best_val_acc = test_acc = 0
for epoch in range(1, 201):
    train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))

Epoch: 001, Train: 0.3357, Val: 0.3440, Test: 0.3610
Epoch: 002, Train: 0.4929, Val: 0.5160, Test: 0.5090
Epoch: 003, Train: 0.6429, Val: 0.5160, Test: 0.5090
Epoch: 004, Train: 0.6643, Val: 0.5160, Test: 0.5090
Epoch: 005, Train: 0.7500, Val: 0.5300, Test: 0.5440
Epoch: 006, Train: 0.8000, Val: 0.5780, Test: 0.6020
Epoch: 007, Train: 0.8500, Val: 0.6280, Test: 0.6550
Epoch: 008, Train: 0.8714, Val: 0.6560, Test: 0.6730
Epoch: 009, Train: 0.8857, Val: 0.6800, Test: 0.6860
Epoch: 010, Train: 0.9214, Val: 0.6880, Test: 0.7090
Epoch: 011, Train: 0.9357, Val: 0.7120, Test: 0.7260
Epoch: 012, Train: 0.9357, Val: 0.7160, Test: 0.7180
Epoch: 013, Train: 0.9357, Val: 0.7160, Test: 0.7180
Epoch: 014, Train: 0.9286, Val: 0.7160, Test: 0.7180
Epoch: 015, Train: 0.9286, Val: 0.7160, Test: 0.7180
Epoch: 016, Train: 0.9214, Val: 0.7160, Test: 0.7180
Epoch: 017, Train: 0.9214, Val: 0.7160, Test: 0.7180
Epoch: 018, Train: 0.9143, Val: 0.7160, Test: 0.7180
Epoch: 019, Train: 0.9214, Val: 0.7160, Test: 

Epoch: 173, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 174, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 175, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 176, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 177, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 178, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 179, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 180, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 181, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 182, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 183, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 184, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 185, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 186, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 187, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 188, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 189, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 190, Train: 1.0000, Val: 0.7920, Test: 0.8180
Epoch: 191, Train: 1.0000, Val: 0.7920, Test: 

In [43]:
sum(p.numel() for p in model.parameters())

23063

In [121]:
class GCN(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, hidden)
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(GCNConv(hidden, hidden))
        self.lin1 = torch.nn.Linear(hidden, hidden)
        self.lin2 = torch.nn.Linear(hidden, dataset.num_classes)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
        x = global_mean_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)

    def __repr__(self):
        return self.__class__.__name__

In [122]:
model = GCN(dataset, 2, 100)

In [68]:
next(sampler())

DataFlow(1<-3<-7)

In [66]:
data

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])

In [128]:
sampler = NeighborSampler(
    data, size=10, num_hops=5, batch_size=10, bipartite=False
)

loader = sampler()

In [124]:
for idx, batch in enumerate(loader):
    print(f'id: {idx}, batch: {batch}')
    model(batch)

id: 0, batch: Data(b_id=[10], e_id=[2980], edge_index=[2, 2980], n_id=[1188], sub_b_id=[10])


TypeError: matmul(): argument 'input' (position 1) must be Tensor, not NoneType

In [127]:
batch

Data(b_id=[10], e_id=[2980], edge_index=[2, 2980], n_id=[1188], sub_b_id=[10])