In [1]:
import os.path as osp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_cluster import random_walk
from sklearn.linear_model import LogisticRegression

import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import NeighborSampler as RawNeighborSampler
import pytorch_lightning as pl

In [2]:
EPS = 1e-15

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath('__file__')), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

In [3]:
class NeighborSampler(RawNeighborSampler):
    def sample(self, batch):
        batch = torch.tensor(batch)
        row, col, _ = self.adj_t.coo()

        # For each node in `batch`, we sample a direct neighbor (as positive
        # example) and a random node (as negative example):
        pos_batch = random_walk(row, col, batch, walk_length=1,
                                coalesced=False)[:, 1]

        neg_batch = torch.randint(0, self.adj_t.size(1), (batch.numel(), ),
                                  dtype=torch.long)

        batch = torch.cat([batch, pos_batch, neg_batch], dim=0)
        return super().sample(batch)


train_loader = NeighborSampler(data.edge_index, sizes=[10, 10], batch_size=256,
                               shuffle=True, num_nodes=data.num_nodes)
val_loader = NeighborSampler(data.edge_index, sizes=[10, 10], batch_size=data.x.size(0),
                               shuffle=True, num_nodes=data.num_nodes)
test_loader = NeighborSampler(data.edge_index, sizes=[10, 10], batch_size=data.x.size(0),
                               shuffle=True, num_nodes=data.num_nodes)

In [8]:
class SAGE(pl.LightningModule):
    def __init__(self, in_channels, hidden_channels, num_layers, data):
        super().__init__()
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        for i in range(num_layers):
            in_channels = in_channels if i == 0 else hidden_channels
            self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.data = data

    def forward(self, x, adjs):
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x

    def full_forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)
    
    def training_step(self, batch, batch_idx):
        _, n_id, adjs = batch
        out = model(self.data.x[n_id], adjs)
        out, pos_out, neg_out = out.split(out.size(0) // 3, dim=0)

        pos_loss = F.logsigmoid((out * pos_out).sum(-1)).mean()
        neg_loss = F.logsigmoid(-(out * neg_out).sum(-1)).mean()
        loss = -pos_loss - neg_loss
        self.log('train_loss', loss)
        return loss
        
        
    def validation_step(self, val_batch, batch_idx):
        out = self.full_forward(self.data.x, self.data.edge_index)
        
        clf = LogisticRegression()
        clf.fit(out[self.data.train_mask], self.data.y[self.data.train_mask])
        
        val_acc = clf.score(out[self.data.val_mask], self.data.y[self.data.val_mask])
        self.log('val_acc', val_acc, prog_bar=True)
        
    def test_step(self, test_batch, batch_idx):
        out = self.full_forward(self.data.x, self.data.edge_index)
        
        clf = LogisticRegression()
        clf.fit(out[self.data.train_mask], self.data.y[self.data.train_mask])
        
        test_acc = clf.score(out[self.data.test_mask], self.data.y[self.data.test_mask])
        self.log('test_acc', test_acc, prog_bar=True)

In [11]:
model = SAGE(data.num_node_features, hidden_channels=64, num_layers=2, data=data)
trainer = pl.Trainer(max_epochs=50)
trainer.fit(model, train_loader, val_loader)
trainer.test(model, test_dataloaders=test_loader)

Testing:   0%|          | 0/1 [05:37<?, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type       | Params
-------------------------------------
0 | convs | ModuleList | 191 K 
-------------------------------------
191 K     Trainable params
0         Non-trainable params
191 K     Total params
0.767     Total estimated model params size (MB)



Validation sanity check:   0%|          | 0/1 [00:00<?, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


                                                                      

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Epoch 0:  92%|█████████▏| 11/12 [00:01<00:00,  6.43it/s, loss=1.41, v_num=4]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 12/12 [00:01<00:00,  6.09it/s, loss=1.41, v_num=4, val_acc=0.326]
Epoch 1:  92%|█████████▏| 11/12 [00:01<00:00,  6.21it/s, loss=1.4, v_num=4, val_acc=0.326] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 12/12 [00:02<00:00,  5.87it/s, loss=1.4, v_num=4, val_acc=0.324]
Epoch 2:  92%|█████████▏| 11/12 [00:01<00:00,  6.23it/s, loss=1.39, v_num=4, val_acc=0.324]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 2: 100%|██████████| 12/12 [00:02<00:00,  5.90it/s, loss=1.39, v_num=4, val_acc=0.306]
Epoch 3:  92%|█████████▏| 11/12 [00:01<00:00,  6.15it/s, loss=1.38, v_num=4, val_acc=0.306]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 3: 100%|██████████

  "`trainer.test(test_dataloaders)` is deprecated in v1.4 and will be removed in v1.6."
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."



Testing: 100%|██████████| 1/1 [00:00<00:00,  3.92it/s]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.7020000219345093}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 1/1 [00:00<00:00,  3.74it/s]


[{'test_acc': 0.7020000219345093}]