In [1]:
# Install of pytorch geometric https://github.com/rusty1s/pytorch_geometric

from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_dense_adj, add_self_loops

import torch
from torch import nn
import torch.nn.functional as F

import numpy as np

In [2]:
cora = Planetoid(root='./data', name='Cora')

In [3]:
print(cora)
print(f'Number of nodes: {cora.data.num_nodes}')
print(f'Number of edges: {cora.data.num_edges}')
print(f'Number of training nodes: {cora.data.train_mask.sum()}')
print(f'Training node label rate: {int(cora.data.train_mask.sum()) / cora.data.num_nodes:.2f}')
print(f'Is undirected: {cora.data.is_undirected()}')

Cora()
Number of nodes: 2708
Number of edges: 10556
Number of training nodes: 140
Training node label rate: 0.05
Is undirected: True


In [5]:
cora.data.val_mask.sum()

tensor(500)

In [6]:
# Node features
X = cora.data.x # document bag of words

# Adjacency matrix + add self-loop
A = to_dense_adj(cora.data.edge_index).squeeze()
A = A + torch.eye(A.shape[0])

# Target
y = cora.data.y # topic

In [10]:
A.shape

torch.Size([2708, 2708])

In [11]:
y

tensor([3, 4, 4,  ..., 3, 3, 3])

In [12]:
@torch.no_grad()
def masked_acc(logits, y, mask):
    
    a_max = torch.argmax(logits, dim=-1)
    
    eq = (a_max == y).float() * mask.float()
            
    return eq.sum() / mask.float().sum()

In [13]:
def masked_cross_entropy(logits, y, mask):
    
    masked_y = y.masked_fill(mask == False, value=-1)
    
    loss = F.cross_entropy(logits, masked_y, ignore_index=-1)
    
    return loss

## Training loop

In [15]:
def train_model(model, A, n_epochs, opt):
    
    best_acc = 0.

    for ep in range(n_epochs):

        out = model(X, A) # [N, num_classes]

        loss =  masked_cross_entropy(out, y, cora.data.train_mask)

        loss.backward()

        opt.step()

        acc = masked_acc(out, y, cora.data.test_mask)

        if acc > best_acc:

            best_acc = acc

            print(f'epoch ={ep: .1f}\t acc ={best_acc: .1%}')
            
    return best_acc

## Graph Convolution

In [16]:
class GCN(nn.Module):
    
    def __init__(self, in_feat=1433, hid_feat=64, num_classes=7):
        
        super().__init__()
        
        self.fc_1 = nn.Linear(in_feat, hid_feat)
        
        self.fc_2 = nn.Linear(hid_feat, num_classes)
        
    def forward(self, X, A):
        """
        inputs:
            X: Node features of shape [N, in_feat]
            A: Adjacency matrix of shape [N, N]
        returns:
            O: Logits of shape [N, num_classes]
        """
        
        H = A @ self.fc_1(X) # [N, N] [N, hid_feat]
        
        H = torch.relu(H)
        
        O = A @ self.fc_2(H) # num_nodes, dim
        
        return O # [N, num_classes]

In [17]:
# degree matrix

D = A.sum(-1)

In [18]:
D

tensor([4., 4., 6.,  ..., 2., 5., 5.])

In [19]:
# row normalization

A_mean_pooling = torch.diag(1 / D) @ A

In [23]:
A_mean_pooling.sum(1)

tensor([1., 1., 1.,  ..., 1., 1., 1.])

In [24]:
# symmetric normalization (Kipf & Welling, 2017)

norm_D = 1 / torch.sqrt(D)

A_GCN = torch.diag(norm_D) @ A @ torch.diag(norm_D) # (kipf & Welling)

In [35]:
model = GCN(hid_feat=64)

opt = torch.optim.Adam(model.parameters())

In [36]:
train_model(model, torch.eye(cora.data.x.shape[0]), 100, opt)

epoch = 0.0	 acc = 10.4%
epoch = 3.0	 acc = 10.6%
epoch = 4.0	 acc = 11.0%
epoch = 5.0	 acc = 11.6%
epoch = 6.0	 acc = 13.0%
epoch = 7.0	 acc = 14.2%
epoch = 8.0	 acc = 16.5%
epoch = 9.0	 acc = 19.4%
epoch = 10.0	 acc = 22.9%
epoch = 11.0	 acc = 25.5%
epoch = 12.0	 acc = 28.5%
epoch = 13.0	 acc = 31.5%
epoch = 14.0	 acc = 33.6%
epoch = 15.0	 acc = 35.8%
epoch = 16.0	 acc = 38.0%
epoch = 17.0	 acc = 40.3%
epoch = 18.0	 acc = 41.0%
epoch = 19.0	 acc = 42.4%
epoch = 20.0	 acc = 43.4%
epoch = 21.0	 acc = 44.8%
epoch = 22.0	 acc = 45.6%
epoch = 23.0	 acc = 46.4%
epoch = 24.0	 acc = 46.6%
epoch = 25.0	 acc = 47.1%
epoch = 27.0	 acc = 47.6%
epoch = 30.0	 acc = 48.2%
epoch = 31.0	 acc = 48.4%
epoch = 33.0	 acc = 49.0%
epoch = 37.0	 acc = 49.1%
epoch = 40.0	 acc = 49.7%
epoch = 41.0	 acc = 49.9%
epoch = 45.0	 acc = 50.0%
epoch = 47.0	 acc = 50.4%
epoch = 48.0	 acc = 50.5%
epoch = 49.0	 acc = 50.8%
epoch = 50.0	 acc = 50.9%
epoch = 51.0	 acc = 51.4%
epoch = 54.0	 acc = 51.5%
epoch = 55.0	 acc = 

tensor(0.5460)

## GCN with Pytorch Geometric

In [38]:
from torch_geometric.nn import GCNConv

In [43]:
class GCNpyG(nn.Module):
    
    def __init__(self, in_feat=1433, hid_feat=64, num_classes=7):
        
        super().__init__()
        
        self.gcn_1 = GCNConv(in_channels=in_feat, out_channels=hid_feat)
        
        self.gcn_2 = GCNConv(in_channels=hid_feat, out_channels=num_classes)
        
    def forward(self, data):
        
        X, edge_index = data.x, data.edge_index
        # edge_index of shape [2, num_edges]
        # X of shape [N, in_feat]
        
        # no need to add self-loops 
        # edge_index, _ = add_self_loops(edge_index, num_nodes=data.num_nodes)
                
        # the gcn layer takes the node features and the edge_index tensor
        H = self.gcn_1(X, edge_index) # [N, hid_feat]
        
        H = torch.relu(H)
        
        O = self.gcn_2(H, edge_index) # [N, num_classes]
        
        return O

In [44]:
def train_model_pyg(model, n_epochs, opt):
    
    best_acc = 0.

    for ep in range(n_epochs):

        out = model(cora.data)

        loss =  masked_cross_entropy(out, y, cora.data.train_mask)

        loss.backward()

        opt.step()

        acc = masked_acc(out, y, cora.data.test_mask)

        if acc > best_acc:

            best_acc = acc

            print(f'epoch ={ep: .1f}\t acc ={best_acc: .1%}')
            
    return best_acc

In [45]:
pyg_gcn = GCNpyG()

In [46]:
opt_2 = torch.optim.Adam(pyg_gcn.parameters())

In [47]:
# torch.Size([2, 13264])

In [48]:
train_model_pyg(pyg_gcn, 100, opt_2)

epoch = 0.0	 acc = 16.2%
epoch = 1.0	 acc = 23.3%
epoch = 2.0	 acc = 30.1%
epoch = 3.0	 acc = 39.9%
epoch = 4.0	 acc = 47.8%
epoch = 5.0	 acc = 54.5%
epoch = 6.0	 acc = 59.4%
epoch = 7.0	 acc = 63.7%
epoch = 8.0	 acc = 66.1%
epoch = 9.0	 acc = 68.3%
epoch = 10.0	 acc = 70.3%
epoch = 11.0	 acc = 72.1%
epoch = 12.0	 acc = 73.2%
epoch = 13.0	 acc = 74.3%
epoch = 14.0	 acc = 75.1%
epoch = 15.0	 acc = 75.5%
epoch = 16.0	 acc = 75.7%
epoch = 17.0	 acc = 75.8%
epoch = 18.0	 acc = 76.3%
epoch = 19.0	 acc = 76.6%
epoch = 20.0	 acc = 77.0%
epoch = 21.0	 acc = 77.2%
epoch = 24.0	 acc = 77.6%
epoch = 26.0	 acc = 78.0%
epoch = 27.0	 acc = 78.3%
epoch = 31.0	 acc = 78.6%
epoch = 32.0	 acc = 78.7%
epoch = 35.0	 acc = 79.2%
epoch = 37.0	 acc = 79.7%
epoch = 38.0	 acc = 79.8%
epoch = 40.0	 acc = 79.9%
epoch = 41.0	 acc = 80.2%
epoch = 42.0	 acc = 80.4%
epoch = 45.0	 acc = 80.6%
epoch = 47.0	 acc = 80.7%
epoch = 50.0	 acc = 81.2%


tensor(0.8120)