In [1]:
# Install of pytorch geometric https://github.com/rusty1s/pytorch_geometric

from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_dense_adj, add_self_loops

import torch_geometric.transforms as T

import torch
from torch import nn
import torch.nn.functional as F

import numpy as np

In [2]:
cora = Planetoid(root='./data', name='Cora')

In [3]:
print(cora.data)
print(f'Number of nodes: {cora.data.num_nodes}')
print(f'Number of edges: {cora.data.num_edges}')
print(f'Number of training nodes: {cora.data.train_mask.sum()}')
print(f'Training node label rate: {int(cora.data.train_mask.sum()) / cora.data.num_nodes:.2f}')
print(f'Is undirected: {cora.data.is_undirected()}')

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
Number of nodes: 2708
Number of edges: 10556
Number of training nodes: 140
Training node label rate: 0.05
Is undirected: True


In [4]:
# Node features
X = cora.data.x

# Adjacency matrix + add self-loop
A = to_dense_adj(cora.data.edge_index).squeeze()
A = A + torch.eye(A.shape[0])

# Target
y = cora.data.y

In [5]:
A

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.]])

In [6]:
@torch.no_grad()
def masked_acc(logits, y, mask):
    
    a_max = torch.argmax(logits, dim=-1)
    
    eq = (a_max == y).float() * mask.float()
            
    return eq.sum() / mask.float().sum()

In [7]:
def masked_cross_entropy(logits, y, mask):
    
    masked_y = y.masked_fill(mask == False, value=-1)
    
    loss = F.cross_entropy(logits, masked_y, ignore_index=-1)
    
    return loss

## Training loop

In [8]:
def train_model(model, A, n_epochs, opt):
    
    best_acc = 0.

    for ep in range(n_epochs):

        out = model(X, A)

        loss =  masked_cross_entropy(out, y, cora.data.train_mask)

        loss.backward()

        opt.step()

        acc = masked_acc(out, y, cora.data.test_mask)

        if acc > best_acc:

            best_acc = acc

            print(f'epoch ={ep: .1f}\t acc ={best_acc: .1%}')
            
    return best_acc

## Graph Attention

In [14]:
class GAT(nn.Module):
    
    def __init__(self, in_feat=1433, hid_feat=64, num_classes=7):
        
        super().__init__()
                        
        self.W_att_1 = nn.Linear(in_feat, 3 * hid_feat, bias=False)
        
        self.W_att_2 = nn.Linear(hid_feat, 3 * num_classes, bias=False)
        
    def compute_attention(self, H, A, W):
        """
        Dot-product attention
        
        H: Hidden features [N, D]
        A: Adjacency matrix [N, N]
        W: layer for attention [D, 3 * D]
        """
                
        H = W(H) # [N, 3 * dim]
                                
        query, key, value = torch.chunk(H, 3, dim=-1)
        
        d_model = query.size(-1)
    
        score = query @ key.transpose(-1, -2) / np.sqrt(d_model) # [N, N]
                
        score = score.masked_fill(A == 0, float('-inf')) # [N, N]
                        
        normalized_score = F.softmax(score, dim=1) # [N, N]
                        
        return normalized_score @ value # [N, D]
        
        
    def forward(self, X, A):
        """
        inputs:
            X: Node features of shape [N, in_feat]
            A: Adjacency matrix of shape [N, N]
        returns:
            O: Logits of shape [N, num_classes]
        """
        
        H = self.compute_attention(X, A, self.W_att_1)
        
        H = torch.relu(H)
        
        O = self.compute_attention(H, A, self.W_att_2)
        
        return O

In [15]:
model = GAT(hid_feat=128)

opt = torch.optim.Adam(model.parameters())

In [16]:
train_model(model, A, 100, opt)

epoch = 0.0	 acc = 27.5%
epoch = 1.0	 acc = 47.6%
epoch = 2.0	 acc = 62.3%
epoch = 3.0	 acc = 70.6%
epoch = 4.0	 acc = 74.3%
epoch = 5.0	 acc = 75.2%
epoch = 6.0	 acc = 76.3%
epoch = 7.0	 acc = 77.6%
epoch = 8.0	 acc = 78.5%
epoch = 9.0	 acc = 79.1%
epoch = 10.0	 acc = 79.2%


KeyboardInterrupt: 

## GAT with PyG

In [13]:
from torch_geometric.nn import GATConv, GATv2Conv

In [24]:
class GATpyG(nn.Module):
    
    def __init__(self, in_feat=1433, hid_feat=64, num_classes=7):
        
        super().__init__()
        
        self.gat_1 = GATv2Conv(in_channels=in_feat, out_channels=hid_feat)
        
        self.gat_2 = GATv2Conv(in_channels=hid_feat, out_channels=num_classes)
        
    def forward(self, data):
        
        X, edge_index = data.x, data.edge_index
        # edge_index of shape [2, num_edges]
        # X of shape [N, in_feat]
        
        
        # the gcn layer takes the node features and the edge_index tensor
        H = self.gat_1(X, edge_index) # [N, hid_feat]
        
        H = torch.relu(H)
        
        O = self.gat_2(H, edge_index) # [N, num_classes]
        
        return O

In [25]:
def train_model_pyg(model, n_epochs, opt):
    
    best_acc = 0.

    for ep in range(n_epochs):

        out = model(cora.data)

        loss =  masked_cross_entropy(out, y, cora.data.train_mask)

        loss.backward()

        opt.step()

        acc = masked_acc(out, y, cora.data.test_mask)

        if acc > best_acc:

            best_acc = acc

            print(f'epoch ={ep: .1f}\t acc ={best_acc: .1%}')
            
    return best_acc

In [26]:
pyg_gcn = GATpyG()

In [27]:
opt_2 = torch.optim.Adam(pyg_gcn.parameters(), lr=0.002)

In [28]:
train_model_pyg(pyg_gcn, 100, opt_2)

epoch = 0.0	 acc = 7.8%
epoch = 1.0	 acc = 23.1%
epoch = 2.0	 acc = 39.4%
epoch = 3.0	 acc = 52.4%
epoch = 4.0	 acc = 62.0%
epoch = 5.0	 acc = 67.4%
epoch = 6.0	 acc = 69.8%
epoch = 7.0	 acc = 71.6%
epoch = 8.0	 acc = 73.6%
epoch = 9.0	 acc = 74.1%
epoch = 10.0	 acc = 74.7%
epoch = 11.0	 acc = 75.1%
epoch = 12.0	 acc = 75.8%
epoch = 13.0	 acc = 76.3%


tensor(0.7630)