In [1]:
from typing import Callable, List, Optional, Tuple
import numpy as np
import torch
import torch.nn. functional as F
import torch_geometric.transforms as T
from torch import Tensor
from torch.optim import Optimizer
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import accuracy
from typing_extensions import Literal, TypedDict
from collections import defaultdict
from typing import List, Optional, Tuple, Union
import scipy.sparse
from torch import Tensor
from scipy.sparse import coo_matrix, eye, diags, csr_matrix
import torch.nn as nn
from torch.nn import init
from torch.nn import Parameter
import torch.nn.functional as F
from functools import cached_property
import scipy.sparse as sp

In [2]:
# Function to load the dataset 
def get_dataset(path, name,split,num_train_per_class,num_val,num_test, transform):
    dataset = Planetoid(path, name=name,split= split,num_train_per_class= num_train_per_class, num_val = num_val, num_test = num_test, transform=transform)
    num_nodes = dataset.data.num_nodes
    num_edges = dataset.data.num_edges // 2
    train_len = dataset[0].train_mask.sum()
    val_len = dataset[0].val_mask.sum()
    test_len = dataset[0].test_mask.sum()
    other_len = num_nodes - train_len-val_len - test_len
    print(f"Dataset: {dataset.name}")
    print(f"Num. nodes: {num_nodes} (train={train_len}, val={val_len}, test={test_len}, other={other_len})")
    print(f"Num. edges: {num_edges}")
    print(f"Num. node features: {dataset.num_node_features}")
    print(f"Num. classes: {dataset.num_classes}")
    print(f"Dataset len.: {dataset.len()}")
    # print(f"Sum of row values with normalization: {dataset[0].x.sum(dim=-1)}")
    return dataset

In [3]:
# Function to create an adjacency matrix from edge index and edge attributes

def adjacency_matrix(edge_index, edge_attr=None, num_nodes=None):
    # Calculate the number of nodes if not provided
    if num_nodes is None:
        num_nodes = int(edge_index.max()) + 1
    # If edge_attr does not exist, assign weight=1 to all edges
    if edge_attr is None:
        edge_attr = torch.ones(edge_index.shape[1], dtype=torch.float)
    # Define the sparse adjacency matrix
    adj_matrix_sparse = torch.sparse_coo_tensor(edge_index, edge_attr, (num_nodes, num_nodes))
    # Convert to a dense matrix
    adj_matrix = adj_matrix_sparse.to_dense()
    return (adj_matrix + adj_matrix.T) / 2


In [4]:
# Compute the normalize adjancy matrix based on the paper P ̃ = (D ̃^ −1/2)A ̃(D ̃^ −1/2 )

def normalize_adjacency_matrix(A):
    # Ensure A is a sparse matrix
    if not isinstance(A, csr_matrix):
        A = csr_matrix(A)
    A = A + sp.eye(A.shape[0])
    degrees = np.array(A.sum(axis=1)).flatten()
    degrees[degrees == 0] = 1  # Replace 0s with 1s
    D_inv_sqrt = diags(1.0 / np.sqrt(degrees))
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt
    return normalized_A


# Convert the matrix to Tensor
def sparse_matrix_to_torch_sparse_tensor(sparse_matrix):
    sparse_matrix = sparse_matrix.tocoo()
    indices = torch.LongTensor(np.vstack((sparse_matrix.row, sparse_matrix.col)))
    values = torch.FloatTensor(sparse_matrix.data)
    shape = torch.Size(sparse_matrix.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [5]:
# Definition the CGNII model

class GCNLayer(nn.Module):

    def __init__(self, in_features, out_features):

        super(GCNLayer, self).__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.nn.Parameter(init.kaiming_uniform_(torch.empty(self.in_features, self.out_features), mode='fan_in', nonlinearity='relu'))

    def forward(self, input, adj , h_0 , lamda, alpha, l):

        h_l = torch.spmm(adj, input)
        features = (1 - alpha) * h_l + alpha * h_0
        n = self.weight.shape[0]
        I_n = torch.eye(n) 
        beta = np.log((lamda / l) + 1)
        term1 = (1 - beta) * I_n
        term2 = beta * self.weight
        weights = term1 + term2
        output = torch.mm(features, weights)
        return output

class GCNII(nn.Module):
    def __init__(self, nfeat, nlayers, nhidden, nclass, dropout, lamda, alpha):
        super(GCNII, self).__init__()
        self.graph_convs = nn.ModuleList()  
        for i in range(nlayers):
            conv_layer = GCNLayer(nhidden, nhidden)
            self.graph_convs.append(conv_layer)

        self.pre_fc = nn.Linear(nfeat, nhidden)
        self.post_fc = nn.Linear(nhidden, nclass)

        self.relu = nn.ReLU()
        self.dropout = dropout
        self.lamda = lamda
        self.alpha = alpha

    # Forward pass accepts edge_index and edge_attr
    def forward(self, x, edge_index, edge_attr):
        # Construct the adjacency matrix from edge_index and edge_attr
        adj = adjacency_matrix(edge_index, edge_attr)
        adj = normalize_adjacency_matrix(adj)
        adj = sparse_matrix_to_torch_sparse_tensor(adj)

        x = F.dropout(x, self.dropout, training=self.training)
        h_0 = self.relu(self.pre_fc(x))
        h = h_0
        for i, con in enumerate(self.graph_convs):
            h = F.dropout(h, self.dropout, training=self.training)
            h = self.relu(con(h, adj, h_0, self.lamda, self.alpha, i + 1))
        h = F.dropout(h, self.dropout, training=self.training)
        h = self.post_fc(h)
        return F.log_softmax(h, dim=1)
    


if __name__ == '__main__':
    pass




In [6]:
LossFunction = Callable[[Tensor, Tensor], Tensor]
Stage = Literal["train", "val", "test"]

def train_step(
    model: torch.nn.Module, data: Data, optimizer: torch.optim.Optimizer, loss_function: LossFunction
) -> Tuple[float, float]:
    # Set model to training mode
    model.train()
    optimizer.zero_grad()
    training_mask = data.train_mask
    logits = model(data.x, data.edge_index, data.edge_attr)[training_mask]
    predictions = torch.argmax(logits, dim=1)
    labels = data.y[training_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)
    loss.backward()
    optimizer.step()  
    return loss.item(), acc

@torch.no_grad()
def evaluate_step(
    model: torch.nn.Module, data: Data, loss_function: LossFunction, stage: Stage
) -> Tuple[float, float]:
    # Set model to evaluation mode
    model.eval()
    stage_mask = getattr(data, f"{stage}_mask")
    logits = model(data.x, data.edge_index, data.edge_attr)[stage_mask]
    predictions = torch.argmax(logits, dim=1)  
    labels = data.y[stage_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)  
    return loss.item(), acc


In [7]:
class HistoryDict(TypedDict):
    loss: List[float]
    acc: List[float]
    val_loss: List[float]
    val_acc: List[float]

def train(
    model: torch.nn.Module,
    data: Data,
    optimizer: torch.optim.Optimizer,
    loss_function: LossFunction = torch.nn.CrossEntropyLoss(),
    max_epochs: int = 1500,
    early_stopping: int = 100,
    print_interval: int = 20,
    verbose: bool = True,
) -> HistoryDict:
    history = {"loss": [], "val_loss": [], "acc": [], "val_acc": []}

    # Training loop
    for epoch in range(max_epochs):
        # Perform a training step
        loss, acc = train_step(model, data, optimizer, loss_function)
        # Perform an evaluation step
        val_loss, val_acc = evaluate_step(model, data, loss_function, "val")

        # Update history
        history["loss"].append(loss)
        history["acc"].append(acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        if epoch > early_stopping and val_loss > np.mean(history["val_loss"][-(early_stopping + 1):-1]):
            if verbose:
                print("\nEarly stopping...")
            break

        # Print training progress
        if verbose and epoch % print_interval == 0:
            print(f"\nEpoch: {epoch}\n------")
            print(f"Train loss: {loss:.4f} | Train acc: {acc:.4f}")
            print(f"Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")

    # Perform a final evaluation step
    test_loss, test_acc = evaluate_step(model, data, loss_function, "test")
    # Print final results
    if verbose:
        print(f"\nEpoch: {epoch}\n------")
        print(f" Train loss: {loss:.4f} | Train acc: {acc:.4f}")
        print(f" Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")
        print(f" Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")
    return history

In [8]:
# Load the Cora dataset semi-supervised task: 
# 20 nodes per class for training, 500 nodes for validation and 1000 nodes for testing based on the paper
dataset = get_dataset(path = "/tmp/Cora", name="Cora",split = "public",num_train_per_class= 20, num_val = 500, num_test = 1000, transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 64
ALPHA = 0.1
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 0.5
DROPOUT = 0.6
MAX_EPOCHS = 1500
WEIGHT_DECAY_1 = 0.01
WEIGHT_DECAY_2 = 5e-4
EARLY_STOPPING = 100


torch.manual_seed(SEED)

# Initialize the model
model = GCNII(nfeat=data.num_node_features,
              nlayers=NLAYERS,
              nhidden=NHIDDEN,
              nclass=dataset.num_classes,
              dropout=DROPOUT,
              lamda=LAMBDA,
              alpha=ALPHA)

# Define the optimizer
opt_order_dict = []
for name, param in model.named_parameters():
    if name.startswith('graph'):
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_1})
    else:
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_2})

optimizer = torch.optim.Adam(opt_order_dict,lr=LEARNING_RATE)

# Train the model
history = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)


Dataset: Cora
Num. nodes: 2708 (train=140, val=500, test=1000, other=1068)
Num. edges: 5278
Num. node features: 1433
Num. classes: 7
Dataset len.: 1

Epoch: 0
------
Train loss: 1.9482 | Train acc: 0.1429
Val loss: 1.9327 | Val acc: 0.1620

Epoch: 20
------
Train loss: 1.8855 | Train acc: 0.4143
Val loss: 1.8920 | Val acc: 0.7440

Epoch: 40
------
Train loss: 1.7371 | Train acc: 0.4429
Val loss: 1.7675 | Val acc: 0.7280

Epoch: 60
------
Train loss: 1.6112 | Train acc: 0.5214
Val loss: 1.5907 | Val acc: 0.7840

Epoch: 80
------
Train loss: 1.3724 | Train acc: 0.6571
Val loss: 1.4288 | Val acc: 0.7940

Epoch: 100
------
Train loss: 1.2164 | Train acc: 0.6500
Val loss: 1.3062 | Val acc: 0.8000

Epoch: 120
------
Train loss: 1.1287 | Train acc: 0.7071
Val loss: 1.2148 | Val acc: 0.8120

Epoch: 140
------
Train loss: 1.0902 | Train acc: 0.7143
Val loss: 1.1625 | Val acc: 0.8040

Epoch: 160
------
Train loss: 1.1445 | Train acc: 0.6786
Val loss: 1.1021 | Val acc: 0.8140

Epoch: 180
------
T

In [9]:
# Load the CiteSeer dataset semi-supervised task: 
# 20 nodes per class for training, 500 nodes for validation and 1000 nodes for testing based on the paper
dataset = get_dataset(path = "/tmp/CiteSeer", name="CiteSeer",split = "public",num_train_per_class= 20, num_val = 500, num_test = 1000, transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 32
ALPHA = 0.1
LEARNING_RATE = 0.01
NHIDDEN = 256
LAMBDA = 0.6
DROPOUT = 0.7
MAX_EPOCHS = 1500
WEIGHT_DECAY_1 = 0.01
WEIGHT_DECAY_2 = 5e-4
EARLY_STOPPING = 100


torch.manual_seed(SEED)

# Initialize the model
model = GCNII(nfeat=data.num_node_features,
              nlayers=NLAYERS,
              nhidden=NHIDDEN,
              nclass=dataset.num_classes,
              dropout=DROPOUT,
              lamda=LAMBDA,
              alpha=ALPHA)

# Define the optimizer
opt_order_dict = []
for name, param in model.named_parameters():
    if name.startswith('graph'):
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_1})
    else:
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_2})

optimizer = torch.optim.Adam(opt_order_dict,lr=LEARNING_RATE)


# Train the model
history = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)



Dataset: CiteSeer
Num. nodes: 3327 (train=120, val=500, test=1000, other=1707)
Num. edges: 4552
Num. node features: 3703
Num. classes: 6
Dataset len.: 1

Epoch: 0
------
Train loss: 1.7907 | Train acc: 0.1667
Val loss: 1.8000 | Val acc: 0.1380

Epoch: 20
------
Train loss: 1.7766 | Train acc: 0.2667
Val loss: 1.7700 | Val acc: 0.4940

Epoch: 40
------
Train loss: 1.7258 | Train acc: 0.3833
Val loss: 1.7273 | Val acc: 0.6320

Epoch: 60
------
Train loss: 1.6503 | Train acc: 0.5167
Val loss: 1.6773 | Val acc: 0.6560

Epoch: 80
------
Train loss: 1.6013 | Train acc: 0.5250
Val loss: 1.6129 | Val acc: 0.6980

Epoch: 100
------
Train loss: 1.4856 | Train acc: 0.5417
Val loss: 1.5400 | Val acc: 0.7200

Epoch: 120
------
Train loss: 1.4208 | Train acc: 0.5667
Val loss: 1.4817 | Val acc: 0.7040

Epoch: 140
------
Train loss: 1.3819 | Train acc: 0.5500
Val loss: 1.4352 | Val acc: 0.7260

Epoch: 160
------
Train loss: 1.5765 | Train acc: 0.5667
Val loss: 1.4039 | Val acc: 0.7240

Epoch: 180
----

In [10]:
# Load the PubMed dataset semi-supervised task: 
# 20 nodes per class for training, 500 nodes for validation and 1000 nodes for testing based on the paper
dataset = get_dataset(path = "/tmp/PubMed", name="PubMed",split = "public",num_train_per_class= 20, num_val = 500, num_test = 1000, transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 16
ALPHA = 0.1
LEARNING_RATE = 0.01
NHIDDEN = 256
LAMBDA = 0.4
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY_1 = 5e-4
WEIGHT_DECAY_2 = 5e-4
EARLY_STOPPING = 100


torch.manual_seed(SEED)

# Initialize the model
model = GCNII(nfeat=data.num_node_features,
              nlayers=NLAYERS,
              nhidden=NHIDDEN,
              nclass=dataset.num_classes,
              dropout=DROPOUT,
              lamda=LAMBDA,
              alpha=ALPHA)

# Define the optimizer
opt_order_dict = []
for name, param in model.named_parameters():
    if name.startswith('graph'):
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_1})
    else:
        opt_order_dict.append({'params':param,'weight_decay':WEIGHT_DECAY_2})

optimizer = torch.optim.Adam(opt_order_dict,lr=LEARNING_RATE)

# Train the model
history = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)



Dataset: PubMed
Num. nodes: 19717 (train=60, val=500, test=1000, other=18157)
Num. edges: 44324
Num. node features: 500
Num. classes: 3
Dataset len.: 1

Epoch: 0
------
Train loss: 1.1015 | Train acc: 0.3167
Val loss: 1.1014 | Val acc: 0.1960

Epoch: 20
------
Train loss: 0.6526 | Train acc: 0.7833
Val loss: 0.7568 | Val acc: 0.7380

Epoch: 40
------
Train loss: 0.2104 | Train acc: 0.9667
Val loss: 0.6174 | Val acc: 0.7600

Epoch: 60
------
Train loss: 0.2138 | Train acc: 0.9667
Val loss: 0.5089 | Val acc: 0.8220

Epoch: 80
------
Train loss: 0.4884 | Train acc: 0.8500
Val loss: 1.2037 | Val acc: 0.6540

Epoch: 100
------
Train loss: 0.1750 | Train acc: 0.9500
Val loss: 0.6008 | Val acc: 0.7960

Epoch: 120
------
Train loss: 0.0979 | Train acc: 0.9833
Val loss: 0.5167 | Val acc: 0.8120

Early stopping...

Epoch: 126
------
 Train loss: 0.1215 | Train acc: 0.9833
 Val loss: 0.6233 | Val acc: 0.7840
 Test loss: 0.6624 | Test acc: 0.7800
