In [18]:
from typing import Callable, List, Optional, Tuple
import numpy as np
import torch
import torch.nn. functional as F
import torch_geometric.transforms as T
from torch import Tensor
from torch.optim import Optimizer
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, WebKB
from torch_geometric.utils import accuracy
from typing_extensions import Literal, TypedDict
from collections import defaultdict
from typing import List, Optional, Tuple, Union
import scipy.sparse
from torch import Tensor
from scipy.sparse import coo_matrix, eye, diags, csr_matrix
import torch.nn as nn
from torch.nn import init
from torch.nn import Parameter
import torch.nn.functional as F
from functools import cached_property
import scipy.sparse as sp
from torch_geometric.transforms import RandomNodeSplit
from torch_geometric.datasets.wikipedia_network import WikipediaNetwork

In [14]:
# Function to load the dataset 

def get_dataset(path, name, split, transform):
    if name in ('Cora', 'CiteSeer', 'PubMed'):
        dataset = Planetoid(path, name=name, split=split, transform=transform)
    elif name in ('Cornell', 'Texas', 'Wisconsin'):
        dataset = WebKB(path, name=name, transform=transform)
    elif name == 'chameleon':
        dataset = WikipediaNetwork(path, name=name, geom_gcn_preprocess= True, transform=transform)
    else:
        raise ValueError(f"Unknown dataset name: {name}")

    num_nodes = dataset.data.num_nodes
    num_edges = dataset.data.num_edges // 2
    print(f"Dataset: {dataset.name}")
    print(f"Num. nodes: {num_nodes}")
    print(f"Num. edges: {num_edges}")
    print(f"Num. node features: {dataset.num_node_features}")
    print(f"Num. classes: {dataset.num_classes}")
    print(f"Dataset len.: {dataset.len()}")
    # print(f"Sum of row values with normalization: {dataset[0].x.sum(dim=-1)}")
    return dataset


In [3]:
# Function to randomly split nodes of each class into 60%, 20%, and 20% for training, validation and testing based on the paper
def split_dataset(dataset, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    num_nodes = dataset[0].num_nodes
    split = RandomNodeSplit(split="random", num_splits=1, num_train_per_class=int((num_nodes) / 7 * train_ratio), num_val=int(num_nodes * val_ratio), num_test=int(num_nodes * test_ratio))
    splitted_dataset = split(dataset[0])
    train_len = splitted_dataset.train_mask.sum()
    val_len = splitted_dataset.val_mask.sum()
    test_len = splitted_dataset.test_mask.sum()
    other_len = num_nodes - train_len-val_len - test_len
    print(f"Num. train={train_len}, val={val_len}, test={test_len}, other={other_len}")
    return splitted_dataset

In [4]:
# Function to create an adjacency matrix from edge index and edge attributes

def adjacency_matrix(edge_index, edge_attr=None, num_nodes=None):
    # Calculate the number of nodes if not provided
    if num_nodes is None:
        num_nodes = int(edge_index.max()) + 1
    # If edge_attr does not exist, assign weight=1 to all edges
    if edge_attr is None:
        edge_attr = torch.ones(edge_index.shape[1], dtype=torch.float)
    # Define the sparse adjacency matrix
    adj_matrix_sparse = torch.sparse_coo_tensor(edge_index, edge_attr, (num_nodes, num_nodes))
    # Convert to a dense matrix
    adj_matrix = adj_matrix_sparse.to_dense()
    return (adj_matrix + adj_matrix.T) / 2


In [5]:
# Compute the normalize adjancy matrix based on the paper P ̃ = (D ̃^ −1/2)A ̃(D ̃^ −1/2 )

def normalize_adjacency_matrix(A):
    # Ensure A is a sparse matrix
    if not isinstance(A, csr_matrix):
        A = csr_matrix(A)
    A = A + sp.eye(A.shape[0])
    degrees = np.array(A.sum(axis=1)).flatten()
    degrees[degrees == 0] = 1  # Replace 0s with 1s
    D_inv_sqrt = diags(1.0 / np.sqrt(degrees))
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt
    return normalized_A


# Convert the matrix to Tensor
def sparse_matrix_to_torch_sparse_tensor(sparse_matrix):
    sparse_matrix = sparse_matrix.tocoo()
    indices = torch.LongTensor(np.vstack((sparse_matrix.row, sparse_matrix.col)))
    values = torch.FloatTensor(sparse_matrix.data)
    shape = torch.Size(sparse_matrix.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [6]:
# Definition the CGNII model

class GCNLayer(nn.Module):

    def __init__(self, in_features, out_features):

        super(GCNLayer, self).__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.nn.Parameter(init.kaiming_uniform_(torch.empty(self.in_features, self.out_features), mode='fan_in', nonlinearity='relu'))

    def forward(self, input, adj , h_0 , lamda, alpha, l):

        h_l = torch.spmm(adj, input)
        features = (1 - alpha) * h_l + alpha * h_0
        n = self.weight.shape[0]
        I_n = torch.eye(n) 
        beta = np.log((lamda / l) + 1)
        term1 = (1 - beta) * I_n
        term2 = beta * self.weight
        weights = term1 + term2
        output = torch.mm(features, weights)
        return output

class GCNII(nn.Module):
    def __init__(self, nfeat, nlayers, nhidden, nclass, dropout, lamda, alpha):
        super(GCNII, self).__init__()
        self.graph_convs = nn.ModuleList()  
        for i in range(nlayers):
            conv_layer = GCNLayer(nhidden, nhidden)
            self.graph_convs.append(conv_layer)

        self.pre_fc = nn.Linear(nfeat, nhidden)
        self.post_fc = nn.Linear(nhidden, nclass)

        self.relu = nn.ReLU()
        self.dropout = dropout
        self.lamda = lamda
        self.alpha = alpha

    # Forward pass accepts edge_index and edge_attr
    def forward(self, x, edge_index, edge_attr):
        # Construct the adjacency matrix from edge_index and edge_attr
        adj = adjacency_matrix(edge_index, edge_attr)
        adj = normalize_adjacency_matrix(adj)
        adj = sparse_matrix_to_torch_sparse_tensor(adj)

        x = F.dropout(x, self.dropout, training=self.training)
        h_0 = self.relu(self.pre_fc(x))
        h = h_0
        for i, con in enumerate(self.graph_convs):
            h = F.dropout(h, self.dropout, training=self.training)
            h = self.relu(con(h, adj, h_0, self.lamda, self.alpha, i + 1))
        h = F.dropout(h, self.dropout, training=self.training)
        h = self.post_fc(h)
        return F.log_softmax(h, dim=1)
    


if __name__ == '__main__':
    pass




In [7]:
LossFunction = Callable[[Tensor, Tensor], Tensor]
Stage = Literal["train", "val", "test"]

def train_step(
    model: torch.nn.Module, data: Data, optimizer: torch.optim.Optimizer, loss_function: LossFunction
) -> Tuple[float, float]:
    # Set model to training mode
    model.train()
    optimizer.zero_grad()
    training_mask = data.train_mask
    logits = model(data.x, data.edge_index, data.edge_attr)[training_mask]
    predictions = torch.argmax(logits, dim=1)
    labels = data.y[training_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)
    loss.backward()
    optimizer.step()  
    return loss.item(), acc

@torch.no_grad()
def evaluate_step(
    model: torch.nn.Module, data: Data, loss_function: LossFunction, stage: Stage
) -> Tuple[float, float]:
    # Set model to evaluation mode
    model.eval()
    stage_mask = getattr(data, f"{stage}_mask")
    logits = model(data.x, data.edge_index, data.edge_attr)[stage_mask]
    predictions = torch.argmax(logits, dim=1)  
    labels = data.y[stage_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)  
    return loss.item(), acc


In [8]:
class HistoryDict(TypedDict):
    loss: List[float]
    acc: List[float]
    val_loss: List[float]
    val_acc: List[float]

def train(
    model: torch.nn.Module,
    data: Data,
    optimizer: torch.optim.Optimizer,
    loss_function: LossFunction = torch.nn.CrossEntropyLoss(),
    max_epochs: int = 1500,
    early_stopping: int = 100,
    print_interval: int = 100,
    verbose: bool = True,
) -> HistoryDict:
    history = {"loss": [], "val_loss": [], "acc": [], "val_acc": []}

    # Training loop
    for epoch in range(max_epochs):
        # Perform a training step
        loss, acc = train_step(model, data, optimizer, loss_function)
        # Perform an evaluation step
        val_loss, val_acc = evaluate_step(model, data, loss_function, "val")

        # Update history
        history["loss"].append(loss)
        history["acc"].append(acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        if epoch > early_stopping and val_loss > np.mean(history["val_loss"][-(early_stopping + 1):-1]):
            if verbose:
                print("\nEarly stopping...")
            break

        # Print training progress
        if verbose and epoch % print_interval == 0:
            print(f"\nEpoch: {epoch}\n------")
            print(f"Train loss: {loss:.4f} | Train acc: {acc:.4f}")
            print(f"Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")

    # Perform a final evaluation step
    test_loss, test_acc = evaluate_step(model, data, loss_function, "test")
    # Print final results
    # if verbose:
    #     print(f"\nEpoch: {epoch}\n------")
    #     print(f" Train loss: {loss:.4f} | Train acc: {acc:.4f}")
    #     print(f" Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")
    #     print(f" Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")
    return history, test_loss, test_acc 

In [12]:
# Load the Cora dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/Cora", name="Cora",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 64
ALPHA = 0.2
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 0.5
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY = 0.0001
EARLY_STOPPING = 100


torch.manual_seed(SEED)



test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    split_data = split_dataset(dataset)
    
    
    # Initialize the model
    model = GCNII(nfeat=split_data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')


Dataset: Cora
Num. nodes: 2708
Num. edges: 5278
Num. node features: 1433
Num. classes: 7
Dataset len.: 1
Num. train=1557, val=541, test=541, other=69

Epoch: 0
------
Train loss: 1.9467 | Train acc: 0.1821
Val loss: 1.9314 | Val acc: 0.3160

Epoch: 100
------
Train loss: 0.5565 | Train acc: 0.8369
Val loss: 0.5107 | Val acc: 0.8680

Epoch: 200
------
Train loss: 0.4231 | Train acc: 0.8783
Val loss: 0.4190 | Val acc: 0.8820

Epoch: 300
------
Train loss: 0.4396 | Train acc: 0.8932
Val loss: 0.3984 | Val acc: 0.8780

Early stopping...
Num. train=1557, val=541, test=541, other=69

Epoch: 0
------
Train loss: 1.9386 | Train acc: 0.1680
Val loss: 1.9222 | Val acc: 0.3180

Epoch: 100
------
Train loss: 0.5540 | Train acc: 0.8601
Val loss: 0.4915 | Val acc: 0.8740

Epoch: 200
------
Train loss: 0.4149 | Train acc: 0.8816
Val loss: 0.4075 | Val acc: 0.8800

Epoch: 300
------
Train loss: 0.3927 | Train acc: 0.8791
Val loss: 0.3875 | Val acc: 0.8780

Early stopping...
Num. train=1557, val=541, t

In [13]:
# Load the CiteSeer dataset for full-supervised task: 

dataset = get_dataset(path = "/tmp/CiteSeer", name="CiteSeer",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 64
ALPHA = 0.5
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 0.5
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY = 5e-6
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    split_data = split_dataset(dataset)
    
    
    # Initialize the model
    model = GCNII(nfeat=split_data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')


Dataset: CiteSeer
Num. nodes: 3327
Num. edges: 4552
Num. node features: 3703
Num. classes: 6
Dataset len.: 1
Num. train=1689, val=665, test=665, other=308

Epoch: 0
------
Train loss: 1.8080 | Train acc: 0.0865
Val loss: 1.8034 | Val acc: 0.0580

Epoch: 100
------
Train loss: 0.3530 | Train acc: 0.9015
Val loss: 0.6692 | Val acc: 0.7820

Early stopping...
Num. train=1689, val=665, test=665, other=308

Epoch: 0
------
Train loss: 1.7809 | Train acc: 0.2102
Val loss: 1.7668 | Val acc: 0.2600

Epoch: 100
------
Train loss: 0.3259 | Train acc: 0.9059
Val loss: 0.6661 | Val acc: 0.7720

Early stopping...
Num. train=1689, val=665, test=665, other=308

Epoch: 0
------
Train loss: 1.8004 | Train acc: 0.0865
Val loss: 1.7952 | Val acc: 0.0600

Epoch: 100
------
Train loss: 0.3509 | Train acc: 0.8955
Val loss: 0.6759 | Val acc: 0.7800

Early stopping...
Num. train=1689, val=665, test=665, other=308

Epoch: 0
------
Train loss: 1.7846 | Train acc: 0.1762
Val loss: 1.7735 | Val acc: 0.2320

Epoch:

In [16]:
# Load the PubMed dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/PubMed", name="PubMed",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for semi-supervised task based on Table 6 of the paper
SEED = 42
NLAYERS = 64
ALPHA = 0.1
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 0.5
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY = 5e-6
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    split_data = split_dataset(dataset)
    
    
    # Initialize the model
    model = GCNII(nfeat=split_data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')



Dataset: PubMed
Num. nodes: 19717
Num. edges: 44324
Num. node features: 500
Num. classes: 3
Dataset len.: 1
Num. train=5070, val=3943, test=3943, other=6761

Epoch: 0
------
Train loss: 1.1077 | Train acc: 0.2150
Val loss: 1.0984 | Val acc: 0.3880

Epoch: 100
------
Train loss: 0.4354 | Train acc: 0.8470
Val loss: 0.3315 | Val acc: 0.8900

Epoch: 200
------
Train loss: 0.3954 | Train acc: 0.8567
Val loss: 0.2995 | Val acc: 0.8980

Epoch: 300
------
Train loss: 0.3786 | Train acc: 0.8631
Val loss: 0.2839 | Val acc: 0.8980

Epoch: 400
------
Train loss: 0.3643 | Train acc: 0.8686
Val loss: 0.2710 | Val acc: 0.9020

Early stopping...
Num. train=5070, val=3943, test=3943, other=6761

Epoch: 0
------
Train loss: 1.0969 | Train acc: 0.3977
Val loss: 1.0850 | Val acc: 0.4240

Epoch: 100
------
Train loss: 0.4375 | Train acc: 0.8470
Val loss: 0.3323 | Val acc: 0.8900

Epoch: 200
------
Train loss: 0.3904 | Train acc: 0.8608
Val loss: 0.2967 | Val acc: 0.8980

Epoch: 300
------
Train loss: 0.38