In [1]:
from typing import Callable, List, Optional, Tuple
import numpy as np
import torch
import torch.nn. functional as F
import torch_geometric.transforms as T
from torch import Tensor
from torch.optim import Optimizer
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, WebKB
from torch_geometric.utils import accuracy
from typing_extensions import Literal, TypedDict
from collections import defaultdict
from typing import List, Optional, Tuple, Union
import scipy.sparse
from torch import Tensor
from scipy.sparse import coo_matrix, eye, diags, csr_matrix
import torch.nn as nn
from torch.nn import init
from torch.nn import Parameter
import torch.nn.functional as F
from functools import cached_property
import scipy.sparse as sp
from torch_geometric.transforms import RandomNodeSplit
from torch_geometric.datasets.wikipedia_network import WikipediaNetwork

In [37]:
# Function to load the dataset 

def get_dataset(path, name, split, transform):
    if name in ('Cora', 'CiteSeer', 'PubMed'):
        dataset = Planetoid(path, name=name, split=split, transform=transform)
    elif name in ('Cornell', 'Texas', 'Wisconsin'):
        dataset = WebKB(path, name=name, transform=transform)
    elif name == 'chameleon':
        dataset = WikipediaNetwork(path, name=name, geom_gcn_preprocess= True, transform=transform)
    else:
        raise ValueError(f"Unknown dataset name: {name}")

    num_nodes = dataset.data.num_nodes
    num_edges = dataset.data.num_edges // 2
    print(f"Dataset: {dataset.name}")
    print(f"Num. nodes: {num_nodes}")
    print(f"Num. edges: {num_edges}")
    print(f"Num. node features: {dataset.num_node_features}")
    print(f"Num. classes: {dataset.num_classes}")
    print(f"Dataset len.: {dataset.len()}")
    # print(f"Sum of row values with normalization: {dataset[0].x.sum(dim=-1)}")
    return dataset


In [3]:
# Function to randomly split nodes of each class into 60%, 20%, and 20% for training, validation and testing based on the paper
def split_dataset(dataset, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    num_nodes = dataset[0].num_nodes
    split = RandomNodeSplit(split="random", num_splits=1, num_train_per_class=int((num_nodes) / 7 * train_ratio), num_val=int(num_nodes * val_ratio), num_test=int(num_nodes * test_ratio))
    splitted_dataset = split(dataset[0])
    train_len = splitted_dataset.train_mask.sum()
    val_len = splitted_dataset.val_mask.sum()
    test_len = splitted_dataset.test_mask.sum()
    other_len = num_nodes - train_len-val_len - test_len
    print(f"Num. train={train_len}, val={val_len}, test={test_len}, other={other_len}")
    return splitted_dataset

In [4]:
# Function to create an adjacency matrix from edge index and edge attributes

def adjacency_matrix(edge_index, edge_attr=None, num_nodes=None):
    # Calculate the number of nodes if not provided
    if num_nodes is None:
        num_nodes = int(edge_index.max()) + 1
    # If edge_attr does not exist, assign weight=1 to all edges
    if edge_attr is None:
        edge_attr = torch.ones(edge_index.shape[1], dtype=torch.float)
    # Define the sparse adjacency matrix
    adj_matrix_sparse = torch.sparse_coo_tensor(edge_index, edge_attr, (num_nodes, num_nodes))
    # Convert to a dense matrix
    adj_matrix = adj_matrix_sparse.to_dense()
    return (adj_matrix + adj_matrix.T) / 2


In [5]:
# Compute the normalize adjancy matrix based on the paper P ̃ = (D ̃^ −1/2)A ̃(D ̃^ −1/2 )

def normalize_adjacency_matrix(A):
    # Ensure A is a sparse matrix
    if not isinstance(A, csr_matrix):
        A = csr_matrix(A)
    A = A + sp.eye(A.shape[0])
    degrees = np.array(A.sum(axis=1)).flatten()
    degrees[degrees == 0] = 1  # Replace 0s with 1s
    D_inv_sqrt = diags(1.0 / np.sqrt(degrees))
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt
    return normalized_A


# Convert the matrix to Tensor
def sparse_matrix_to_torch_sparse_tensor(sparse_matrix):
    sparse_matrix = sparse_matrix.tocoo()
    indices = torch.LongTensor(np.vstack((sparse_matrix.row, sparse_matrix.col)))
    values = torch.FloatTensor(sparse_matrix.data)
    shape = torch.Size(sparse_matrix.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [6]:
# Definition the CGNII model

class GCNLayer(nn.Module):

    def __init__(self, in_features, out_features):

        super(GCNLayer, self).__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.nn.Parameter(init.kaiming_uniform_(torch.empty(self.in_features, self.out_features), mode='fan_in', nonlinearity='relu'))

    def forward(self, input, adj , h_0 , lamda, alpha, l):

        h_l = torch.spmm(adj, input)
        features = (1 - alpha) * h_l + alpha * h_0
        n = self.weight.shape[0]
        I_n = torch.eye(n) 
        beta = np.log((lamda / l) + 1)
        term1 = (1 - beta) * I_n
        term2 = beta * self.weight
        weights = term1 + term2
        output = torch.mm(features, weights)
        return output

class GCNII(nn.Module):
    def __init__(self, nfeat, nlayers, nhidden, nclass, dropout, lamda, alpha):
        super(GCNII, self).__init__()
        self.graph_convs = nn.ModuleList()  
        for i in range(nlayers):
            conv_layer = GCNLayer(nhidden, nhidden)
            self.graph_convs.append(conv_layer)

        self.pre_fc = nn.Linear(nfeat, nhidden)
        self.post_fc = nn.Linear(nhidden, nclass)

        self.relu = nn.ReLU()
        self.dropout = dropout
        self.lamda = lamda
        self.alpha = alpha

    # Forward pass accepts edge_index and edge_attr
    def forward(self, x, edge_index, edge_attr):
        # Construct the adjacency matrix from edge_index and edge_attr
        adj = adjacency_matrix(edge_index, edge_attr)
        adj = normalize_adjacency_matrix(adj)
        adj = sparse_matrix_to_torch_sparse_tensor(adj)

        x = F.dropout(x, self.dropout, training=self.training)
        h_0 = self.relu(self.pre_fc(x))
        h = h_0
        for i, con in enumerate(self.graph_convs):
            h = F.dropout(h, self.dropout, training=self.training)
            h = self.relu(con(h, adj, h_0, self.lamda, self.alpha, i + 1))
        h = F.dropout(h, self.dropout, training=self.training)
        h = self.post_fc(h)
        return F.log_softmax(h, dim=1)
    


if __name__ == '__main__':
    pass




In [18]:
LossFunction = Callable[[Tensor, Tensor], Tensor]
Stage = Literal["train", "val", "test"]

def train_step(
    model: torch.nn.Module, data: Data, optimizer: torch.optim.Optimizer, loss_function: LossFunction,split:int =1
) -> Tuple[float, float]:
    # Set model to training mode
    model.train()
    optimizer.zero_grad()
    training_mask = data.train_mask[:, split]
    logits = model(data.x, data.edge_index, data.edge_attr)[training_mask]
    predictions = torch.argmax(logits, dim=1)
    labels = data.y[training_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)
    loss.backward()
    optimizer.step()  
    return loss.item(), acc

@torch.no_grad()
def evaluate_step(
    model: torch.nn.Module, data: Data, loss_function: LossFunction, stage: Stage,split:int =1
) -> Tuple[float, float]:
    # Set model to evaluation mode
    model.eval()
    stage_mask = getattr(data, f"{stage}_mask")
    stage_mask = stage_mask[:, split]
    logits = model(data.x, data.edge_index, data.edge_attr)[stage_mask]
    predictions = torch.argmax(logits, dim=1)  
    labels = data.y[stage_mask]
    loss = loss_function(logits, labels)
    acc = accuracy(predictions, labels)  
    return loss.item(), acc


In [19]:
class HistoryDict(TypedDict):
    loss: List[float]
    acc: List[float]
    val_loss: List[float]
    val_acc: List[float]

def train(
    model: torch.nn.Module,
    data: Data,
    optimizer: torch.optim.Optimizer,
    loss_function: LossFunction = torch.nn.CrossEntropyLoss(),
    max_epochs: int = 1500,
    early_stopping: int = 100,
    print_interval: int = 100,
    verbose: bool = True,
    split: int = 1
) -> HistoryDict:
    history = {"loss": [], "val_loss": [], "acc": [], "val_acc": []}

    # Training loop
    for epoch in range(max_epochs):
        # Perform a training step
        loss, acc = train_step(model, data, optimizer, loss_function,split)
        # Perform an evaluation step
        val_loss, val_acc = evaluate_step(model, data, loss_function, "val",split)

        # Update history
        history["loss"].append(loss)
        history["acc"].append(acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        if epoch > early_stopping and val_loss > np.mean(history["val_loss"][-(early_stopping + 1):-1]):
            if verbose:
                print("\nEarly stopping...")
            break

        # Print training progress
        if verbose and epoch % print_interval == 0:
            print(f"\nEpoch: {epoch}\n------")
            print(f"Train loss: {loss:.4f} | Train acc: {acc:.4f}")
            print(f"Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")

    # Perform a final evaluation step
    test_loss, test_acc = evaluate_step(model, data, loss_function, "test")
    # Print final results
    # if verbose:
    #     print(f"\nEpoch: {epoch}\n------")
    #     print(f" Train loss: {loss:.4f} | Train acc: {acc:.4f}")
    #     print(f" Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}")
    #     print(f" Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")
    return history, test_loss, test_acc 

In [22]:
# Load the Cornell dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/Cornell", name="Cornell",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for full-supervised task based on Table 7 of the paper
SEED = 42
NLAYERS = 16
ALPHA = 0.5
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 1
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY =  0.001
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    Split = i
    
    
    # Initialize the model
    model = GCNII(nfeat=data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING,split = Split)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')




Dataset: cornell
Num. nodes: 183
Num. edges: 149
Num. node features: 1703
Num. classes: 5
Dataset len.: 1

Epoch: 0
------
Train loss: 1.6172 | Train acc: 0.1839
Val loss: 1.6039 | Val acc: 0.1864

Epoch: 100
------
Train loss: 0.9675 | Train acc: 0.7126
Val loss: 1.0223 | Val acc: 0.6949

Epoch: 200
------
Train loss: 0.6943 | Train acc: 0.7586
Val loss: 0.8906 | Val acc: 0.6780

Early stopping...

Epoch: 0
------
Train loss: 1.6252 | Train acc: 0.2299
Val loss: 1.6193 | Val acc: 0.2203

Epoch: 100
------
Train loss: 0.7917 | Train acc: 0.7586
Val loss: 0.9861 | Val acc: 0.7119

Epoch: 200
------
Train loss: 0.5220 | Train acc: 0.8391
Val loss: 0.8423 | Val acc: 0.7119

Early stopping...

Epoch: 0
------
Train loss: 1.6469 | Train acc: 0.1954
Val loss: 1.6275 | Val acc: 0.1695

Epoch: 100
------
Train loss: 0.8623 | Train acc: 0.7586
Val loss: 1.2637 | Val acc: 0.5763

Epoch: 200
------
Train loss: 0.6122 | Train acc: 0.8506
Val loss: 1.1606 | Val acc: 0.5424

Early stopping...

Epoch

In [28]:
# Load the Texas dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/Texas", name="Texas",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for full-supervised task based on Table 7 of the paper
SEED = 42
NLAYERS = 32
ALPHA = 0.5
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 1.5
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY =  0.0001
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    Split = i
    
    
    # Initialize the model
    model = GCNII(nfeat=data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING,split = Split)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')





Dataset: texas
Num. nodes: 183
Num. edges: 162
Num. node features: 1703
Num. classes: 5
Dataset len.: 1

Epoch: 0
------
Train loss: 1.6093 | Train acc: 0.0805
Val loss: 1.5846 | Val acc: 0.1186

Epoch: 100
------
Train loss: 0.2669 | Train acc: 0.9080
Val loss: 0.8776 | Val acc: 0.6610

Early stopping...

Epoch: 0
------
Train loss: 1.6237 | Train acc: 0.0000
Val loss: 1.6086 | Val acc: 0.0169

Epoch: 100
------
Train loss: 0.3348 | Train acc: 0.8621
Val loss: 0.8517 | Val acc: 0.6949

Early stopping...

Epoch: 0
------
Train loss: 1.6048 | Train acc: 0.0920
Val loss: 1.5935 | Val acc: 0.5424

Epoch: 100
------
Train loss: 0.3688 | Train acc: 0.9080
Val loss: 0.7344 | Val acc: 0.7797

Early stopping...

Epoch: 0
------
Train loss: 1.5998 | Train acc: 0.1264
Val loss: 1.5781 | Val acc: 0.5254

Epoch: 100
------
Train loss: 0.4210 | Train acc: 0.8391
Val loss: 0.7811 | Val acc: 0.7966

Epoch: 200
------
Train loss: 0.2282 | Train acc: 0.9425
Val loss: 0.6398 | Val acc: 0.7797

Early sto

In [29]:
# Load the Wisconsin dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/Wisconsin", name="Wisconsin",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for full-supervised task based on Table 7 of the paper
SEED = 42
NLAYERS = 16
ALPHA = 0.5
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 1
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY = 0.0005
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    Split = i
    
    
    # Initialize the model
    model = GCNII(nfeat=data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING,split = Split)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')






Dataset: wisconsin
Num. nodes: 251
Num. edges: 257
Num. node features: 1703
Num. classes: 5
Dataset len.: 1

Epoch: 0
------
Train loss: 1.6167 | Train acc: 0.0333
Val loss: 1.5958 | Val acc: 0.2500

Epoch: 100
------
Train loss: 0.5749 | Train acc: 0.7917
Val loss: 0.6921 | Val acc: 0.7250

Epoch: 200
------
Train loss: 0.3910 | Train acc: 0.9083
Val loss: 0.6132 | Val acc: 0.7625

Early stopping...

Epoch: 0
------
Train loss: 1.5727 | Train acc: 0.4583
Val loss: 1.5722 | Val acc: 0.3875

Epoch: 100
------
Train loss: 0.4798 | Train acc: 0.8333
Val loss: 1.0169 | Val acc: 0.7250

Epoch: 200
------
Train loss: 0.3258 | Train acc: 0.9250
Val loss: 0.9305 | Val acc: 0.7375

Early stopping...

Epoch: 0
------
Train loss: 1.6362 | Train acc: 0.0833
Val loss: 1.6202 | Val acc: 0.0875

Epoch: 100
------
Train loss: 0.5490 | Train acc: 0.8000
Val loss: 0.8532 | Val acc: 0.6625

Epoch: 200
------
Train loss: 0.4338 | Train acc: 0.8417
Val loss: 0.7861 | Val acc: 0.7375

Early stopping...

Epo

In [38]:
# Load the Chameleon dataset for full-supervised task: 
dataset = get_dataset(path = "/tmp/chameleon", name="chameleon",split = "full", transform=T.NormalizeFeatures())
data = dataset[0]


# Training configuration of GCNII for full-supervised task based on Table 7 of the paper
SEED = 42
NLAYERS = 8
ALPHA = 0.2
LEARNING_RATE = 0.01
NHIDDEN = 64
LAMBDA = 1.5
DROPOUT = 0.5
MAX_EPOCHS = 1500
WEIGHT_DECAY = 0.0005
EARLY_STOPPING = 100


torch.manual_seed(SEED)

test_accuracies = []

# Run the training and evaluation process 10 times with random splits
for i in range(10):
    # Get a random split of the dataset
    Split = i
    
    
    # Initialize the model
    model = GCNII(nfeat=data.num_node_features,
                  nlayers=NLAYERS,
                  nhidden=NHIDDEN,
                  nclass=dataset.num_classes,
                  dropout=DROPOUT,
                  lamda=LAMBDA,
                  alpha=ALPHA)
    
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    # Train and evaluate the model on the current split
    history, test_loss, test_acc  = train(model, data, optimizer, max_epochs=MAX_EPOCHS, early_stopping=EARLY_STOPPING,split = Split)
    
    # Store the test accuracy result
    test_accuracies.append(test_acc)

# Calculate the mean and standard deviation of test accuracies
mean_accuracy = np.mean(test_accuracies)
std_accuracy = np.std(test_accuracies)

print(f'Mean Test Accuracy: {mean_accuracy:.4f}')
print(f'Standard Deviation of Test Accuracies: {std_accuracy:.4f}')







Dataset: chameleon
Num. nodes: 2277
Num. edges: 18050
Num. node features: 2325
Num. classes: 5
Dataset len.: 1

Epoch: 0
------
Train loss: 1.6068 | Train acc: 0.2289
Val loss: 1.6023 | Val acc: 0.2442

Epoch: 100
------
Train loss: 1.1158 | Train acc: 0.5211
Val loss: 1.3586 | Val acc: 0.4335

Early stopping...

Epoch: 0
------
Train loss: 1.6062 | Train acc: 0.2216
Val loss: 1.6091 | Val acc: 0.1756

Epoch: 100
------
Train loss: 1.1046 | Train acc: 0.5559
Val loss: 1.4613 | Val acc: 0.4198

Early stopping...

Epoch: 0
------
Train loss: 1.6140 | Train acc: 0.2015
Val loss: 1.6101 | Val acc: 0.1989

Epoch: 100
------
Train loss: 1.0780 | Train acc: 0.5778
Val loss: 1.3291 | Val acc: 0.4472

Early stopping...

Epoch: 0
------
Train loss: 1.6127 | Train acc: 0.1648
Val loss: 1.6105 | Val acc: 0.1646

Epoch: 100
------
Train loss: 1.1212 | Train acc: 0.5174
Val loss: 1.3822 | Val acc: 0.4321

Early stopping...

Epoch: 0
------
Train loss: 1.6168 | Train acc: 0.1914
Val loss: 1.6148 | Va