In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Using cached torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
Collecting xxhash (from torch_geometric)
  Using cached xxhash-3.6.0-cp313-cp313-win_amd64.whl.metadata (13 kB)
Using cached torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
Using cached xxhash-3.6.0-cp313-cp313-win_amd64.whl (31 kB)
Installing collected packages: xxhash, torch_geometric

   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   -------------------- ------------------- 1/2 [torch_geometric]
   ----------------

In [6]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch.optim.optimizer import Optimizer, required
import time

# Custom Optimizer Implementation: VanillaSGD

class VanillaSGD(Optimizer):
    """
    Implements a basic Stochastic Gradient Descent (SGD) optimizer using the core update rule: 
    theta = theta - lr * grad.
    """

    def __init__(self, params, lr=required):
        if lr is not required and lr < 0.0:
            raise ValueError(f"Invalid learning rate: {lr}")

        # Store the hyperparameters (learning rate) in the defaults dictionary
        defaults = dict(lr=lr)
        super(VanillaSGD, self).__init__(params, defaults)

    @torch.no_grad()
    def step(self, closure=None):
        
        # Performs a single optimization step.
        
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        # Iterate over all parameter groups
        for group in self.param_groups:
            lr = group['lr']

            # Iterate over all parameters in the group
            for p in group['params']:
                if p.grad is None:
                    continue

                # Get the gradient tensor
                d_p = p.grad

                # Core Update: p.data = p.data - lr * d_p
                # In-place addition with alpha=-lr performs subtraction
                p.data.add_(d_p, alpha=-lr)

        return loss

# GNN Model Architecture Specification: Two-Layer GCN

class GNNModel(torch.nn.Module):
    """
    A two-layer Graph Convolutional Network (GCN) for node classification.
    """
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GNNModel, self).__init__()
        
        # First GCN Layer
        # Input Dimension: num_node_features (Citeseer = 3703)
        # Output Dimension: hidden_channels (e.g., 16)
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        
        # Second GCN Layer
        # Input Dimension: hidden_channels (e.g., 16)
        # Output Dimension: num_classes (Citeseer = 6)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        # First GCN Layer with ReLU activation and Dropout
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        # Second GCN Layer (no activation or dropout, as it feeds into the loss function)
        x = self.conv2(x, edge_index)
        
        # Return the raw logits (unnormalized scores)
        return x

# Data Loading and Feature Description

# Load the Citeseer dataset from the Planetoid collection
dataset = Planetoid(root='/tmp/Citeseer', name='Citeseer')
data = dataset[0]

# Define Model Parameters and Dimensions
INPUT_FEATS = dataset.num_node_features   # 3703
OUTPUT_CLASSES = dataset.num_classes     # 6
HIDDEN_CHANNELS = 16                     # Design parameter (common choice)
LEARNING_RATE = 0.015                     # For VanillaSGD
EPOCHS = 200                             # Training epochs

# Initialize Model, Loss Function, and Custom Optimizer
model = GNNModel(INPUT_FEATS, HIDDEN_CHANNELS, OUTPUT_CLASSES)
criterion = torch.nn.CrossEntropyLoss()
# Use the custom optimizer!
optimizer = VanillaSGD(model.parameters(), lr=LEARNING_RATE)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)


# Training and Testing Functions

def train():
    """Performs one epoch of training."""
    model.train()
    optimizer.zero_grad()
    
    # Forward pass: get the logits
    out = model(data.x, data.edge_index)
    
    # Calculate loss ONLY on the training nodes (train_mask)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    
    # Backward pass and optimization step
    loss.backward()
    optimizer.step()
    
    return loss.item()

@torch.no_grad()
def test():
    """Evaluates the model on validation and test sets."""
    model.eval()
    
    # Forward pass
    out = model(data.x, data.edge_index)
    
    # Get the predicted class (index of max logit)
    pred = out.argmax(dim=1)
    
    # Calculate accuracy for a given mask
    def calculate_accuracy(mask):
        correct = pred[mask] == data.y[mask]
        acc = int(correct.sum()) / int(mask.sum())
        return acc

    train_acc = calculate_accuracy(data.train_mask)
    val_acc = calculate_accuracy(data.val_mask)
    test_acc = calculate_accuracy(data.test_mask)
    
    return train_acc, val_acc, test_acc


# Training Loop and Reporting
print(f"--- GNN Training on Citeseer with Custom VanillaSGD Optimizer ---")
print(f"Model: 2-Layer GCN (3703 -> {HIDDEN_CHANNELS} -> 6)")
print(f"Optimizer: Custom VanillaSGD (LR={LEARNING_RATE})")
print(f"Device: {device}")
print("-" * 50)

start_time = time.time()

for epoch in range(1, EPOCHS + 1):
    loss = train()
    
    # Evaluate 20 epochs
    if epoch % 20 == 0 or epoch == EPOCHS:
        train_acc, val_acc, test_acc = test()
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, '
              f'Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

end_time = time.time()

print("-" * 50)
print(f"Training finished in {end_time - start_time:.2f} seconds.")

# Final Report of Node Classification Accuracy
final_train_acc, final_val_acc, final_test_acc = test()

print(f"\nFINAL REPORT (After {EPOCHS} Epochs):")
print(f"--------------------------------------------------")
print(f"Training Accuracy: {final_train_acc * 100:.2f}%")
print(f"Validation Accuracy: {final_val_acc * 100:.2f}%")
print(f"Node Classification Accuracy (Test Set): {final_test_acc * 100:.2f}%")
print(f"--------------------------------------------------")

--- GNN Training on Citeseer with Custom VanillaSGD Optimizer ---
Model: 2-Layer GCN (3703 -> 16 -> 6)
Optimizer: Custom VanillaSGD (LR=0.015)
Device: cpu
--------------------------------------------------
Epoch: 020, Loss: 1.7753, Train Acc: 0.3167, Val Acc: 0.2060, Test Acc: 0.2160
Epoch: 040, Loss: 1.7497, Train Acc: 0.3750, Val Acc: 0.2560, Test Acc: 0.2780
Epoch: 060, Loss: 1.7228, Train Acc: 0.4750, Val Acc: 0.3100, Test Acc: 0.3450
Epoch: 080, Loss: 1.6960, Train Acc: 0.5833, Val Acc: 0.3720, Test Acc: 0.3980
Epoch: 100, Loss: 1.6609, Train Acc: 0.6333, Val Acc: 0.4340, Test Acc: 0.4360
Epoch: 120, Loss: 1.6425, Train Acc: 0.6750, Val Acc: 0.4760, Test Acc: 0.4680
Epoch: 140, Loss: 1.6171, Train Acc: 0.7417, Val Acc: 0.5160, Test Acc: 0.5000
Epoch: 160, Loss: 1.5780, Train Acc: 0.7417, Val Acc: 0.5440, Test Acc: 0.5240
Epoch: 180, Loss: 1.5525, Train Acc: 0.7667, Val Acc: 0.5620, Test Acc: 0.5540
Epoch: 200, Loss: 1.5174, Train Acc: 0.7833, Val Acc: 0.5840, Test Acc: 0.5680
----