<a href="https://colab.research.google.com/github/vent0906/ww/blob/main/GCN_GraphSAGE_Tutorial_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code Directory
 1. Install required dependencies (PyG & DGL)
 2. Load and inspect MUTAG dataset using PyG
 3. Prepare DataLoader for training/testing
 4. Define GCNClassifier using GCNConv
 5. Train and evaluate GCN model
 6. Load and inspect Cora dataset using DGL
 7. Generate train/test splits for link prediction
 8. Define GraphSAGE model with DGL
 9. Define DotProductPredictor and MLPPredictor
 10. Train GraphSAGE model and evaluate AUC

In [None]:

# Step 1: Install required dependencies (PyG & DGL)

import torch
print(f"Using torch version: {torch.__version__}")

# Install PyTorch Geometric dependencies
!pip install torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "import torch; print(torch.__version__.split('+')[0])").html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-$(python3 -c "import torch; print(torch.__version__.split('+')[0])").html
!pip install torch-geometric
!pip install dgl


In [None]:

# Step 2: Load and inspect MUTAG dataset using PyG

from torch_geometric.datasets import TUDataset

graph_dataset = TUDataset(root='data/TUDataset', name='MUTAG')
print(f'Dataset: {graph_dataset}')
print(f'Number of graphs: {len(graph_dataset)}')
print(f'Number of node features: {graph_dataset.num_features}')
print(f'Number of classes: {graph_dataset.num_classes}')

sample_graph = graph_dataset[0]
print(sample_graph)
print(f'Nodes: {sample_graph.num_nodes}, Edges: {sample_graph.num_edges}')
print(f'Average node degree: {sample_graph.num_edges / sample_graph.num_nodes:.2f}')
print(f'Isolated nodes: {sample_graph.has_isolated_nodes()}')
print(f'Self-loops: {sample_graph.has_self_loops()}')
print(f'Undirected: {sample_graph.is_undirected()}')


In [None]:

# Step 3: Prepare DataLoader for training/testing

import torch
from torch_geometric.loader import DataLoader

torch.manual_seed(12345)
graph_dataset = graph_dataset.shuffle()
train_subset = graph_dataset[:150]
test_subset = graph_dataset[150:]

train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False)

print(f'Training graphs: {len(train_subset)}, Test graphs: {len(test_subset)}')


In [None]:

# Step 4: Define GCNClassifier using GCNConv

import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv, global_mean_pool

class GCNClassifier(torch.nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        torch.manual_seed(12345)
        self.gcn1 = GCNConv(graph_dataset.num_node_features, hidden_dim)
        self.gcn2 = GCNConv(hidden_dim, hidden_dim)
        self.gcn3 = GCNConv(hidden_dim, hidden_dim)
        self.output_layer = Linear(hidden_dim, graph_dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.gcn1(x, edge_index))
        x = F.relu(self.gcn2(x, edge_index))
        x = self.gcn3(x, edge_index)
        x = global_mean_pool(x, batch)
        x = F.dropout(x, p=0.5, training=self.training)
        return self.output_layer(x)

classifier_net = GCNClassifier(hidden_dim=64)
print(classifier_net)


In [None]:

# Step 5: Train and evaluate GCN model

optimizer = torch.optim.Adam(classifier_net.parameters(), lr=0.01)
loss_function = torch.nn.CrossEntropyLoss()

def run_training():
    classifier_net.train()
    for batch in train_loader:
        pred = classifier_net(batch.x, batch.edge_index, batch.batch)
        loss = loss_function(pred, batch.y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def evaluate(loader):
    classifier_net.eval()
    correct = 0
    for batch in loader:
        pred = classifier_net(batch.x, batch.edge_index, batch.batch)
        predicted = pred.argmax(dim=1)
        correct += int((predicted == batch.y).sum())
    return correct / len(loader.dataset)

for epoch in range(1, 51):
    run_training()
    train_acc = evaluate(train_loader)
    test_acc = evaluate(test_loader)
    if epoch % 5 == 0:
        print(f'Epoch {epoch:03d}: Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}')


In [None]:

# Step 6: Load and inspect Cora dataset using DGL

import dgl
import numpy as np
import scipy.sparse as sp
import dgl.data

cora_dataset = dgl.data.CoraGraphDataset()
cora_graph = cora_dataset[0]
print(cora_graph)


In [None]:

# Step 7: Generate train/test splits for link prediction

u, v = cora_graph.edges()
eids = np.random.permutation(np.arange(cora_graph.number_of_edges()))
test_size = int(len(eids) * 0.1)

test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]

adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
adj_neg = 1 - adj.todense() - np.eye(cora_graph.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)

neg_eids = np.random.choice(len(neg_u), cora_graph.number_of_edges())
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]


In [None]:

# Step 8: Define GraphSAGE model with DGL

import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import SAGEConv

class GraphSAGE(nn.Module):
    def __init__(self, in_feats, hidden_feats):
        super().__init__()
        self.conv1 = SAGEConv(in_feats, hidden_feats, 'mean')
        self.conv2 = SAGEConv(hidden_feats, hidden_feats, 'mean')

    def forward(self, g, x):
        h = self.conv1(g, x)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h


In [None]:

# Step 9: Define DotProductPredictor and MLPPredictor

import dgl.function as fn

class DotProductPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            return g.edata['score'][:, 0]

class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats * 2, h_feats)
        self.W2 = nn.Linear(h_feats, 1)

    def apply_edges(self, edges):
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']


In [None]:

# Step 10: Train GraphSAGE model and evaluate AUC

from sklearn.metrics import roc_auc_score

train_graph = dgl.remove_edges(cora_graph, eids[:test_size])
train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=cora_graph.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=cora_graph.number_of_nodes())
test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=cora_graph.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=cora_graph.number_of_nodes())

model = GraphSAGE(train_graph.ndata['feat'].shape[1], 16)
predictor = DotProductPredictor()

def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).detach().numpy()
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=0.01)

for epoch in range(100):
    h = model(train_graph, train_graph.ndata['feat'])
    pos_score = predictor(train_pos_g, h)
    neg_score = predictor(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

with torch.no_grad():
    h = model(train_graph, train_graph.ndata['feat'])
    pos_score = predictor(test_pos_g, h)
    neg_score = predictor(test_neg_g, h)
    print('AUC:', compute_auc(pos_score, neg_score))
