# GNN course assignment 3

### This assignment guides you to implement hand-crafted features for graph classification. Have fun!

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import DataLoader, Data, Dataset
from torch_geometric.datasets import TUDataset
from torch_geometric.utils import degree
from torch_geometric import utils
import networkx as nx
# Set random seed
torch.manual_seed(42)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7f3067642350>

In [2]:
# Load a sample graph dataset from TUDataset (e.g., MUTAG)
dataset = TUDataset(root='data/TUDataset', name='MUTAG')
dataset, dataset[0], dataset.num_classes

(MUTAG(188), Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1]), 2)

In [3]:
### Graph-Level Features ###
def extract_graph_features(dataset: Dataset) -> torch.Tensor:
    """
    Extracts graph-level features for a PyTorch Geometric data object. Returns the extracted features.
    
    Parameters:
    - dataset: A PyTorch Geometric dataset object
    
    Returns:
    - features: A torch.Tensor of shape (num_graphs, num_features)
    """
    n_features = 8  # TODO: Number of your designed features

    # E.g., the average node degree of each graph
    features = torch.zeros(len(dataset), n_features).float()
    for i, data in enumerate(dataset):
        features[i, 0] = torch.mean(degree(data.edge_index[0]).float())
        nx_graph = utils.to_networkx(data)
    
        # TODO: You may include your hand-crafted features here
        features[i,1] = torch.tensor(nx.average_clustering(nx_graph)).float()
        features[i,2] = torch.tensor(nx.density(nx_graph)).float()
        features[i,3] = torch.tensor(nx.transitivity(nx_graph)).float()
        features[i,4] = torch.tensor(nx.average_shortest_path_length(nx_graph)).float()
        features[i,5] = torch.tensor(nx.number_of_nodes(nx_graph)).float()
        features[i,6] = torch.tensor(nx.number_of_edges(nx_graph)).float()
        features[i,7] = torch.tensor(nx.diameter(nx_graph)).float()

    return features

In [4]:
### A naive Linear Probing Model for Graph Classification ###
class LinearProbingModelGraph(nn.Module):
    task = 'graph'

    def __init__(self, input_dim, output_dim=2):
        super(LinearProbingModelGraph, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, edge_attr):
        out = self.linear(edge_attr)
        logits = F.log_softmax(out, dim=1)
        return logits

In [5]:
train_dataset = dataset[:120]
test_dataset = dataset[120:]

train_features = extract_graph_features(train_dataset)
train_labels = torch.LongTensor([data.y for data in train_dataset])
test_features = extract_graph_features(test_dataset)
test_labels = torch.LongTensor([data.y for data in test_dataset])

print(train_features.shape, train_labels.shape, test_features.shape, test_labels.shape)

torch.Size([120, 8]) torch.Size([120]) torch.Size([68, 8]) torch.Size([68])


In [6]:
model = LinearProbingModelGraph(input_dim=train_features.shape[1], output_dim=dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training function
def train():
    model.train()
    optimizer.zero_grad()
    out = model(train_features)
    loss = F.nll_loss(out, train_labels)
    loss.backward()
    optimizer.step()
    return loss.item()

# Test function
def test(features, labels):
    model.eval()
    out = model(features)
    pred = out.argmax(dim=1)
    acc = pred.eq(labels).sum().item() / len(labels)
    return acc

# Training loop
for epoch in range(0, 500):
    loss = train()
    if epoch % 20 == 0:
        train_acc = test(train_features, train_labels)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}')

# Test the model
test_acc = test(test_features, test_labels)
print(f'Test Acc: {test_acc:.4f}')


Epoch: 000, Loss: 2.6061, Train Acc: 0.7083
Epoch: 020, Loss: 0.5999, Train Acc: 0.7083
Epoch: 040, Loss: 0.4519, Train Acc: 0.7333
Epoch: 060, Loss: 0.4194, Train Acc: 0.7417
Epoch: 080, Loss: 0.4075, Train Acc: 0.7750
Epoch: 100, Loss: 0.3966, Train Acc: 0.7833
Epoch: 120, Loss: 0.3866, Train Acc: 0.7917
Epoch: 140, Loss: 0.3773, Train Acc: 0.7917
Epoch: 160, Loss: 0.3688, Train Acc: 0.7917
Epoch: 180, Loss: 0.3611, Train Acc: 0.7917
Epoch: 200, Loss: 0.3542, Train Acc: 0.8583
Epoch: 220, Loss: 0.3480, Train Acc: 0.8583
Epoch: 240, Loss: 0.3424, Train Acc: 0.8583
Epoch: 260, Loss: 0.3375, Train Acc: 0.8667
Epoch: 280, Loss: 0.3331, Train Acc: 0.8667
Epoch: 300, Loss: 0.3292, Train Acc: 0.8750
Epoch: 320, Loss: 0.3257, Train Acc: 0.8750
Epoch: 340, Loss: 0.3226, Train Acc: 0.8750
Epoch: 360, Loss: 0.3198, Train Acc: 0.8750
Epoch: 380, Loss: 0.3172, Train Acc: 0.8750
Epoch: 400, Loss: 0.3150, Train Acc: 0.8750
Epoch: 420, Loss: 0.3129, Train Acc: 0.8750
Epoch: 440, Loss: 0.3110, Train 

---
# Discussions

Graph分类其实就我而言更希望是通过在x上做扩散等后，做一个aggregration.但是这里似乎不是很能搞这个东西。为了方便一点，就选择了assignment 1里面的一些东西。性能确实有所提升，但还是存在很多不足。