# In this notebook we will have hands on code to get familiar with pytorch geometric

In [2]:
# !pip install torch-geometric
# !pip install torch_sparse
# !pip install torch_scatter

In [3]:
from torch_geometric.nn import GCNConv, GATConv, GINConv, global_add_pool, SAGEConv ,MessagePassing
    

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
 # load dataset from pytorch geometric
    
    
dataset = TUDataset(root="data/TUDataset", name="MUTAG")

print()
print(f"Dataset: {dataset}:")
print("====================")
print(f"Number of graphs: {len(dataset)}")
print(f"Number of features: {dataset.num_features}")
print(f"Number of classes: {dataset.num_classes}")

data = dataset[0]  # Get the first graph object.

print()
print(data)
print("=============================================================")

# Gather some statistics about the first graph.
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Average node degree: {data.num_edges / data.num_nodes:.2f}")
print(f"Has isolated nodes: {data.has_isolated_nodes()}")
print(f"Has self-loops: {data.has_self_loops()}")
print(f"Is undirected: {data.is_undirected()}")

In [6]:
# get training and validation sets
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:len(dataset)*9/10]
test_dataset = dataset[len(dataset)*9/10:]

print(f"Number of training graphs: {len(train_dataset)}")
print(f"Number of test graphs: {len(test_dataset)}")

Number of training graphs: 188
Number of test graphs: 0


In [8]:
from torch_geometric.loader import DataLoader
 
# build the dataloader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# show first batchs
for step, data in enumerate(train_loader):
    if step <3 :
        print(f"Step {step + 1}:")
        print("=======")
        print(f"Number of graphs in the current batch: {data.num_graphs}")
        print(data)
        print()

Step 1:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1376], x=[615, 7], edge_attr=[1376, 4], y=[32], batch=[615], ptr=[33])

Step 2:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1238], x=[560, 7], edge_attr=[1238, 4], y=[32], batch=[560], ptr=[33])

Step 3:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1240], x=[565, 7], edge_attr=[1240, 4], y=[32], batch=[565], ptr=[33])

Step 4:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1264], x=[570, 7], edge_attr=[1264, 4], y=[32], batch=[570], ptr=[33])

Step 5:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 1192], x=[543, 7], edge_attr=[1192, 4], y=[32], batch=[543], ptr=[33])

Step 6:
Number of graphs in the current batch: 28
DataBatch(edge_index=[2, 1132], x=[518, 7], edge_attr=[1132, 4], y=[28], batch=[518], ptr=[29])



In [9]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.dropout=0.1

        self.layer1 = SAGEConv(dataset.num_node_features, hidden_channels)
        self.layer2 = SAGEConv(hidden_channels, hidden_channels)
        self.layer3 = SAGEConv(hidden_channels, hidden_channels)

        self.decoder = nn.Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.layer1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.layer2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.layer3(x, edge_index)
        x = F.relu(x)
        x = global_add_pool(x, batch)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.decoder(x)

        return x
model = GCN(hidden_channels=64)
print(model)

GCN(
  (layer1): SAGEConv(7, 64, aggr=mean)
  (layer2): SAGEConv(64, 64, aggr=mean)
  (layer3): SAGEConv(64, 64, aggr=mean)
  (decoder): Linear(in_features=64, out_features=2, bias=True)
)


In [40]:
# model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()


def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(
            data.x, data.edge_index, data.batch
        )  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.


def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f"Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

Epoch: 001, Train Acc: 0.1800, Test Acc: 0.2200
Epoch: 002, Train Acc: 0.1218, Test Acc: 0.1400
Epoch: 003, Train Acc: 0.1691, Test Acc: 0.1400
Epoch: 004, Train Acc: 0.2055, Test Acc: 0.2400
Epoch: 005, Train Acc: 0.2145, Test Acc: 0.2200
Epoch: 006, Train Acc: 0.2255, Test Acc: 0.2600
Epoch: 007, Train Acc: 0.2218, Test Acc: 0.2200
Epoch: 008, Train Acc: 0.2600, Test Acc: 0.3000
Epoch: 009, Train Acc: 0.2745, Test Acc: 0.2200
Epoch: 010, Train Acc: 0.2709, Test Acc: 0.2400
Epoch: 011, Train Acc: 0.2055, Test Acc: 0.2000
Epoch: 012, Train Acc: 0.2800, Test Acc: 0.2200
Epoch: 013, Train Acc: 0.2164, Test Acc: 0.1800
Epoch: 014, Train Acc: 0.2909, Test Acc: 0.2800
Epoch: 015, Train Acc: 0.2673, Test Acc: 0.2200
Epoch: 016, Train Acc: 0.2764, Test Acc: 0.2200
Epoch: 017, Train Acc: 0.2582, Test Acc: 0.2000
Epoch: 018, Train Acc: 0.2618, Test Acc: 0.2800
Epoch: 019, Train Acc: 0.2800, Test Acc: 0.2800
Epoch: 020, Train Acc: 0.2764, Test Acc: 0.2800
Epoch: 021, Train Acc: 0.2800, Test Acc: