In [1]:
import torch
torch.__version__

'1.9.0+cu102'

In [2]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html


In [3]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures())
data = dataset[0]

In [4]:
print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

print()
print(data)
print('======================')

print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: PubMed():
Number of graphs: 1
Number of features: 500
Number of classes: 3

Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717])
Number of nodes: 19717
Number of edges: 88648
Average node degree: 4.50
Number of training nodes: 60
Training node label rate: 0.00
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [5]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(out, color):
    z = TSNE(n_components=2).fit_transform(out.detach().cpu().numpy())
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [6]:
def train(model, useGNN=True):
    model.train()
    optimizer.zero_grad() 
    if useGNN:
        out = model(data.x, data.edge_index)
    else:
        out = model(data.x)
    loss = criterion(out[data.train_mask], data.y[data.train_mask]) 
    loss.backward() 
    optimizer.step() 
    return loss

def test(model, useGNN=True):
    model.eval()
    if useGNN:
        out = model(data.x, data.edge_index)
    else:
        out = model(data.x)
    pred = out.argmax(dim=1) 
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
    return test_acc, out

In [7]:
import torch
from torch.nn import Linear
import torch.nn.functional as F

In [8]:
class MLP(torch.nn.Module):
    def __init__(self, num_features, num_classes, hidden_channels):
        super(MLP, self).__init__()
        self.lin1 = Linear(num_features, hidden_channels)
        self.lin2 = Linear(hidden_channels, num_classes)

    def forward(self, x):
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return x
    
model = MLP(dataset.num_features, dataset.num_classes, 16)
print(model)

MLP(
  (lin1): Linear(in_features=500, out_features=16, bias=True)
  (lin2): Linear(in_features=16, out_features=3, bias=True)
)


In [9]:
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) 

for epoch in range(200):
    loss = train(model, False)
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch + 1} Loss: {loss:.4f}")

test_acc, out = test(model, False)
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 20 Loss: 0.9345
Epoch 40 Loss: 0.5690
Epoch 60 Loss: 0.3334
Epoch 80 Loss: 0.2371
Epoch 100 Loss: 0.1919
Epoch 120 Loss: 0.1665
Epoch 140 Loss: 0.1206
Epoch 160 Loss: 0.1750
Epoch 180 Loss: 0.1485
Epoch 200 Loss: 0.1533
Test Accuracy: 0.7340


In [10]:
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, num_features, num_classes, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(dataset.num_features, dataset.num_classes, hidden_channels=16)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
data.to(device)
print(model)

GCN(
  (conv1): GCNConv(500, 16)
  (conv2): GCNConv(16, 3)
)


In [11]:
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) 

for epoch in range(200):
    loss = train(model)
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch + 1} Loss: {loss:.4f}")

test_acc, out = test(model)
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 20 Loss: 0.8818
Epoch 40 Loss: 0.5760
Epoch 60 Loss: 0.4305
Epoch 80 Loss: 0.3209
Epoch 100 Loss: 0.2116
Epoch 120 Loss: 0.1811
Epoch 140 Loss: 0.2338
Epoch 160 Loss: 0.1510
Epoch 180 Loss: 0.1866
Epoch 200 Loss: 0.1628
Test Accuracy: 0.7870


In [12]:
from torch_geometric.nn import GATConv

class GAT(torch.nn.Module):
    def __init__(self, num_features, num_classes, hidden_channels):
        super(GAT, self).__init__()
        self.conv1 = GATConv(num_features, hidden_channels)
        self.conv2 = GATConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GAT(dataset.num_features, dataset.num_classes, hidden_channels=16)
model.to(device)
print(model)

GAT(
  (conv1): GATConv(500, 16, heads=1)
  (conv2): GATConv(16, 3, heads=1)
)


In [13]:
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) 

for epoch in range(200):
    loss = train(model)
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch + 1} Loss: {loss:.4f}")

test_acc, out = test(model)
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 20 Loss: 0.8703
Epoch 40 Loss: 0.5932
Epoch 60 Loss: 0.3063
Epoch 80 Loss: 0.1923
Epoch 100 Loss: 0.1818
Epoch 120 Loss: 0.1312
Epoch 140 Loss: 0.1033
Epoch 160 Loss: 0.0727
Epoch 180 Loss: 0.0982
Epoch 200 Loss: 0.0768
Test Accuracy: 0.7910
