In [None]:
import torch

In [None]:
%pip install torch_geometric



In [None]:
import torch_geometric

In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

#1. Dataset

In [None]:
cora_dataset = Planetoid(root='./cora', name='cora', transform=NormalizeFeatures())
cora_dataset

cora()

In [None]:
print(f'No. of graphs: {len(cora_dataset)}')
print(f'No. of node features: {cora_dataset.num_node_features}')
print(f"No. of classes: {cora_dataset.num_classes}")

No. of graphs: 1
No. of node features: 1433
No. of classes: 7


In [None]:
cora_data = cora_dataset[0]
cora_data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
print(f'No. of training nodes: {cora_data.train_mask.sum().item()}')
print(f'No. of validation nodes: {cora_data.val_mask.sum().item()}')
print(f'No. of test nodes: {cora_data.test_mask.sum().item()}')

No. of training nodes: 140
No. of validation nodes: 500
No. of test nodes: 1000


In [None]:
print(f"No. of nodes: {cora_data.num_nodes}")
print(f"No. of edges: {cora_data.num_edges}")
print(f"Average node degree: {(cora_data.num_edges/cora_data.num_nodes):.2f}")
print(f"Has isolated nodes? {cora_data.has_isolated_nodes()}")
print(f"Has self-loops? {cora_data.has_self_loops()}")
print(f"Is undirected? {cora_data.is_undirected()}")

No. of nodes: 2708
No. of edges: 10556
Average node degree: 3.90
Has isolated nodes? False
Has self-loops? False
Is undirected? True


#2. Building the GCN model

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
  def __init__(self, in_channels, out_channels, hidden_channels=16):
    torch.manual_seed(123)

    super().__init__()

    self.conv1 = GCNConv(in_channels=in_channels, out_channels=hidden_channels)
    self.conv2 = GCNConv(in_channels=hidden_channels, out_channels=out_channels)

  def forward(self, data):
    x, edge_index = data.x, data.edge_index

    x = self.conv1(x, edge_index)
    x = F.relu(x)
    x = F.dropout(x, p=0.5, training=self.training)

    x= self.conv2(x, edge_index)

    return x

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [None]:
cora_data.to(device)
model = GCN(in_channels = cora_data.num_features,
            out_channels = cora_dataset.num_classes).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

In [None]:
print(f"No. of parameters: {sum(p.numel() for p in model.parameters())}")

No. of parameters: 23063


In [None]:
epochs = 200

cora_data = cora_dataset[0].to(device)

for epoch in range(epochs):
  model.train()
  optimizer.zero_grad()
  out = model(cora_data)
  loss = criterion(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
  loss.backward()
  optimizer.step()

  pred_train = out.argmax(dim=1)
  correct_train = (pred_train[cora_data.train_mask] == cora_data.y[cora_data.train_mask]).sum()
  train_acc = int(correct_train) / int(cora_data.train_mask.sum())

  if (epoch+1)%10 == 0:
    print(f"Epoch: {epoch+1}, train loss: {loss:.3f}, Train Acc: {train_acc:.3f}")

Epoch: 10, train loss: 1.861, Train Acc: 0.743
Epoch: 20, train loss: 1.714, Train Acc: 0.764
Epoch: 30, train loss: 1.523, Train Acc: 0.836
Epoch: 40, train loss: 1.335, Train Acc: 0.850
Epoch: 50, train loss: 1.125, Train Acc: 0.900
Epoch: 60, train loss: 0.873, Train Acc: 0.957
Epoch: 70, train loss: 0.796, Train Acc: 0.929
Epoch: 80, train loss: 0.680, Train Acc: 0.936
Epoch: 90, train loss: 0.621, Train Acc: 0.943
Epoch: 100, train loss: 0.553, Train Acc: 0.936
Epoch: 110, train loss: 0.502, Train Acc: 0.950
Epoch: 120, train loss: 0.494, Train Acc: 0.950
Epoch: 130, train loss: 0.455, Train Acc: 0.936
Epoch: 140, train loss: 0.404, Train Acc: 0.957
Epoch: 150, train loss: 0.411, Train Acc: 0.979
Epoch: 160, train loss: 0.383, Train Acc: 0.986
Epoch: 170, train loss: 0.342, Train Acc: 0.986
Epoch: 180, train loss: 0.371, Train Acc: 0.971
Epoch: 190, train loss: 0.332, Train Acc: 0.979
Epoch: 200, train loss: 0.325, Train Acc: 0.979


In [None]:
model.eval()

with torch.inference_mode():
  val_preds = model(cora_data).argmax(dim=1)
  val_correct = (val_preds[cora_data.val_mask] == cora_data.y[cora_data.val_mask]).sum()
  val_acc = int(val_correct) / int(cora_data.val_mask.sum())
  print(f"Val Accuracy: {val_acc:.3f}")

Val Accuracy: 0.794


In [None]:
model.eval()

with torch.inference_mode():
  test_preds = model(cora_data).argmax(dim=1)
  test_correct = (test_preds[cora_data.test_mask] == cora_data.y[cora_data.test_mask]).sum()
  test_acc = int(test_correct) / int(cora_data.test_mask.sum())
  print(f"Test Accuracy: {test_acc:.3f}")

Test Accuracy: 0.801


In [None]:
cora_data.to(device)
model = GCN(in_channels = cora_data.num_features,
            out_channels = cora_dataset.num_classes).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

In [None]:
%%time
import copy

epochs = 200

cora_data = cora_dataset[0].to(device)
best_val_acc, best_model_state = 0.0, None

for epoch in range(epochs):
  model.train()
  optimizer.zero_grad()
  out = model(cora_data)
  loss = criterion(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
  loss.backward()
  optimizer.step()

  pred_train = out.argmax(dim=1)
  correct_train = (pred_train[cora_data.train_mask] == cora_data.y[cora_data.train_mask]).sum()
  train_acc = int(correct_train) / int(cora_data.train_mask.sum())

  model.eval()

  with torch.inference_mode():
    val_preds = model(cora_data).argmax(dim=1)
    val_correct = (val_preds[cora_data.val_mask] == cora_data.y[cora_data.val_mask]).sum()
    val_acc = int(val_correct) / int(cora_data.val_mask.sum())

    if val_acc > best_val_acc:
      best_val_acc = val_acc
      best_model_state = copy.deepcopy(model.state_dict())

    if (epoch+1)%10 == 0:
      print(f"Epoch: {epoch+1}, train loss: {loss:.3f}, Train Acc: {train_acc:.3f}, Val Acc: {best_val_acc:.3f}")

Epoch: 10, train loss: 1.861, Train Acc: 0.743, Val Acc: 0.610
Epoch: 20, train loss: 1.714, Train Acc: 0.764, Val Acc: 0.680
Epoch: 30, train loss: 1.523, Train Acc: 0.836, Val Acc: 0.706
Epoch: 40, train loss: 1.335, Train Acc: 0.850, Val Acc: 0.744
Epoch: 50, train loss: 1.125, Train Acc: 0.900, Val Acc: 0.774
Epoch: 60, train loss: 0.873, Train Acc: 0.957, Val Acc: 0.788
Epoch: 70, train loss: 0.796, Train Acc: 0.929, Val Acc: 0.802
Epoch: 80, train loss: 0.680, Train Acc: 0.936, Val Acc: 0.802
Epoch: 90, train loss: 0.621, Train Acc: 0.943, Val Acc: 0.802
Epoch: 100, train loss: 0.553, Train Acc: 0.936, Val Acc: 0.802
Epoch: 110, train loss: 0.502, Train Acc: 0.950, Val Acc: 0.802
Epoch: 120, train loss: 0.494, Train Acc: 0.950, Val Acc: 0.802
Epoch: 130, train loss: 0.455, Train Acc: 0.936, Val Acc: 0.802
Epoch: 140, train loss: 0.404, Train Acc: 0.957, Val Acc: 0.802
Epoch: 150, train loss: 0.411, Train Acc: 0.979, Val Acc: 0.802
Epoch: 160, train loss: 0.383, Train Acc: 0.986, 

In [None]:
print("Best validation accuracy:", best_val_acc)
model.load_state_dict(best_model_state)

Best validation accuracy: 0.802


<All keys matched successfully>

In [None]:
model.eval()

with torch.inference_mode():
  test_preds = model(cora_data).argmax(dim=1)
  test_correct = (test_preds[cora_data.test_mask] == cora_data.y[cora_data.test_mask]).sum()
  test_acc = int(test_correct) / int(cora_data.test_mask.sum())
  print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.8070
