# GNN for Molecular Property Prediction
This notebook demonstrates a basic Graph Neural Network (GNN) using PyTorch Geometric to predict molecular properties from the QM9 dataset.

In [3]:
!pip install torch torchvision torchaudio torch-geometric rdkit-pypi

Collecting torch
  Using cached torch-2.7.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Using cached torchvision-0.22.1-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached torchaudio-2.7.1-cp312-cp312-win_amd64.whl.metadata (6.6 kB)
Collecting torch-geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)


ERROR: Could not find a version that satisfies the requirement rdkit-pypi (from versions: none)
ERROR: No matching distribution found for rdkit-pypi


In [12]:
import torch
import torch.nn.functional as F
from model import GCN
from torch_geometric.datasets import QM9
from torch_geometric.transforms import AddSelfLoops
from torch_geometric.data import DataLoader


In [13]:
dataset = QM9(root='data/QM9', transform=AddSelfLoops())
dataset = dataset.shuffle()
train_dataset = dataset[:10000]
test_dataset = dataset[10000:11000]
train_loader = DataLoader(train_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


Downloading https://data.pyg.org/datasets/qm9_v3.zip
Extracting data\QM9\raw\qm9_v3.zip
Processing...
Using a pre-processed version of the dataset. Please install 'rdkit' to alternatively process the raw data.
Done!


In [14]:
model = GCN(num_features=11, hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(5):
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y[:, 0:1])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 2.0906
Epoch 2, Loss: 1.9639
Epoch 3, Loss: 1.9181
Epoch 4, Loss: 1.8889
Epoch 5, Loss: 1.8582


In [15]:
torch.save(model.state_dict(), "gnn_model.pt")


In [11]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import QM9
from torch_geometric.transforms import Complete
from torch_geometric.data import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
import matplotlib.pyplot as plt

ImportError: cannot import name 'Complete' from 'torch_geometric.transforms' (c:\Users\jvpor\anaconda3\Lib\site-packages\torch_geometric\transforms\__init__.py)

In [9]:
import torch
import torch.nn.functional as F
from model import GCN
from torch_geometric.nn import GCNConv, global_mean_pool


In [5]:
!pip install torch torchvision torchaudio torch-geometric


Collecting torch
  Using cached torch-2.7.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Using cached torchvision-0.22.1-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached torchaudio-2.7.1-cp312-cp312-win_amd64.whl.metadata (6.6 kB)
Collecting torch-geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Using cached torch-2.7.1-cp312-cp312-win_amd64.whl (216.1 MB)
Using cached torchvision-0.22.1-cp312-cp312-win_amd64.whl (1.7 MB)
Using cached torchaudio-2.7.1-cp312-cp312-win_amd64.whl (2.5 MB)
Using cached torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
Installing collected packages: sympy, torch, torchvision, torchaudio, torch-geometric
  Attempting uninstall: sympy
    Found existing installation: sympy 1.12
    Uninstalling sympy-1.12:
      Successfully u

In [6]:
import torch


In [7]:
# Load QM9 Dataset
dataset = QM9(root='data/QM9', transform=Complete())
dataset = dataset.shuffle()
train_dataset = dataset[:10000]
test_dataset = dataset[10000:11000]
train_loader = DataLoader(train_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

print(f'Train size: {len(train_dataset)}, Test size: {len(test_dataset)}')

NameError: name 'QM9' is not defined

In [None]:
# Define the GCN Model
class GCN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)
        return self.lin(x)

In [10]:
# Train the Model
model = GCN(num_features=11, hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(5):
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y[:, 0:1])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

NameError: name 'train_loader' is not defined

In [8]:
torch.save(model.state_dict(), "gnn_model.pt")


NameError: name 'model' is not defined

In [None]:
# Evaluate on Test Data
model.eval()
total_loss = 0
with torch.no_grad():
    for data in test_loader:
        out = model(data)
        loss = criterion(out, data.y[:, 0:1])
        total_loss += loss.item()
print(f"Test Loss: {total_loss / len(test_loader):.4f}")