# Lecture : Graph Convolutional Networks

## Lab 05 : GatedGCNs for chemical regression -- Solution

### Xavier Bresson, Nian Liu

Bresson, Laurent, Residual Gated Graph ConvNets, 2017  
https://arxiv.org/pdf/1711.07553

Bresson, Laurent, A two-step graph convolutional decoder for molecule generation, 2019  
https://arxiv.org/pdf/1906.03412


In [1]:
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
    # mount google drive
    from google.colab import drive
    drive.mount('/content/gdrive')
    path_to_file = '/content/gdrive/My Drive/CS5284_2024_codes/codes/08_Graph_Convnets'
    print(path_to_file)
    # change current path to the folder containing "path_to_file"
    os.chdir(path_to_file)
    !pwd
    !pip install dgl==1.0.0 # Install DGL
    

In [2]:
# Libraries
import pickle
from lib.utils import Dictionary, MoleculeDataset, MoleculeDGL, Molecule
from torch.utils.data import DataLoader
import dgl
import torch
import torch.nn as nn
import time


# Load molecular datasets

In [3]:
# Select dataset

print('Loading data')
data_folder_pytorch = 'datasets/QM9_pytorch/'
with open(data_folder_pytorch+"train_pytorch.pkl","rb") as f:
    dataset=pickle.load(f)

# Load the number of atom and bond types 
with open(data_folder_pytorch + "atom_dict.pkl" ,"rb") as f: num_atom_type = len(pickle.load(f))
with open(data_folder_pytorch + "bond_dict.pkl" ,"rb") as f: num_bond_type = len(pickle.load(f))
print(num_atom_type)
print(num_bond_type)

# Load the DGL datasets
data_folder_dgl = 'datasets/QM9_dgl/'
dataset_name = 'QM9'
datasets_dgl = MoleculeDataset(dataset_name, data_folder_dgl)
trainset, valset, testset = datasets_dgl.train, datasets_dgl.val, datasets_dgl.test
print(len(trainset))
print(len(valset))
print(len(testset))
idx = 0
print(trainset[:2])
print(valset[idx])
print(testset[idx])


Loading data
13
4
Loading datasets QM9_dgl...
train, test, val sizes : 2000 200 200
Time: 0.7321s
2000
200
200
([Graph(num_nodes=9, num_edges=20,
      ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), Graph(num_nodes=9, num_edges=18,
      ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)})], [tensor([-0.2623]), tensor([1.0908])])
(Graph(num_nodes=9, num_edges=18,
      ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), tensor([0.5063]))
(Graph(num_nodes=9, num_edges=20,
      ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), tensor([-4.4348]))


# Generate a batch of graphs and test it

In [4]:
# collate function prepares a batch of graphs, labels and other graph features
def collate(samples):
    
    # Input sample is a list of pairs (graph, label)
    graphs, labels = map(list, zip(*samples))
    batch_graphs = dgl.batch(graphs)    # batch of graphs
    batch_labels = torch.stack(labels)  # batch of labels (here chemical target)
    
    # Normalization w.r.t. graph sizes
    tab_sizes_n = [ graphs[i].number_of_nodes() for i in range(len(graphs))]
    tab_norm_n = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_n ]
    batch_norm_n = torch.cat(tab_norm_n).sqrt()  
    tab_sizes_e = [ graphs[i].number_of_edges() for i in range(len(graphs))]
    tab_norm_e = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_e ]
    batch_norm_e = torch.cat(tab_norm_e).sqrt()
    
    return batch_graphs, batch_labels, batch_norm_n, batch_norm_e


# Generate a batch of graphs
batch_size = 10
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)
batch_graphs, batch_labels, batch_norm_n, batch_norm_e = list(train_loader)[0]
print(batch_graphs)
print(batch_labels)
print('batch_norm_n:',batch_norm_n.size())
print('batch_norm_e:',batch_norm_e.size())
batch_x = batch_graphs.ndata['feat']
print('batch_x:',batch_x.size())
batch_e = batch_graphs.edata['feat']
print('batch_e:',batch_e.size())


Graph(num_nodes=88, num_edges=186,
      ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)})
tensor([[ 0.8187],
        [ 0.8730],
        [ 4.0595],
        [-0.8083],
        [-0.5649],
        [ 0.1069],
        [ 3.0962],
        [-0.5172],
        [ 1.4457],
        [-3.9308]])
batch_norm_n: torch.Size([88, 1])
batch_norm_e: torch.Size([186, 1])
batch_x: torch.Size([88])
batch_e: torch.Size([186])


## Question 1: Implement a two-layer MLP for regression on a top of a GatedGCN network

Node and edge update equations for GatedGCN layers:  
\begin{eqnarray}
h_i^{\ell+1} &=& h_i^{\ell} + \text{ReLU} \Big( \text{BN} \Big( A^\ell h_i^{\ell} +  \sum_{j\sim i} \eta(e_{ij}^{\ell}) \odot B^\ell h_j^{\ell} \Big) \Big), \quad \eta(e_{ij}^{\ell}) = \frac{\sigma(e_{ij}^{\ell})}{\sum_{j'\sim i} \sigma(e_{ij'}^{\ell}) + \varepsilon} \\
e_{ij}^{\ell+1} &=& e^\ell_{ij} + \text{ReLU} \Big( \text{BN}  \Big( C^\ell e_{ij}^{\ell} + D^\ell h^{\ell}_i + E^\ell h^{\ell}_j  \Big) \Big)
\end{eqnarray}

MLP block for scalar regression of a molecular property:
\begin{eqnarray}
y &=& \textrm{MLP}(\hat{h})\in\mathbb{R} \\
\hat{h} &=& \frac{1}{n} \sum_{i=1}^n h_i^{\ell=L}\in\mathbb{R}^d\\
\textrm{MLP}(h) &=& \textrm{LL}_2\big( \textrm{ReLU} \big( \textrm{LL}_1 \big(h\big) \big) \big), h\in\mathbb{R}^d
\end{eqnarray}
       

In [5]:
# Define a two-layer MLP for regression 
class MLP_layer(nn.Module): 
    
    def __init__(self, input_dim, hidden_dim): 
        super(MLP_layer, self).__init__()
        ########################################
        # YOUR CODE STARTS
        # Implement a two-layer predictor
        ########################################
        self.linear1 = nn.Linear( input_dim, hidden_dim, bias=True )
        self.linear2 = nn.Linear( hidden_dim, 1, bias=True )   
        ########################################
        # YOUR CODE ENDS
        ########################################
        
    def forward(self, x):
        ########################################
        # YOUR CODE STARTS
        # Pass `x` through the two-layer MLP and get prediction y
        ########################################
        y = self.linear2(torch.relu(self.linear1(x)))
        ########################################
        # YOUR CODE ENDS
        ########################################
        return y


# class of GatedGCN layer  
class GatedGCN_layer(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(GatedGCN_layer, self).__init__()
        self.A = nn.Linear(input_dim, output_dim, bias=True)
        self.B = nn.Linear(input_dim, output_dim, bias=True)
        self.C = nn.Linear(input_dim, output_dim, bias=True)
        self.D = nn.Linear(input_dim, output_dim, bias=True)
        self.E = nn.Linear(input_dim, output_dim, bias=True)
        self.bn_node_h = nn.BatchNorm1d(output_dim)
        self.bn_node_e = nn.BatchNorm1d(output_dim)

    # Step 1 of message-passing with DGL: 
    #   Node feature and edge features are passed along edges (src/j => dst/i) 
    def message_func(self, edges):
        Bhj = edges.src['Bh'] # Bhj with j/src
        eij = edges.data['Ce'] +  edges.dst['Dh'] + edges.src['Eh'] # Ceij + Dhi + Ehj with dst/i, src/j
        edges.data['e'] = eij # update edge feature value
        return {'Bhj' : Bhj, 'eij' : eij} # send message={Bhj, eij} to node dst/i

    # Step 2 of message-passing with DGL: 
    #   Reduce function collects all messages={Bhj, eij} sent to node dst/i with Step 1
    def reduce_func(self, nodes):
        Ahi = nodes.data['Ah']
        Bhj = nodes.mailbox['Bhj']
        e = nodes.mailbox['eij'] 
        sigmaij = torch.sigmoid(e) # sigma_ij = sigmoid(e_ij)
        h = Ahi + torch.sum( sigmaij * Bhj, dim=1 ) / torch.sum( sigmaij, dim=1 ) # hi = Ahi + sum_j eta_ij * Bhj    
        return {'h' : h} # return update node feature hi
    
    def forward(self, g, h, e, snorm_n, snorm_e):
        
        h_in = h # residual connection
        e_in = e # residual connection
        
        g.ndata['h']  = h 
        g.ndata['Ah'] = self.A(h) # linear transformation 
        g.ndata['Bh'] = self.B(h) # linear transformation 
        g.ndata['Dh'] = self.D(h) # linear transformation 
        g.ndata['Eh'] = self.E(h) # linear transformation 
        g.edata['e']  = e 
        g.edata['Ce'] = self.C(e) # linear transformation 
        
        g.update_all(self.message_func,self.reduce_func) # update the node and edge features with DGL
        
        h = g.ndata['h'] # collect the node output of graph convolution
        e = g.edata['e'] # collect the edge output of graph convolution
        
        h = h* snorm_n # normalize activation w.r.t. graph node size
        e = e* snorm_e # normalize activation w.r.t. graph edge size
        
        h = self.bn_node_h(h) # batch normalization  
        e = self.bn_node_e(e) # batch normalization  
        
        h = torch.relu(h) # non-linear activation
        e = torch.relu(e) # non-linear activation
        
        h = h_in + h # residual connection
        e = e_in + e # residual connection
        
        return h, e


## Question 2: Implement the Mean Absolute Error (MAE) loss

Hint: You may use [torch.nn.L1Loss()](https://pytorch.org/docs/stable/generated/torch.nn.L1Loss.html#torch.nn.L1Loss).
                                       

In [6]:
class GatedGCN_net(nn.Module):
    
    def __init__(self, net_parameters):
        super(GatedGCN_net, self).__init__()
        input_dim = net_parameters['input_dim']
        hidden_dim = net_parameters['hidden_dim']
        L = net_parameters['L']
        self.embedding_h = nn.Embedding(num_atom_type, hidden_dim)
        self.embedding_e = nn.Embedding(num_bond_type, hidden_dim)
        self.GatedGCN_layers = nn.ModuleList([ GatedGCN_layer(hidden_dim, hidden_dim) for _ in range(L) ]) 
        self.MLP_layer = MLP_layer(hidden_dim, hidden_dim)
        
    def forward(self, g, h, e, snorm_n, snorm_e):
        
        # input embedding
        h = self.embedding_h(h)
        e = self.embedding_e(e)
        
        # graph convnet layers
        for GGCN_layer in self.GatedGCN_layers:
            h,e = GGCN_layer(g,h,e,snorm_n,snorm_e)
        
        # MLP classifier
        g.ndata['h'] = h
        y = dgl.mean_nodes(g,'h') # DGL mean function over the neighbors
        y = self.MLP_layer(y)
        
        return y    
    
    def loss(self, y_scores, y_labels):
        ########################################
        # YOUR CODE START
        # Define the Mean Absolute Error (MAE) as regression loss 
        ########################################
        loss = nn.L1Loss()(y_scores, y_labels)
        ########################################
        # YOUR CODE END
        ########################################
        return loss        
    
    def update(self, lr):       
        update = torch.optim.Adam( self.parameters(), lr=lr )
        return update


# Instantiate one network (testing)
net_parameters = {}
net_parameters['input_dim'] = 1
net_parameters['hidden_dim'] = 128
net_parameters['L'] = 4
net = GatedGCN_net(net_parameters)
print(net)


GatedGCN_net(
  (embedding_h): Embedding(13, 128)
  (embedding_e): Embedding(4, 128)
  (GatedGCN_layers): ModuleList(
    (0-3): 4 x GatedGCN_layer(
      (A): Linear(in_features=128, out_features=128, bias=True)
      (B): Linear(in_features=128, out_features=128, bias=True)
      (C): Linear(in_features=128, out_features=128, bias=True)
      (D): Linear(in_features=128, out_features=128, bias=True)
      (E): Linear(in_features=128, out_features=128, bias=True)
      (bn_node_h): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn_node_e): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (MLP_layer): MLP_layer(
    (linear1): Linear(in_features=128, out_features=128, bias=True)
    (linear2): Linear(in_features=128, out_features=1, bias=True)
  )
)


# Train the network

In [7]:
def run_one_epoch(net, data_loader, train=True):
    if train:
        net.train() # during training
    else:
        net.eval()  # during inference/test
    epoch_loss = 0
    nb_data = 0
    gpu_mem = 0
    for iter, (batch_graphs, batch_labels, batch_snorm_n, batch_snorm_e) in enumerate(data_loader):
        batch_x = batch_graphs.ndata['feat']
        batch_e = batch_graphs.edata['feat']
        batch_snorm_n = batch_snorm_n
        batch_snorm_e = batch_snorm_e
        batch_labels = batch_labels
        batch_scores = net.forward(batch_graphs, batch_x, batch_e, batch_snorm_n, batch_snorm_e)
        loss = net.loss(batch_scores, batch_labels)
        if train: # during training, run backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        epoch_loss += loss.detach().item()
        nb_data += batch_labels.size(0)
    epoch_loss /= (iter + 1)
    return epoch_loss


# dataset loaders
batch_size = 100
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=datasets_dgl.collate)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=True, collate_fn=datasets_dgl.collate)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=True, collate_fn=datasets_dgl.collate)

# Instantiate one network
net_parameters = {}
net_parameters['input_dim'] = 1
net_parameters['hidden_dim'] = 100
net_parameters['L'] = 4
net = GatedGCN_net(net_parameters)

# optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

# training loop
for epoch in range(50):
    start = time.time()
    epoch_train_loss = run_one_epoch(net, train_loader, True)
    with torch.no_grad(): 
        epoch_test_loss = run_one_epoch(net, test_loader, False)
        epoch_val_loss = run_one_epoch(net, val_loader, False)  
    print('Epoch {}, time {:.4f}, train_loss: {:.4f}, test_loss: {:.4f}, val_loss: {:.4f}'.format(epoch, time.time()-start, epoch_train_loss, epoch_test_loss, epoch_val_loss))
   

  assert input.numel() == input.storage().size(), (


Epoch 0, time 6.5603, train_loss: 1.2752, test_loss: 1.2946, val_loss: 1.1812
Epoch 1, time 2.5837, train_loss: 1.1820, test_loss: 1.1751, val_loss: 1.0746
Epoch 2, time 2.0501, train_loss: 1.1072, test_loss: 1.0627, val_loss: 0.9810
Epoch 3, time 2.0528, train_loss: 1.0613, test_loss: 0.9989, val_loss: 0.9349
Epoch 4, time 3.0131, train_loss: 1.0205, test_loss: 0.9602, val_loss: 0.8919
Epoch 5, time 4.2675, train_loss: 0.9793, test_loss: 0.9157, val_loss: 0.8583
Epoch 6, time 3.5839, train_loss: 0.9437, test_loss: 0.9194, val_loss: 0.8427
Epoch 7, time 4.6368, train_loss: 0.9036, test_loss: 0.8688, val_loss: 0.7862
Epoch 8, time 5.0549, train_loss: 0.8704, test_loss: 0.8137, val_loss: 0.7314
Epoch 9, time 2.4849, train_loss: 0.8398, test_loss: 0.7933, val_loss: 0.7093
Epoch 10, time 2.1983, train_loss: 0.8174, test_loss: 0.8576, val_loss: 0.8258
Epoch 11, time 2.2391, train_loss: 0.8060, test_loss: 0.7310, val_loss: 0.6537
Epoch 12, time 2.5428, train_loss: 0.7860, test_loss: 0.7464, 