# Lecture : Graph Convolutional Networks

## Lab 01 : ChebNets 

### Xavier Bresson

Defferrard, Bresson, Vandergheynst, Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering, 2016  
https://arxiv.org/pdf/1606.09375


In [1]:
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
    # mount google drive
    from google.colab import drive
    drive.mount('/content/gdrive')
    path_to_file = '/content/gdrive/My Drive/GML_May23_codes/codes/08_Graph_Convnets'
    print(path_to_file)
    # change current path to the folder containing "path_to_file"
    os.chdir(path_to_file)
    !pwd
    

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import collections
import time
import numpy as np
import sys
sys.path.insert(0, 'lib/')
%load_ext autoreload
%autoreload 2


## MNIST


In [3]:
# Load small MNIST
[train_data, train_label, test_data, test_label] = torch.load('datasets/MNIST_1k.pt')
print('train_data',train_data.size())
print('train_label',train_label.size())
print('test_data',test_data.size())
print('test_label',test_label.size())


train_data torch.Size([1000, 784])
train_label torch.Size([1000])
test_data torch.Size([100, 784])
test_label torch.Size([100])


## Compute coarsened graphs


In [4]:
from lib.grid_graph import grid_graph
from lib.coarsening import coarsen
from lib.coarsening import lmax_L
from lib.coarsening import perm_data
from lib.coarsening import rescale_L

# Construct grid graph
t_start = time.time()
grid_side = 28
number_edges = 8
A = grid_graph(grid_side, number_edges, 'euclidean') # create graph of Euclidean grid

# Compute coarsened graphs
coarsening_levels = 4
L, perm = coarsen(A, coarsening_levels)

# Compute largest eigenvalue of graph Laplacians
lmax = []
for i in range(coarsening_levels):
    lmax.append(lmax_L(L[i]))
print('lmax: ' + str([lmax[i] for i in range(coarsening_levels)]))

# Reindex nodes to satisfy a binary tree structure
train_data = perm_data(train_data, perm)
test_data = perm_data(test_data, perm)
train_data = torch.tensor(train_data).float()
test_data = torch.tensor(test_data).float()
print(train_data.size())
print(test_data.size())

print('Execution time: {:.2f}s'.format(time.time() - t_start))
del perm


nb edges:  6396
Heavy Edge Matching coarsening with Xavier version
Layer 0: M_0 = |V| = 944 nodes (160 added), |E| = 3198 edges
Layer 1: M_1 = |V| = 472 nodes (67 added), |E| = 1619 edges
Layer 2: M_2 = |V| = 236 nodes (23 added), |E| = 784 edges
Layer 3: M_3 = |V| = 118 nodes (5 added), |E| = 387 edges
Layer 4: M_4 = |V| = 59 nodes (0 added), |E| = 190 edges
lmax: [1.3857534, 1.3440951, 1.2102374, 1.0000005]
torch.Size([1000, 944])
torch.Size([100, 944])
Execution time: 0.38s


# ChebNet with LeNet5 hyper-parameters

- First layer : CL with 32 features
- Second layer : MaxPooling to reduce graph size by a factor 2
- Third layer : CL with 64 features
- Fourth layer : MaxPooling to reduce graph size by a factor 2
- Fifth layer : Fully connected (or linear) layer with 512 features 
- Last layer : Fully connected (or linear) layer with 10 output values for 10 classes 


In [5]:
# class definition
class ChebNet_LeNet5(nn.Module):
    def __init__(self, net_parameters, dropout_value, Ls, lmax):
        super().__init__()
        # parameters
        D, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F = net_parameters
        FC1Fin = CL2_F*(D//16)
        # graph CL1
        self.cl1 = nn.Linear(CL1_K, CL1_F) 
        self.CL1_K = CL1_K; self.CL1_F = CL1_F
        # graph CL2
        self.cl2 = nn.Linear(CL2_K*CL1_F, CL2_F) 
        self.CL2_K = CL2_K; self.CL2_F = CL2_F
        # FC1
        self.fc1 = nn.Linear(FC1Fin, FC1_F) 
        self.FC1Fin = FC1Fin
        self.dropout = nn.Dropout(dropout_value)
        # FC2
        self.fc2 = nn.Linear(FC1_F, FC2_F)
        # Compute pytorch Laplacians
        self.L = []
        for i in range(coarsening_levels+1):
            L = Ls[i] 
            # rescale Laplacian
            lmax = lmax_L(L)
            L = rescale_L(L, lmax) 
            # convert scipy sparse matric L to pytorch
            L = L.tocoo()
            indices = np.column_stack((L.row, L.col)).T 
            indices = indices.astype(np.int64)
            indices = torch.from_numpy(indices)
            indices = indices.type(torch.LongTensor)
            L_data = L.data.astype(np.float32)
            L_data = torch.from_numpy(L_data) 
            L_data = L_data.type(torch.FloatTensor)
            L = torch.sparse.FloatTensor(indices, L_data, torch.Size(L.shape))
            L = Variable( L , requires_grad=False)
            self.L.append(L)
        
    # Max pooling of size p (p must be a power of 2)
    def graph_max_pool(self, x, p): 
        if p > 1: 
            x = x.permute(0,2,1).contiguous()  # x = B x F x V
            x = nn.MaxPool1d(p)(x)             # B x F x V/p          
            x = x.permute(0,2,1).contiguous()  # x = B x V/p x F
            return x  
        else:
            return x   
            
    # Graph convolution layer
    def graph_conv_cheby(self, x, cl, L, Fout, K):
        # parameters
        # B = batch size
        # V = num vertices
        # Fin = num input features
        # Fout = num output features
        # K = Chebyshev order and support size
        B, V, Fin = x.size(); B, V, Fin = int(B), int(V), int(Fin) 
        # transform to Chebyshev basis
        x0 = x.permute(1,2,0).contiguous()  # V x Fin x B
        x0 = x0.view([V, Fin*B])            # V x Fin*B
        x = x0.unsqueeze(0)                 # 1 x V x Fin*B
        def concat(x, x_):
            x_ = x_.unsqueeze(0)            # 1 x V x Fin*B
            return torch.cat((x, x_), 0)    # K x V x Fin*B  
        if K > 1: 
            x1 = torch.sparse.mm(L,x0)             # V x Fin*B
            x = torch.cat((x, x1.unsqueeze(0)),0)  # 2 x V x Fin*B
        for k in range(2, K):
            x2 = 2 * torch.sparse.mm(L,x1) - x0  
            x = torch.cat((x, x2.unsqueeze(0)),0)  # M x Fin*B
            x0, x1 = x1, x2  
        x = x.view([K, V, Fin, B])           # K x V x Fin x B     
        x = x.permute(3,1,2,0).contiguous()  # B x V x Fin x K       
        x = x.view([B*V, Fin*K])             # B*V x Fin*K
        # Linear transformation of Fin features to get Fout features
        x = cl(x)                            # B*V x Fout  
        x = x.view([B, V, Fout])             # B x V x Fout
        return x
             
    def forward(self, x):
        # graph CL1
        x = x.unsqueeze(2) # B x V x Fin=1  
        x = self.graph_conv_cheby(x, self.cl1, self.L[0], self.CL1_F, self.CL1_K)
        x = torch.relu(x)
        x = self.graph_max_pool(x, 4)
        # graph CL2
        x = self.graph_conv_cheby(x, self.cl2, self.L[2], self.CL2_F, self.CL2_K)
        x = torch.relu(x)
        x = self.graph_max_pool(x, 4)
        # FC1
        x = x.view(-1, self.FC1Fin)
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.dropout(x)
        # FC2
        x = self.fc2(x) 
        return x
        
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
def loss_reg(lossCE, net, y, y_target, l2_regularization):
    loss = lossCE(y, y_target)
    l2_loss = 0.0
    for param in net.parameters():
        data = param* param
        l2_loss += data.sum()   
    loss += 0.5* l2_regularization* l2_loss     
    return loss


# network parameters
D = train_data.shape[1]
CL1_F = 32
CL1_K = 25
CL2_F = 64
CL2_K = 25
FC1_F = 512
FC2_F = 10
net_parameters = [D, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F]

# instantiate ChebNet
dropout_value = 0.5
net = ChebNet_LeNet5(net_parameters, dropout_value, L, lmax)
print(net)

def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('Number of parameters: {} ({:.2f} million)'.format(nb_param, nb_param/1e6))
    return nb_param/1e6
display_num_param(net)

# extract one batch
batch_size = 10
indices = torch.randperm(train_data.shape[0])
batch_idx = indices[:batch_size]
print(batch_idx)
train_x, train_y = train_data[batch_idx,:], train_label[batch_idx]

# Forward 
y = net(train_x)

# backward
lossCE = nn.CrossEntropyLoss()
l2_regularization = 5e-4 
loss = loss_reg(lossCE, net, y, train_y, l2_regularization)
loss.backward()

# Update 
learning_rate = 0.05
optimizer = torch.optim.SGD( net.parameters(), lr=learning_rate, momentum=0.9 )
optimizer.zero_grad()
optimizer.step()


  L = torch.sparse.FloatTensor(indices, L_data, torch.Size(L.shape))


ChebNet_LeNet5(
  (cl1): Linear(in_features=25, out_features=32, bias=True)
  (cl2): Linear(in_features=800, out_features=64, bias=True)
  (fc1): Linear(in_features=3776, out_features=512, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
)
Number of parameters: 1991050 (1.99 million)
tensor([612, 477, 568, 141, 311, 415, 668, 201, 389, 904])


## Training ChebNet

In [6]:
# network parameters
D = train_data.shape[1]
CL1_F = 32
CL1_K = 25
CL2_F = 64
CL2_K = 25
FC1_F = 512
FC2_F = 10
net_parameters = [D, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F]

# instantiate ChebNet
dropout_value = 0.1
net = ChebNet_LeNet5(net_parameters, dropout_value, L, lmax)
print(net)
display_num_param(net)

# optimization parameters
lr = 0.05 # learning_rate
init_lr = lr
l2_regularization = 5e-4 
batch_size = 100
num_epochs = 20
num_train_data = train_data.shape[0]
nb_iter = int(num_epochs * num_train_data) // batch_size
print('num_epochs=',num_epochs,', num_train_data=',num_train_data,', nb_iter=',nb_iter)

# Optimizer
lossCE = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD( net.parameters(), lr=lr, momentum=0.9 )

def evaluation(y_predicted, y_label):
    _, class_predicted = torch.max(y_predicted, 1)
    return 100.0* (class_predicted == y_label).sum()/ y_predicted.size(0)


# loop over epochs
num_data = 0
for epoch in range(num_epochs):  

    # reshuffle 
    indices = torch.randperm(num_train_data)
    
    # reset time
    t_start = time.time()
    
    # extract batches
    running_loss = 0.0
    running_accuray = 0
    running_total = 0
    net.train()
    for idx in range(0,num_train_data,batch_size):

        # extract batches
        train_x, train_y = train_data[idx:idx+batch_size,:], train_label[idx:idx+batch_size]
            
        # Forward 
        y = net(train_x)
        
        # backward
        optimizer.zero_grad()
        loss = loss_reg(lossCE, net, y, train_y, l2_regularization)
        loss.backward()
        optimizer.step()
        
        # Accuracy
        acc_train = evaluation(y.detach(), train_y)
        
        # loss, accuracy
        num_data += batch_size 
        running_loss += loss.detach()
        running_accuray += acc_train
        running_total += 1
      
    # print 
    print('epoch= %d, loss(train)= %.3f, accuracy(train)= %.3f, time= %.3f, lr= %.5f' % 
          (epoch+1, running_loss/running_total, running_accuray/running_total, time.time()-t_start, lr))
 
    # update learning rate 
    lr = init_lr * pow( 0.95 , float(num_data// num_train_data) )
    optimizer = net.update_learning_rate(optimizer, lr)
    
    # Test set
    with torch.no_grad():
        net.eval()
        running_accuray_test = 0
        running_total_test = 0
        num_test_data = test_data.size(0)
        indices_test = torch.arange(num_test_data)
        t_start_test = time.time()
        for idx in range(0,num_test_data,batch_size):
            test_x, test_y = test_data[idx:idx+batch_size,:], test_label[idx:idx+batch_size]
            y = net(test_x)
            acc_test = evaluation(y.detach(), test_y)
            running_accuray_test += acc_test
            running_total_test += 1
        t_stop_test = time.time() - t_start_test
        print('  accuracy(test) = %.3f %%, time= %.3f' % (running_accuray_test / running_total_test, t_stop_test))  



ChebNet_LeNet5(
  (cl1): Linear(in_features=25, out_features=32, bias=True)
  (cl2): Linear(in_features=800, out_features=64, bias=True)
  (fc1): Linear(in_features=3776, out_features=512, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
)
Number of parameters: 1991050 (1.99 million)
num_epochs= 20 , num_train_data= 1000 , nb_iter= 200
epoch= 1, loss(train)= 2.350, accuracy(train)= 10.100, time= 5.378, lr= 0.05000
  accuracy(test) = 8.000 %, time= 0.193
epoch= 2, loss(train)= 2.321, accuracy(train)= 12.900, time= 3.357, lr= 0.04750
  accuracy(test) = 30.000 %, time= 0.303
epoch= 3, loss(train)= 2.228, accuracy(train)= 35.300, time= 3.108, lr= 0.04512
  accuracy(test) = 45.000 %, time= 0.170
epoch= 4, loss(train)= 1.690, accuracy(train)= 53.900, time= 3.291, lr= 0.04287
  accuracy(test) = 66.000 %, time= 0.206
epoch= 5, loss(train)= 0.920, accuracy(train)= 72.700, time= 8.541, lr= 0.04073
  accuracy(test) = 80.000 %, time