# Install packages
Please install all any needed packages here.

In [None]:
%pip install numpy matplotlib pandas
%pip install torch
%pip install torch-geometric
%pip install torch-scatter 
%pip install scikit-learn

# Import packages

Please import all the needed packages here

In [None]:
import torch
import numpy as np
import os
from os.path import join
import pandas as pd
from torch_geometric.data import Dataset
from torch_geometric.data import Data
from os import listdir
import json
from os.path import isfile
from random import sample
import random
import torch.nn.functional as F
import torch
import torch.nn.functional as F
from torch_sparse import spspmm

from torch_geometric.nn import GCNConv, TopKPooling, SAGEConv, GraphConv, GENConv
from torch_geometric.utils import (
    add_self_loops,
    remove_self_loops,
    sort_edge_index,
)
from torch_geometric.utils.repeat import repeat
from torch_geometric.utils import to_dense_adj
from torch_geometric.utils import dense_to_sparse
from torch.nn import Sequential as Seq, Linear, SiLU,Tanh,ReLU
from torch import sin

In [None]:
torch.manual_seed(10)

import warnings
warnings.filterwarnings("ignore")

# Graph data creation

This section creates the graph data using the Data function of pytorch geometric.

In [None]:
def process_graph_data(source_dir, sim, fake_perc):
    
    """Loading the data from the source directory and returns the data
    
    Parameters
    -----------
    source_dir : source directory which contains the original graph data, with the name format 'run_{sim}'.
    sim : simulation/run number, to identify different simulations, int
    fake_perc : % of augmented edges
    Ouput
    ------
    data - Pytorch geometric Data object
       
    """ 
    
    data = torch.load(osp.join('{}/run_{}.pt'.format(source_dir,sim)))
    
    
    #DO THE REST OF THE PROCESSING BASED ON YOUR DATA!!
    
    return data

In [None]:
class GraphDataset(Dataset):
    
    """
    Class to create custom graph dataset compatible for EAGNN
    
    Every graph data generated using this class contains graph of Data object.
    
    """ 
    
    def __init__(self, root, source_dir, sim_list, test=False, transform=None, pre_transform=None):
        
        self.root = root # root directory where procssed data is stored
        self.sims = sim_list # list of simulation numbers (different for train and test data)
        self.test = test # flag to identify test data
        self.source_dir = source_dir # source directory for raw data
        
        super(GraphDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return []
    
    @property
    def processed_file_names(self):
        return []

    def download(self):
        pass
    
    def len(self):
        return len(self.sims)
    
    def process(self):
        i = 0
        
        for i in range(len(self.sims)):
            print("processing simulation {}!".format(i))
            data = process_graph_data(self.source_dir, self.sims[i])
            
            if(self.test):
                torch.save(data, osp.join(self.root, 'processed/test_run_{}.pt'.format(i)))
                
            else:
                torch.save(data, osp.join(self.root, 'processed/run_{}.pt'.format(i)))

    def get(self,idx):
        
        if(self.test):
            i = idx 
            data = torch.load(osp.join(self.root, 'processed/test_run_{}.pt'.format(i)))
            
        else:
            i = idx 
            data = torch.load(osp.join(self.root, 'processed/run_{}.pt'.format(i)))
            
        return data 

# Data


In [None]:
# getting the list of simulation numbers from the dataset
### change this based on your data!!!
mypath = 'dataset/'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
file_nums=[]
for file in onlyfiles:
    file_nums.append(int(file.split('_')[1]))
file_nums = np.unique(np.array(file_nums)) 

# defining the root and source directory
# root directory/processed is the folder where the processed graph data is stored
# source directory contains the raw graph data
root_dir = "dataset/"
source_dir = "dataset"

# 500 simulations are used for training and 300 for testing
# you can changes these numbers
train_list = file_nums[:500]
val_list = file_nums[500:]

#change batch size and percentage of fake edges, if needed
batch_size = 2


In [None]:
# Processing training and validation datasets
train_dataset = GraphDataset(root_dir, source_dir, train_list, test=False)
val_dataset = GraphDataset(root_dir, source_dir, val_list, test=True)

## Defining the data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# GNN Model Architecture

Next section contains the functions and classes which Multi Graph Neural Network. It uses the same architecture as Graph UNet and hence the same function is used for this.

In [None]:
import torch
import torch.nn.functional as F
from torch_sparse import spspmm

from torch_geometric.nn import GCNConv, TopKPooling, SAGEConv, GraphConv, GENConv
from torch_geometric.utils import (
    add_self_loops,
    remove_self_loops,
    sort_edge_index,
)
from torch_geometric.utils.repeat import repeat
from torch_geometric.utils import to_dense_adj
from torch_geometric.utils import dense_to_sparse
from torch.nn import Sequential as Seq, Linear, SiLU,Tanh,ReLU
from torch import sin

class GraphUNet(torch.nn.Module):
    r"""The Graph U-Net model from the `"Graph U-Nets"
    <https://arxiv.org/abs/1905.05178>`_ paper which implements a U-Net like
    architecture with graph pooling and unpooling operations.

    Args:
        in_channels (int): Size of each input sample.
        hidden_channels (int): Size of each hidden sample.
        out_channels (int): Size of each output sample.
        depth (int): The depth of the U-Net architecture.
        pool_ratios (float or [float], optional): Graph pooling ratio for each
            depth. (default: :obj:`0.5`)
        sum_res (bool, optional): If set to :obj:`False`, will use
            concatenation for integration of skip connections instead
            summation. (default: :obj:`True`)
        act (torch.nn.functional, optional): The nonlinearity to use.
            (default: :obj:`torch.nn.functional.relu`)
    """
    def __init__(self, in_channels, hidden_channels, out_channels, depth,
                 pool_ratios=0.4, sum_res=True, act=F.relu):
        super().__init__()
        assert depth >= 1
        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.out_channels = out_channels
        self.depth = depth
        self.pool_ratios = repeat(pool_ratios, depth)
        self.act = act
        self.sum_res = sum_res
        self.p = 0.1 #dropout
        channels = hidden_channels
        
        #encoder
        self.mlp_encode_disp = Seq(Linear(in_channels, 64),
                       ReLU(), 
                       Linear(64, channels))

        self.down_convs = torch.nn.ModuleList()
        self.pools = torch.nn.ModuleList()
        self.down_convs.append(SAGEConv(channels, channels))
        
        for i in range(depth):
            self.pools.append(TopKPooling(channels, self.pool_ratios[i]))
            self.down_convs.append(SAGEConv(channels, channels))

        in_channels = channels if sum_res else 2 * channels

        self.up_convs = torch.nn.ModuleList()
        for i in range(depth - 1):
            self.up_convs.append(SAGEConv(in_channels, channels))
        self.up_convs.append(SAGEConv(in_channels, channels))
        
        #decoder
        self.mlp_decode_disp = Seq(Linear(channels, 64),
                       ReLU(),
                       Linear(64, out_channels))
        
        self.transformer = Seq(Linear(channels, channels),
                       ReLU(),
                       Linear(channels, channels))
        
        self.reset_parameters()

    def reset_parameters(self):
        for conv in self.down_convs:
            conv.reset_parameters()
        for pool in self.pools:
            pool.reset_parameters()
        for conv in self.up_convs:
            conv.reset_parameters()


    def forward(self, x, edge_index, batch=None):
        
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))
            
        edge_weight = x.new_ones(edge_index.size(1))
        
        x = self.mlp_encode_disp(x)
        
        x = F.dropout(x, p = self.p, training=self.training)
        
        #downsampling steps
        x = self.down_convs[0](x, edge_index)
        x = self.act(x)
        x = x + self.transformer(x)
        
        xs = [x]
        edge_indices = [edge_index]
        edge_weights = [edge_weight]
        perms = []

        for i in range(1, self.depth + 1):
            edge_index, edge_weight = self.add_edges(edge_index, edge_weight, batch)
            x, edge_index, edge_weight, batch, perm, _ = self.pools[i - 1](
                x, edge_index, edge_weight, batch)
            
            x = self.down_convs[i](x, edge_index)
            x = self.act(x)
            x = x + self.transformer(x)
            x = F.dropout(x, p = self.p, training=self.training)
            
            if i < self.depth:
                xs += [x]
                edge_indices += [edge_index]
                edge_weights += [edge_weight]
            perms += [perm]
        
        #upsampling steps
        for i in range(self.depth):
            j = self.depth - 1 - i
            res = xs[j]
            edge_index = edge_indices[j]
            edge_weight = edge_weights[j]
            perm = perms[j]

            up = torch.zeros_like(res)
            up[perm] = x
            
            x = res + up if self.sum_res else torch.cat((res, up), dim=-1)

            x = self.up_convs[i](x, edge_index)
            x = self.act(x) if i < self.depth - 1 else x
            x = x + self.transformer(x)
            x = F.dropout(x, p = self.p, training=self.training)
            
        x = self.up_convs[-1](x, edge_index)
        x = self.act(x) if i < self.depth - 1 else x
        x = x + self.transformer(x)
        
        #decoder
        x = self.mlp_decode_disp(x)
        return x

    
    def augment_adj(self, edge_index, edge_weight, num_nodes):
        edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
        edge_index, edge_weight = add_self_loops(edge_index, edge_weight,
                                                 num_nodes=num_nodes)
        edge_index, edge_weight = sort_edge_index(edge_index, edge_weight,
                                                  num_nodes)
        edge_index, edge_weight = spspmm(edge_index, edge_weight, edge_index,
                                         edge_weight, num_nodes, num_nodes,
                                         num_nodes)
        edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
        return edge_index, edge_weight

    #function for adding edges to nodes upto 3 hops away for better connectivity after pooling
    def add_edges(self, edge_index, edge_weight, batch):
        A = to_dense_adj(edge_index, edge_attr=edge_weight, batch=batch)
        B=torch.matmul(A,A)
        B=B/(torch.sum(B,1)+0.001)
        B=torch.matmul(B,A)
        edge_index, edge_weight = dense_to_sparse(B)
        return edge_index, edge_weight

    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}({self.in_channels}, '
                f'{self.hidden_channels}, {self.out_channels}, '
                f'depth={self.depth}, pool_ratios={self.pool_ratios})')


# Model training and evaluation

In [None]:
def model_train(data_loader, loss_all, device, scale):
    """Training the GNN model
    
    Parameters
    -----------
    data_loader : Data loader object from pytorch geometric, it contains all the graphs for training
    loss_all : loss value, Tensor float
    device : GPU/CPU
    scale : if True, scaling is done on node and edge attributes as well as the target, boolean
    
    Ouput
    ------
    loss_all : loss value after a single epoch, Tensor float
       
    """ 
    model.train()
    for data in data_loader:
        # get the predicted outputs
        out = model(data, device, scale)
        
        optimizer.zero_grad(set_to_none=True)
        
        y = data[0].y.reshape(-1,1)
            
        # loss calculation
        loss_calc = loss(out.reshape(-1,1), y.reshape(-1,1)) 
        loss_calc.backward()
        
        optimizer.step()
        my_lr_scheduler.step()
        
    return loss_all


def model_eval(data_loader, device, scale):
    """Evaluating the GNN model
    
    Parameters
    -----------
    data_loader : Data loader object from pytorch geometric, it contains all the graphs for training
    device : GPU/CPU
    scale : if True, scaling is done on node and edge attributes as well as the target, boolean
    
    Ouput
    ------
    l2_err : relative L2 error for the predictions in the graph, float
       
    """ 
    model.eval()
    
    predictions = []
    labels = []
    
    for data in data_loader:
                
        #getting the prediction from just the fine graph
        pred = model(data, device, scale)
        
        pred = pred.detach().cpu().numpy().reshape(-1,1)
            
        label = data.y.detach().cpu().numpy().reshape(-1,1)
        predictions.append(pred)
        labels.append(label)
        
    predictions = np.vstack(predictions)
    labels = np.vstack(labels)
    
    # calculation of relative L2 error
    diff_norm = np.linalg.norm(predictions - labels, ord=2)
    y_norm = np.linalg.norm(labels, ord=2)
    l2_err = np.mean(diff_norm / y_norm)

    return l2_err

### CHANGE TRAINING AND MODEL PARAMETERS HERE!

In [None]:

# TRAINING HYPERPARAMETERS
n_epochs = 1000
batch_size = 2
lr = 0.001
weight_decay=1e-6

# MODEL PARAMETERS
# change these based on your data
in_channels = 14
hidden_channels=128
out_channels=2
depth=3
pool_ratios=0.6

# DIRECTORIES TO STORE RESULTS
result_dir = 'results'
model_dir = 'models'
loss_dir = 'losses'


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = GraphUNet(in_channels=in_channels, hidden_channels=hidden_channels, 
                  out_channels=out_channels, depth=depth, pool_ratios=pool_ratios).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.99, 0.999), weight_decay=weight_decay)
my_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=n_epochs, eta_min=1e-8)

loss = torch.nn.MSELoss()


### MODEL TRAINING

In [None]:
## model training
epoch_list = []
train_l2_err = []
val_l2_err = []

print('Training started...')

for epoch in range(n_epochs):
    loss_all = 0
    loss_all = model_train(train_loader, loss_all, device)
    if(epoch%10==0):
        epoch_list.append(epoch)
        l2_err = model_eval(train_loader, device)
        train_l2_err.append(l2_err)
        l2_err = model_eval(val_loader, device)
        val_l2_err.append(l2_err)
        print('epoch: ', epoch, 'train error: ', train_l2_err[-1], 'val error: ', val_l2_err[-1])
        print()

        # saving the model
        torch.save({
            'epoch':epoch,
            'model_state_dict':model.state_dict(),
            'optimizer_state_dict':optimizer.state_dict(),
        }, result_dir + '/' + model_dir + '/model_mgnn.pt')
        
        # saving the loss results
        np.savetxt(result_dir + '/' + loss_dir + '/train_l2_err.txt', train_l2_err)
        np.savetxt(result_dir + '/' + loss_dir + '/val_l2_err.txt', val_l2_err)
        