In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import math
from sklearn.metrics import classification_report, roc_auc_score, f1_score,accuracy_score,average_precision_score,recall_score,auc
from scipy.stats import entropy
import sys
import os
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
from operator import itemgetter
import torch.optim as optim
import torch.nn.functional as F
from sklearn.preprocessing import OneHotEncoder
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
import time
import scipy
from scipy import sparse as sp
import pickle
import scipy.sparse as sp
import copy as cp
from collections import defaultdict
import random
DATAPATH='/content/drive/MyDrive/Amazon.mat'# Path of Amazon.mat file
prefix_1 = '/content/drive/MyDrive/' # Path to store temporary output files
prefix_2 = '/content/drive/MyDrive/' # Folder that stores Amazon.mat file

In [None]:
pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html


In [None]:
pip install torch_geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from torch_geometric.utils import to_undirected
from torch_geometric.nn import GCNConv, GATConv, JumpingKnowledge,SGConv 
from torch_sparse import SparseTensor, matmul
from torch_geometric.nn.conv.gcn_conv import gcn_norm

# Define some functions and classes
# Reference: https://github.com/CUAI/Non-Homophily-Benchmarks

In [None]:
#Functions copied from https://github.com/CUAI/Non-Homophily-Benchmarks
class NCDataset(object):
    def __init__(self, name, root=f'{DATAPATH}'):
        self.name = name  # original name, e.g., ogbn-proteins
        self.graph = {}
        self.label = None
        self.original_label= None

    def __getitem__(self, idx):
        assert idx == 0, 'This dataset has only one graph'
        return self.graph, self.label

    def __len__(self):
        return 1

    def __repr__(self):  
        return '{}({})'.format(self.__class__.__name__, len(self))
    
def load_amazon_dataset():
    fulldata = scipy.io.loadmat(DATAPATH)
    A = fulldata['homo']
    edge_index = np.array(A.nonzero())
    node_feat = fulldata['features']
    label = np.array(fulldata['label'], dtype=np.int).flatten()
    num_nodes = node_feat.shape[0]

    dataset = NCDataset('Amazon')
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    node_feat = torch.tensor(node_feat.todense(), dtype=torch.float)
    dataset.graph = {'edge_index': edge_index,
                     'node_feat': node_feat,
                     'edge_feat': None,
                     'num_nodes': num_nodes}
    label = torch.tensor(label, dtype=torch.long)
    dataset.label = label
    dataset.original_label = label
    return dataset


def normalize(edge_index):
    """ normalizes the edge_index
    """
    adj_t = edge_index.set_diag()
    deg = adj_t.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
    return adj_t

def eval_acc(y_true, y_pred):
    acc_list = []
    y_true = y_true.detach().cpu().numpy()
    y_pred = y_pred.argmax(dim=-1, keepdim=True).detach().cpu().numpy()

    for i in range(y_true.shape[1]):
        is_labeled = y_true[:, i] == y_true[:, i]
        correct = y_true[is_labeled, i] == y_pred[is_labeled, i]
        acc_list.append(float(np.sum(correct))/len(correct))

    return sum(acc_list)/len(acc_list)


def eval_rocauc(y_true, y_pred):
    """ adapted from ogb
    https://github.com/snap-stanford/ogb/blob/master/ogb/nodeproppred/evaluate.py"""
    rocauc_list = []
    y_true = y_true.detach().cpu().numpy()
    if y_true.shape[1] == 1:
        # use the predicted class for single-class classification
        y_pred = F.softmax(y_pred, dim=-1)[:,1].unsqueeze(1).detach().cpu().numpy()
    else:
        y_pred = y_pred.detach().cpu().numpy()

    for i in range(y_true.shape[1]):
        # AUC is only defined when there is at least one positive data.
        if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
            is_labeled = y_true[:, i] == y_true[:, i]
            score = roc_auc_score(y_true[is_labeled, i], y_pred[is_labeled, i])
                                
            rocauc_list.append(score)

    if len(rocauc_list) == 0:
        print('No positively labeled data available. Cannot compute ROC-AUC.')
        return 0
    return sum(rocauc_list)/len(rocauc_list)


def evaluate(model, dataset, split_idx, eval_func, result=None):
    if result is not None:
        out = result
    else:
        model.eval()
        out = model(dataset)

    train_acc = eval_func(
        dataset.label[split_idx['train']], out[split_idx['train']])
    test_acc = eval_func(
        dataset.label[split_idx['test']], out[split_idx['test']])

    return train_acc, test_acc, out

#Models

    
class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, heads=2):
        super(GAT, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GATConv(in_channels, hidden_channels, heads=heads, concat=True))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels*heads))
        for _ in range(num_layers - 2):

            self.convs.append(
                    GATConv(hidden_channels*heads, hidden_channels, heads=heads, concat=True) ) 
            self.bns.append(nn.BatchNorm1d(hidden_channels*heads))

        self.convs.append(
            GATConv(hidden_channels*heads, out_channels, heads=heads, concat=False))

        self.dropout = dropout
        self.activation = F.elu 

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()


    def forward(self, data):
        x = data.graph['node_feat']
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            x = self.bns[i](x)
            x = self.activation(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        return x
class LINK(nn.Module):
    """ logistic regression on adjacency matrix """
    
    def __init__(self, num_nodes, out_channels):
        super(LINK, self).__init__()
        self.W = nn.Linear(num_nodes, out_channels)

    def reset_parameters(self):
        self.W.reset_parameters()
        
    def forward(self, data):
        N = data.graph['num_nodes']
        edge_index = data.graph['edge_index']
        if isinstance(edge_index, torch.Tensor):
            row, col = edge_index
            A = SparseTensor(row=row, col=col, sparse_sizes=(N, N)).to_torch_sparse_coo_tensor()
        elif isinstance(edge_index, SparseTensor):
            A = edge_index.to_torch_sparse_coo_tensor()
        logits = self.W(A)
        return logits    
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, save_mem=False, use_bn=True):
        super(GCN, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.convs.append(
            GCNConv(hidden_channels, out_channels, cached=not save_mem, normalize=not save_mem))

        self.dropout = dropout
        self.activation = F.relu
        self.use_bn = use_bn

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()


    def forward(self, data):
        x = data.graph['node_feat']
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            if self.use_bn:
                x = self.bns[i](x)
            x = self.activation(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        return x
    
class SGC(nn.Module):
    def __init__(self, in_channels, out_channels, hops):
        """ takes 'hops' power of the normalized adjacency"""
        super(SGC, self).__init__()
        self.conv = SGConv(in_channels, out_channels, hops, cached=True) 

    def reset_parameters(self):
        self.conv.reset_parameters()

    def forward(self, data):
        edge_index = data.graph['edge_index']
        x = data.graph['node_feat']
        x = self.conv(x, edge_index)
        return x
    
class GCNJK(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, save_mem=False, jk_type='max'):
        super(GCNJK, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.convs.append(
            GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.dropout = dropout
        self.activation = F.relu
        self.jump = JumpingKnowledge(jk_type, channels=hidden_channels, num_layers=1)
        if jk_type == 'cat':
            self.final_project = nn.Linear(hidden_channels * num_layers, out_channels)
        else: # max or lstm
            self.final_project = nn.Linear(hidden_channels, out_channels)

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()
        self.jump.reset_parameters()
        self.final_project.reset_parameters()

    def forward(self, data):
        x = data.graph['node_feat']
        xs = []
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            x = self.bns[i](x)
            x = self.activation(x)
            xs.append(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        xs.append(x)
        x = self.jump(xs)
        x = self.final_project(x)
        return x





# Some hyper parameters

In [None]:
#Training set and test set portion size
train_portion=0.4
test_portion=0.6

#Train with GAT will require 40GB GPU memory (Colab Pro+)
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
#device= 'cpu'
device = torch.device(device)

#Parameters for GNN models
hidden_channels=32
num_layers=2
dropout=0
gat_heads=8
lr=0.01
weight_decay=1e-4
batch_size=512 

# Define a data structure and store the graph in it
### --Used for training GNNs

In [None]:
dataset=load_amazon_dataset()
if len(dataset.label.shape) == 1:
    dataset.label = dataset.label.unsqueeze(1)
    dataset.original_label= dataset.original_label.unsqueeze(1)

dataset.label = dataset.label.to(device)
dataset.original_label= dataset.original_label.to(device)
n = dataset.graph['num_nodes']
# infer the number of classes for non one-hot and one-hot labels
c = max(dataset.label.max().item() + 1, dataset.label.shape[1])
d = dataset.graph['node_feat'].shape[1]
dataset.graph['edge_index'] = to_undirected(dataset.graph['edge_index'])
dataset.graph['edge_index'], dataset.graph['node_feat'] = \
    dataset.graph['edge_index'].to(device), dataset.graph['node_feat'].to(device)
print(f"num nodes {n} | num classes {c} | num node feats {d}")

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  label = np.array(fulldata['label'], dtype=np.int).flatten()


num nodes 11944 | num classes 2 | num node feats 25


# Train test split

In [None]:
amazon = loadmat(DATAPATH)

#adj = amazon['homo'].tocsr() # Adjacency matrix
features=amazon['features'].toarray() # Feature matrix
label_origin=amazon['label'][0]

n_nodes, feat_dim = features.shape

#Split train and test set
all_index=np.arange(n_nodes)
X_train, X_test, y_train, y_test=train_test_split(all_index, label_origin, test_size=test_portion, stratify=label_origin, random_state=66)
original_index={'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}

# Train GNN models

In [None]:
#Define loss function and evaluation metric
criterion = nn.BCEWithLogitsLoss()
eval_func = eval_rocauc # eval_acc: Accuracy 
split_idx = {}# Define an empty dictionary to store training and test index list
split_idx['train']=torch.tensor(np.array(X_train)).to(device)
split_idx['test']=torch.tensor(np.array(X_test)).to(device)
train_idx = split_idx['train']

# GCN

In [None]:
model1 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      optimizer1.zero_grad()
      out = model1(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer1.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model1, dataset, split_idx, eval_func)
    print(f'Model GCN, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')


Training nodes: 4777
Model GCN, Epoch: 01, Loss: 20.9084, Train AUC: 77.48%, Test AUC: 77.56%
Model GCN, Epoch: 02, Loss: 2.0645, Train AUC: 77.20%, Test AUC: 76.89%
Model GCN, Epoch: 03, Loss: 2.5971, Train AUC: 77.66%, Test AUC: 77.74%
Model GCN, Epoch: 04, Loss: 1.9902, Train AUC: 78.82%, Test AUC: 78.80%
Model GCN, Epoch: 05, Loss: 1.3353, Train AUC: 79.34%, Test AUC: 79.23%
Model GCN, Epoch: 06, Loss: 0.4940, Train AUC: 79.35%, Test AUC: 79.19%
Model GCN, Epoch: 07, Loss: 0.4658, Train AUC: 79.35%, Test AUC: 79.19%
Model GCN, Epoch: 08, Loss: 0.3019, Train AUC: 79.61%, Test AUC: 79.28%
Model GCN, Epoch: 09, Loss: 0.2533, Train AUC: 79.59%, Test AUC: 79.30%
Model GCN, Epoch: 10, Loss: 0.2299, Train AUC: 79.48%, Test AUC: 79.31%
Model GCN, Epoch: 11, Loss: 0.2223, Train AUC: 79.39%, Test AUC: 79.28%
Model GCN, Epoch: 12, Loss: 0.2204, Train AUC: 79.42%, Test AUC: 79.30%
Model GCN, Epoch: 13, Loss: 0.2189, Train AUC: 79.49%, Test AUC: 79.33%
Model GCN, Epoch: 14, Loss: 0.2183, Train 

In [None]:
result = evaluate(model1, dataset, split_idx, eval_func)
prob=result[2][X_test]
prob=F.softmax(prob, dim=-1).detach().cpu().numpy()
print(classification_report(y_test,prob.argmax(axis=1)))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97      6674
         1.0       0.58      0.08      0.14       493

    accuracy                           0.93      7167
   macro avg       0.76      0.54      0.55      7167
weighted avg       0.91      0.93      0.91      7167



# GCNJK

In [None]:
model1 = GCNJK(d, hidden_channels, c, num_layers=num_layers, dropout=dropout, jk_type='max').to(device)
optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      optimizer1.zero_grad()
      out = model1(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer1.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model1, dataset, split_idx, eval_func)
    print(f'Model GCNJK, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 4777
Model GCNJK, Epoch: 01, Loss: 0.3910, Train AUC: 77.80%, Test AUC: 77.61%
Model GCNJK, Epoch: 02, Loss: 0.2523, Train AUC: 79.26%, Test AUC: 78.88%
Model GCNJK, Epoch: 03, Loss: 0.2236, Train AUC: 80.08%, Test AUC: 79.57%
Model GCNJK, Epoch: 04, Loss: 0.2192, Train AUC: 80.08%, Test AUC: 79.46%
Model GCNJK, Epoch: 05, Loss: 0.2169, Train AUC: 80.08%, Test AUC: 79.47%
Model GCNJK, Epoch: 06, Loss: 0.2150, Train AUC: 80.44%, Test AUC: 79.82%
Model GCNJK, Epoch: 07, Loss: 0.2125, Train AUC: 80.80%, Test AUC: 80.22%
Model GCNJK, Epoch: 08, Loss: 0.2090, Train AUC: 81.52%, Test AUC: 80.97%
Model GCNJK, Epoch: 09, Loss: 0.2046, Train AUC: 82.21%, Test AUC: 81.72%
Model GCNJK, Epoch: 10, Loss: 0.2005, Train AUC: 82.75%, Test AUC: 82.34%
Model GCNJK, Epoch: 11, Loss: 0.1979, Train AUC: 83.83%, Test AUC: 83.27%
Model GCNJK, Epoch: 12, Loss: 0.1952, Train AUC: 84.34%, Test AUC: 83.72%
Model GCNJK, Epoch: 13, Loss: 0.1903, Train AUC: 85.04%, Test AUC: 84.39%
Model GCNJK, Epoc

In [None]:
result = evaluate(model1, dataset, split_idx, eval_func)
prob=result[2][X_test]
prob=F.softmax(prob, dim=-1).detach().cpu().numpy()
print(classification_report(y_test,prob.argmax(axis=1)))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97      6674
         1.0       0.82      0.13      0.22       493

    accuracy                           0.94      7167
   macro avg       0.88      0.56      0.59      7167
weighted avg       0.93      0.94      0.92      7167



# SGC

In [None]:
model1 = SGC(in_channels=d, out_channels=c, hops=1).to(device)
optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      optimizer1.zero_grad()
      out = model1(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer1.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model1, dataset, split_idx, eval_func)
    print(f'Model SGC, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 4777
Model SGC, Epoch: 01, Loss: 1.1692, Train AUC: 80.75%, Test AUC: 81.32%
Model SGC, Epoch: 02, Loss: 0.5492, Train AUC: 80.82%, Test AUC: 81.38%
Model SGC, Epoch: 03, Loss: 0.5645, Train AUC: 80.92%, Test AUC: 81.47%
Model SGC, Epoch: 04, Loss: 0.4975, Train AUC: 81.11%, Test AUC: 81.63%
Model SGC, Epoch: 05, Loss: 0.3892, Train AUC: 81.38%, Test AUC: 81.77%
Model SGC, Epoch: 06, Loss: 0.2802, Train AUC: 81.61%, Test AUC: 81.82%
Model SGC, Epoch: 07, Loss: 0.2418, Train AUC: 81.90%, Test AUC: 82.01%
Model SGC, Epoch: 08, Loss: 0.2284, Train AUC: 82.01%, Test AUC: 82.20%
Model SGC, Epoch: 09, Loss: 0.2130, Train AUC: 82.19%, Test AUC: 82.45%
Model SGC, Epoch: 10, Loss: 0.2064, Train AUC: 82.48%, Test AUC: 82.73%
Model SGC, Epoch: 11, Loss: 0.2041, Train AUC: 82.57%, Test AUC: 82.84%
Model SGC, Epoch: 12, Loss: 0.2029, Train AUC: 82.75%, Test AUC: 82.99%
Model SGC, Epoch: 13, Loss: 0.2019, Train AUC: 82.96%, Test AUC: 83.14%
Model SGC, Epoch: 14, Loss: 0.2009, Train A

In [None]:
result = evaluate(model1, dataset, split_idx, eval_func)
prob=result[2][X_test]
prob=F.softmax(prob, dim=-1).detach().cpu().numpy()
print(classification_report(y_test,prob.argmax(axis=1)))

              precision    recall  f1-score   support

         0.0       0.94      0.99      0.97      6674
         1.0       0.59      0.16      0.25       493

    accuracy                           0.93      7167
   macro avg       0.76      0.57      0.61      7167
weighted avg       0.92      0.93      0.92      7167



# LINK

In [None]:
model1 = LINK(n, c).to(device)
optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      optimizer1.zero_grad()
      out = model1(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer1.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model1, dataset, split_idx, eval_func)
    print(f'Model APPNP, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 4777
Model APPNP, Epoch: 01, Loss: 0.4904, Train AUC: 78.99%, Test AUC: 78.04%
Model APPNP, Epoch: 02, Loss: 0.5092, Train AUC: 83.84%, Test AUC: 81.04%
Model APPNP, Epoch: 03, Loss: 0.3007, Train AUC: 89.83%, Test AUC: 83.46%
Model APPNP, Epoch: 04, Loss: 0.1806, Train AUC: 95.05%, Test AUC: 84.14%
Model APPNP, Epoch: 05, Loss: 0.1230, Train AUC: 97.17%, Test AUC: 83.28%
Model APPNP, Epoch: 06, Loss: 0.0950, Train AUC: 99.30%, Test AUC: 83.68%
Model APPNP, Epoch: 07, Loss: 0.0756, Train AUC: 99.77%, Test AUC: 83.99%
Model APPNP, Epoch: 08, Loss: 0.0625, Train AUC: 99.89%, Test AUC: 84.08%
Model APPNP, Epoch: 09, Loss: 0.0547, Train AUC: 99.95%, Test AUC: 84.13%
Model APPNP, Epoch: 10, Loss: 0.0484, Train AUC: 99.98%, Test AUC: 84.16%
Model APPNP, Epoch: 11, Loss: 0.0434, Train AUC: 99.99%, Test AUC: 84.19%
Model APPNP, Epoch: 12, Loss: 0.0392, Train AUC: 99.99%, Test AUC: 84.22%
Model APPNP, Epoch: 13, Loss: 0.0357, Train AUC: 100.00%, Test AUC: 84.23%
Model APPNP, Epo

In [None]:
result = evaluate(model1, dataset, split_idx, eval_func)
prob=result[2][X_test]
prob=F.softmax(prob, dim=-1).detach().cpu().numpy()
print(classification_report(y_test,prob.argmax(axis=1)))

              precision    recall  f1-score   support

         0.0       0.95      0.98      0.96      6674
         1.0       0.51      0.25      0.34       493

    accuracy                           0.93      7167
   macro avg       0.73      0.62      0.65      7167
weighted avg       0.92      0.93      0.92      7167



# 

# CARE-GNN
# Reference:https://github.com/YingtongDou/CARE-GNN

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def sparse_to_adjlist(sp_matrix, filename):
    """
    Transfer sparse matrix to adjacency list
    :param sp_matrix: the sparse matrix
    :param filename: the filename of adjlist
    """
    # add self loop
    homo_adj = sp_matrix + sp.eye(sp_matrix.shape[0])
    # create adj_list
    adj_lists = defaultdict(set)
    edges = homo_adj.nonzero()
    for index, node in enumerate(edges[0]):
        adj_lists[node].add(edges[1][index])
        adj_lists[edges[1][index]].add(node)
    with open(filename, 'wb') as file:
        pickle.dump(adj_lists, file)
    file.close()
    

net_upu = amazon['net_upu']
net_usu = amazon['net_usu']
net_uvu = amazon['net_uvu']
amz_homo = amazon['homo']

sparse_to_adjlist(net_upu, prefix_1 + 'amz_upu_adjlists.pickle')
sparse_to_adjlist(net_usu, prefix_1 + 'amz_usu_adjlists.pickle')
sparse_to_adjlist(net_uvu, prefix_1 + 'amz_uvu_adjlists.pickle')
sparse_to_adjlist(amz_homo, prefix_1 + 'amz_homo_adjlists.pickle')

In [None]:
def load_data_CARE():
    """
    Load graph, feature, and label given dataset name
    :returns: home and single-relation graphs, feature, label
    """
    prefix1 = prefix_2 # Folder where .mat files are stored
    prefix2 = prefix_1 # Folder where .pickle files are stored
    data_file = loadmat(prefix1 + 'Amazon.mat')
    labels = data_file['label'].flatten()
    feat_data = data_file['features'].todense().A
    # load the preprocessed adj_lists
    with open(prefix2 + 'amz_homo_adjlists.pickle', 'rb') as file:
        homo = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_upu_adjlists.pickle', 'rb') as file:
            relation1 = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_usu_adjlists.pickle', 'rb') as file:
        relation2 = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_uvu_adjlists.pickle', 'rb') as file:
        relation3 = pickle.load(file)

    return [homo, relation1, relation2, relation3], feat_data, labels

def normalize(mx):
    """
    Row-normalize sparse matrix
    Code from https://github.com/williamleif/graphsage-simple/
    """
    rowsum = np.array(mx.sum(1)) + 0.01
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def pos_neg_split(nodes, labels):
    """
    Find positive and negative nodes given a list of nodes and their labels
    :param nodes: a list of nodes
    :param labels: a list of node labels
    :returns: the spited positive and negative nodes
    """
    pos_nodes = []
    neg_nodes = cp.deepcopy(nodes)
    aux_nodes = cp.deepcopy(nodes)
    for idx, label in enumerate(labels):
        if label == 1:
            pos_nodes.append(aux_nodes[idx])
            neg_nodes.remove(aux_nodes[idx])

    return pos_nodes, neg_nodes

def undersample(pos_nodes, neg_nodes, scale=1):
    """
    Under-sample the negative nodes
    :param pos_nodes: a list of positive nodes
    :param neg_nodes: a list negative nodes
    :param scale: the under-sampling scale
    :return: a list of under-sampled batch nodes
    """

    aux_nodes = cp.deepcopy(neg_nodes)
    aux_nodes = random.sample(aux_nodes, k=int(len(pos_nodes)*scale))
    batch_nodes = pos_nodes + aux_nodes

    return batch_nodes

def test_care(test_cases, labels, model, batch_size):
    """
    Test the performance of CARE-GNN and its variants
    :param test_cases: a list of testing node
    :param labels: a list of testing node labels
    :param model: the GNN model
    :param batch_size: number nodes in a batch
    :returns: the AUC and Recall of GNN and Simi modules
    """

    test_batch_num = int(len(test_cases) / batch_size) + 1
    f1_gnn = 0.0
    acc_gnn = 0.0
    recall_gnn = 0.0
    f1_label1 = 0.0
    acc_label1 = 0.00
    recall_label1 = 0.0
    gnn_list = []
    label_list1 = []
    predicted_labels=[]
    predict_prob=[]
    for iteration in range(test_batch_num):
        i_start = iteration * batch_size
        i_end = min((iteration + 1) * batch_size, len(test_cases))
        batch_nodes = test_cases[i_start:i_end]
        batch_label = labels[i_start:i_end]
        gnn_prob, label_prob1 = model.to_prob(batch_nodes, batch_label, train_flag=False)

        f1_gnn += f1_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1), average="macro")
        acc_gnn += accuracy_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1))
        recall_gnn += recall_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1), average="macro")

        f1_label1 += f1_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1), average="macro")
        acc_label1 += accuracy_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1))
        recall_label1 += recall_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1), average="macro")

        gnn_list.extend(gnn_prob.data.cpu().numpy()[:, 1].tolist())
        label_list1.extend(label_prob1.data.cpu().numpy()[:, 1].tolist())
        predicted_labels.extend(gnn_prob.data.cpu().numpy().argmax(axis=1))
        predict_prob.extend(gnn_prob)
    auc_gnn = roc_auc_score(labels, np.array(gnn_list))
    ap_gnn = average_precision_score(labels, np.array(gnn_list))
    auc_label1 = roc_auc_score(labels, np.array(label_list1))
    ap_label1 = average_precision_score(labels, np.array(label_list1))
    print(f"GNN F1: {f1_gnn / test_batch_num:.4f}")
    print(f"GNN Accuracy: {acc_gnn / test_batch_num:.4f}")
    print(f"GNN Recall: {recall_gnn / test_batch_num:.4f}")
    print(f"GNN auc: {auc_gnn:.4f}")
    print(f"GNN ap: {ap_gnn:.4f}")
    print(f"Label1 F1: {f1_label1 / test_batch_num:.4f}")
    print(f"Label1 Accuracy: {acc_label1 / test_batch_num:.4f}")
    print(f"Label1 Recall: {recall_label1 / test_batch_num:.4f}")
    print(f"Label1 auc: {auc_label1:.4f}")
    print(f"Label1 ap: {ap_label1:.4f}")

    return auc_gnn, auc_label1, recall_gnn, recall_label1, predicted_labels,predict_prob

In [None]:
class InterAgg(nn.Module):

    def __init__(self, features, feature_dim,
                embed_dim, adj_lists, intraggs,
                inter='GNN', step_size=0.02, cuda=True):
        """
        Initialize the inter-relation aggregator
        :param features: the input node features or embeddings for all nodes
        :param feature_dim: the input dimension
        :param embed_dim: the output dimension
        :param adj_lists: a list of adjacency lists for each single-relation graph
        :param intraggs: the intra-relation aggregators used by each single-relation graph
        :param inter: the aggregator type: 'Att', 'Weight', 'Mean', 'GNN'
        :param step_size: the RL action step size
        :param cuda: whether to use GPU
        """
        super(InterAgg, self).__init__()

        self.features = features
        self.dropout = 0.6
        self.adj_lists = adj_lists
        self.intra_agg1 = intraggs[0]
        self.intra_agg2 = intraggs[1]
        self.intra_agg3 = intraggs[2]
        self.embed_dim = embed_dim
        self.feat_dim = feature_dim
        self.inter = inter
        self.step_size = step_size
        self.cuda = cuda
        self.intra_agg1.cuda = cuda
        self.intra_agg2.cuda = cuda
        self.intra_agg3.cuda = cuda

        # RL condition flag
        self.RL = True

        # number of batches for current epoch, assigned during training
        self.batch_num = 0

        # initial filtering thresholds
        self.thresholds = [0.5, 0.5, 0.5]

        # the activation function used by attention mechanism
        self.leakyrelu = nn.LeakyReLU(0.2)

        # parameter used to transform node embeddings before inter-relation aggregation
        self.weight = nn.Parameter(torch.FloatTensor(self.feat_dim, self.embed_dim))
        init.xavier_uniform_(self.weight)

        # weight parameter for each relation used by CARE-Weight
        self.alpha = nn.Parameter(torch.FloatTensor(self.embed_dim, 3))
        init.xavier_uniform_(self.alpha)

        # parameters used by attention layer
        self.a = nn.Parameter(torch.FloatTensor(2 * self.embed_dim, 1))
        init.xavier_uniform_(self.a)

        # label predictor for similarity measure
        self.label_clf = nn.Linear(self.feat_dim, 2)

        # initialize the parameter logs
        self.weights_log = []
        self.thresholds_log = [self.thresholds]
        self.relation_score_log = []
        
    def forward(self, nodes, labels, train_flag=True):
        """
        :param nodes: a list of batch node ids
        :param labels: a list of batch node labels, only used by the RLModule
        :param train_flag: indicates whether in training or testing mode
        :return combined: the embeddings of a batch of input node features
        :return center_scores: the label-aware scores of batch nodes
        """

    # extract 1-hop neighbor ids from adj lists of each single-relation graph
        to_neighs = []
        for adj_list in self.adj_lists:
            to_neighs.append([set(adj_list[int(node)]) for node in nodes])

        # find unique nodes and their neighbors used in current batch
        unique_nodes = set.union(set.union(*to_neighs[0]), set.union(*to_neighs[1]),
                                 set.union(*to_neighs[2], set(nodes)))

        # calculate label-aware scores
        if self.cuda:
            batch_features = self.features(torch.cuda.LongTensor(list(unique_nodes)))
        else:
            batch_features = self.features(torch.LongTensor(list(unique_nodes)))
        batch_scores = self.label_clf(batch_features)
        id_mapping = {node_id: index for node_id, index in zip(unique_nodes, range(len(unique_nodes)))}

        # the label-aware scores for current batch of nodes
        center_scores = batch_scores[itemgetter(*nodes)(id_mapping), :]

        # get neighbor node id list for each batch node and relation
        r1_list = [list(to_neigh) for to_neigh in to_neighs[0]]
        r2_list = [list(to_neigh) for to_neigh in to_neighs[1]]
        r3_list = [list(to_neigh) for to_neigh in to_neighs[2]]

        # assign label-aware scores to neighbor nodes for each batch node and relation
        r1_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r1_list]
        r2_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r2_list]
        r3_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r3_list]

        # count the number of neighbors kept for aggregation for each batch node and relation
        r1_sample_num_list = [math.ceil(len(neighs) * self.thresholds[0]) for neighs in r1_list]
        r2_sample_num_list = [math.ceil(len(neighs) * self.thresholds[1]) for neighs in r2_list]
        r3_sample_num_list = [math.ceil(len(neighs) * self.thresholds[2]) for neighs in r3_list]

        # intra-aggregation steps for each relation
        # Eq. (8) in the paper
        r1_feats, r1_scores = self.intra_agg1.forward(nodes, r1_list, center_scores, r1_scores, r1_sample_num_list)
        r2_feats, r2_scores = self.intra_agg2.forward(nodes, r2_list, center_scores, r2_scores, r2_sample_num_list)
        r3_feats, r3_scores = self.intra_agg3.forward(nodes, r3_list, center_scores, r3_scores, r3_sample_num_list)

        # concat the intra-aggregated embeddings from each relation
        neigh_feats = torch.cat((r1_feats, r2_feats, r3_feats), dim=0)

        # get features or embeddings for batch nodes
        if self.cuda and isinstance(nodes, list):
            index = torch.LongTensor(nodes).cuda()
        else:
            index = torch.LongTensor(nodes)
        self_feats = self.features(index)

        # number of nodes in a batch
        n = len(nodes)

        # inter-relation aggregation steps
        # Eq. (9) in the paper
        if self.inter == 'Att':
            # 1) CARE-Att Inter-relation Aggregator
            combined, attention = att_inter_agg(len(self.adj_lists), self.leakyrelu, self_feats, neigh_feats, self.embed_dim,
                                                self.weight, self.a, n, self.dropout, self.training, self.cuda)
        elif self.inter == 'Weight':
            # 2) CARE-Weight Inter-relation Aggregator
            combined = weight_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, self.alpha, n, self.cuda)
            gem_weights = F.softmax(torch.sum(self.alpha, dim=0), dim=0).tolist()
            if train_flag:
                print(f'Weights: {gem_weights}')
        elif self.inter == 'Mean':
            # 3) CARE-Mean Inter-relation Aggregator
            combined = mean_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, n, self.cuda)
        elif self.inter == 'GNN':
            # 4) CARE-GNN Inter-relation Aggregator
            combined = threshold_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, self.thresholds, n, self.cuda)

        # the reinforcement learning module
        if self.RL and train_flag:
            relation_scores, rewards, thresholds, stop_flag = RLModule([r1_scores, r2_scores, r3_scores],
                                                                        self.relation_score_log, labels, self.thresholds,
                                                                        self.batch_num, self.step_size)
            self.thresholds = thresholds
            self.RL = stop_flag
            self.relation_score_log.append(relation_scores)
            self.thresholds_log.append(self.thresholds)

        return combined, center_scores

class IntraAgg(nn.Module):

    def __init__(self, features, feat_dim, cuda=False):
        """
        Initialize the intra-relation aggregator
        :param features: the input node features or embeddings for all nodes
        :param feat_dim: the input dimension
        :param cuda: whether to use GPU
        """
        super(IntraAgg, self).__init__()

        self.features = features
        self.cuda = cuda
        self.feat_dim = feat_dim

    def forward(self, nodes, to_neighs_list, batch_scores, neigh_scores, sample_list):
        """
        Code partially from https://github.com/williamleif/graphsage-simple/
        :param nodes: list of nodes in a batch
        :param to_neighs_list: neighbor node id list for each batch node in one relation
        :param batch_scores: the label-aware scores of batch nodes
        :param neigh_scores: the label-aware scores 1-hop neighbors each batch node in one relation
        :param sample_list: the number of neighbors kept for each batch node in one relation
        :return to_feats: the aggregated embeddings of batch nodes neighbors in one relation
        :return samp_scores: the average neighbor distances for each relation after filtering
        """

        # filer neighbors under given relation
        samp_neighs, samp_scores = filter_neighs_ada_threshold(batch_scores, neigh_scores, to_neighs_list, sample_list)

        # find the unique nodes among batch nodes and the filtered neighbors
        unique_nodes_list = list(set.union(*samp_neighs))
        unique_nodes = {n: i for i, n in enumerate(unique_nodes_list)}

        # intra-relation aggregation only with sampled neighbors
        mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
        column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
        row_indices = [i for i in range(len(samp_neighs)) for _ in range(len(samp_neighs[i]))]
        mask[row_indices, column_indices] = 1
        if self.cuda:
            mask = mask.cuda()
        num_neigh = mask.sum(1, keepdim=True)
        mask = mask.div(num_neigh)
        if self.cuda:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
        else:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
        to_feats = mask.mm(embed_matrix)
        to_feats = F.relu(to_feats)
        return to_feats, samp_scores

def RLModule(scores, scores_log, labels, thresholds, batch_num, step_size):
    """
    The reinforcement learning module.
    It updates the neighbor filtering threshold for each relation based
    on the average neighbor distances between two consecutive epochs.
    :param scores: the neighbor nodes label-aware scores for each relation
    :param scores_log: a list stores the relation average distances for each batch
    :param labels: the batch node labels used to select positive nodes
    :param thresholds: the current neighbor filtering thresholds for each relation
    :param batch_num: numbers batches in an epoch
    :param step_size: the RL action step size
    :return relation_scores: the relation average distances for current batch
    :return rewards: the reward for given thresholds in current epoch
    :return new_thresholds: the new filtering thresholds updated according to the rewards
    :return stop_flag: the RL terminal condition flag
    """

    relation_scores = []
    stop_flag = True
    # only compute the average neighbor distances for positive nodes
    pos_index = (labels == 1).nonzero().tolist()
    pos_index = [i[0] for i in pos_index]

    # compute average neighbor distances for each relation
    for score in scores:
        pos_scores = itemgetter(*pos_index)(score)
        neigh_count = sum([1 if isinstance(i, float) else len(i) for i in pos_scores])
        pos_sum = [i if isinstance(i, float) else sum(i) for i in pos_scores]
        relation_scores.append(sum(pos_sum) / neigh_count)

    if len(scores_log) % batch_num != 0 or len(scores_log) < 2 * batch_num:
        # do not call RL module within the epoch or within the first two epochs
        rewards = [0, 0, 0]
        new_thresholds = thresholds
    else:
        # update thresholds according to average scores in last epoch
        # Eq.(5) in the paper
        previous_epoch_scores = [sum(s) / batch_num for s in zip(*scores_log[-2 * batch_num:-batch_num])]
        current_epoch_scores = [sum(s) / batch_num for s in zip(*scores_log[-batch_num:])]

        # compute reward for each relation and update the thresholds according to reward
        # Eq. (6) in the paper
        rewards = [1 if previous_epoch_scores[i] - s >= 0 else -1 for i, s in enumerate(current_epoch_scores)]
        new_thresholds = [thresholds[i] + step_size if r == 1 else thresholds[i] - step_size for i, r in enumerate(rewards)]

        # avoid overflow
        new_thresholds = [0.999 if i > 1 else i for i in new_thresholds]
        new_thresholds = [0.001 if i < 0 else i for i in new_thresholds]

        print(f'epoch scores: {current_epoch_scores}')
        print(f'rewards: {rewards}')
        print(f'thresholds: {new_thresholds}')

    # TODO: add terminal condition

    return relation_scores, rewards, new_thresholds, stop_flag

def filter_neighs_ada_threshold(center_scores, neigh_scores, neighs_list, sample_list):
    """
    Filter neighbors according label predictor result with adaptive thresholds
    :param center_scores: the label-aware scores of batch nodes
    :param neigh_scores: the label-aware scores 1-hop neighbors each batch node in one relation
    :param neighs_list: neighbor node id list for each batch node in one relation
    :param sample_list: the number of neighbors kept for each batch node in one relation
    :return samp_neighs: the neighbor indices and neighbor simi scores
    :return samp_scores: the average neighbor distances for each relation after filtering
    """

    samp_neighs = []
    samp_scores = []
    for idx, center_score in enumerate(center_scores):
        center_score = center_scores[idx][0]
        neigh_score = neigh_scores[idx][:, 0].view(-1, 1)
        center_score = center_score.repeat(neigh_score.size()[0], 1)
        neighs_indices = neighs_list[idx]
        num_sample = sample_list[idx]

        # compute the L1-distance of batch nodes and their neighbors
        # Eq. (2) in paper
        score_diff = torch.abs(center_score - neigh_score).squeeze()
        sorted_scores, sorted_indices = torch.sort(score_diff, dim=0, descending=False)
        selected_indices = sorted_indices.tolist()

        # top-p sampling according to distance ranking and thresholds
        # Section 3.3.1 in paper
        if len(neigh_scores[idx]) > num_sample + 1:
            selected_neighs = [neighs_indices[n] for n in selected_indices[:num_sample]]
            selected_scores = sorted_scores.tolist()[:num_sample]
        else:
            selected_neighs = neighs_indices
            selected_scores = score_diff.tolist()
            if isinstance(selected_scores, float):
                selected_scores = [selected_scores]

        samp_neighs.append(set(selected_neighs))
        samp_scores.append(selected_scores)

    return samp_neighs, samp_scores

def mean_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, n, cuda):
    """
    Mean inter-relation aggregator
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

# transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # sum neighbor embeddings together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :]

    # sum aggregated neighbor embedding and batch node embedding
    # take the average of embedding and feed them to activation function
    combined = F.relu((center_h + aggregated) / 4.0)

    return combined

def weight_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, alpha, n, cuda):
    """
    Weight inter-relation aggregator
    Reference: https://arxiv.org/abs/2002.12307
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param alpha: weight parameter for each relation used by CARE-Weight
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # compute relation weights using softmax
    w = F.softmax(alpha, dim=1)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add weighted neighbor embeddings in each relation together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :] * w[:, r]

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu(center_h + aggregated)

    return combined

def att_inter_agg(num_relations, att_layer, self_feats, neigh_feats, embed_dim, weight, a, n, dropout, training, cuda):
    """
    Attention-based inter-relation aggregator
    Reference: https://github.com/Diego999/pyGAT
    :param num_relations: num_relations: number of relations in the graph
    :param att_layer: the activation function used by the attention layer
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param a: parameters used by attention layer
    :param n: number of nodes in a batch
    :param dropout: dropout for attention layer
    :param training: a flag indicating whether in the training or testing mode
    :param cuda: whether use GPU
    :return combined: inter-relation aggregated node embeddings
    :return att: the attention weights for each relation
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    import pdb
    pdb.set_trace()
    # compute attention weights
    combined = torch.cat((center_h.repeat(3, 1), neigh_h), dim=1)
    e = att_layer(combined.mm(a))
    attention = torch.cat((e[0:n, :], e[n:2 * n, :], e[2 * n:3 * n, :]), dim=1)
    ori_attention = F.softmax(attention, dim=1)
    attention = F.dropout(ori_attention, dropout, training=training)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add neighbor embeddings in each relation together with attention weights
    for r in range(num_relations):
        aggregated += torch.mul(attention[:, r].unsqueeze(1).repeat(1, embed_dim), neigh_h[r * n:(r + 1) * n, :])

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu((center_h + aggregated))

    # extract the attention weights
    att = F.softmax(torch.sum(ori_attention, dim=0), dim=0)

    return combined, att

def threshold_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, threshold, n, cuda):
    """
    CARE-GNN inter-relation aggregator
    Eq. (9) in the paper
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param threshold: the neighbor filtering thresholds used as aggregating weights
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add weighted neighbor embeddings in each relation together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :] * threshold[r]

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu(center_h + aggregated)

    return combined

In [None]:
#CARE-GNN model
class OneLayerCARE(nn.Module):
    """
    The CARE-GNN model in one layer
    """

    def __init__(self, num_classes, inter1, lambda_1):
        """
        Initialize the CARE-GNN model
        :param num_classes: number of classes (2 in our paper)
        :param inter1: the inter-relation aggregator that output the final embedding
        """
        super(OneLayerCARE, self).__init__()
        self.inter1 = inter1
        self.xent = nn.CrossEntropyLoss()

        # the parameter to transform the final embedding
        self.weight = nn.Parameter(torch.FloatTensor(inter1.embed_dim, num_classes))
        init.xavier_uniform_(self.weight)
        self.lambda_1 = lambda_1

    def forward(self, nodes, labels, train_flag=True):
        embeds1, label_scores = self.inter1(nodes, labels, train_flag)
        scores = torch.mm(embeds1, self.weight)
        return scores, label_scores

    def to_prob(self, nodes, labels, train_flag=True):
        gnn_scores, label_scores = self.forward(nodes, labels, train_flag)
        gnn_prob = nn.functional.softmax(gnn_scores, dim=1)
        label_prob = nn.functional.softmax(label_scores, dim=1)
        return gnn_prob, label_prob

    def loss(self, nodes, labels, train_flag=True):
        gnn_scores, label_scores = self.forward(nodes, labels, train_flag)
        # Simi loss, Eq. (4) in the paper
        label_loss = self.xent(label_scores, labels.squeeze())
        # GNN loss, Eq. (10) in the paper
        gnn_loss = self.xent(gnn_scores, labels.squeeze())
        # the loss function of CARE-GNN, Eq. (11) in the paper
        final_loss = gnn_loss + self.lambda_1 * label_loss
        return final_loss

In [None]:
#Training settings for CARE-GNN
data='Amazon'
model='CARE'
inter='GNN' #The inter-relation aggregator type. [Att, Weight, Mean, GNN]
#batch_size=1024
lr=0.01
lambda_1=2 #Simi loss weight
lambda_2=1e-3 #Weight decay (L2 loss weight)
emb_size=64 #Node embedding size at the last layer
step_size=2e-2 #Reinforcement Learning action step size
cuda=False#torch.cuda.is_available()
np.random.seed(66)
random.seed(66)

In [None]:
#Load graph, feature, and label
[homo, relation1, relation2, relation3], feat_data, labels = load_data_CARE()


In [None]:
#Care-GNN training
features = nn.Embedding(feat_data.shape[0], feat_data.shape[1])
feat_data = normalize(feat_data)
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
if cuda:
    features.cuda()

adj_lists = [relation1, relation2, relation3]

# build the model
intra1 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
intra2 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
intra3 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
inter1 = InterAgg(features, feat_data.shape[1], emb_size, adj_lists, [intra1, intra2, intra3], inter=inter,
                    step_size=step_size, cuda=cuda)
gnn_model = OneLayerCARE(2, inter1, lambda_1)

if cuda:
  gnn_model.cuda()

optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gnn_model.parameters()), lr=lr, weight_decay=lambda_2)

performance_log = []
# train the model
for epoch in range(30):
  # send number of batches to model to let the RLModule know the training progress
  inter1.batch_num = 1 #Equal to 1 because no mini-batch training
    
  optimizer.zero_grad()
  if cuda:
    loss = gnn_model.loss(X_train, Variable(torch.cuda.LongTensor(y_train)))
  else:
    loss = gnn_model.loss(X_train, Variable(torch.LongTensor(y_train)))
    loss.backward()
    optimizer.step()
  print(f'Epoch: {epoch}, loss: {loss.item() / num_batches}')

Epoch: 0, loss: 0.21221451759338378
Epoch: 1, loss: 0.19332618713378907
epoch scores: [0.06086775438698947, 0.04683465075355385, 0.03944652062216364]
rewards: [1, 1, 1]
thresholds: [0.52, 0.52, 0.52]
Epoch: 2, loss: 0.17795649766921998
epoch scores: [0.055511100636507466, 0.04345052294635087, 0.03771996702014042]
rewards: [1, 1, 1]
thresholds: [0.54, 0.54, 0.54]
Epoch: 3, loss: 0.16520798206329346
epoch scores: [0.052144079109478075, 0.04159766279088379, 0.03833248533336858]
rewards: [1, 1, -1]
thresholds: [0.56, 0.56, 0.52]
Epoch: 4, loss: 0.15505390167236327
epoch scores: [0.04810275610169747, 0.03966967313499985, 0.037663359880267644]
rewards: [1, 1, 1]
thresholds: [0.5800000000000001, 0.5800000000000001, 0.54]
Epoch: 5, loss: 0.1475539207458496
epoch scores: [0.044190114765345165, 0.03781636173205029, 0.03486808180732065]
rewards: [1, 1, 1]
thresholds: [0.6000000000000001, 0.6000000000000001, 0.56]
Epoch: 6, loss: 0.14167476892471315
epoch scores: [0.04024509754861346, 0.0360434655

In [None]:
#Predict using CARE-GNN
#_, _, _, _,predicted_labels,predict_prob = test_care(X_test, y_test, gnn_model, batch_size)
gnn_prob, label_prob = gnn_model.to_prob(X_test, y_test, train_flag=False)
care_label=gnn_prob.data.cpu().numpy().argmax(axis=1)

In [None]:
print(classification_report(y_test,care_label))

              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      6674
         1.0       0.97      0.47      0.63       493

    accuracy                           0.96      7167
   macro avg       0.96      0.74      0.81      7167
weighted avg       0.96      0.96      0.96      7167



In [None]:
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, gnn_prob.detach().numpy()[:,1]))

0.8847329803342084
