In [10]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import math
from sklearn.metrics import classification_report, roc_auc_score, f1_score,accuracy_score,average_precision_score,recall_score,auc
from scipy.stats import entropy
import sys
import os
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
from operator import itemgetter
import torch.optim as optim
import torch.nn.functional as F
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
import time
import scipy
from scipy import sparse as sp
import pickle
import copy as cp
from collections import defaultdict
import random
DATAPATH='/content/drive/MyDrive/Amazon.mat'# Path of Amazon.mat file
prefix_1 = '/content/drive/MyDrive/' # Path to store temporary output files
prefix_2 = '/content/drive/MyDrive/' # Folder that stores Amazon.mat file

In [11]:
pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html


In [12]:
pip install torch_geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [13]:
from torch_geometric.utils import to_undirected
from torch_geometric.nn import GCNConv, GATConv, JumpingKnowledge,SGConv
from torch_sparse import SparseTensor, matmul
from torch_geometric.nn.conv.gcn_conv import gcn_norm

# Define some functions and classes
# Reference: https://github.com/CUAI/Non-Homophily-Benchmarks

In [14]:
# Simple Autoencoder
class Encoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, output_size),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.encoder(x)
        return x


class Decoder(nn.Module):
    def __init__(self, input_size,output_size):
        super(Decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, output_size),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.decoder(x)
        return x


In [15]:
#Functions copied from https://github.com/CUAI/Non-Homophily-Benchmarks
class NCDataset(object):
    def __init__(self, name, root=f'{DATAPATH}'):
        self.name = name  # original name, e.g., ogbn-proteins
        self.graph = {}
        self.label = None
        self.original_label= None

    def __getitem__(self, idx):
        assert idx == 0, 'This dataset has only one graph'
        return self.graph, self.label

    def __len__(self):
        return 1

    def __repr__(self):  
        return '{}({})'.format(self.__class__.__name__, len(self))
    
def load_amazon_dataset():
    fulldata = scipy.io.loadmat(DATAPATH)
    A = fulldata['homo']
    edge_index = np.array(A.nonzero())
    node_feat = fulldata['features']
    label = np.array(fulldata['label'], dtype=np.int).flatten()
    num_nodes = node_feat.shape[0]

    dataset = NCDataset('Amazon')
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    node_feat = torch.tensor(node_feat.todense(), dtype=torch.float)
    dataset.graph = {'edge_index': edge_index,
                     'node_feat': node_feat,
                     'edge_feat': None,
                     'num_nodes': num_nodes}
    label = torch.tensor(label, dtype=torch.long)
    dataset.label = label
    dataset.original_label = label
    return dataset


def normalize(edge_index):
    """ normalizes the edge_index
    """
    adj_t = edge_index.set_diag()
    deg = adj_t.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
    return adj_t

def eval_acc(y_true, y_pred):
    acc_list = []
    y_true = y_true.detach().cpu().numpy()
    y_pred = y_pred.argmax(dim=-1, keepdim=True).detach().cpu().numpy()

    for i in range(y_true.shape[1]):
        is_labeled = y_true[:, i] == y_true[:, i]
        correct = y_true[is_labeled, i] == y_pred[is_labeled, i]
        acc_list.append(float(np.sum(correct))/len(correct))

    return sum(acc_list)/len(acc_list)


def eval_rocauc(y_true, y_pred):
    """ adapted from ogb
    https://github.com/snap-stanford/ogb/blob/master/ogb/nodeproppred/evaluate.py"""
    rocauc_list = []
    y_true = y_true.detach().cpu().numpy()
    if y_true.shape[1] == 1:
        # use the predicted class for single-class classification
        y_pred = F.softmax(y_pred, dim=-1)[:,1].unsqueeze(1).detach().cpu().numpy()
    else:
        y_pred = y_pred.detach().cpu().numpy()

    for i in range(y_true.shape[1]):
        # AUC is only defined when there is at least one positive data.
        if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
            is_labeled = y_true[:, i] == y_true[:, i]
            score = roc_auc_score(y_true[is_labeled, i], y_pred[is_labeled, i])
                                
            rocauc_list.append(score)

    if len(rocauc_list) == 0:
        print('No positively labeled data available. Cannot compute ROC-AUC.')
        return 0
    return sum(rocauc_list)/len(rocauc_list)


def evaluate(model, dataset, split_idx, eval_func, result=None):
    if result is not None:
        out = result
    else:
        model.eval()
        out = model(dataset)

    train_acc = eval_func(
        dataset.label[split_idx['train']], out[split_idx['train']])
    test_acc = eval_func(
        dataset.label[split_idx['test']], out[split_idx['test']])

    return train_acc, test_acc, out

#Models
class LINK(nn.Module):
    """ logistic regression on adjacency matrix """
    
    def __init__(self, num_nodes, out_channels):
        super(LINK, self).__init__()
        self.W = nn.Linear(num_nodes, out_channels)

    def reset_parameters(self):
        self.W.reset_parameters()
        
    def forward(self, data):
        N = data.graph['num_nodes']
        edge_index = data.graph['edge_index']
        if isinstance(edge_index, torch.Tensor):
            row, col = edge_index
            A = SparseTensor(row=row, col=col, sparse_sizes=(N, N)).to_torch_sparse_coo_tensor()
        elif isinstance(edge_index, SparseTensor):
            A = edge_index.to_torch_sparse_coo_tensor()
        logits = self.W(A)
        return logits   
class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, heads=2):
        super(GAT, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GATConv(in_channels, hidden_channels, heads=heads, concat=True))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels*heads))
        for _ in range(num_layers - 2):

            self.convs.append(
                    GATConv(hidden_channels*heads, hidden_channels, heads=heads, concat=True) ) 
            self.bns.append(nn.BatchNorm1d(hidden_channels*heads))

        self.convs.append(
            GATConv(hidden_channels*heads, out_channels, heads=heads, concat=False))

        self.dropout = dropout
        self.activation = F.elu 

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()


    def forward(self, data):
        x = data.graph['node_feat']
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            x = self.bns[i](x)
            x = self.activation(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        return x
    
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, save_mem=False, use_bn=True):
        super(GCN, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.convs.append(
            GCNConv(hidden_channels, out_channels, cached=not save_mem, normalize=not save_mem))

        self.dropout = dropout
        self.activation = F.relu
        self.use_bn = use_bn

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()


    def forward(self, data):
        x = data.graph['node_feat']
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            if self.use_bn:
                x = self.bns[i](x)
            x = self.activation(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        return x
class SGC(nn.Module):
    def __init__(self, in_channels, out_channels, hops):
        """ takes 'hops' power of the normalized adjacency"""
        super(SGC, self).__init__()
        self.conv = SGConv(in_channels, out_channels, hops, cached=True) 

    def reset_parameters(self):
        self.conv.reset_parameters()

    def forward(self, data):
        edge_index = data.graph['edge_index']
        x = data.graph['node_feat']
        x = self.conv(x, edge_index)
        return x

class APPNP_Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, dropout=.5, K=10, alpha=.1):
        super(APPNP_Net, self).__init__()
        self.lin1 = nn.Linear(in_channels, hidden_channels)
        self.lin2 = nn.Linear(hidden_channels, out_channels)
        self.prop1 = APPNP(K, alpha)
        self.dropout = dropout

    def reset_parameters(self):
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.graph['node_feat'], data.graph['edge_index']
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lin2(x)
        x = self.prop1(x, edge_index)
        return x
        
class GCNJK(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2,
                 dropout=0.5, save_mem=False, jk_type='max'):
        super(GCNJK, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.bns = nn.ModuleList()
        self.bns.append(nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.convs.append(
            GCNConv(hidden_channels, hidden_channels, cached=not save_mem, normalize=not save_mem))

        self.dropout = dropout
        self.activation = F.relu
        self.jump = JumpingKnowledge(jk_type, channels=hidden_channels, num_layers=1)
        if jk_type == 'cat':
            self.final_project = nn.Linear(hidden_channels * num_layers, out_channels)
        else: # max or lstm
            self.final_project = nn.Linear(hidden_channels, out_channels)

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()
        self.jump.reset_parameters()
        self.final_project.reset_parameters()

    def forward(self, data):
        x = data.graph['node_feat']
        xs = []
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, data.graph['edge_index'])
            x = self.bns[i](x)
            x = self.activation(x)
            xs.append(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, data.graph['edge_index'])
        xs.append(x)
        x = self.jump(xs)
        x = self.final_project(x)
        return x

# Some hyper parameters

In [16]:
#Training set and test set portion size
train_portion=0.4
test_portion=0.6

#Train with GAT will require 40GB GPU memory (Colab Pro+)
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
#device= 'cpu'
device = torch.device(device)

#Parameters for GNN models
hidden_channels=32
num_layers=2
dropout=0
lr=0.01
weight_decay=1e-4
batch_size=512 

# Define a data structure and store the graph in it
### --Used for training GNNs

In [17]:
dataset=load_amazon_dataset()
if len(dataset.label.shape) == 1:
    dataset.label = dataset.label.unsqueeze(1)
    dataset.original_label= dataset.original_label.unsqueeze(1)

dataset.label = dataset.label.to(device)
dataset.original_label= dataset.original_label.to(device)
n = dataset.graph['num_nodes']
# infer the number of classes for non one-hot and one-hot labels
c = max(dataset.label.max().item() + 1, dataset.label.shape[1])
d = dataset.graph['node_feat'].shape[1]
dataset.graph['edge_index'] = to_undirected(dataset.graph['edge_index'])
dataset.graph['edge_index'], dataset.graph['node_feat'] = \
    dataset.graph['edge_index'].to(device), dataset.graph['node_feat'].to(device)
print(f"num nodes {n} | num classes {c} | num node feats {d}")

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  label = np.array(fulldata['label'], dtype=np.int).flatten()


num nodes 11944 | num classes 2 | num node feats 25


# Load the feature matrix for training Autoencoders

In [18]:
amazon = loadmat(DATAPATH)

#adj = amazon['homo'].tocsr() # Adjacency matrix
features=amazon['features'].toarray() # Feature matrix
label_origin=amazon['label'][0]

n_nodes, feat_dim = features.shape

#Split train and test set
all_index=np.arange(n_nodes)
X_train, X_test, y_train, y_test=train_test_split(all_index, label_origin, test_size=test_portion, stratify=label_origin, random_state=66)
original_index={'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}

# Train Autoencoder

In [19]:
#Split a validation set for performance evaluation
x_train, x_valid, y1, y2=train_test_split(X_train, y_train, test_size=0.2, stratify=y_train, random_state=66)
code_size=20
criterion_decoder = nn.MSELoss()

#Get index for fraud and non-fraud nodes in training set
idx_NF=x_train[y1==0]
idx_F=x_train[y1==1]

features_NF=features[idx_NF]
features_F=features[idx_F]
features_test=features[X_test]

In [20]:
#L1=Loss of Autoencoder trained on non-fraud samples; L2=Loss of Autoencoder trained on fraud samples
NF_ratio=0 # The ratio of non-fraud nodes that L1<L2.
F_ratio=0 # The ratio of fraud nodes that L1>L2.
encoder_NF=None
decoder_NF=None
encoder_F=None
decoder_F=None
#Train until both ratio are larger than 0.8
while(NF_ratio<0.8 or F_ratio<0.8):
  #Autoencoder for non-fraud nodes
  encoder_NF = Encoder(input_size=25,output_size=code_size)
  decoder_NF = Decoder(input_size=code_size,output_size=25)
  optimizer1 = optim.Adam(list(encoder_NF.parameters()) + list(decoder_NF.parameters()),
                        lr=0.001, weight_decay=1e-4)
  for epoch in range(100):
      optimizer1.zero_grad()
      # Forward
      encoder_out = encoder_NF(torch.from_numpy(features_NF).float())
      decoder_out = decoder_NF(encoder_out)
      # Loss
      train_loss_decoder = criterion_decoder(decoder_out, torch.from_numpy(features_NF).float())
      #Backpropagation
      train_loss_decoder.backward()
      optimizer1.step()    

  #Autoencoder for fraud nodes
  encoder_F = Encoder(input_size=25,output_size=code_size)
  decoder_F = Decoder(input_size=code_size,output_size=25)
  optimizer2 = optim.Adam(list(encoder_F.parameters()) + list(decoder_F.parameters()),
                        lr=0.001, weight_decay=1e-4)
  for epoch in range(200):
      optimizer2.zero_grad()
      # Forward
      encoder_out = encoder_F(torch.from_numpy(features_F).float())
      decoder_out = decoder_F(encoder_out)
      # Loss
      train_loss_decoder = criterion_decoder(decoder_out, torch.from_numpy(features_F).float())
      #Backpropagation
      train_loss_decoder.backward()
      optimizer2.step()

  #Check performance
  NF_F=[]
  NF_NF=[]
  for i in x_valid[y2==0]:
    NF_valid_tensor=torch.from_numpy(features[i]).float()
    encoder_out = encoder_F.forward(NF_valid_tensor)
    decoder_out = decoder_F.forward(encoder_out)
    loss=criterion_decoder(decoder_out, NF_valid_tensor).item()
    NF_F.append(round(loss,6))

    NF_valid_tensor=torch.from_numpy(features[i]).float()
    encoder_out = encoder_NF.forward(NF_valid_tensor)
    decoder_out = decoder_NF.forward(encoder_out)
    loss=criterion_decoder(decoder_out, NF_valid_tensor).item()
    NF_NF.append(round(loss,6))

  F_NF=[]
  F_F=[]
  for i in x_valid[y2==1]:
    F_valid_tensor=torch.from_numpy(features[i]).float()
    encoder_out = encoder_NF.forward(F_valid_tensor)
    decoder_out = decoder_NF.forward(encoder_out)
    loss=criterion_decoder(decoder_out, F_valid_tensor).item()
    F_NF.append(round(loss,6))

    F_valid_tensor=torch.from_numpy(features[i]).float()
    encoder_out = encoder_F.forward(F_valid_tensor)
    decoder_out = decoder_F.forward(encoder_out)
    loss=criterion_decoder(decoder_out, F_valid_tensor).item()
    F_F.append(round(loss,6))

  NF_F=np.array(NF_F)  
  NF_NF=np.array(NF_NF)        
  F_F=np.array(F_F)  
  F_NF=np.array(F_NF)      

  NF_ratio=((NF_NF<NF_F).sum())/len(NF_F)
  F_ratio=((F_F<F_NF).sum())/len(F_F)

In [21]:
print(f'NF_ratio: {NF_ratio}, F_ratio: {F_ratio}')

NF_ratio: 0.8696629213483146, F_ratio: 0.8333333333333334


# Train GNN models

In [22]:
#Define loss function and evaluation metric
criterion = nn.BCEWithLogitsLoss()
eval_func = eval_rocauc # eval_acc: Accuracy 

In [23]:
keep_training=1
iteration=1
no_improvement_round=0
code_size=16 # Code size of Autoencoder
probs=np.empty(shape=[0,2])#prediction probabilities
pseudo_labels=np.array([])
while(keep_training):
  start_time=time.time()
  # If no new test nodes are labeled for 5 consecutive rounds
  if no_improvement_round==1:
    break

  print(f'********* Iteration {iteration} *********\n')

  ########Calculate Autoencoder losss L1 and L2, and Hint########
  hint=[]
  abs_diff=[]
  for i in X_test:
    sample=torch.from_numpy(features[i]).float()

    encoder_out = encoder_F(sample)
    decoder_out = decoder_F(encoder_out)
    loss1=criterion_decoder(decoder_out,sample).item()

    encoder_out2 = encoder_NF(sample)
    decoder_out2 = decoder_NF(encoder_out2)
    loss2=criterion_decoder(decoder_out2,sample).item()

    abs_diff.append(abs(loss1-loss2))
    hint.append(0 if loss1>loss2 else 1)
  hint=np.array(hint)
  abs_diff=np.array(abs_diff)

  ######## Update the indexes for taning GNNs ########
  split_idx = {}# Define an empty dictionary to store training and test index list
  split_idx['train']=torch.tensor(np.array(X_train)).to(device)
  split_idx['test']=torch.tensor(np.array(X_test)).to(device)

  train_idx = split_idx['train']
  #model2 = MixHop(d, hidden_channels, c, num_layers=num_layers,
                       #dropout=dropout, hops=1).to(device)
  #model1 = GCNJK(d, hidden_channels, c, num_layers=num_layers, dropout=dropout, jk_type='max').to(device)
  model1 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
  optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
  #model2 = GAT(d, hidden_channels, c, num_layers=num_layers, dropout=dropout, heads=gat_heads).to(device)
  model2 = SGC(in_channels=d, out_channels=c, hops=1).to(device)
  #model2 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
  optimizer2 = torch.optim.AdamW(model2.parameters(), lr=lr, weight_decay=weight_decay)
  model3 = LINK(n, c).to(device)
  optimizer3 = torch.optim.AdamW(model3.parameters(), lr=lr, weight_decay=weight_decay)
  model4 = GCNJK(d, hidden_channels, c, num_layers=num_layers, dropout=dropout, jk_type='max').to(device)
  optimizer4 = torch.optim.AdamW(model4.parameters(), lr=lr, weight_decay=weight_decay)
  model1.reset_parameters()
  model2.reset_parameters()
  model3.reset_parameters()
  model4.reset_parameters()
  print(f'GNN training nodes: {len(train_idx)}')
  ########Train the first model########
  for epoch in range(50):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      #torch.cuda.empty_cache()
      optimizer1.zero_grad()
      #torch.cuda.empty_cache()
      out = model1(dataset)
      #torch.cuda.empty_cache()
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label
      #torch.cuda.empty_cache()
      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))
      #torch.cuda.empty_cache()
      loss.backward()
      #torch.cuda.empty_cache()
      optimizer1.step()
      #torch.cuda.empty_cache()
      epoch_loss += loss.item() 
      '''del loss,out,batch_nodes,true_label
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()   
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache() '''    
    
    if (epoch+1)%10==0:
      result = evaluate(model1, dataset, split_idx, eval_func)
      torch.cuda.empty_cache()
      print(f'Model 1, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')
      '''del result
      torch.cuda.empty_cache()   
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
    torch.cuda.empty_cache()   
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()'''
  ########Train the second model########
  for epoch in range(50):
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0     
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
                
      model2.train()
      #torch.cuda.empty_cache()
      optimizer2.zero_grad()
      #torch.cuda.empty_cache()
      out = model2(dataset)
      #torch.cuda.empty_cache()
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label
      #torch.cuda.empty_cache()
      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      loss.backward()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      epoch_loss += loss.item()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      optimizer2.step()
      '''del loss, out, batch_nodes, true_label
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()          
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    torch.cuda.empty_cache()'''
    
    if (epoch+1)%10==0:
      result = evaluate(model2, dataset, split_idx, eval_func)
      torch.cuda.empty_cache()
      print(f'Model 2, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')
      '''del result
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()'''

  ########Train the third model########
  for epoch in range(50):
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0     
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
                
      model3.train()
      #torch.cuda.empty_cache()
      optimizer3.zero_grad()
      #torch.cuda.empty_cache()
      out = model3(dataset)
      #torch.cuda.empty_cache()
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label
      #torch.cuda.empty_cache()
      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      loss.backward()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      epoch_loss += loss.item()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      optimizer3.step()
      del loss, out, batch_nodes, true_label
      torch.cuda.empty_cache()         
    torch.cuda.empty_cache()

    
    if (epoch+1)%10==0:
      result = evaluate(model3, dataset, split_idx, eval_func)
      torch.cuda.empty_cache()
      print(f'Model 3, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')
      '''del result
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()'''
  ########Train the third model########
  for epoch in range(50):
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0     
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
                
      model4.train()
      #torch.cuda.empty_cache()
      optimizer4.zero_grad()
      #torch.cuda.empty_cache()
      out = model4(dataset)
      #torch.cuda.empty_cache()
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label
      #torch.cuda.empty_cache()
      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      loss.backward()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      epoch_loss += loss.item()
      #torch.cuda.empty_cache()
      #torch.cuda.empty_cache()
      optimizer4.step()
      del loss, out, batch_nodes, true_label
      torch.cuda.empty_cache()         
    torch.cuda.empty_cache()

    
    if (epoch+1)%10==0:
      result = evaluate(model4, dataset, split_idx, eval_func)
      torch.cuda.empty_cache()
      print(f'Model 4, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')
      '''del result
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()
      torch.cuda.empty_cache()'''

  ########Get probabilities########
  result1 = evaluate(model1, dataset, split_idx, eval_func)
  torch.cuda.empty_cache()
  AUC1=result1[0]
  prob1=result1[2][split_idx['test']]
  prob1=F.softmax(prob1, dim=-1).detach().cpu().numpy()

  result2 = evaluate(model2, dataset, split_idx, eval_func)
  torch.cuda.empty_cache()
  AUC2=result2[0]
  prob2=result2[2][split_idx['test']]
  prob2=F.softmax(prob2, dim=-1).detach().cpu().numpy()

  result3 = evaluate(model3, dataset, split_idx, eval_func)
  torch.cuda.empty_cache()
  AUC3=result3[0]
  prob3=result3[2][split_idx['test']]
  prob3=F.softmax(prob3, dim=-1).detach().cpu().numpy()

  result4 = evaluate(model4, dataset, split_idx, eval_func)
  torch.cuda.empty_cache()
  AUC4=result4[0]
  prob4=result4[2][split_idx['test']]
  prob4=F.softmax(prob4, dim=-1).detach().cpu().numpy()
  
  ########Ensemble probabilities########
  prob_e=(prob1+prob2+prob3+prob4)/4 #Weighted average according to training AUC

  ########Calculate entropy and confidence########
  H=[] #Entropy
  for i in prob_e:
    h = entropy(i)
    H.append(h)
  Confidence=abs_diff+np.array(H) #Confidence score

  ########Sort according to confidence and choose the first p nodes########
  p=int(len(H)*0.5)

  if p<=500:
    p=len(H)

  idx_sorted=np.array(sorted(range(len(Confidence)),key=Confidence.__getitem__,reverse=True))# Sort according to confidence score
  idx_sorted=idx_sorted[0:p]# Choose first p samples

  ########Assign pseudo labels########
  pseudo_label=prob_e[idx_sorted].argmax(axis=1)

  ########Only keep nodes that agree on both hint and pseudo label########
  idx_agree=idx_sorted[pseudo_label==hint[idx_sorted]]# Index that agree on both hint and pseudo label

  # If no new nodes to be labeled, skip this iteration
  if len(idx_agree)<=10:
    no_improvement_round+=1
    iteration+=1
    print('No new nodes can be labeled')
    continue
  else:
    no_improvement_round=0

  #Get pesudo labels that agree with hint
  pseudo_label_agree=pseudo_label[pseudo_label==hint[idx_sorted]]

  # Record the corresponding prediction probability for roc-auc calculation
  probs=np.append(probs,prob_e[idx_agree],axis=0)

  #Update labels
  y_train=np.append(y_train, pseudo_label_agree)
  pseudo_labels=np.append(pseudo_labels,pseudo_label_agree)
  y_test=np.delete(y_test,idx_agree)
  #Also update labels in "dataset" so that we can use the pseudo labels to train the model
  for i in range(len(idx_agree)):
    curr=X_test[idx_agree[i]]
    dataset.label[curr]=pseudo_label_agree[i]

  #Update indexes
  X_train=np.append(X_train, X_test[idx_agree])#Update training index
  X_test=np.delete(X_test, idx_agree)#Update test index


  print(f'New labeled fraud nodes: {len(pseudo_label_agree[pseudo_label_agree==1])},'
     f'New labeled non-fraud nodes: {len(pseudo_label_agree[pseudo_label_agree==0])}')
  print(f'Total new labeled nodes: {len(idx_agree)}, remaining test nodes: {len(X_test)}')
  iteration+=1
  
  # If all test nodes are lebeled, stop training
  if len(X_test)==0:
    keep_training=0

  # Clear gpu memory  
  del model1,model2,model3,optimizer1, optimizer2, optimizer3, result1, result2,result3,prob1,prob2,prob3,split_idx,train_idx
  #del model1, model2, optimizer1, optimizer2
  #Not sure how much is enough :D
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  torch.cuda.empty_cache()


  end_time=time.time()
  print(f'Time consumption: {int(end_time-start_time)} seconds')

********* Iteration 1 *********

GNN training nodes: 4777
Model 1, Epoch: 10, Loss: 0.2222, Train AUC: 79.33%, Test AUC: 79.11%
Model 1, Epoch: 20, Loss: 0.2185, Train AUC: 79.74%, Test AUC: 79.49%
Model 1, Epoch: 30, Loss: 0.2193, Train AUC: 80.17%, Test AUC: 79.88%
Model 1, Epoch: 40, Loss: 0.2295, Train AUC: 80.41%, Test AUC: 80.13%
Model 1, Epoch: 50, Loss: 0.2313, Train AUC: 80.66%, Test AUC: 80.36%
Model 2, Epoch: 10, Loss: 0.2091, Train AUC: 82.31%, Test AUC: 82.45%
Model 2, Epoch: 20, Loss: 0.1966, Train AUC: 83.63%, Test AUC: 83.53%
Model 2, Epoch: 30, Loss: 0.1936, Train AUC: 84.15%, Test AUC: 84.05%
Model 2, Epoch: 40, Loss: 0.1916, Train AUC: 84.48%, Test AUC: 84.38%
Model 2, Epoch: 50, Loss: 0.1900, Train AUC: 84.73%, Test AUC: 84.63%
Model 3, Epoch: 10, Loss: 0.0504, Train AUC: 99.97%, Test AUC: 84.15%
Model 3, Epoch: 20, Loss: 0.0218, Train AUC: 100.00%, Test AUC: 84.29%
Model 3, Epoch: 30, Loss: 0.0126, Train AUC: 100.00%, Test AUC: 84.32%
Model 3, Epoch: 40, Loss: 0.00

# Check the remaining unlabeled test nodes

In [24]:
X_test.shape

(1977,)

# Train on the same ensemble models for remaining unlabeled nodes

In [25]:
split_idx={}
split_idx['train']=torch.tensor(np.array(X_train)).to(device)
split_idx['test']=torch.tensor(np.array(X_test)).to(device)

train_idx = split_idx['test']

In [26]:
model1 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
optimizer1 = torch.optim.AdamW(model1.parameters(), lr=lr, weight_decay=weight_decay)
print(f'Training nodes: {len(X_test)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model1.train()
      optimizer1.zero_grad()
      out = model1(dataset)
      torch.cuda.empty_cache()
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))
      torch.cuda.empty_cache()
      loss.backward()
      torch.cuda.empty_cache()
      optimizer1.step()
      torch.cuda.empty_cache()
      epoch_loss += loss.item() 
      torch.cuda.empty_cache()
    
    result = evaluate(model1, dataset, split_idx, eval_func)
    torch.cuda.empty_cache()
    print(f'Model 1, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')
    torch.cuda.empty_cache()

Training nodes: 1977
Model 1, Epoch: 01, Loss: 12.2427, Train AUC: 77.25%, Test AUC: 76.77%
Model 1, Epoch: 02, Loss: 5.2499, Train AUC: 78.68%, Test AUC: 76.82%
Model 1, Epoch: 03, Loss: 7.3204, Train AUC: 78.95%, Test AUC: 76.91%
Model 1, Epoch: 04, Loss: 6.6649, Train AUC: 79.75%, Test AUC: 78.40%
Model 1, Epoch: 05, Loss: 5.8962, Train AUC: 80.33%, Test AUC: 79.10%
Model 1, Epoch: 06, Loss: 4.8892, Train AUC: 80.77%, Test AUC: 79.75%
Model 1, Epoch: 07, Loss: 3.3992, Train AUC: 81.20%, Test AUC: 80.02%
Model 1, Epoch: 08, Loss: 1.9552, Train AUC: 81.67%, Test AUC: 79.93%
Model 1, Epoch: 09, Loss: 0.8404, Train AUC: 81.70%, Test AUC: 79.64%
Model 1, Epoch: 10, Loss: 0.9637, Train AUC: 81.77%, Test AUC: 79.66%
Model 1, Epoch: 11, Loss: 0.8986, Train AUC: 82.82%, Test AUC: 80.65%
Model 1, Epoch: 12, Loss: 0.6092, Train AUC: 82.29%, Test AUC: 80.10%
Model 1, Epoch: 13, Loss: 0.6383, Train AUC: 82.26%, Test AUC: 80.09%
Model 1, Epoch: 14, Loss: 0.5224, Train AUC: 82.63%, Test AUC: 80.58

In [27]:
#model2 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
model2 = SGC(in_channels=d, out_channels=c, hops=1).to(device)
optimizer2 = torch.optim.AdamW(model2.parameters(), lr=lr, weight_decay=weight_decay)

print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model2.train()
      optimizer2.zero_grad()
      out = model2(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer2.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model2, dataset, split_idx, eval_func)
    print(f'Model 2, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 1977
Model 2, Epoch: 01, Loss: 49.8101, Train AUC: 37.01%, Test AUC: 42.34%
Model 2, Epoch: 02, Loss: 33.2344, Train AUC: 24.22%, Test AUC: 27.66%
Model 2, Epoch: 03, Loss: 17.0822, Train AUC: 19.17%, Test AUC: 20.13%
Model 2, Epoch: 04, Loss: 5.2705, Train AUC: 71.04%, Test AUC: 71.50%
Model 2, Epoch: 05, Loss: 1.7668, Train AUC: 79.21%, Test AUC: 79.53%
Model 2, Epoch: 06, Loss: 2.5748, Train AUC: 80.59%, Test AUC: 80.74%
Model 2, Epoch: 07, Loss: 3.0570, Train AUC: 81.17%, Test AUC: 81.18%
Model 2, Epoch: 08, Loss: 3.2921, Train AUC: 81.51%, Test AUC: 81.39%
Model 2, Epoch: 09, Loss: 3.3530, Train AUC: 81.71%, Test AUC: 81.52%
Model 2, Epoch: 10, Loss: 3.2898, Train AUC: 81.85%, Test AUC: 81.64%
Model 2, Epoch: 11, Loss: 3.1370, Train AUC: 81.97%, Test AUC: 81.77%
Model 2, Epoch: 12, Loss: 2.9186, Train AUC: 82.14%, Test AUC: 81.85%
Model 2, Epoch: 13, Loss: 2.6513, Train AUC: 82.29%, Test AUC: 81.96%
Model 2, Epoch: 14, Loss: 2.3470, Train AUC: 82.45%, Test AUC: 82.

In [28]:
#model2 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
model3 = LINK(n, c).to(device)
optimizer3 = torch.optim.AdamW(model3.parameters(), lr=lr, weight_decay=weight_decay)

print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model3.train()
      optimizer3.zero_grad()
      out = model3(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer3.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model3, dataset, split_idx, eval_func)
    print(f'Model 3, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 1977
Model 3, Epoch: 01, Loss: 0.6608, Train AUC: 80.66%, Test AUC: 79.47%
Model 3, Epoch: 02, Loss: 0.7948, Train AUC: 83.05%, Test AUC: 84.29%
Model 3, Epoch: 03, Loss: 0.6181, Train AUC: 84.83%, Test AUC: 88.33%
Model 3, Epoch: 04, Loss: 0.3953, Train AUC: 84.41%, Test AUC: 91.48%
Model 3, Epoch: 05, Loss: 0.3585, Train AUC: 83.60%, Test AUC: 92.37%
Model 3, Epoch: 06, Loss: 0.2734, Train AUC: 86.63%, Test AUC: 96.25%
Model 3, Epoch: 07, Loss: 0.2031, Train AUC: 86.35%, Test AUC: 97.02%
Model 3, Epoch: 08, Loss: 0.1827, Train AUC: 86.22%, Test AUC: 98.42%
Model 3, Epoch: 09, Loss: 0.1498, Train AUC: 85.94%, Test AUC: 99.47%
Model 3, Epoch: 10, Loss: 0.1297, Train AUC: 85.92%, Test AUC: 99.75%
Model 3, Epoch: 11, Loss: 0.1106, Train AUC: 86.47%, Test AUC: 99.91%
Model 3, Epoch: 12, Loss: 0.0958, Train AUC: 86.66%, Test AUC: 99.97%
Model 3, Epoch: 13, Loss: 0.0881, Train AUC: 86.82%, Test AUC: 99.98%
Model 3, Epoch: 14, Loss: 0.0800, Train AUC: 86.95%, Test AUC: 99.98%

In [29]:
#model2 = GCN(in_channels=d,hidden_channels=hidden_channels,out_channels=c,num_layers=num_layers,dropout=dropout,use_bn=False).to(device)
model4 = LINK(n, c).to(device)
optimizer4 = torch.optim.AdamW(model4.parameters(), lr=lr, weight_decay=weight_decay)

print(f'Training nodes: {len(train_idx)}')
for epoch in range(100):       
    num_batches = int(len(train_idx) / batch_size) + 1
    epoch_loss = 0.0    
    for batch in range(num_batches):
      i_start = batch * batch_size
      i_end = min((batch + 1) * batch_size, len(train_idx))
      batch_nodes = train_idx[i_start:i_end]
              
      model3.train()
      optimizer3.zero_grad()
      out = model3(dataset)
      if dataset.label.shape[1] == 1:
          true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
      else:
          true_label = dataset.label

      loss = criterion(out[batch_nodes], true_label.squeeze(1)[batch_nodes].to(torch.float))

      loss.backward()

      optimizer3.step()

      epoch_loss += loss.item() 
  
    
    result = evaluate(model3, dataset, split_idx, eval_func)
    print(f'Model 4, '
        f'Epoch: {epoch+1:02d}, '
        f'Loss: {epoch_loss / num_batches:.4f}, '
        f'Train AUC: {100 * result[0]:.2f}%, '
        f'Test AUC: {100 * result[1]:.2f}%')

Training nodes: 1977
Model 4, Epoch: 01, Loss: 0.0062, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 02, Loss: 0.0061, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 03, Loss: 0.0060, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 04, Loss: 0.0060, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 05, Loss: 0.0059, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 06, Loss: 0.0058, Train AUC: 86.61%, Test AUC: 100.00%
Model 4, Epoch: 07, Loss: 0.0057, Train AUC: 86.60%, Test AUC: 100.00%
Model 4, Epoch: 08, Loss: 0.0056, Train AUC: 86.60%, Test AUC: 100.00%
Model 4, Epoch: 09, Loss: 0.0056, Train AUC: 86.60%, Test AUC: 100.00%
Model 4, Epoch: 10, Loss: 0.0055, Train AUC: 86.60%, Test AUC: 100.00%
Model 4, Epoch: 11, Loss: 0.0054, Train AUC: 86.60%, Test AUC: 100.00%
Model 4, Epoch: 12, Loss: 0.0053, Train AUC: 86.59%, Test AUC: 100.00%
Model 4, Epoch: 13, Loss: 0.0053, Train AUC: 86.59%, Test AUC: 100.00%
Model 4, Epoch: 14, Loss: 0.0052, Train AUC: 86.59%, Tes

In [30]:
result1 = evaluate(model1, dataset, split_idx, eval_func)
prob1=result1[2][X_test]
prob1=F.softmax(prob1, dim=-1).detach().cpu().numpy()

result2 = evaluate(model2, dataset, split_idx, eval_func)
prob2=result2[2][X_test]
prob2=F.softmax(prob2, dim=-1).detach().cpu().numpy()

result3 = evaluate(model3, dataset, split_idx, eval_func)
prob3=result3[2][X_test]
prob3=F.softmax(prob3, dim=-1).detach().cpu().numpy()

result4 = evaluate(model4, dataset, split_idx, eval_func)
prob4=result4[2][X_test]
prob4=F.softmax(prob4, dim=-1).detach().cpu().numpy()

prob_e=(prob1+prob2+prob3+prob4)/4
pred=prob_e.argmax(axis=1)

#Get true labels for nodes predicted in ensemble models and care-gnn
true_before=np.array([label_origin[i] for i in X_train[4777:]])#The first 4777 nodes in X_train are original labeled training nodes
true_remain=np.array([label_origin[i] for i in X_test])#Remaining nodes are predicted by care-gnn
final_true_all=np.append(true_before,true_remain)
#Get all predicted labels
final_predict_all=np.append(pseudo_labels,pred)

print(classification_report(final_true_all,final_predict_all))

              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      6674
         1.0       0.97      0.47      0.64       493

    accuracy                           0.96      7167
   macro avg       0.96      0.74      0.81      7167
weighted avg       0.96      0.96      0.96      7167



In [31]:
final_probs=np.append(probs,prob_e,axis=0)
print(roc_auc_score(final_true_all, final_probs[:,1]))

0.9245295691980201


# Train CARE-GNN for the remaining unlabeled nodes
# Reference:https://github.com/YingtongDou/CARE-GNN

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def sparse_to_adjlist(sp_matrix, filename):
    """
    Transfer sparse matrix to adjacency list
    :param sp_matrix: the sparse matrix
    :param filename: the filename of adjlist
    """
    # add self loop
    homo_adj = sp_matrix + sp.eye(sp_matrix.shape[0])
    # create adj_list
    adj_lists = defaultdict(set)
    edges = homo_adj.nonzero()
    for index, node in enumerate(edges[0]):
        adj_lists[node].add(edges[1][index])
        adj_lists[edges[1][index]].add(node)
    with open(filename, 'wb') as file:
        pickle.dump(adj_lists, file)
    file.close()
    



net_upu = amazon['net_upu']
net_usu = amazon['net_usu']
net_uvu = amazon['net_uvu']
amz_homo = amazon['homo']

sparse_to_adjlist(net_upu, prefix_1 + 'amz_upu_adjlists.pickle')
sparse_to_adjlist(net_usu, prefix_1 + 'amz_usu_adjlists.pickle')
sparse_to_adjlist(net_uvu, prefix_1 + 'amz_uvu_adjlists.pickle')
sparse_to_adjlist(amz_homo, prefix_1 + 'amz_homo_adjlists.pickle')

In [None]:
def load_data_CARE():
    """
    Load graph, feature, and label given dataset name
    :returns: home and single-relation graphs, feature, label
    """
    prefix1 = prefix_2 # Folder where .mat files are stored
    prefix2 = prefix_1 # Folder where .pickle files are stored
    data_file = loadmat(prefix1 + 'Amazon.mat')
    labels = data_file['label'].flatten()
    feat_data = data_file['features'].todense().A
    # load the preprocessed adj_lists
    with open(prefix2 + 'amz_homo_adjlists.pickle', 'rb') as file:
        homo = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_upu_adjlists.pickle', 'rb') as file:
            relation1 = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_usu_adjlists.pickle', 'rb') as file:
        relation2 = pickle.load(file)
    file.close()
    with open(prefix2 + 'amz_uvu_adjlists.pickle', 'rb') as file:
        relation3 = pickle.load(file)

    return [homo, relation1, relation2, relation3], feat_data, labels

def normalize(mx):
    """
    Row-normalize sparse matrix
    Code from https://github.com/williamleif/graphsage-simple/
    """
    rowsum = np.array(mx.sum(1)) + 0.01
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


def test_care(test_cases, labels, model, batch_size):
    """
    Test the performance of CARE-GNN and its variants
    :param test_cases: a list of testing node
    :param labels: a list of testing node labels
    :param model: the GNN model
    :param batch_size: number nodes in a batch
    :returns: the AUC and Recall of GNN and Simi modules
    """

    test_batch_num = int(len(test_cases) / batch_size) + 1
    f1_gnn = 0.0
    acc_gnn = 0.0
    recall_gnn = 0.0
    f1_label1 = 0.0
    acc_label1 = 0.00
    recall_label1 = 0.0
    gnn_list = []
    label_list1 = []
    predicted_labels=[]
    predict_prob=[]
    for iteration in range(test_batch_num):
        i_start = iteration * batch_size
        i_end = min((iteration + 1) * batch_size, len(test_cases))
        batch_nodes = test_cases[i_start:i_end]
        batch_label = labels[i_start:i_end]
        gnn_prob, label_prob1 = model.to_prob(batch_nodes, batch_label, train_flag=False)

        f1_gnn += f1_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1), average="macro")
        acc_gnn += accuracy_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1))
        recall_gnn += recall_score(batch_label, gnn_prob.data.cpu().numpy().argmax(axis=1), average="macro")

        f1_label1 += f1_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1), average="macro")
        acc_label1 += accuracy_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1))
        recall_label1 += recall_score(batch_label, label_prob1.data.cpu().numpy().argmax(axis=1), average="macro")

        gnn_list.extend(gnn_prob.data.cpu().numpy()[:, 1].tolist())
        label_list1.extend(label_prob1.data.cpu().numpy()[:, 1].tolist())
        predicted_labels.extend(gnn_prob.data.cpu().numpy().argmax(axis=1))
        predict_prob.extend(gnn_prob)
    auc_gnn = roc_auc_score(labels, np.array(gnn_list))
    ap_gnn = average_precision_score(labels, np.array(gnn_list))
    auc_label1 = roc_auc_score(labels, np.array(label_list1))
    ap_label1 = average_precision_score(labels, np.array(label_list1))
    print(f"GNN F1: {f1_gnn / test_batch_num:.4f}")
    print(f"GNN Accuracy: {acc_gnn / test_batch_num:.4f}")
    print(f"GNN Recall: {recall_gnn / test_batch_num:.4f}")
    print(f"GNN auc: {auc_gnn:.4f}")
    print(f"GNN ap: {ap_gnn:.4f}")
    print(f"Label1 F1: {f1_label1 / test_batch_num:.4f}")
    print(f"Label1 Accuracy: {acc_label1 / test_batch_num:.4f}")
    print(f"Label1 Recall: {recall_label1 / test_batch_num:.4f}")
    print(f"Label1 auc: {auc_label1:.4f}")
    print(f"Label1 ap: {ap_label1:.4f}")

    return auc_gnn, auc_label1, recall_gnn, recall_label1, predicted_labels,predict_prob

In [None]:
class InterAgg(nn.Module):

    def __init__(self, features, feature_dim,
                embed_dim, adj_lists, intraggs,
                inter='GNN', step_size=0.02, cuda=True):
        """
        Initialize the inter-relation aggregator
        :param features: the input node features or embeddings for all nodes
        :param feature_dim: the input dimension
        :param embed_dim: the output dimension
        :param adj_lists: a list of adjacency lists for each single-relation graph
        :param intraggs: the intra-relation aggregators used by each single-relation graph
        :param inter: the aggregator type: 'Att', 'Weight', 'Mean', 'GNN'
        :param step_size: the RL action step size
        :param cuda: whether to use GPU
        """
        super(InterAgg, self).__init__()

        self.features = features
        self.dropout = 0.6
        self.adj_lists = adj_lists
        self.intra_agg1 = intraggs[0]
        self.intra_agg2 = intraggs[1]
        self.intra_agg3 = intraggs[2]
        self.embed_dim = embed_dim
        self.feat_dim = feature_dim
        self.inter = inter
        self.step_size = step_size
        self.cuda = cuda
        self.intra_agg1.cuda = cuda
        self.intra_agg2.cuda = cuda
        self.intra_agg3.cuda = cuda

        # RL condition flag
        self.RL = True

        # number of batches for current epoch, assigned during training
        self.batch_num = 0

        # initial filtering thresholds
        self.thresholds = [0.5, 0.5, 0.5]

        # the activation function used by attention mechanism
        self.leakyrelu = nn.LeakyReLU(0.2)

        # parameter used to transform node embeddings before inter-relation aggregation
        self.weight = nn.Parameter(torch.FloatTensor(self.feat_dim, self.embed_dim))
        init.xavier_uniform_(self.weight)

        # weight parameter for each relation used by CARE-Weight
        self.alpha = nn.Parameter(torch.FloatTensor(self.embed_dim, 3))
        init.xavier_uniform_(self.alpha)

        # parameters used by attention layer
        self.a = nn.Parameter(torch.FloatTensor(2 * self.embed_dim, 1))
        init.xavier_uniform_(self.a)

        # label predictor for similarity measure
        self.label_clf = nn.Linear(self.feat_dim, 2)

        # initialize the parameter logs
        self.weights_log = []
        self.thresholds_log = [self.thresholds]
        self.relation_score_log = []
        
    def forward(self, nodes, labels, train_flag=True):
        """
        :param nodes: a list of batch node ids
        :param labels: a list of batch node labels, only used by the RLModule
        :param train_flag: indicates whether in training or testing mode
        :return combined: the embeddings of a batch of input node features
        :return center_scores: the label-aware scores of batch nodes
        """

    # extract 1-hop neighbor ids from adj lists of each single-relation graph
        to_neighs = []
        for adj_list in self.adj_lists:
            to_neighs.append([set(adj_list[int(node)]) for node in nodes])

        # find unique nodes and their neighbors used in current batch
        unique_nodes = set.union(set.union(*to_neighs[0]), set.union(*to_neighs[1]),
                                 set.union(*to_neighs[2], set(nodes)))

        # calculate label-aware scores
        if self.cuda:
            batch_features = self.features(torch.cuda.LongTensor(list(unique_nodes)))
        else:
            batch_features = self.features(torch.LongTensor(list(unique_nodes)))
        batch_scores = self.label_clf(batch_features)
        id_mapping = {node_id: index for node_id, index in zip(unique_nodes, range(len(unique_nodes)))}

        # the label-aware scores for current batch of nodes
        center_scores = batch_scores[itemgetter(*nodes)(id_mapping), :]

        # get neighbor node id list for each batch node and relation
        r1_list = [list(to_neigh) for to_neigh in to_neighs[0]]
        r2_list = [list(to_neigh) for to_neigh in to_neighs[1]]
        r3_list = [list(to_neigh) for to_neigh in to_neighs[2]]

        # assign label-aware scores to neighbor nodes for each batch node and relation
        r1_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r1_list]
        r2_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r2_list]
        r3_scores = [batch_scores[itemgetter(*to_neigh)(id_mapping), :].view(-1, 2) for to_neigh in r3_list]

        # count the number of neighbors kept for aggregation for each batch node and relation
        r1_sample_num_list = [math.ceil(len(neighs) * self.thresholds[0]) for neighs in r1_list]
        r2_sample_num_list = [math.ceil(len(neighs) * self.thresholds[1]) for neighs in r2_list]
        r3_sample_num_list = [math.ceil(len(neighs) * self.thresholds[2]) for neighs in r3_list]

        # intra-aggregation steps for each relation
        # Eq. (8) in the paper
        r1_feats, r1_scores = self.intra_agg1.forward(nodes, r1_list, center_scores, r1_scores, r1_sample_num_list)
        r2_feats, r2_scores = self.intra_agg2.forward(nodes, r2_list, center_scores, r2_scores, r2_sample_num_list)
        r3_feats, r3_scores = self.intra_agg3.forward(nodes, r3_list, center_scores, r3_scores, r3_sample_num_list)

        # concat the intra-aggregated embeddings from each relation
        neigh_feats = torch.cat((r1_feats, r2_feats, r3_feats), dim=0)

        # get features or embeddings for batch nodes
        if self.cuda and isinstance(nodes, list):
            index = torch.LongTensor(nodes).cuda()
        else:
            index = torch.LongTensor(nodes)
        self_feats = self.features(index)

        # number of nodes in a batch
        n = len(nodes)

        # inter-relation aggregation steps
        # Eq. (9) in the paper
        if self.inter == 'Att':
            # 1) CARE-Att Inter-relation Aggregator
            combined, attention = att_inter_agg(len(self.adj_lists), self.leakyrelu, self_feats, neigh_feats, self.embed_dim,
                                                self.weight, self.a, n, self.dropout, self.training, self.cuda)
        elif self.inter == 'Weight':
            # 2) CARE-Weight Inter-relation Aggregator
            combined = weight_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, self.alpha, n, self.cuda)
            gem_weights = F.softmax(torch.sum(self.alpha, dim=0), dim=0).tolist()
            if train_flag:
                print(f'Weights: {gem_weights}')
        elif self.inter == 'Mean':
            # 3) CARE-Mean Inter-relation Aggregator
            combined = mean_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, n, self.cuda)
        elif self.inter == 'GNN':
            # 4) CARE-GNN Inter-relation Aggregator
            combined = threshold_inter_agg(len(self.adj_lists), self_feats, neigh_feats, self.embed_dim, self.weight, self.thresholds, n, self.cuda)

        # the reinforcement learning module
        if self.RL and train_flag:
            relation_scores, rewards, thresholds, stop_flag = RLModule([r1_scores, r2_scores, r3_scores],
                                                                        self.relation_score_log, labels, self.thresholds,
                                                                        self.batch_num, self.step_size)
            self.thresholds = thresholds
            self.RL = stop_flag
            self.relation_score_log.append(relation_scores)
            self.thresholds_log.append(self.thresholds)

        return combined, center_scores

class IntraAgg(nn.Module):

    def __init__(self, features, feat_dim, cuda=False):
        """
        Initialize the intra-relation aggregator
        :param features: the input node features or embeddings for all nodes
        :param feat_dim: the input dimension
        :param cuda: whether to use GPU
        """
        super(IntraAgg, self).__init__()

        self.features = features
        self.cuda = cuda
        self.feat_dim = feat_dim

    def forward(self, nodes, to_neighs_list, batch_scores, neigh_scores, sample_list):
        """
        Code partially from https://github.com/williamleif/graphsage-simple/
        :param nodes: list of nodes in a batch
        :param to_neighs_list: neighbor node id list for each batch node in one relation
        :param batch_scores: the label-aware scores of batch nodes
        :param neigh_scores: the label-aware scores 1-hop neighbors each batch node in one relation
        :param sample_list: the number of neighbors kept for each batch node in one relation
        :return to_feats: the aggregated embeddings of batch nodes neighbors in one relation
        :return samp_scores: the average neighbor distances for each relation after filtering
        """

        # filer neighbors under given relation
        samp_neighs, samp_scores = filter_neighs_ada_threshold(batch_scores, neigh_scores, to_neighs_list, sample_list)

        # find the unique nodes among batch nodes and the filtered neighbors
        unique_nodes_list = list(set.union(*samp_neighs))
        unique_nodes = {n: i for i, n in enumerate(unique_nodes_list)}

        # intra-relation aggregation only with sampled neighbors
        mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
        column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
        row_indices = [i for i in range(len(samp_neighs)) for _ in range(len(samp_neighs[i]))]
        mask[row_indices, column_indices] = 1
        if self.cuda:
            mask = mask.cuda()
        num_neigh = mask.sum(1, keepdim=True)
        mask = mask.div(num_neigh)
        if self.cuda:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
        else:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
        to_feats = mask.mm(embed_matrix)
        to_feats = F.relu(to_feats)
        return to_feats, samp_scores

def RLModule(scores, scores_log, labels, thresholds, batch_num, step_size):
    """
    The reinforcement learning module.
    It updates the neighbor filtering threshold for each relation based
    on the average neighbor distances between two consecutive epochs.
    :param scores: the neighbor nodes label-aware scores for each relation
    :param scores_log: a list stores the relation average distances for each batch
    :param labels: the batch node labels used to select positive nodes
    :param thresholds: the current neighbor filtering thresholds for each relation
    :param batch_num: numbers batches in an epoch
    :param step_size: the RL action step size
    :return relation_scores: the relation average distances for current batch
    :return rewards: the reward for given thresholds in current epoch
    :return new_thresholds: the new filtering thresholds updated according to the rewards
    :return stop_flag: the RL terminal condition flag
    """

    relation_scores = []
    stop_flag = True
    # only compute the average neighbor distances for positive nodes
    pos_index = (labels == 1).nonzero().tolist()
    pos_index = [i[0] for i in pos_index]

    # compute average neighbor distances for each relation
    for score in scores:
        pos_scores = itemgetter(*pos_index)(score)
        neigh_count = sum([1 if isinstance(i, float) else len(i) for i in pos_scores])
        pos_sum = [i if isinstance(i, float) else sum(i) for i in pos_scores]
        relation_scores.append(sum(pos_sum) / neigh_count)

    if len(scores_log) % batch_num != 0 or len(scores_log) < 2 * batch_num:
        # do not call RL module within the epoch or within the first two epochs
        rewards = [0, 0, 0]
        new_thresholds = thresholds
    else:
        # update thresholds according to average scores in last epoch
        # Eq.(5) in the paper
        previous_epoch_scores = [sum(s) / batch_num for s in zip(*scores_log[-2 * batch_num:-batch_num])]
        current_epoch_scores = [sum(s) / batch_num for s in zip(*scores_log[-batch_num:])]

        # compute reward for each relation and update the thresholds according to reward
        # Eq. (6) in the paper
        rewards = [1 if previous_epoch_scores[i] - s >= 0 else -1 for i, s in enumerate(current_epoch_scores)]
        new_thresholds = [thresholds[i] + step_size if r == 1 else thresholds[i] - step_size for i, r in enumerate(rewards)]

        # avoid overflow
        new_thresholds = [0.999 if i > 1 else i for i in new_thresholds]
        new_thresholds = [0.001 if i < 0 else i for i in new_thresholds]

        print(f'epoch scores: {current_epoch_scores}')
        print(f'rewards: {rewards}')
        print(f'thresholds: {new_thresholds}')

    # TODO: add terminal condition

    return relation_scores, rewards, new_thresholds, stop_flag

def filter_neighs_ada_threshold(center_scores, neigh_scores, neighs_list, sample_list):
    """
    Filter neighbors according label predictor result with adaptive thresholds
    :param center_scores: the label-aware scores of batch nodes
    :param neigh_scores: the label-aware scores 1-hop neighbors each batch node in one relation
    :param neighs_list: neighbor node id list for each batch node in one relation
    :param sample_list: the number of neighbors kept for each batch node in one relation
    :return samp_neighs: the neighbor indices and neighbor simi scores
    :return samp_scores: the average neighbor distances for each relation after filtering
    """

    samp_neighs = []
    samp_scores = []
    for idx, center_score in enumerate(center_scores):
        center_score = center_scores[idx][0]
        neigh_score = neigh_scores[idx][:, 0].view(-1, 1)
        center_score = center_score.repeat(neigh_score.size()[0], 1)
        neighs_indices = neighs_list[idx]
        num_sample = sample_list[idx]

        # compute the L1-distance of batch nodes and their neighbors
        # Eq. (2) in paper
        score_diff = torch.abs(center_score - neigh_score).squeeze()
        sorted_scores, sorted_indices = torch.sort(score_diff, dim=0, descending=False)
        selected_indices = sorted_indices.tolist()

        # top-p sampling according to distance ranking and thresholds
        # Section 3.3.1 in paper
        if len(neigh_scores[idx]) > num_sample + 1:
            selected_neighs = [neighs_indices[n] for n in selected_indices[:num_sample]]
            selected_scores = sorted_scores.tolist()[:num_sample]
        else:
            selected_neighs = neighs_indices
            selected_scores = score_diff.tolist()
            if isinstance(selected_scores, float):
                selected_scores = [selected_scores]

        samp_neighs.append(set(selected_neighs))
        samp_scores.append(selected_scores)

    return samp_neighs, samp_scores

def mean_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, n, cuda):
    """
    Mean inter-relation aggregator
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

# transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # sum neighbor embeddings together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :]

    # sum aggregated neighbor embedding and batch node embedding
    # take the average of embedding and feed them to activation function
    combined = F.relu((center_h + aggregated) / 4.0)

    return combined

def weight_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, alpha, n, cuda):
    """
    Weight inter-relation aggregator
    Reference: https://arxiv.org/abs/2002.12307
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param alpha: weight parameter for each relation used by CARE-Weight
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # compute relation weights using softmax
    w = F.softmax(alpha, dim=1)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add weighted neighbor embeddings in each relation together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :] * w[:, r]

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu(center_h + aggregated)

    return combined

def att_inter_agg(num_relations, att_layer, self_feats, neigh_feats, embed_dim, weight, a, n, dropout, training, cuda):
    """
    Attention-based inter-relation aggregator
    Reference: https://github.com/Diego999/pyGAT
    :param num_relations: num_relations: number of relations in the graph
    :param att_layer: the activation function used by the attention layer
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param a: parameters used by attention layer
    :param n: number of nodes in a batch
    :param dropout: dropout for attention layer
    :param training: a flag indicating whether in the training or testing mode
    :param cuda: whether use GPU
    :return combined: inter-relation aggregated node embeddings
    :return att: the attention weights for each relation
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    import pdb
    pdb.set_trace()
    # compute attention weights
    combined = torch.cat((center_h.repeat(3, 1), neigh_h), dim=1)
    e = att_layer(combined.mm(a))
    attention = torch.cat((e[0:n, :], e[n:2 * n, :], e[2 * n:3 * n, :]), dim=1)
    ori_attention = F.softmax(attention, dim=1)
    attention = F.dropout(ori_attention, dropout, training=training)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add neighbor embeddings in each relation together with attention weights
    for r in range(num_relations):
        aggregated += torch.mul(attention[:, r].unsqueeze(1).repeat(1, embed_dim), neigh_h[r * n:(r + 1) * n, :])

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu((center_h + aggregated))

    # extract the attention weights
    att = F.softmax(torch.sum(ori_attention, dim=0), dim=0)

    return combined, att

def threshold_inter_agg(num_relations, self_feats, neigh_feats, embed_dim, weight, threshold, n, cuda):
    """
    CARE-GNN inter-relation aggregator
    Eq. (9) in the paper
    :param num_relations: number of relations in the graph
    :param self_feats: batch nodes features or embeddings
    :param neigh_feats: intra-relation aggregated neighbor embeddings for each relation
    :param embed_dim: the dimension of output embedding
    :param weight: parameter used to transform node embeddings before inter-relation aggregation
    :param threshold: the neighbor filtering thresholds used as aggregating weights
    :param n: number of nodes in a batch
    :param cuda: whether use GPU
    :return: inter-relation aggregated node embeddings
    """

    # transform batch node embedding and neighbor embedding in each relation with weight parameter
    center_h = torch.mm(self_feats, weight)
    neigh_h = torch.mm(neigh_feats, weight)

    # initialize the final neighbor embedding
    if cuda:
        aggregated = torch.zeros(size=(n, embed_dim)).cuda()
    else:
        aggregated = torch.zeros(size=(n, embed_dim))

    # add weighted neighbor embeddings in each relation together
    for r in range(num_relations):
        aggregated += neigh_h[r * n:(r + 1) * n, :] * threshold[r]

    # sum aggregated neighbor embedding and batch node embedding
    # feed them to activation function
    combined = F.relu(center_h + aggregated)

    return combined

In [None]:
#CARE-GNN model
class OneLayerCARE(nn.Module):
    """
    The CARE-GNN model in one layer
    """

    def __init__(self, num_classes, inter1, lambda_1):
        """
        Initialize the CARE-GNN model
        :param num_classes: number of classes (2 in our paper)
        :param inter1: the inter-relation aggregator that output the final embedding
        """
        super(OneLayerCARE, self).__init__()
        self.inter1 = inter1
        self.xent = nn.CrossEntropyLoss()

        # the parameter to transform the final embedding
        self.weight = nn.Parameter(torch.FloatTensor(inter1.embed_dim, num_classes))
        init.xavier_uniform_(self.weight)
        self.lambda_1 = lambda_1

    def forward(self, nodes, labels, train_flag=True):
        embeds1, label_scores = self.inter1(nodes, labels, train_flag)
        scores = torch.mm(embeds1, self.weight)
        return scores, label_scores

    def to_prob(self, nodes, labels, train_flag=True):
        gnn_scores, label_scores = self.forward(nodes, labels, train_flag)
        gnn_prob = nn.functional.softmax(gnn_scores, dim=1)
        label_prob = nn.functional.softmax(label_scores, dim=1)
        return gnn_prob, label_prob

    def loss(self, nodes, labels, train_flag=True):
        gnn_scores, label_scores = self.forward(nodes, labels, train_flag)
        # Simi loss, Eq. (4) in the paper
        label_loss = self.xent(label_scores, labels.squeeze())
        # GNN loss, Eq. (10) in the paper
        gnn_loss = self.xent(gnn_scores, labels.squeeze())
        # the loss function of CARE-GNN, Eq. (11) in the paper
        final_loss = gnn_loss + self.lambda_1 * label_loss
        return final_loss

In [None]:
#Training settings for CARE-GNN
data='Amazon'
model='CARE'
inter='GNN' #The inter-relation aggregator type. [Att, Weight, Mean, GNN]
#batch_size=1024
lr=0.01
lambda_1=2 #Simi loss weight
lambda_2=1e-3 #Weight decay (L2 loss weight)
emb_size=64 #Node embedding size at the last layer
step_size=2e-2 #Reinforcement Learning action step size
cuda=False#torch.cuda.is_available()
np.random.seed(66)
random.seed(66)

In [None]:
#Load graph, feature, and label
[homo, relation1, relation2, relation3], feat_data, labels = load_data_CARE()


In [None]:
#The labels are original True labels, need to update to our pseudo labels
for i in range(len(X_train)):
  labels[X_train[i]]=y_train[i]

In [None]:
#Care-GNN training
features = nn.Embedding(feat_data.shape[0], feat_data.shape[1])
feat_data = normalize(feat_data)
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
if cuda:
    features.cuda()

adj_lists = [relation1, relation2, relation3]

# build the model
intra1 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
intra2 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
intra3 = IntraAgg(features, feat_data.shape[1], cuda=cuda)
inter1 = InterAgg(features, feat_data.shape[1], emb_size, adj_lists, [intra1, intra2, intra3], inter=inter,
                    step_size=step_size, cuda=cuda)
gnn_model = OneLayerCARE(2, inter1, lambda_1)

if cuda:
  gnn_model.cuda()

optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gnn_model.parameters()), lr=lr, weight_decay=lambda_2)

performance_log = []
# train the model
print(f'Training nodes: {len(X_train)}')
for epoch in range(30):
  # send number of batches to model to let the RLModule know the training progress
  inter1.batch_num = 1 #Equal to 1 because no mini-batch training
    
  optimizer.zero_grad()
  if cuda:
    loss = gnn_model.loss(X_train, Variable(torch.cuda.LongTensor(y_train)))
  else:
    loss = gnn_model.loss(X_train, Variable(torch.LongTensor(y_train)))
    loss.backward()
    optimizer.step()
  print(f'Epoch: {epoch}, loss: {loss.item() / num_batches}')

Training nodes: 10800
Epoch: 0, loss: 0.7939790089925131
Epoch: 1, loss: 0.7415876388549805
epoch scores: [0.044845915884961014, 0.03362197244169051, 0.02831463308126549]
rewards: [-1, -1, 1]
thresholds: [0.48, 0.48, 0.52]
Epoch: 2, loss: 0.6948471069335938
epoch scores: [0.04483712153481518, 0.03361620013488305, 0.02831135755831665]
rewards: [1, 1, 1]
thresholds: [0.5, 0.5, 0.54]
Epoch: 3, loss: 0.6539614597956339
epoch scores: [0.044067511305934025, 0.0327327867737886, 0.030278614244731328]
rewards: [1, 1, -1]
thresholds: [0.52, 0.52, 0.52]
Epoch: 4, loss: 0.615675171216329
epoch scores: [0.044760477206601194, 0.03356624467498198, 0.031156347028636296]
rewards: [-1, -1, -1]
thresholds: [0.5, 0.5, 0.5]
Epoch: 5, loss: 0.5825599431991577
epoch scores: [0.04607620833417217, 0.03455738643343571, 0.03022681219606561]
rewards: [-1, -1, 1]
thresholds: [0.48, 0.48, 0.52]
Epoch: 6, loss: 0.5548523267110189
epoch scores: [0.044533125776288617, 0.03341839957654582, 0.02820455473632984]
rewards:

In [None]:
#Predict using CARE-GNN
#_, _, _, _,predicted_labels,predict_prob = test_care(X_test, y_test, gnn_model, batch_size)
gnn_prob, label_prob = gnn_model.to_prob(X_test, y_test, train_flag=False)
care_label=gnn_prob.data.cpu().numpy().argmax(axis=1)

In [None]:
#Get true labels for nodes predicted in ensemble models and care-gnn
true_before=np.array([label_origin[i] for i in X_train[4777:]])#The first 4777 nodes in X_train are original labeled training nodes
true_remain=np.array([label_origin[i] for i in X_test])#Remaining nodes are predicted by care-gnn
final_true_all=np.append(true_before,true_remain)
#Get all predicted labels
final_predict_all=np.append(pseudo_labels,care_label)

In [None]:
print(classification_report(final_true_all,final_predict_all))

              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      6674
         1.0       0.97      0.34      0.50       493

    accuracy                           0.95      7167
   macro avg       0.96      0.67      0.74      7167
weighted avg       0.95      0.95      0.94      7167



In [None]:
final_probs=np.append(probs,gnn_prob.data.cpu().numpy(),axis=0)

In [None]:
print(roc_auc_score(final_true_all, final_probs[:,1]))

0.9083570952277039
