GraphSAGE
- cora: ~0.8330 
- citeseer: ~0.7110
- pubmed: ~0.7830

In [1]:
!pip install dgl
!pip install dgl-cu101



In [0]:
import argparse
import time
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SAGEConv

In [0]:
 dropout=0.5
 gpu=0
 lr=1e-2
 epochs=200
 n_hidden=16
 n_layers=1
 weight_decay=5e-4
 aggregator_type="gcn"

In [0]:
class GraphSAGE(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout,
                 aggregator_type):
        super(GraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.g = g

        # input layer
        self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # output layer
        self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None

    def forward(self, features):
        h = features
        for layer in self.layers:
            h = layer(self.g, h)
        return h


In [0]:
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

def train(model, n_epochs):
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                            acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))


In [6]:
parser = argparse.ArgumentParser(description='APPNP')
register_data_args(parser)
args = parser.parse_args(args=['--dataset', 'cora'])
#args = parser.parse_args(args=['--dataset', 'citeseer'])
#args = parser.parse_args(args=['--dataset', 'pubmed'])

data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
if hasattr(torch, 'BoolTensor'):
    train_mask = torch.BoolTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
else:
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
      (n_edges, n_classes,
        train_mask.int().sum().item(),
        val_mask.int().sum().item(),
        test_mask.int().sum().item()))

if gpu < 0:
    cuda = False
else:
    cuda = True
    torch.cuda.set_device(gpu)
    features = features.cuda()
    labels = labels.cuda()
    train_mask = train_mask.cuda()
    val_mask = val_mask.cuda()
    test_mask = test_mask.cuda()
    print("use cuda:", gpu)



Downloading /root/.dgl/cora.zip from https://data.dgl.ai/dataset/cora_raw.zip...
Extracting file to /root/.dgl/cora
----Data statistics------'
  #Edges 10556
  #Classes 7
  #Train samples 140
  #Val samples 300
  #Test samples 1000
use cuda: 0


In [0]:
g = data.graph
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
n_edges = g.number_of_edges()

# create GraphSAGE model
model = GraphSAGE(g,
                  in_feats,
                  n_hidden,
                  n_classes,
                  n_layers,
                  F.relu,
                  dropout,
                  aggregator_type
                  )

if cuda:
    model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()

# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)


In [8]:
train(model,200)
print("Train Accuracy",evaluate(model, features, labels, train_mask))
print("test Accuracy",evaluate(model, features, labels, test_mask))

Epoch 00000 | Time(s) nan | Loss 1.9610 | Accuracy 0.1267 | ETputs(KTEPS) nan
Epoch 00001 | Time(s) nan | Loss 1.9342 | Accuracy 0.1267 | ETputs(KTEPS) nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00002 | Time(s) nan | Loss 1.9117 | Accuracy 0.2633 | ETputs(KTEPS) nan
Epoch 00003 | Time(s) 0.0058 | Loss 1.8849 | Accuracy 0.4400 | ETputs(KTEPS) 1816.26
Epoch 00004 | Time(s) 0.0055 | Loss 1.8609 | Accuracy 0.3600 | ETputs(KTEPS) 1916.50
Epoch 00005 | Time(s) 0.0054 | Loss 1.8450 | Accuracy 0.3633 | ETputs(KTEPS) 1937.02
Epoch 00006 | Time(s) 0.0054 | Loss 1.8252 | Accuracy 0.3867 | ETputs(KTEPS) 1972.91
Epoch 00007 | Time(s) 0.0055 | Loss 1.7958 | Accuracy 0.4033 | ETputs(KTEPS) 1911.16
Epoch 00008 | Time(s) 0.0055 | Loss 1.7729 | Accuracy 0.4100 | ETputs(KTEPS) 1926.16
Epoch 00009 | Time(s) 0.0055 | Loss 1.7655 | Accuracy 0.4167 | ETputs(KTEPS) 1928.91
Epoch 00010 | Time(s) 0.0054 | Loss 1.7277 | Accuracy 0.4167 | ETputs(KTEPS) 1942.54
Epoch 00011 | Time(s) 0.0054 | Loss 1.7084 | Accuracy 0.4167 | ETputs(KTEPS) 1943.62
Epoch 00012 | Time(s) 0.0055 | Loss 1.7029 | Accuracy 0.4167 | ETputs(KTEPS) 1932.84
Epoch 00013 | Time(s) 0.0055 | Loss 1.6948 | Accuracy 0.4167 | ETputs(KT

In [0]:
def softmax_by_row(logits, T = 1.0):
    mx = np.max(logits, axis=-1, keepdims=True)
    exp = np.exp((logits - mx)/T)
    denominator = np.sum(exp, axis=-1, keepdims=True)
    return exp/denominator


def classifier_performance(model, features, labels, train_mask, test_mask):

    output_train = model(features)
    output_train = output_train[train_mask]
    output_train = output_train.cpu().detach().numpy()
    output_train = softmax_by_row(output_train,T = 1)
    train_labels = labels[train_mask].cpu().detach().numpy()


    output_test = model(features)
    output_test = output_test[test_mask]
    output_test=output_test.cpu().detach().numpy()
    output_test = softmax_by_row(output_test,T = 1)
    test_labels = labels[test_mask].cpu().detach().numpy()


    train_acc1 = np.sum(np.argmax(output_train,axis=1) == train_labels.flatten())/len(train_labels)
    test_acc1 = np.sum(np.argmax(output_test,axis=1) == test_labels.flatten())/len(test_labels)

    print('Accuracy: ', (train_acc1, test_acc1))

    return output_train, output_test, train_labels, test_labels




def inference_via_confidence(confidence_mtx1, confidence_mtx2, label_vec1, label_vec2):
    
    #----------------First step: obtain confidence lists for both training dataset and test dataset--------------
    confidence1 = []
    confidence2 = []
    acc1 = 0
    acc2 = 0
    for num in range(confidence_mtx1.shape[0]):
        confidence1.append(confidence_mtx1[num,label_vec1[num]])
        if np.argmax(confidence_mtx1[num,:]) == label_vec1[num]:
            acc1 += 1
            
    for num in range(confidence_mtx2.shape[0]):
        confidence2.append(confidence_mtx2[num,label_vec2[num]])
        if np.argmax(confidence_mtx2[num,:]) == label_vec2[num]:
            acc2 += 1
    confidence1 = np.array(confidence1)
    confidence2 = np.array(confidence2)
    
    print('model accuracy for training and test-', (acc1/confidence_mtx1.shape[0], acc2/confidence_mtx2.shape[0]) )
    
    
    #sort_confidence = np.sort(confidence1)
    sort_confidence = np.sort(np.concatenate((confidence1, confidence2)))
    max_accuracy = 0.5
    best_precision = 0.5
    best_recall = 0.5
    for num in range(len(sort_confidence)):
        delta = sort_confidence[num]
        ratio1 = np.sum(confidence1>=delta)/confidence_mtx1.shape[0]
        ratio2 = np.sum(confidence2>=delta)/confidence_mtx2.shape[0]
        accuracy_now = 0.5*(ratio1+1-ratio2)
        if accuracy_now > max_accuracy:
            max_accuracy = accuracy_now
            best_precision = ratio1/(ratio1+ratio2)
            best_recall = ratio1
    print('membership inference accuracy is:', max_accuracy)
    return max_accuracy

In [10]:
output_train, output_test, train_label, test_label = classifier_performance(model, features, labels, train_mask, test_mask)
inference_accuracy=inference_via_confidence(output_train, output_test, train_label, test_label)
print("Maximum Accuracy:",inference_accuracy)

Accuracy:  (0.9785714285714285, 0.824)
model accuracy for training and test- (0.9785714285714285, 0.824)
membership inference accuracy is: 0.6933571428571429
Maximum Accuracy: 0.6933571428571429
