GraphSAGE
- cora: ~0.8330 
- citeseer: ~0.7110
- pubmed: ~0.7830

In [0]:
!pip install dgl
!pip install dgl-cu101

In [0]:
import argparse
import time
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SAGEConv

In [0]:
 dropout=0.5
 gpu=0
 lr=1e-2
 epochs=200
 n_hidden=16
 n_layers=1
 weight_decay=5e-4
 aggregator_type="gcn"

In [0]:
class GraphSAGE(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout,
                 aggregator_type):
        super(GraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.g = g

        # input layer
        self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
        # output layer
        self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None

    def forward(self, features):
        emb=[]
        h = features
        for layer in self.layers:
            h = layer(self.g, h)
            emb.append(h)
        return h,emb


In [0]:
parser = argparse.ArgumentParser(description='APPNP')
register_data_args(parser)
args = parser.parse_args(args=['--dataset', 'cora'])
#args = parser.parse_args(args=['--dataset', 'citeseer'])
#args = parser.parse_args(args=['--dataset', 'pubmed'])

data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
if hasattr(torch, 'BoolTensor'):
    train_mask = torch.BoolTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
else:
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
      (n_edges, n_classes,
        train_mask.int().sum().item(),
        val_mask.int().sum().item(),
        test_mask.int().sum().item()))

if gpu < 0:
    cuda = False
else:
    cuda = True
    torch.cuda.set_device(gpu)
    features = features.cuda()
    labels = labels.cuda()
    train_mask = train_mask.cuda()
    val_mask = val_mask.cuda()
    test_mask = test_mask.cuda()
    print("use cuda:", gpu)



In [0]:
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits,_ = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        acc =correct.item() * 1.0 / len(labels)
        return acc


def train(model, n_epochs):
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits,_ = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} |Accuracy {:.4f} |"
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                           acc,n_edges / np.mean(dur) / 1000))

    print()
    trainacc = evaluate(model, features, labels, train_mask)
    print("Test Accuracy {:.4f}".format(trainacc))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))


In [0]:
g = data.graph
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
n_edges = g.number_of_edges()

# create GraphSAGE model
model = GraphSAGE(g,
                  in_feats,
                  n_hidden,
                  n_classes,
                  n_layers,
                  F.relu,
                  dropout,
                  aggregator_type
                  )

if cuda:
    model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()

# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)


In [0]:
train(model,200)

In [0]:
def ExtractEmbedding(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits, emb = model(features)
        embedding1=emb[0][mask]
        embedding2=emb[1][mask]
        logits = logits[mask]
        labels = labels[mask]
        return embedding1, embedding2

train_emb1,train_emb2=ExtractEmbedding(model, features, labels, train_mask)
test_emb1,test_emb2 = ExtractEmbedding(model, features, labels, test_mask)

In [0]:
train_emb=train_emb1.cpu().numpy()
test_emb=test_emb1.cpu().numpy()
print(train_emb.shape)
print(test_emb.shape)
X=np.concatenate((train_emb, test_emb), axis=0)
print(X.shape)

In [0]:
from keras.layers import Input, Dense
from keras.models import Model

encoding_dim = 1  

input_img = Input(shape=(16,))
encoded = Dense(encoding_dim, activation='relu')(input_img)
decoded = Dense(16, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
encoder = Model(input_img, encoded)
encoded_input = Input(shape=(encoding_dim,))
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))

autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

autoencoder.fit(X, X,epochs=1000,batch_size=256,shuffle=True)
encoded_imgs = encoder.predict(X)
#print(encoded_imgs.shape)
#print(encoded_imgs)

In [0]:
print(encoded_imgs)

In [0]:
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
accuracy=[]
for i in range(100):
  kmeans = KMeans(n_clusters=2, random_state=i).fit(encoded_imgs)
  #print(kmeans.labels_)
  ylabel=[1]*train_emb.shape[0] + [0]*test_emb.shape[0] 
  acc = accuracy_score(kmeans.labels_, ylabel)
  accuracy.append(acc)
print(max(accuracy))