In [16]:
# A bit of setup, just ignore this cell
import numpy as np
import matplotlib.pyplot as plt

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.rcParams['animation.html'] = 'html5'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
import mxnet as mx
from mxnet import gluon, nd, autograd
from mxnet.gluon import nn

import dgl
import dgl.function as fn

In [7]:
from reddit_gcn import load_data

In [9]:
g, feat, label, train_mask, test_mask = load_data()

Features...
Generating graph
#Nodes: 165534
#Edges: 2276551
Labels...
Making training/testing masks


In [21]:
label.max()


[49.]
<NDArray 1 @cpu(0)>

In [61]:
ctx = mx.cpu(0)
feat = nd.array(feat, ctx=ctx)
label = nd.array(label, ctx=ctx)
train_mask = nd.array(train_mask, ctx=ctx)
test_mask = nd.array(test_mask, ctx=ctx)
n_train_samples = train_mask.shape[0]

In [62]:
# calculate normalization
degs = g.in_degrees().astype('float32').asnumpy()
norm = np.power(degs, -0.5).reshape(-1, 1)
norm[np.isinf(norm)] = 0.
norm = nd.array(norm, ctx=ctx)
g.ndata['norm'] = norm

In [63]:
g.ndata

{'norm': 
[[0.2773501 ]
 [0.13130644]
 [0.22941573]
 ...
 [1.        ]
 [0.18257418]
 [0.20851442]]
<NDArray 165534x1 @cpu(0)>}

In [57]:
gcn_message = fn.copy_u('h', 'msg')
gcn_reduce = fn.sum('msg', 'h')

class GCN(gluon.Block):
    def __init__(self, out_feats):
        super(GCN, self).__init__()
        self.dense_1 = nn.Dense(64)
        #self.dropout
        self.linear = nn.Dense(out_feats)
    
    def forward(self, g, inputs):
        # g is the graph and the inputs is the input node features
        # first perform linear transformation
        h = self.dense_1(inputs).relu()
        h = self.linear(h)
        
        # set the node features
        g.ndata['h'] = h
        # trigger message passing, gcn_message and gcn_reduce will be defined later
        g.update_all(gcn_message, gcn_reduce)
        # get the result node features
        h = g.ndata.pop('h')
        return h

In [58]:
def evaluate(net, g, X, y, mask):
    logits = net(g, X)
    logits = logits[mask]
    labels = y[mask]
    indices = logits.argmax(axis=1)
    accuracy = (indices == labels).sum() / labels.shape[0]
    return accuracy.asscalar()

def train(net, g, X, y, train_mask, test_mask, epochs=1):
    trainer = gluon.Trainer(
        net.collect_params(), 'adam',
        {'learning_rate': 0.01, 'wd': 5e-4}
    )
    
    loss_fcn = gluon.loss.SoftmaxCELoss()

    X_ = X.as_in_context(ctx)
    y_ = y.as_in_context(ctx)

    for epoch in range(epochs):
        with autograd.record():
            logits = net(g, X_)
            loss = loss_fcn(logits[train_mask], y_[train_mask]).sum() / train_mask.shape[0]

        loss.backward()
        trainer.step(batch_size=1)

        train_acc = evaluate(net, g, X_, y_, train_mask)
        test_acc = evaluate(net, g, X_, y_, test_mask)
        print('Epoch %d, Loss %f, Train acc %f, Test acc %f' % (epoch, loss.asscalar(), train_acc, test_acc))

In [64]:
net = GCN(50)

In [65]:
net.initialize(ctx=ctx)

In [None]:
train(net, g, feat, label, train_mask, test_mask, 10)

Epoch 0, Loss 3.913624, Train acc 0.132106, Test acc 0.108000
Epoch 1, Loss 3.746692, Train acc 0.090555, Test acc 0.085000
