In [1]:
import torch
import torch.fx
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.nn as dglnn
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl import AddSelfLoop
import argparse

class GCN(nn.Module):
    def __init__(self, in_size, hid_size, out_size):
        super().__init__()
        self.layers = nn.ModuleList()
        # two-layer GCN
        self.layers.append(dglnn.GraphConv(in_size, hid_size, activation=F.relu))
        self.layers.append(dglnn.GraphConv(hid_size, out_size))
        self.dropout = nn.Dropout(0.5)

    def forward(self, g, features):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(g, h)
        return h
    
def evaluate(g, features, labels, mask, model):
    model.eval()
    with torch.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


def train(g, features, labels, masks, model):
    # define train/val samples, loss function and optimizer
    train_mask = masks[0]
    val_mask = masks[1]
    loss_fcn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)

    # training loop
    for epoch in range(10):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acc = evaluate(g, features, labels, val_mask, model)
        print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
              . format(epoch, loss.item(), acc))

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from dgl.data import CoraGraphDataset
raw_dir = "../data/dgl"
# load and preprocess dataset
transform = AddSelfLoop()
data = CoraGraphDataset(raw_dir=raw_dir, transform=transform)
g = data[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
g = g.int().to(device)
features = g.ndata['feat']
labels = g.ndata['label']
masks = g.ndata['train_mask'], g.ndata['val_mask'], g.ndata['test_mask']
    
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5).to(device)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)

# create GCN model    
in_size = features.shape[1]
out_size = data.num_classes
model = GCN(in_size, 16, out_size).to(device)

# model training
print('Training...')
train(g, features, labels, masks, model)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Epoch 00000 | Loss 1.9457 | Accuracy 0.2860 
Epoch 00001 | Loss 1.9400 | Accuracy 0.1980 
Epoch 00002 | Loss 1.9324 | Accuracy 0.2320 
Epoch 00003 | Loss 1.9240 | Accuracy 0.3480 
Epoch 00004 | Loss 1.9157 | Accuracy 0.5080 
Epoch 00005 | Loss 1.9045 | Accuracy 0.6120 
Epoch 00006 | Loss 1.8926 | Accuracy 0.6700 
Epoch 00007 | Loss 1.8851 | Accuracy 0.6800 
Epoch 00008 | Loss 1.8720 | Accuracy 0.6600 
Epoch 00009 | Loss 1.8610 | Accuracy 0.6620 


In [35]:
model.eval()
print(model)
print(model.layers[0].__class__)
print(model.layers[0].__dict__['_norm'])
print(model.layers[0].__dict__['_out_feats'])
print(model.layers[0].__dict__['_in_feats'])
print(model.layers[0].__dict__['_activation'].__name__)
print(model.layers[0].state_dict()['weight'])

GCN(
  (layers): ModuleList(
    (0): GraphConv(in=1433, out=16, normalization=both, activation=<function relu at 0x7fba3f8f9af0>)
    (1): GraphConv(in=16, out=7, normalization=both, activation=None)
  )
  (dropout): Dropout(p=0.5, inplace=False)
)
<class 'dgl.nn.pytorch.conv.graphconv.GraphConv'>
both
16
1433
relu
tensor([[ 0.0215,  0.0372, -0.0192,  ...,  0.0021, -0.0052, -0.0157],
        [ 0.0328,  0.0998, -0.0019,  ...,  0.0361,  0.0914, -0.0276],
        [ 0.0335, -0.0006,  0.0153,  ...,  0.0121, -0.0161, -0.1174],
        ...,
        [ 0.0192, -0.0408,  0.0059,  ...,  0.0235, -0.0238,  0.0362],
        [ 0.1194, -0.0027, -0.0152,  ...,  0.0440,  0.0286,  0.0403],
        [ 0.0553, -0.0928, -0.0081,  ..., -0.0938, -0.0405,  0.1224]],
       device='cuda:0')


In [38]:
g.ndata['feat'].shape[1]

1433

In [40]:
def enlarge_feature(g):
    from math import log2, pow
    shape = g.ndata['feat'].shape
    feat_num = pow(2,int(log2(shape[1])) + 1)
    g.ndata['feat'] = F.pad(g.ndata['feat'], (0,int(feat_num-shape[1])), "constant", 0)

In [42]:
tmp_g = g
enlarge_feature(tmp_g)
print(tmp_g.ndata['feat'].shape)
print(g.ndata['feat'].shape)

torch.Size([2708, 4096])
torch.Size([2708, 4096])


In [43]:
g.ndata.keys()

dict_keys(['feat', 'label', 'test_mask', 'train_mask', 'val_mask', 'norm'])