In [1]:
import torch
import torch.fx
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.nn as dglnn
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl import AddSelfLoop
import argparse

class GCN(nn.Module):
    def __init__(self, in_size, hid_size, out_size):
        super().__init__()
        self.layers = nn.ModuleList()
        # two-layer GCN
        self.layers.append(dglnn.GraphConv(in_size, hid_size, activation=F.relu))
        self.layers.append(dglnn.GraphConv(hid_size, out_size))
        self.dropout = nn.Dropout(0.5)

    def forward(self, g, features):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(g, h)
        return h
    
def evaluate(g, features, labels, mask, model):
    model.eval()
    with torch.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


def train(g, features, labels, masks, model):
    # define train/val samples, loss function and optimizer
    train_mask = masks[0]
    val_mask = masks[1]
    loss_fcn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)

    # training loop
    for epoch in range(10):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acc = evaluate(g, features, labels, val_mask, model)
        print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
              . format(epoch, loss.item(), acc))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dgl.data import PubmedGraphDataset
raw_dir = "../data/dgl"
# load and preprocess dataset
transform = AddSelfLoop()
data = PubmedGraphDataset(raw_dir=raw_dir, transform=transform)
g = data[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
g = g.int().to(device)
features = g.ndata['feat']
labels = g.ndata['label']
masks = g.ndata['train_mask'], g.ndata['val_mask'], g.ndata['test_mask']
    
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5).to(device)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)

# create GCN model    
in_size = features.shape[1]
out_size = data.num_classes
model = GCN(in_size, 16, out_size).to(device)

# model training
print('Training...')
train(g, features, labels, masks, model)

  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Epoch 00000 | Loss 1.0975 | Accuracy 0.5000 
Epoch 00001 | Loss 1.0904 | Accuracy 0.6100 
Epoch 00002 | Loss 1.0843 | Accuracy 0.6760 
Epoch 00003 | Loss 1.0748 | Accuracy 0.5880 
Epoch 00004 | Loss 1.0660 | Accuracy 0.6160 
Epoch 00005 | Loss 1.0559 | Accuracy 0.6560 
Epoch 00006 | Loss 1.0452 | Accuracy 0.6560 
Epoch 00007 | Loss 1.0285 | Accuracy 0.6800 
Epoch 00008 | Loss 1.0196 | Accuracy 0.6820 
Epoch 00009 | Loss 1.0079 | Accuracy 0.6940 


In [42]:
model.eval()
print(model)
print(model.layers[0].__class__.__name__)
print(model.layers[0].__dict__['_norm'])
print(model.layers[0].__dict__['_out_feats'])
print(model.layers[0].__dict__['_in_feats'])
print(model.layers[0].__dict__['_activation'].__name__)
print(model.layers[0].state_dict()['weight'].shape)

GCN(
  (layers): ModuleList(
    (0): GraphConv(in=500, out=16, normalization=both, activation=<function relu at 0x7f304a0149d0>)
    (1): GraphConv(in=16, out=3, normalization=both, activation=None)
  )
  (dropout): Dropout(p=0.5, inplace=False)
)
GraphConv
both
16
500
relu
torch.Size([500, 16])


In [38]:
g.ndata['feat'].shape[1]

1433

In [40]:
def enlarge_feature(g):
    from math import log2, pow
    shape = g.ndata['feat'].shape
    feat_num = pow(2,int(log2(shape[1])) + 1)
    g.ndata['feat'] = F.pad(g.ndata['feat'], (0,int(feat_num-shape[1])), "constant", 0)

In [42]:
tmp_g = g
enlarge_feature(tmp_g)
print(tmp_g.ndata['feat'].shape)
print(g.ndata['feat'].shape)

torch.Size([2708, 4096])
torch.Size([2708, 4096])


In [43]:
g.ndata.keys()

dict_keys(['feat', 'label', 'test_mask', 'train_mask', 'val_mask', 'norm'])

In [13]:
coo_g = g.adjacency_matrix(scipy_fmt = 'coo')
print(coo_g.shape)
print(coo_g.row.shape)
print(coo_g.col.shape)
e_data = coo_g.data
norm = g.ndata['norm']
print(e_data.shape)
print(norm.shape)

(2708, 2708)
(13264,)
(13264,)
(13264,)
torch.Size([2708, 1])


In [18]:
def copy_norm(edges):
  return {'m': edges.dst['norm']}

g.apply_edges(copy_norm)

(13264, 1)


In [28]:
import numpy as np
agg_adj = g.edata['m'].cpu().numpy().transpose()
print(agg_adj.shape)
f = open("../trace/agg_adj.npy", "wb")
np.save(f, agg_adj)

(1, 13264)


In [26]:
row_ids = np.expand_dims(coo_g.row, axis=0)
col_ids = np.expand_dims(coo_g.col, axis=0)
print(row_ids.shape)
agg_index = np.concatenate((row_ids, col_ids), axis=0)
print(agg_index.shape)

(1, 13264)
(2, 13264)


In [31]:
ng = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
ng.ndata['x'] = torch.randn(6)
feat = torch.randn(6,3)
feat_src, feat_dst = dgl.utils.expand_as_pair(feat, ng)
print(feat_src)
print(feat_dst)
def copy_x(edges):
  return {'m': edges.dst['x']} # "edge.dst" is the opposite of "graph.dstdata"
ng.apply_edges(copy_x)
print(ng.edata['m'])
coo_ng = ng.adjacency_matrix(scipy_fmt = 'coo') # It has a "transpose" parameter
print(coo_ng.row)
print(coo_ng.col)

tensor([[-0.6114,  1.4711, -0.7030],
        [-1.7285, -0.8871, -1.7409],
        [-0.8648,  0.5302, -0.9473],
        [-3.4139,  0.2511,  1.7076],
        [-1.4238,  0.4509,  2.3247],
        [ 0.8083, -0.4484,  0.2525]])
tensor([[-0.6114,  1.4711, -0.7030],
        [-1.7285, -0.8871, -1.7409],
        [-0.8648,  0.5302, -0.9473],
        [-3.4139,  0.2511,  1.7076],
        [-1.4238,  0.4509,  2.3247],
        [ 0.8083, -0.4484,  0.2525]])
tensor([ 0.0407,  0.2770, -0.6609, -0.0441, -2.1066])
[0 0 0 0 0]
[1 2 3 4 5]


In [9]:
import dgl
ng = dgl.graph(([0, 1, 0, 1, 2, 3, 3, 3], [0, 1, 1, 2, 2, 1, 2, 3]), num_nodes=4)
# check the sparse array
coo = ng.adjacency_matrix(scipy_fmt = 'coo')
print(coo.row)
print(coo.col)

[0 1 0 1 2 3 3 3]
[0 1 1 2 2 1 2 3]


In [7]:
# check the left and right norm
out_degs = ng.out_degrees().float()
in_degs = ng.in_degrees().float()
norm_left = 1 / out_degs
norm_right = 1 / in_degs
ng.ndata['l_norm'] = norm_left
ng.ndata['r_norm'] = norm_right
ng.srcdata['h'] = norm_left
def copy_x(edges):
  return {'ld': edges.dst['h'], 'ls': edges.src['h'], 'rd': edges.dst['r_norm'], 'rs': edges.src['r_norm']} # "edge.dst" is the opposite of "graph.dstdata"
ng.apply_edges(copy_x)
print(ng.edata)
print(ng.ndata)

{'ld': tensor([0.5000, 0.5000, 0.5000, 1.0000, 1.0000, 0.5000, 1.0000, 0.3333]), 'ls': tensor([0.5000, 0.5000, 0.5000, 0.5000, 1.0000, 0.3333, 0.3333, 0.3333]), 'rd': tensor([1.0000, 0.3333, 0.3333, 0.3333, 0.3333, 0.3333, 0.3333, 1.0000]), 'rs': tensor([1.0000, 1.0000, 0.3333, 0.3333, 0.3333, 1.0000, 1.0000, 1.0000])}
{'l_norm': tensor([0.5000, 0.5000, 1.0000, 0.3333]), 'r_norm': tensor([1.0000, 0.3333, 0.3333, 1.0000]), 'h': tensor([0.5000, 0.5000, 1.0000, 0.3333])}


In [11]:
print(out_degs.shape)
out_degs.shape + (1,) * 8

torch.Size([4])


torch.Size([4, 1, 1, 1, 1, 1, 1, 1, 1])

In [3]:
ng = dgl.graph(([0, 1, 0, 1, 3, 2, 3, 3], [1, 1, 0, 2, 1, 2, 3, 2]), num_nodes=4)
tng = dgl.reorder_graph(ng, edge_permute_algo='src')
tng = dgl.reorder_graph(tng, edge_permute_algo='dst')
ng = tng
coo = tng.adjacency_matrix(transpose = True ,scipy_fmt = 'coo')
print(coo.row)
print(coo.col)

[0 1 1 1 2 2 2 3]
[0 0 1 3 1 2 3 3]


In [4]:
# check the left and right norm
out_degs = ng.out_degrees().float()
in_degs = ng.in_degrees().float()
norm_left = 1 / out_degs
norm_right = 1 / in_degs
ng.ndata['l_norm'] = norm_left
ng.ndata['r_norm'] = norm_right
def copy_x(edges):
  return {'ls': edges.src['l_norm'], 'rd': edges.dst['r_norm'], 'both': edges.src['l_norm'] * edges.dst['r_norm']} # "edge.dst" is the opposite of "graph.dstdata"
ng.apply_edges(copy_x)
print(ng.edata)
print(ng.ndata)

{'_ID': tensor([1, 0, 2, 5, 3, 4, 7, 6]), 'ls': tensor([0.5000, 0.5000, 0.5000, 0.3333, 0.5000, 1.0000, 0.3333, 0.3333]), 'rd': tensor([1.0000, 0.3333, 0.3333, 0.3333, 0.3333, 0.3333, 0.3333, 1.0000]), 'both': tensor([0.5000, 0.1667, 0.1667, 0.1111, 0.1667, 0.3333, 0.1111, 0.3333])}
{'_ID': tensor([0, 1, 2, 3]), 'l_norm': tensor([0.5000, 0.5000, 1.0000, 0.3333]), 'r_norm': tensor([1.0000, 0.3333, 0.3333, 1.0000])}


In [None]:
torch.save(model, "../trace/model.pt")