In [11]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

In [34]:
# Graph Convolutional Network
class GCN(nn.Module):
    def __init__(self, g, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()                         # initial the graph convolutional network parent class
        self.conv1 = dgl.nn.GraphConv(in_feats, h_feats)    # in feats: size of input feature vector, h feats: size of output
        self.conv2 = dgl.nn.GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

def train_and_save_embeddings_and_edges(dataset, dataset_name):
    g = dataset[0]
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']

    graph_features = g.ndata['feat'].numpy()
    graph_labels = g.ndata['label'].numpy()
    np.save(f'{dataset_name}_features.npy', graph_features)
    np.save(f'{dataset_name}_labels.npy', graph_labels)

    in_feats = features.shape[1]
    h_feats = 64
    num_classes = dataset.num_classes
    model = GCN(g, in_feats, h_feats, num_classes)

    # Train the model
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(200):
        model.train()
        logits = model(g, features)
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])
        print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Extract and save the embeddings
    model.eval()
    with torch.no_grad():
        embeddings = model(g, features)
    print(embeddings.shape)
    np.save(f'{dataset_name}_embeddings.npy', embeddings.detach().numpy())


    edge_index = g.edges()
    np.save(f'{dataset_name}_edge_index.npy', np.vstack((edge_index[0].numpy(), edge_index[1].numpy())))

# Load the datasets
cora_dataset = CoraGraphDataset()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [35]:
train_and_save_embeddings_and_edges(cora_dataset, 'cora')

tensor(1.9455, grad_fn=<NllLossBackward0>)
tensor(1.9242, grad_fn=<NllLossBackward0>)
tensor(1.8994, grad_fn=<NllLossBackward0>)
tensor(1.8693, grad_fn=<NllLossBackward0>)
tensor(1.8372, grad_fn=<NllLossBackward0>)
tensor(1.8023, grad_fn=<NllLossBackward0>)
tensor(1.7636, grad_fn=<NllLossBackward0>)
tensor(1.7215, grad_fn=<NllLossBackward0>)
tensor(1.6772, grad_fn=<NllLossBackward0>)
tensor(1.6303, grad_fn=<NllLossBackward0>)
tensor(1.5805, grad_fn=<NllLossBackward0>)
tensor(1.5283, grad_fn=<NllLossBackward0>)
tensor(1.4739, grad_fn=<NllLossBackward0>)
tensor(1.4174, grad_fn=<NllLossBackward0>)
tensor(1.3591, grad_fn=<NllLossBackward0>)
tensor(1.2993, grad_fn=<NllLossBackward0>)
tensor(1.2382, grad_fn=<NllLossBackward0>)
tensor(1.1762, grad_fn=<NllLossBackward0>)
tensor(1.1137, grad_fn=<NllLossBackward0>)
tensor(1.0511, grad_fn=<NllLossBackward0>)
tensor(0.9889, grad_fn=<NllLossBackward0>)
tensor(0.9273, grad_fn=<NllLossBackward0>)
tensor(0.8667, grad_fn=<NllLossBackward0>)
tensor(0.80

In [41]:
emb = np.load('cora_embeddings.npy')