In [2]:
import numpy as np
import torch
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import TUDataset, Entities, Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import NormalizeFeatures
import site

site.addsitedir("../")

from models import mlp, gcn, graphsage, graphsaint, gat, graphmlp
from utils import config_utils as cfg_u
from utils import model_utils

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [4]:
print("Load data!")
aifb_dataset = Entities(root="data", name="AIFB", transform=NormalizeFeatures())
mutag_dataset = TUDataset(root="data", name="MUTAG", transform=NormalizeFeatures())
dataset = Planetoid(root="data", name="Cora", transform=NormalizeFeatures())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on", device)

Load data!


Downloading https://data.dgl.ai/dataset/aifb.tgz
Extracting data/aifb.tgz
Processing...
Done!
Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Extracting data/MUTAG/MUTAG.zip
Processing...
Done!
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index


Running on cpu


Processing...
Done!


In [4]:
print(dataset.num_classes)

7


In [5]:
data = dataset[0]
print(data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [5]:
def train(model, optimizer, data, criterion=torch.nn.CrossEntropyLoss()):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

def test(model, data, mask):
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    correct = pred[mask] == data.y[mask]
    acc = int(correct.sum()) / int(mask.sum())
    return acc

def loop(model, optimizer, silent=False):
    for data in dataset:
        for epoch in range(1, 201):
            loss = train(model, optimizer, data)
            val_acc = test(model, data, data.val_mask)
            test_acc = test(model, data, data.test_mask)
            if epoch % 10 == 0 and not silent:
                print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')
            
        test_acc = test(model, data, data.test_mask)
        print(f'Test Accuracy for {model.name}: {test_acc:.4f}')


In [7]:
gcn_model = gcn.GCN(num_classes=dataset.num_classes, num_features=dataset.num_features)
gcn_optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01, weight_decay=5e-4)

graphsage_model = graphsage.SAGE(num_classes=dataset.num_classes, num_features=dataset.num_features)
graphsage_optimizer = torch.optim.Adam(graphsage_model.parameters(), lr=0.01, weight_decay=5e-4)

graphsaint_model = graphsaint.SAINT(num_classes=dataset.num_classes, num_node_features=dataset.num_node_features)
graphsaint_optimizer = torch.optim.Adam(graphsaint_model.parameters(), lr=0.01, weight_decay=5e-4)

gat_model = gat.GAT(num_classes=dataset.num_classes, num_features=dataset.num_features)
gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.01, weight_decay=5e-4)

graphmlp_model = graphmlp.GMLP(num_classes=dataset.num_classes, num_features=dataset.num_features)
graphmlp_optimizer = torch.optim.Adam(graphmlp_model.parameters(), lr=0.01, weight_decay=5e-4)

mlp_model = mlp.MLP(num_classes=dataset.num_classes, num_features=dataset.num_features)
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.01, weight_decay=5e-4)

In [8]:
loop(mlp_model, mlp_optimizer, silent=True)
# loop(graphmlp_model, graphmlp_optimizer, silent=True)
loop(gcn_model, gcn_optimizer, silent=True)
loop(graphsage_model, graphsage_optimizer, silent=True)
loop(graphsaint_model, graphsaint_optimizer, silent=True)
loop(gat_model, gat_optimizer, silent=True)

Test Accuracy for MLP: 0.6000
Test Accuracy for GCN: 0.8080
Test Accuracy for GraphSAGE: 0.8040
Test Accuracy for GraphSAINT: 0.7620
Test Accuracy for GAT: 0.8220


In [6]:
# hyperparameter tuning

learning_rates = [0.001, 0.01, 0.1]
hidden_channels = [32, 64]

for lr in learning_rates:
    for hidden_channel in hidden_channels:
        print(f"\nLearning Rate: {lr}, Hidden Channels: {hidden_channel}\n")

        # GCN
        gcn_model = gcn.GCN(
            num_classes=dataset.num_classes,
            num_features=dataset.num_features,
            hidden_channels=hidden_channel,
        )
        gcn_optimizer = torch.optim.Adam(gcn_model.parameters(), lr=lr, weight_decay=5e-4)
        loop(gcn_model, gcn_optimizer, silent=True)

        # # GraphSAGE
        # graphsage_model = graphsage.SAGE(
        #     num_classes=dataset.num_classes,
        #     num_features=dataset.num_features,
        #     hidden_channels=hidden_channel,
        # )
        # graphsage_optimizer = torch.optim.Adam(
        #     graphsage_model.parameters(), lr=lr, weight_decay=5e-4
        # )
        # loop(graphsage_model, graphsage_optimizer, silent=True)

        # # GraphSAINT
        # graphsaint_model = graphsaint.SAINT(
        #     num_classes=dataset.num_classes,
        #     num_node_features=dataset.num_node_features,
        #     hidden_channels=hidden_channel,
        # )
        # graphsaint_optimizer = torch.optim.Adam(
        #     graphsaint_model.parameters(), lr=lr, weight_decay=5e-4
        # )
        # loop(graphsaint_model, graphsaint_optimizer, silent=True)

        # # GAT
        # gat_model = gat.GAT(
        #     num_classes=dataset.num_classes,
        #     num_features=dataset.num_features,
        #     hidden_channels=hidden_channel,
        # )
        # gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=lr, weight_decay=5e-4)
        # loop(gat_model, gat_optimizer, silent=True)

        # # MLP
        # mlp_model = mlp.MLP(
        #     num_classes=dataset.num_classes,
        #     num_features=dataset.num_features,
        #     hidden_channels=hidden_channel,
        # )
        # mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=lr, weight_decay=5e-4)
        # loop(mlp_model, mlp_optimizer, silent=True)
        
        print("-------------------------------------------------------------------------------")




Learning Rate: 0.001, Hidden Channels: 32

Test Accuracy for GCN: 0.8020
-------------------------------------------------------------------------------

Learning Rate: 0.001, Hidden Channels: 64

Test Accuracy for GCN: 0.7960
-------------------------------------------------------------------------------

Learning Rate: 0.01, Hidden Channels: 32

Test Accuracy for GCN: 0.8140
-------------------------------------------------------------------------------

Learning Rate: 0.01, Hidden Channels: 64

Test Accuracy for GCN: 0.8030
-------------------------------------------------------------------------------

Learning Rate: 0.1, Hidden Channels: 32

Test Accuracy for GCN: 0.8100
-------------------------------------------------------------------------------

Learning Rate: 0.1, Hidden Channels: 64

Test Accuracy for GCN: 0.8030
-------------------------------------------------------------------------------


In [None]:
import torch
import torch.nn.functional as F
import timeit
from models import graphmlp

def train_graphmlp(model, optimizer, data, criterion=torch.nn.CrossEntropyLoss(), tau=1.0, alpha=1.0, k_hop=2):
    model.train()
    optimizer.zero_grad()

    out, x_dis = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss_train_class = criterion(out[data.train_mask], data.y[data.train_mask])

    adj_label = model_utils.get_A_r(data, k_hop)
    loss_Ncontrast = model_utils.Ncontrast(x_dis, adj_label, tau=tau)
    loss_train = loss_train_class + loss_Ncontrast * alpha

    loss_train.backward()
    optimizer.step()

    return loss_train


def train(model, optimizer, data, criterion=torch.nn.CrossEntropyLoss()):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss


In [None]:
train_graphmlp(graphmlp_model, graphmlp_optimizer, dataset[0])

IndexError: The shape of the mask [2708] at index 0 does not match the shape of the indexed tensor [2166, 32] at index 0

In [None]:
from model import graphmlp as gmlp

model = gmlp.Model()

TypeError: Model.__init__() missing 1 required positional argument: 'args'

In [None]:
from __future__ import division
from __future__ import print_function
import random
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

from models import GMLP
from utils import load_citation, accuracy, get_A_r

# Settings
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--epochs', type=int, default=400,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.01,
                    help='learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=256,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.6,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--data', type=str, default='cora',
                    help='dataset to be used')
parser.add_argument('--alpha', type=float, default=2.0,
                    help='To control the ratio of Ncontrast loss')
parser.add_argument('--batch_size', type=int, default=2048,
                    help='batch size')
parser.add_argument('--order', type=int, default=2,
                    help='to compute order-th power of adj')
parser.add_argument('--tau', type=float, default=1.0,
                    help='temperature for Ncontrast loss')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

## get data
adj, features, labels, idx_train, idx_val, idx_test = load_citation(args.data, 'AugNormAdj', True)
adj_label = get_A_r(adj, args.order)


## Model and optimizer
model = GMLP(nfeat=features.shape[1],
            nhid=args.hidden,
            nclass=labels.max().item() + 1,
            dropout=args.dropout,
            )
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)


if args.cuda:
    model.cuda()
    features = features.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()


def Ncontrast(x_dis, adj_label, tau = 1):
    """
    compute the Ncontrast loss
    """
    x_dis = torch.exp( tau * x_dis)
    x_dis_sum = torch.sum(x_dis, 1)
    x_dis_sum_pos = torch.sum(x_dis*adj_label, 1)
    loss = -torch.log(x_dis_sum_pos * (x_dis_sum**(-1))+1e-8).mean()
    return loss

def get_batch(batch_size):
    """
    get a batch of feature & adjacency matrix
    """
    rand_indx = torch.tensor(np.random.choice(np.arange(adj_label.shape[0]), batch_size)).type(torch.long).cuda()
    rand_indx[0:len(idx_train)] = idx_train
    features_batch = features[rand_indx]
    adj_label_batch = adj_label[rand_indx,:][:,rand_indx]
    return features_batch, adj_label_batch

def train():
    features_batch, adj_label_batch = get_batch(batch_size=args.batch_size)
    model.train()
    optimizer.zero_grad()
    output, x_dis = model(features_batch)
    loss_train_class = F.nll_loss(output[idx_train], labels[idx_train])
    loss_Ncontrast = Ncontrast(x_dis, adj_label_batch, tau = args.tau)
    loss_train = loss_train_class + loss_Ncontrast * args.alpha
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()
    return 

def test():
    model.eval()
    output = model(features)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    return acc_test, acc_val

best_accu = 0
best_val_acc = 0
print('\n'+'training configs', args)
for epoch in tqdm(range(args.epochs)):
    train()
    tmp_test_acc, val_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc


ImportError: cannot import name 'GMLP' from 'models' (unknown location)

In [11]:
import torch.nn.functional as F

gat_model.eval()

with torch.no_grad():
    output = gat_model(dataset[0])
    
gs = F.softmax(output, dim=1).mean(dim=0)

print("Graph Summary:", gs)

Graph Summary: tensor([0.1397, 0.1057, 0.1681, 0.2023, 0.1595, 0.1230, 0.1016])


In [15]:
from GMN_PyTorch import models

class Args:
    def __init__(self):
        self.feat_dim = 64 
        self.dim = 128     
        self.num_layers = 3
        self.n_classes = 2 
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.logging = True
        
args = Args()

In [16]:
args.n_classes

2

In [17]:
gmn = models.GraphMatchingNetwork(args)