In [1]:
seed = 0
import torch
torch.manual_seed(seed)
import random
random.seed(seed)
import numpy as np
np.random.seed(seed)
torch.use_deterministic_algorithms(True)

In [2]:
import torch
from torch import Tensor
from torch_geometric.logging import init_wandb, log
from torch_geometric.datasets import Planetoid
from utils import train, test, edgeindex2adj
from models import GCN, GAT, LP
citeseer = Planetoid(root='.', name='Citeseer')
cora = Planetoid(root='.', name='Cora')
pubmed = Planetoid(root='.', name='Pubmed')

In [3]:
# dataset = citeseer
# model = GCN(dataset.num_features, 24, dataset.num_classes)

# dataset = cora
# model = GCN(dataset.num_features, 8, dataset.num_classes)

# dataset = pubmed
# model = GCN(dataset.num_features, 8, dataset.num_classes)

# dataset = citeseer
# model = GAT(dataset.num_features, 8, dataset.num_classes, heads=4)

# dataset = cora
# model = GAT(dataset.num_features, 8, dataset.num_classes, heads=4)

# dataset = pubmed
# model = GAT(dataset.num_features, 8, dataset.num_classes, heads=4)

# optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

In [4]:
from models import ALP, GCN
dataset = cora
data = dataset[0]

# take k random training nodes for each class
k = 2
for c in data.y.unique():
    idx = ((data.y == c) & data.train_mask).nonzero(as_tuple=False).view(-1)
    idx = idx[torch.randperm(idx.size(0))]
    idx = idx[k:]
    data.train_mask[idx] = False

print(data.train_mask.sum())

tensor(14)


In [10]:
model = GCN(dataset.num_features, 8, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=50, min_lr=0.0001)

best_val_acc = final_test_acc = 0
for epoch in range(1, 200):
    loss = train(model, data, optimizer, scheduler=None, loss='cross_entropy', alpha=0.0005)
    train_acc, val_acc, tmp_test_acc = test(model, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)
print(f'Best Val Acc: {best_val_acc:.4f}', f'Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 1.945435643196106, Train: 0.6429, Val: 0.2560, Test: 0.2320
Epoch: 002, Loss: 1.8319387435913086, Train: 0.6429, Val: 0.3140, Test: 0.3160
Epoch: 003, Loss: 1.7161442041397095, Train: 0.7857, Val: 0.3200, Test: 0.3490
Epoch: 004, Loss: 1.5864388942718506, Train: 0.5714, Val: 0.3160, Test: 0.3490
Epoch: 005, Loss: 1.4705387353897095, Train: 0.5714, Val: 0.3300, Test: 0.3310
Epoch: 006, Loss: 1.3599216938018799, Train: 0.7143, Val: 0.3360, Test: 0.3350
Epoch: 007, Loss: 1.2562779188156128, Train: 0.7857, Val: 0.3320, Test: 0.3350
Epoch: 008, Loss: 1.162876009941101, Train: 0.8571, Val: 0.3240, Test: 0.3350
Epoch: 009, Loss: 1.0791798830032349, Train: 0.8571, Val: 0.3260, Test: 0.3350
Epoch: 010, Loss: 1.0050922632217407, Train: 0.8571, Val: 0.3140, Test: 0.3350
Epoch: 011, Loss: 0.9391421675682068, Train: 0.8571, Val: 0.3140, Test: 0.3350
Epoch: 012, Loss: 0.8810887932777405, Train: 0.8571, Val: 0.3000, Test: 0.3350
Epoch: 013, Loss: 0.828885555267334, Train: 0.7857, Va

In [6]:
# dataset = citeseer
# dataset = cora
dataset = pubmed

best_val_acc = final_test_acc = 0
best_l = best_a = 0

for l in [1, 2, 4, 8, 16, 32]:
    for a in [0.05, 0.1, 0.3, 0.6, 0.8, 0.9, 0.95, 0.99, 1]:
        model = LP(num_layers=l, alpha=a)
        outs = model.train(dataset)
        train_acc, val_acc, tmp_test_acc = model.test()
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            test_acc = tmp_test_acc
            best_l = l
            best_a = a
            
print(f'Best Val Acc: {best_val_acc:.4f}', f'Test Acc: {test_acc:.4f}', f'Best l: {best_l}', f'Best a: {best_a}')

Best Val Acc: 0.7260 Test Acc: 0.7140 Best l: 16 Best a: 0.99


In [7]:
accs = model.test()
accs

[0.8833333333333333, 0.712, 0.707]

In [8]:
outs.shape

torch.Size([19717, 3])

In [9]:
outs.argmax(dim=-1)

tensor([1, 1, 0,  ..., 2, 0, 2])

In [10]:
dataset[0].y

tensor([1, 1, 0,  ..., 2, 0, 2])

In [11]:
dataset.num_classes

3