In [1]:
seed = 0
import torch
torch.manual_seed(seed)
import random
random.seed(seed)
import numpy as np
np.random.seed(seed)
torch.use_deterministic_algorithms(True)

In [2]:
import torch
from torch import Tensor
from torch_geometric.logging import init_wandb, log
from torch_geometric.datasets import Planetoid
import torch.nn.functional as F
from utils import train, test, edgeindex2adj
citeseer = Planetoid(root='.', name='Citeseer')
cora = Planetoid(root='.', name='Cora')
# pubmed = Planetoid(root='.', name='Pubmed')

In [3]:
from models import ALP, GCN
dataset = citeseer
data = dataset[0]

# take k random training nodes for each class
k = 5
for c in data.y.unique():
    idx = ((data.y == c) & data.train_mask).nonzero(as_tuple=False).view(-1)
    idx = idx[torch.randperm(idx.size(0))]
    idx = idx[k:]
    data.train_mask[idx] = False

print(data.train_mask.sum())

tensor(30)


In [4]:
yshape = dataset[0].y.shape[0]
model = ALP(num_layers=16, alpha=0.9, yshape=yshape)
outs = model.lpa(dataset)

gcn = GCN(dataset.num_features, 8, dataset.num_classes)

optimizer_gcn = torch.optim.Adam(gcn.parameters(), lr=0.005, weight_decay=5e-4)
optimizer_alp = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

In [5]:
model.test()

[1.0, 0.386, 0.351]

In [6]:
train_mask = data.train_mask.clone()
val_mask = data.val_mask.clone()
test_mask = data.test_mask.clone()

unlab_mask = torch.ones_like(data.train_mask)
unlab_mask[(data.train_mask == True) | (data.val_mask == True) | (data.test_mask == True)] = False

print(train_mask.sum(), val_mask.sum(), test_mask.sum(), unlab_mask.sum())

tensor(30) tensor(500) tensor(1000) tensor(1797)


In [7]:
# for p in model.parameters():
#     print(p)

In [8]:
model.train()
best_val_acc = final_test_acc = 0
for epoch in range(1, 200):
    
    optimizer_gcn.zero_grad()
    optimizer_alp.zero_grad()

    # add 1% of unlabelled data to training data
    new_train_mask = train_mask.clone()
    to_add_mask = unlab_mask.clone()
    # put 99% to 0
    idx = to_add_mask.nonzero(as_tuple=False).view(-1)
    idx = idx[torch.randperm(idx.size(0))][:int(0.90 * idx.size(0))]
    to_add_mask[idx] = False
    new_train_mask[to_add_mask] = True
    # print(new_train_mask.sum())
    
    op_alp = model()
    op = gcn(data.x, data.edge_index)
    
    # print(op_alp.shape, op.shape, data.y[train_mask].shape)
    loss = F.cross_entropy(op[new_train_mask, :], op_alp[new_train_mask].argmax(dim=1))
    # print(op_alp[train_mask].shape, data.y[train_mask].shape)
    loss2 = F.cross_entropy(op_alp[train_mask], data.y[train_mask])
    # print(loss.item(), loss2.item())
    # for p in model.parameters():
    #     print(p)
    # loss += 0.01 * loss2
    loss.backward()
    
    optimizer_alp.step()
    optimizer_gcn.step()
    
    train_acc, val_acc, tmp_test_acc = test(gcn, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc, Loss2 = loss2)
print(f'Best Val Acc: {best_val_acc:.4f}', f'Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 1.7877354621887207, Train: 0.1667, Val: 0.2340, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 002, Loss: 1.7337859869003296, Train: 0.1667, Val: 0.2320, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 003, Loss: 1.6900779008865356, Train: 0.1667, Val: 0.2320, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 004, Loss: 1.612727165222168, Train: 0.2000, Val: 0.2320, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 005, Loss: 1.5861579179763794, Train: 0.2333, Val: 0.2320, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 006, Loss: 1.5180915594100952, Train: 0.2333, Val: 0.2340, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 007, Loss: 1.4751678705215454, Train: 0.2333, Val: 0.2340, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 008, Loss: 1.4293628931045532, Train: 0.2667, Val: 0.2340, Test: 0.1820, Loss2: 1.5568794012069702
Epoch: 009, Loss: 1.3959996700286865, Train: 0.3000, Val: 0.2440, Test: 0.1900, Loss2: 1.5568794012069702
Epoch: 010, Loss: 1.3738919496536255, Train: 0.

In [9]:
optimizer = torch.optim.Adam(gcn.parameters(), lr=0.0005, weight_decay=5e-4)
best_val_acc = final_test_acc = 0
for epoch in range(1, 100):
    loss = train(gcn, data, optimizer, loss='cross_entropy')
    train_acc, val_acc, tmp_test_acc = test(gcn, data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)
print(f'Best Val Acc: {best_val_acc:.4f}', f'Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 0.0236421637237072, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 002, Loss: 0.022779876366257668, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 003, Loss: 0.021996216848492622, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 004, Loss: 0.021289195865392685, Train: 1.0000, Val: 0.3880, Test: 0.3430
Epoch: 005, Loss: 0.020636949688196182, Train: 1.0000, Val: 0.3880, Test: 0.3430
Epoch: 006, Loss: 0.020023580640554428, Train: 1.0000, Val: 0.3880, Test: 0.3430
Epoch: 007, Loss: 0.01945175975561142, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 008, Loss: 0.018910497426986694, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 009, Loss: 0.01840626448392868, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 010, Loss: 0.017919963225722313, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 011, Loss: 0.0174688920378685, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 012, Loss: 0.01704259403049946, Train: 1.0000, Val: 0.3900, Test: 0.3430
Epoch: 013, Loss: 0.016639430075883

In [10]:
# # take k random training nodes for each class
# data = dataset[0]
# k = 2
# for c in data.y.unique():
#     idx = ((data.y == c) & data.train_mask).nonzero(as_tuple=False).view(-1)
#     idx = idx[torch.randperm(idx.size(0))]
#     idx = idx[k:]
#     data.train_mask[idx] = False

In [11]:
from models import GCNLPA

dataset = cora
data = dataset[0]

k = 2
for c in data.y.unique():
    idx = ((data.y == c) & data.train_mask).nonzero(as_tuple=False).view(-1)
    idx = idx[torch.randperm(idx.size(0))]
    idx = idx[k:]
    data.train_mask[idx] = False

model = GCNLPA(dataset.num_features, 16, dataset.num_classes, dataset.edge_index.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

best_val_acc = final_test_acc = 0
for epoch in range(1, 200):
    op, op_lpa = model(data)
    
    loss1 = F.cross_entropy(op[data.train_mask], data.y[data.train_mask])
    # print(op_alp[train_mask].shape, data.y[train_mask].shape)
    loss2 = F.cross_entropy(op_lpa[data.train_mask], data.y[data.train_mask])
    # print(loss.item(), loss2.item())
    # for p in model.parameters():
    #     print(p)
    loss = loss1 + loss2
    loss.backward()
    optimizer.step()    
    train_acc, val_acc, tmp_test_acc = model.test(data)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    if epoch % 20 == 0:
        log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc, Loss2 = loss2)
print(f'Best Val Acc: {best_val_acc:.4f}', f'Test Acc: {test_acc:.4f}')

Epoch: 020, Loss: 2.8510704040527344, Train: 0.9286, Val: 0.2360, Test: 0.2470, Loss2: 1.4198877811431885
Epoch: 040, Loss: 2.5801124572753906, Train: 1.0000, Val: 0.2520, Test: 0.2470, Loss2: 1.4051464796066284
Epoch: 060, Loss: 2.5570645332336426, Train: 1.0000, Val: 0.3680, Test: 0.3610, Loss2: 1.388647437095642
Epoch: 080, Loss: 2.53702712059021, Train: 1.0000, Val: 0.4420, Test: 0.4390, Loss2: 1.3714197874069214
Epoch: 100, Loss: 2.520162343978882, Train: 1.0000, Val: 0.4280, Test: 0.4390, Loss2: 1.3543814420700073
Epoch: 120, Loss: 2.503467082977295, Train: 1.0000, Val: 0.3860, Test: 0.4390, Loss2: 1.3380448818206787
Epoch: 140, Loss: 2.488126516342163, Train: 1.0000, Val: 0.3520, Test: 0.4390, Loss2: 1.3227041959762573
Epoch: 160, Loss: 2.4739537239074707, Train: 1.0000, Val: 0.3320, Test: 0.4390, Loss2: 1.308531403541565
Epoch: 180, Loss: 2.4610157012939453, Train: 1.0000, Val: 0.3120, Test: 0.4390, Loss2: 1.295593500137329
Best Val Acc: 0.4500 Test Acc: 0.4390


In [12]:
# outs.shape, outs[0, 0, :].sum(), (outs[0, 0, :] == outs[2, 0, :]).sum()

In [13]:
best_val_acc = final_test_acc = 0

model.train()
for epoch in range(1, 100):
    optimizer.zero_grad()
    loss = model.loss()
    loss.backward()
    optimizer.step()
    
    train_acc, val_acc, tmp_test_acc = model.test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)

AttributeError: 'GCNLPA' object has no attribute 'loss'