In [45]:
import torch  # pytorch backend
from grb.dataset import Dataset
from grb.model.torch import GCN
from grb.trainer import Trainer
from grb.attack.injection.tdgia import TDGIA
from grb.utils.normalize import GCNAdjNorm
# from grb.defense import AdvTrainer
from torch_geometric.datasets import Planetoid

In [46]:
# import cora
dataset_name = 'Cora'

# Load the dataset
dataset = Planetoid(root='data', name=dataset_name)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [49]:
from grb.dataset import CustomDataset
from torch_geometric.utils import to_scipy_sparse_matrix

In [50]:
# create custom dataset from planetoid dataset

adj_matrix = to_scipy_sparse_matrix(dataset[0].edge_index)
features = dataset[0].x
labels = dataset[0].y
name = 'cora-custom'
data_dir = 'grb_data/cora'
save = True

In [51]:
cora_custom = CustomDataset(
    adj=adj_matrix,
    features=features,
    labels=labels,
    name=name,
    data_dir=data_dir,
    save=save,
)

GRB data splitting...
    Average degree of all nodes: 3.8981
    Average degree of 5% nodes with small degree: 1.0000
    Average degree of 5% nodes with large degree: 17.4265
    Average degree of 30% nodes (easy): 1.5690
    Randomly sampled 270 nodes
    Average degree of 30% nodes (medium): 3.0221
    Randomly sampled 270 nodes
    Average degree of 30% nodes (hard): 5.3202
    Randomly sampled 270 nodes
    Number of training/validation nodes: 1624/274
    No duplicate.
    Saved in grb_data/cora.
Custom Dataset 'cora-custom' loaded.
    Number of nodes: 2708
    Number of edges: 5278
    Number of features: 1433
    Number of classes: 7
    Number of train samples: 1624
    Number of val samples: 274
    Number of test samples: 810
    Dataset mode: full
    Feature range [0.0000, 1.0000]


In [26]:
cora_custom.adj

<2708x2708 sparse matrix of type '<class 'numpy.float32'>'
	with 10556 stored elements in COOrdinate format>

In [53]:
import shutil
import os

source_dir = 'grb_data/cora'
destination_dir = 'data/grb-cora'

os.makedirs(destination_dir, exist_ok=True)

for file_name in os.listdir(source_dir):
    shutil.copy(os.path.join(source_dir, file_name), destination_dir)

In [54]:
dataset = Dataset(name='grb-cora', data_dir='data/grb-cora', mode='hard')

Dataset 'grb-cora' loaded.
    Number of nodes: 2708
    Number of edges: 5278
    Number of features: 1433
    Number of classes: 7
    Number of train samples: 1624
    Number of val samples: 274
    Number of test samples: 270
    Dataset mode: hard
    Feature range: [-0.0718, 0.9282]


In [55]:
from grb.model.torch import GCN

In [56]:
model = GCN(in_features=dataset.num_features,
            out_features=dataset.num_classes,
            hidden_features=[64, 64], n_layers=3)

In [59]:
adam = torch.optim.Adam(model.parameters(), lr=0.01)
trainer = Trainer(dataset=dataset, optimizer=adam, loss=torch.nn.functional.nll_loss)
trainer.train(model=model, n_epoch=200, train_mode="inductive")

Epoch 00013 | Train loss 0.3302 | Train score 0.9329 | Val loss 1.2986 | Val score 0.8504:   6%|▌         | 12/200 [00:00<00:03, 55.64it/s]

Epoch 00001 | Best validation score: 0.7883
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00005 | Best validation score: 0.8029
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00007 | Best validation score: 0.8175
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00008 | Best validation score: 0.8248
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00009 | Best validation score: 0.8321
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00011 | Best validation score: 0.8358
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00012 | Best validation score: 0.8394
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00013 | Best validation score: 0.8504
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.


Epoch 00025 | Train loss 0.1729 | Train score 0.9501 | Val loss 1.2117 | Val score 0.8577:  12%|█▎        | 25/200 [00:00<00:02, 58.63it/s]

Epoch 00014 | Best validation score: 0.8540
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.
Epoch 00015 | Best validation score: 0.8686
Model saved in './tmp_2024_05_15_15_23_54/model.pt'.


Epoch 00199 | Train loss 0.0562 | Train score 0.9858 | Val loss 1.7454 | Val score 0.8431: 100%|██████████| 200/200 [00:03<00:00, 61.49it/s]

Model saved in './tmp_2024_05_15_15_23_54/final_model.pt'.
Training finished. Best validation score: 0.8686
Training runtime: 3.2577.





In [60]:
from grb.attack.injection.tdgia import TDGIA

# Attack configuration
tdgia = TDGIA(
    lr=0.01,
    n_epoch=10,
    n_inject_max=20,
    n_edge_max=20,
    feat_lim_min=-0.9,
    feat_lim_max=0.9,
    sequential_step=0.2,
)

# covert adj to coo format
adj = dataset.adj.tocoo()

# Apply attack
rst = tdgia.attack(
    model=model,
    adj=adj,
    features=dataset.features,
    target_mask=dataset.test_mask,
    adj_norm_func=GCNAdjNorm,
)
# Get modified adj and features
adj_attack, features_attack = rst

Attacking: Sequential inject 4/20 nodes


Epoch 9, Loss: 15.6292, Surrogate test score: 0.9778: 100%|██████████| 10/10 [00:00<00:00, 37.69it/s]


Attacking: Sequential inject 8/20 nodes


Epoch 9, Loss: 15.3678, Surrogate test score: 0.9593: 100%|██████████| 10/10 [00:00<00:00, 48.22it/s]


Attacking: Sequential inject 12/20 nodes


Epoch 9, Loss: 15.1927, Surrogate test score: 0.9444: 100%|██████████| 10/10 [00:00<00:00, 34.17it/s]


Attacking: Sequential inject 16/20 nodes


Epoch 9, Loss: 14.5031, Surrogate test score: 0.9037: 100%|██████████| 10/10 [00:00<00:00, 35.10it/s]


Attacking: Sequential inject 20/20 nodes


Epoch 9, Loss: 13.9904, Surrogate test score: 0.8667: 100%|██████████| 10/10 [00:00<00:00, 38.11it/s]

Attack runtime: 1.5817.





In [61]:
from grb.utils import evaluate

test_score = evaluate(
    model=model,
    adj=adj_attack,
    features=features_attack,
    labels=dataset.labels,
    mask=dataset.test_mask,
    adj_norm_func=GCNAdjNorm,
)

test_score

RuntimeError: addmm: Argument #3 (dense): Expected dim 0 size 2728, got 20