In [1]:
#!/usr/bin/env python
# coding: utf-8

# In[1]: 


import time
import argparse
import numpy as np
import torch
from models.GCN import GCN
from models.GCN_Encoder import GCN_Encoder
from torch_geometric.datasets import Planetoid, WebKB, WikipediaNetwork,Reddit,Reddit2,Flickr,Yelp,PPI
from torch_geometric.utils import to_dense_adj,dense_to_sparse

from ogb.nodeproppred import PygNodePropPredDataset
# from torch_geometric.loader import DataLoader
from help_funcs import prune_unrelated_edge,prune_unrelated_edge_isolated,select_target_nodes
import help_funcs
import scipy.sparse as sp

# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true',
        default=True, help='debug mode')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--seed', type=int, default=10, help='Random seed.')
parser.add_argument('--model', type=str, default='GCN', help='model',
                    choices=['GCN','GAT','GraphSage','GIN'])
parser.add_argument('--dataset', type=str, default='ogbn-arxiv', 
                    help='Dataset',
                    choices=['Cora','Citeseer','Pubmed','PPI','Flickr','ogbn-arxiv','Reddit','Reddit2','Yelp'])
parser.add_argument('--train_lr', type=float, default=0.02,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=64,
                    help='Number of hidden units.')
parser.add_argument('--thrd', type=float, default=0.5)
parser.add_argument('--target_class', type=int, default=0)
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--epochs', type=int,  default=200, help='Number of epochs to train benign and backdoor model.')
parser.add_argument('--trojan_epochs', type=int,  default=200, help='Number of epochs to train trigger generator.')
parser.add_argument('--inner', type=int,  default=1, help='Number of inner')
# backdoor setting
parser.add_argument('--lr', type=float, default=0.01,
                    help='Initial learning rate.')
parser.add_argument('--trigger_size', type=int, default=3,
                    help='tirgger_size')
parser.add_argument('--use_vs_number', action='store_true', default=False,
                    help="if use detailed number to decide Vs")
parser.add_argument('--vs_ratio', type=float, default=0.001,
                    help="ratio of poisoning nodes relative to the full graph")
parser.add_argument('--vs_number', type=int, default=0,
                    help="number of poisoning nodes relative to the full graph")
# defense setting
parser.add_argument('--defense_mode', type=str, default="isolated",
                    choices=['prune', 'isolate', 'none'],
                    help="Mode of defense")
parser.add_argument('--prune_thr', type=float, default=0.35,
                    help="Threshold of prunning edges")
parser.add_argument('--target_loss_weight', type=float, default=1,
                    help="Weight of optimize outter trigger generator")
parser.add_argument('--homo_loss_weight', type=float, default=1,
                    help="Weight of optimize similarity loss")
parser.add_argument('--homo_boost_thrd', type=float, default=0.5,
                    help="Threshold of increase similarity")
# attack setting
parser.add_argument('--dis_weight', type=float, default=1,
                    help="Weight of cluster distance")
parser.add_argument('--attack_method', type=str, default='Basic',
                    choices=['Rand_Gene','Rand_Samp','Basic','None'],
                    help='Method to select idx_attach for training trojan model (none means randomly select)')
parser.add_argument('--trigger_prob', type=float, default=0.5,
                    help="The probability to generate the trigger's edges in random method")
parser.add_argument('--selection_method', type=str, default='cluster_degree',
                    choices=['loss','conf','cluster','none','cluster_degree'],
                    help='Method to select idx_attach for training trojan model (none means randomly select)')
parser.add_argument('--test_model', type=str, default='GCN',
                    choices=['GCN','GAT','GraphSage','GIN'],
                    help='Model used to attack')
parser.add_argument('--evaluate_mode', type=str, default='overall',
                    choices=['overall','1by1'],
                    help='Model used to attack')
# GPU setting
parser.add_argument('--device_id', type=int, default=1,
                    help="Threshold of prunning edges")
# args = parser.parse_args()
args = parser.parse_known_args()[0]
args.cuda =  not args.no_cuda and torch.cuda.is_available()
device = torch.device(('cuda:{}' if torch.cuda.is_available() else 'cpu').format(args.device_id))
# device2 = torch.device(('cuda:{}' if torch.cuda.is_available() else 'cpu').format(args.device_id+1))

# np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
print(args)
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
    #  random.seed(seed)
     torch.backends.cudnn.deterministic = True
# 设置随机数种子
setup_seed(args.seed)
#%%
from torch_geometric.utils import to_undirected
import torch_geometric.transforms as T
transform = T.Compose([T.NormalizeFeatures()])

np.random.seed(11) # fix the random seed is important
if(args.dataset == 'Cora' or args.dataset == 'Citeseer' or args.dataset == 'Pubmed'):
    dataset = Planetoid(root='./data/', \
                        name=args.dataset,\
                        transform=transform)
elif(args.dataset == 'Flickr'):
    dataset = Flickr(root='./data/Flickr/', \
                    transform=transform)
elif(args.dataset == 'PPI'):
    dataset = PPI(root='./data/PPI/', 
                split='train', transform=None)
elif(args.dataset == 'Reddit2'):
    dataset = Reddit2(root='./data/Reddit2/', \
                    transform=transform)
elif(args.dataset == 'ogbn-arxiv'):
    # Download and process data at './dataset/ogbg_molhiv/'
    dataset = PygNodePropPredDataset(name = 'ogbn-arxiv', root='./data/')
    split_idx = dataset.get_idx_split() 
elif(args.dataset == 'Yelp'):
    # Download and process data at './dataset/ogbg_molhiv/'
    dataset = Yelp(root='./data/Yelp/')
    # idx_train, idx_val, idx_test = split_idx["train"], split_idx["valid"], split_idx["test"]

data = dataset[0].to(device)

if(args.dataset == 'ogbn-arxiv'):
    nNode = data.x.shape[0]
    setattr(data,'train_mask',torch.zeros(nNode, dtype=torch.bool).to(device))
    # dataset[0].train_mask = torch.zeros(nEdge, dtype=torch.bool).to(device)
    data.val_mask = torch.zeros(nNode, dtype=torch.bool).to(device)
    data.test_mask = torch.zeros(nNode, dtype=torch.bool).to(device)
    data.y = data.y.squeeze(1)
# we build our own train test split 
from utils import get_split
data, idx_train, idx_val, idx_clean_test, idx_atk = get_split(args,data,device)

#%%
from torch_geometric.utils import to_undirected
from utils import subgraph
data.edge_index = to_undirected(data.edge_index)
train_edge_index,_, edge_mask = subgraph(torch.bitwise_not(data.test_mask),data.edge_index,relabel_nodes=False)
mask_edge_index = data.edge_index[:,torch.bitwise_not(edge_mask)]

# In[3]:

# In[6]: 
import os
from models.backdoor import model_construct
benign_modelpath = './modelpath/{}_{}_benign.pth'.format(args.model, args.dataset)
if(os.path.exists(benign_modelpath)):
    # load existing benign model
    benign_model = torch.load(benign_modelpath)
    benign_model = benign_model.to(device)
    # edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
    print("Loading benign {} model Finished!".format(args.model))
else:
    benign_model = model_construct(args,args.model,data,device).to(device) 
    t_total = time.time()
    print("Length of training set: {}".format(len(idx_train)))
    benign_model.fit(data.x, train_edge_index, None, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=False)
    print("Training benign model Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    # Save trained model
    # torch.save(benign_model, benign_modelpath)
    # print("Benign model saved at {}".format(benign_modelpath))

# In[7]:

benign_ca = benign_model.test(data.x, data.edge_index, None, data.y,idx_clean_test)
print("Benign CA: {:.4f}".format(benign_ca))
benign_model = benign_model.cpu()

# In[9]:

from sklearn_extra import cluster
from models.backdoor import Backdoor
from heuristic_selection import obtain_attach_nodes, obtain_attach_nodes_by_cluster_degree, obtain_attach_nodes_by_cluster_gpu,obtain_attach_nodes_by_influential,obtain_attach_nodes_by_cluster,cluster_distance_selection,cluster_degree_selection

from kmeans_pytorch import kmeans, kmeans_predict

# filter out the unlabeled nodes except from training nodes and testing nodes, nonzero() is to get index, flatten is to get 1-d tensor
unlabeled_idx = (torch.bitwise_not(data.test_mask)&torch.bitwise_not(data.train_mask)).nonzero().flatten()
if(args.use_vs_number):
    size = args.vs_number
else:
    size = int((len(data.test_mask)-data.test_mask.sum())*args.vs_ratio)
# here is randomly select poison nodes from unlabeled nodes
if(args.selection_method == 'none'):
    idx_attach = obtain_attach_nodes(args,unlabeled_idx,size)
elif(args.selection_method == 'loss' or args.selection_method == 'conf'):
    idx_attach = obtain_attach_nodes_by_influential(args,benign_model,unlabeled_idx.cpu().tolist(),data.x,train_edge_index,None,data.y,device,size,selected_way=args.selection_method)
    idx_attach = torch.LongTensor(idx_attach).to(device)
elif(args.selection_method == 'cluster'):
    idx_attach = cluster_distance_selection(args,data,idx_train,idx_val,idx_clean_test,unlabeled_idx,train_edge_index,size,device)
    idx_attach = torch.LongTensor(idx_attach).to(device)
elif(args.selection_method == 'cluster_degree'):
    idx_attach = cluster_degree_selection(args,data,idx_train,idx_val,idx_clean_test,unlabeled_idx,train_edge_index,size,device)
    idx_attach = torch.LongTensor(idx_attach).to(device)

# In[10]:
# train trigger generator 
model = Backdoor(args,device)
if(args.attack_method == 'Basic'):
    model.fit(data.x, train_edge_index, None, data.y, idx_train,idx_attach, unlabeled_idx)
    poison_x, poison_edge_index, poison_edge_weights, poison_labels = model.get_poisoned()
elif(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
    # model.fit_rand(data.x, train_edge_index, None, data.y, idx_train,idx_attach, unlabeled_idx)
    poison_x, poison_edge_index, poison_edge_weights, poison_labels = model.get_poisoned_rand(data.x, train_edge_index, None, data.y, idx_train,idx_attach, unlabeled_idx)
elif(args.attack_method == 'None'):
    train_edge_weights = torch.ones([train_edge_index.shape[1]],device=device,dtype=torch.float)
    poison_x, poison_edge_index, poison_edge_weights, poison_labels = data.x.clone(), train_edge_index.clone(), train_edge_weights, data.y.clone()
# In[12]:
if(args.defense_mode == 'prune'):
    poison_edge_index,poison_edge_weights = prune_unrelated_edge(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
elif(args.defense_mode == 'isolate'):
    poison_edge_index,poison_edge_weights,rel_nodes = prune_unrelated_edge_isolated(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).tolist()
    bkd_tn_nodes = torch.LongTensor(list(set(bkd_tn_nodes) - set(rel_nodes))).to(device)
else:
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
# if(args.attack_method == 'none'):
#     bkd_tn_nodes = idx_train
print("precent of left attach nodes: {:.3f}"\
    .format(len(set(bkd_tn_nodes.tolist()) & set(idx_attach.tolist()))/len(idx_attach)))
#%%
test_model = model_construct(args,args.test_model,data,device).to(device) 
test_model.fit(poison_x, poison_edge_index, poison_edge_weights, poison_labels, bkd_tn_nodes, idx_val,train_iters=args.epochs,verbose=False)

output = test_model(poison_x,poison_edge_index,poison_edge_weights)
train_attach_rate = (output.argmax(dim=1)[idx_attach]==args.target_class).float().mean()
print("target class rate on Vs: {:.4f}".format(train_attach_rate))
#%%
induct_edge_index = torch.cat([poison_edge_index,mask_edge_index],dim=1)
induct_edge_weights = torch.cat([poison_edge_weights,torch.ones([mask_edge_index.shape[1]],dtype=torch.float,device=device)])
clean_acc = test_model.test(poison_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
# test_model = test_model.cpu()

print("accuracy on clean test nodes: {:.4f}".format(clean_acc))

# poison_x, poison_edge_index, poison_edge_weights, poison_labels = poison_x.to(device2), poison_edge_index.to(device2), poison_edge_weights.to(device2), poison_labels.to(device2)
# model.trojan = model.trojan.cpu()
if(args.evaluate_mode == '1by1'):
    if(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
        induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger_rand(idx_atk,poison_x,induct_edge_index,induct_edge_weights,data.y)
        # induct_x, induct_edge_index,induct_edge_weights = induct_x.clone().detach(), induct_edge_index.clone().detach(),induct_edge_weights.clone().detach()
    
    from torch_geometric.utils  import k_hop_subgraph
    overall_induct_edge_index, overall_induct_edge_weights = induct_edge_index.clone(),induct_edge_weights.clone()
    asr = 0
    flip_asr = 0
    flip_idx_atk = idx_atk[(data.y[idx_atk] != args.target_class).nonzero().flatten()]
    for i, idx in enumerate(idx_atk):
        idx=int(idx)
        sub_induct_nodeset, sub_induct_edge_index, sub_mapping, sub_edge_mask  = k_hop_subgraph(node_idx = [idx], num_hops = 2, edge_index = overall_induct_edge_index, relabel_nodes=True) # sub_mapping means the index of [idx] in sub)nodeset
        ori_node_idx = sub_induct_nodeset[sub_mapping]
        relabeled_node_idx = sub_mapping
        sub_induct_edge_weights = torch.ones([sub_induct_edge_index.shape[1]]).to(device)
        # inject trigger on attack test nodes (idx_atk)'''
        if(args.attack_method == 'Basic'):
            induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(relabeled_node_idx,poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights,device)
        elif(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
            # induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger_rand(relabeled_node_idx,poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights,data.y[sub_induct_nodeset], full_data=True)
            induct_x, induct_edge_index,induct_edge_weights = induct_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights
        elif(args.attack_method == 'None'):
            induct_x, induct_edge_index,induct_edge_weights = poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights

        induct_x, induct_edge_index,induct_edge_weights = induct_x.clone().detach(), induct_edge_index.clone().detach(),induct_edge_weights.clone().detach()
        # # do pruning in test datas'''
        print(induct_edge_index.shape,induct_edge_weights.shape,induct_x.shape)
        if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
            induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
        # attack evaluation

        # test_model = test_model.to(device)
        output = test_model(induct_x,induct_edge_index,induct_edge_weights)
        train_attach_rate = (output.argmax(dim=1)[relabeled_node_idx]==args.target_class).float().mean()
        print("Node {}: {}, Origin Label: {}".format(i, idx, data.y[idx]))
        print("ASR: {:.4f}".format(train_attach_rate))
        asr += train_attach_rate
        if(data.y[idx] != args.target_class):
            flip_asr += train_attach_rate
        # ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
        # print("CA: {:.4f}".format(ca))
    asr = asr/(idx_atk.shape[0])
    flip_asr = flip_asr/(flip_idx_atk.shape[0])
    print("Overall ASR: {:.4f}".format(asr))
    print("Flip ASR: {:.4f}/{} nodes".format(flip_asr,flip_idx_atk.shape[0]))
elif(args.evaluate_mode == 'overall'):
    # %% inject trigger on attack test nodes (idx_atk)'''
    if(args.attack_method == 'Basic'):
        induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(idx_atk,poison_x,induct_edge_index,induct_edge_weights,device)
    elif(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
        induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger_rand(idx_atk,poison_x,induct_edge_index,induct_edge_weights,data.y)
    elif(args.attack_method == 'None'):
        induct_x, induct_edge_index,induct_edge_weights = poison_x,induct_edge_index,induct_edge_weights

    induct_x, induct_edge_index,induct_edge_weights = induct_x.clone().detach(), induct_edge_index.clone().detach(),induct_edge_weights.clone().detach()
    # do pruning in test datas'''
    if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
        induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
    # attack evaluation

    # test_model = test_model.to(device)
    output = test_model(induct_x,induct_edge_index,induct_edge_weights)
    train_attach_rate = (output.argmax(dim=1)[idx_atk]==args.target_class).float().mean()
    print("ASR: {:.4f}".format(train_attach_rate))
    ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
    print("CA: {:.4f}".format(ca))

Namespace(attack_method='Basic', cuda=True, dataset='ogbn-arxiv', debug=True, defense_mode='isolated', device_id=1, dis_weight=1, dropout=0.5, epochs=200, evaluate_mode='overall', hidden=64, homo_boost_thrd=0.5, homo_loss_weight=1, inner=1, lr=0.01, model='GCN', no_cuda=False, prune_thr=0.35, seed=10, selection_method='cluster_degree', target_class=0, target_loss_weight=1, test_model='GCN', thrd=0.5, train_lr=0.02, trigger_prob=0.5, trigger_size=3, trojan_epochs=200, use_vs_number=False, vs_number=0, vs_ratio=0.001, weight_decay=0.0005)
Length of training set: 33868
Training benign model Finished!
Total time elapsed: 11.7324s
Benign CA: 0.6619
Length of training set: 33868
=== training gcn model ===
Epoch 0, training loss: 3.6526167392730713
acc_val: 0.1333
Epoch 10, training loss: 2.751762628555298
acc_val: 0.2825
Epoch 20, training loss: 2.228600263595581
acc_val: 0.4161
Epoch 30, training loss: 1.9383784532546997
acc_val: 0.5150
Epoch 40, training loss: 1.7810544967651367
acc_val: 0

[running kmeans]: 159it [09:43,  3.67s/it, center_shift=0.000017, iteration=159, tol=0.000100]


[ 4 28 28 ... 24  4  4]
Epoch 0, loss_inner: 3.65077, loss_target: 3.44922, homo loss: 0.53180 
acc_train_clean: 0.1599, ASR_train_attach: 0.0000, ASR_train_outter: 0.0000
Epoch 10, loss_inner: 2.80903, loss_target: 2.81291, homo loss: 0.00043 
acc_train_clean: 0.2830, ASR_train_attach: 0.0000, ASR_train_outter: 0.0000
Epoch 20, loss_inner: 2.38171, loss_target: 2.33512, homo loss: 0.00025 
acc_train_clean: 0.3591, ASR_train_attach: 0.5037, ASR_train_outter: 0.4498
Epoch 30, loss_inner: 1.99055, loss_target: 1.94164, homo loss: 0.00052 
acc_train_clean: 0.4892, ASR_train_attach: 0.9926, ASR_train_outter: 0.8717
Epoch 40, loss_inner: 1.71890, loss_target: 1.68128, homo loss: 0.00026 
acc_train_clean: 0.5495, ASR_train_attach: 1.0000, ASR_train_outter: 0.9474
Epoch 50, loss_inner: 1.54396, loss_target: 1.51084, homo loss: 0.00010 
acc_train_clean: 0.5837, ASR_train_attach: 1.0000, ASR_train_outter: 0.9768
Epoch 60, loss_inner: 1.42736, loss_target: 1.40058, homo loss: 0.00003 
acc_train_

In [None]:
# In[12]:
if(args.defense_mode == 'prune'):
    poison_edge_index,poison_edge_weights = prune_unrelated_edge(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
elif(args.defense_mode == 'isolate'):
    poison_edge_index,poison_edge_weights,rel_nodes = prune_unrelated_edge_isolated(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).tolist()
    bkd_tn_nodes = torch.LongTensor(list(set(bkd_tn_nodes) - set(rel_nodes))).to(device)
else:
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
# if(args.attack_method == 'none'):
#     bkd_tn_nodes = idx_train
print("precent of left attach nodes: {:.3f}"\
    .format(len(set(bkd_tn_nodes.tolist()) & set(idx_attach.tolist()))/len(idx_attach)))
#%%
test_model = model_construct(args,args.test_model,data,device).to(device) 
test_model.fit(poison_x, poison_edge_index, poison_edge_weights, poison_labels, bkd_tn_nodes, idx_val,train_iters=args.epochs,verbose=False)

output = test_model(poison_x,poison_edge_index,poison_edge_weights)
train_attach_rate = (output.argmax(dim=1)[idx_attach]==args.target_class).float().mean()
print("target class rate on Vs: {:.4f}".format(train_attach_rate))
#%%
induct_edge_index = torch.cat([poison_edge_index,mask_edge_index],dim=1)
induct_edge_weights = torch.cat([poison_edge_weights,torch.ones([mask_edge_index.shape[1]],dtype=torch.float,device=device)])
clean_acc = test_model.test(poison_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
# test_model = test_model.cpu()

print("accuracy on clean test nodes: {:.4f}".format(clean_acc))

# poison_x, poison_edge_index, poison_edge_weights, poison_labels = poison_x.to(device2), poison_edge_index.to(device2), poison_edge_weights.to(device2), poison_labels.to(device2)
# model.trojan = model.trojan.cpu()
if(args.evaluate_mode == '1by1'):
    from torch_geometric.utils  import k_hop_subgraph
    overall_induct_edge_index, overall_induct_edge_weights = induct_edge_index.clone(),induct_edge_weights.clone()
    asr = 0
    flip_asr = 0
    flip_idx_atk = idx_atk[(data.y[idx_atk] != args.target_class).nonzero().flatten()]
    for i, idx in enumerate(idx_atk):
        idx=int(idx)
        sub_induct_nodeset, sub_induct_edge_index, sub_mapping, sub_edge_mask  = k_hop_subgraph(node_idx = [idx], num_hops = 2, edge_index = overall_induct_edge_index, relabel_nodes=True) # sub_mapping means the index of [idx] in sub)nodeset
        ori_node_idx = sub_induct_nodeset[sub_mapping]
        relabeled_node_idx = sub_mapping
        sub_induct_edge_weights = torch.ones([sub_induct_edge_index.shape[1]]).to(device)
        # inject trigger on attack test nodes (idx_atk)'''
        if(args.attack_method == 'Basic'):
            induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(relabeled_node_idx,poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights,device)
        elif(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
            induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger_rand(relabeled_node_idx,poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights,data.y[sub_induct_nodeset], full_data=True)
        elif(args.attack_method == 'None'):
            induct_x, induct_edge_index,induct_edge_weights = poison_x[sub_induct_nodeset],sub_induct_edge_index,sub_induct_edge_weights

        induct_x, induct_edge_index,induct_edge_weights = induct_x.clone().detach(), induct_edge_index.clone().detach(),induct_edge_weights.clone().detach()
        # # do pruning in test datas'''
        if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
            induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
        # attack evaluation

        # test_model = test_model.to(device)
        output = test_model(induct_x,induct_edge_index,induct_edge_weights)
        train_attach_rate = (output.argmax(dim=1)[relabeled_node_idx]==args.target_class).float().mean()
        print("Node {}: {}, Origin Label: {}".format(i, idx, data.y[idx]))
        print("ASR: {:.4f}".format(train_attach_rate))
        asr += train_attach_rate
        if(data.y[idx] != args.target_class):
            flip_asr += train_attach_rate
        # ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
        # print("CA: {:.4f}".format(ca))
    asr = asr/(idx_atk.shape[0])
    flip_asr = flip_asr/(flip_idx_atk.shape[0])
    print("Overall ASR: {:.4f}".format(asr))
    print("Flip ASR: {:.4f}/{} nodes".format(flip_asr,flip_idx_atk.shape[0]))
elif(args.evaluate_mode == 'overall'):
    # %% inject trigger on attack test nodes (idx_atk)'''
    if(args.attack_method == 'Basic'):
        induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(idx_atk,poison_x,induct_edge_index,induct_edge_weights,device)
    elif(args.attack_method == 'Rand_Gene' or args.attack_method == 'Rand_Samp'):
        induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger_rand(idx_atk,poison_x,induct_edge_index,induct_edge_weights,data.y)
    elif(args.attack_method == 'None'):
        induct_x, induct_edge_index,induct_edge_weights = poison_x,induct_edge_index,induct_edge_weights

    induct_x, induct_edge_index,induct_edge_weights = induct_x.clone().detach(), induct_edge_index.clone().detach(),induct_edge_weights.clone().detach()
    # do pruning in test datas'''
    if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
        induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
    # attack evaluation

    # test_model = test_model.to(device)
    output = test_model(induct_x,induct_edge_index,induct_edge_weights)
    train_attach_rate = (output.argmax(dim=1)[idx_atk]==args.target_class).float().mean()
    print("ASR: {:.4f}".format(train_attach_rate))
    ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_clean_test)
    print("CA: {:.4f}".format(ca))

In [2]:
torch.save(benign_model, benign_modelpath)
print("Benign model saved at {}".format(benign_modelpath))

# In[7]:

benign_ca = benign_model.test(data.x, data.edge_index, None, data.y,idx_clean_test)
print("Benign CA: {:.4f}".format(benign_ca))
benign_model = benign_model.cpu()


Benign model saved at ./modelpath/GCN_Reddit2_benign.pth
Benign CA: 0.7006


In [21]:
# from ogb.graphproppred import PygGraphPropPredDataset
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric.loader import DataLoader

# Download and process data at './dataset/ogbg_molhiv/'
dataset = PygNodePropPredDataset(name = 'ogbn-arxiv', root='./dataset/')

split_idx = dataset.get_idx_split() 
# splitted_idx = data.get_idx_split()
idx_train, idx_val, idx_test = split_idx["train"], split_idx["valid"], split_idx["test"]
graph, labels = dataset[0]

ValueError: too many values to unpack (expected 2)

In [24]:
dataset[0]
# train_loader = DataLoader(dataset[split_idx['train']])
# valid_loader = DataLoader(dataset[split_idx['valid']])
# test_loader = DataLoader(dataset[split_idx['test']])

Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343, 1])

In [2]:
#%%
from torch_geometric.utils import to_undirected
import torch_geometric.transforms as T
transform = T.Compose([T.NormalizeFeatures()])

# if args.dataset in ['Cora', 'Citeseer', 'Pubmed']:
dataset = Planetoid(root='./data/', split="random", num_train_per_class=80, num_val=400, num_test=1000, \
                    name=args.dataset,transform=None)
# dataset = Reddit(root='./data/', transform=transform, pre_transform=None)
# dataset = classFlickr(root='./data/', transform=transform, pre_transform=None)

data = dataset[0].to(device)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [3]:
#%%
from torch_geometric.utils import to_undirected
# get the overall edge index of the graph
data.edge_index = to_undirected(data.edge_index)

In [4]:
#%%  mask the test nodes
from utils import subgraph
# get the edge index used for training (except from test nodes) and 
train_edge_index,train_edge_weights, edge_mask = subgraph(torch.bitwise_not(data.test_mask),data.edge_index,relabel_nodes=False)

mask_edge_index = data.edge_index[:,torch.bitwise_not(edge_mask)]
idx_train =data.train_mask.nonzero().flatten()
idx_val = data.val_mask.nonzero().flatten()
idx_test = data.test_mask.nonzero().flatten()
# val_mask = node_idx[data.val_mask]
# labels = data.y[torch.bitwise_not(data.test_mask)]
# features = data.x[torch.bitwise_not(data.test_mask)]

In [9]:
e = data.edge_index.shape[1]
t = data.x.shape[0]
p = 2*e/(t*(t-1))
print(p)

0.0028799998253884154


In [5]:
from models.GCN import GCN
from models.GAT import GAT
from models.GIN import GIN
from models.SAGE import GraphSage
def model_construct(args,model_name,data):
    if (model_name == 'GCN'):
        model = GCN(nfeat=data.x.shape[1],
                    nhid=args.hidden,
                    nclass= int(data.y.max()+1),
                    dropout=args.dropout,
                    lr=args.lr,
                    weight_decay=args.weight_decay,
                    device=device)
    elif(model_name == 'GAT'):
        model = GAT(nfeat=data.x.shape[1], 
                    nhid=args.hidden, 
                    nclass=int(data.y.max()+1), 
                    heads=8,
                    dropout=args.dropout, 
                    lr=args.lr, 
                    weight_decay=args.weight_decay, 
                    device=device)
    elif(model_name == 'GraphSage'):
        model = GraphSage(nfeat=data.x.shape[1],
                    nhid=args.hidden,
                    nclass= int(data.y.max()+1),
                    dropout=args.dropout,
                    lr=args.lr,
                    weight_decay=args.weight_decay,
                    device=device)
    elif(model_name == 'GCN_Encoder'):
        model = GCN_Encoder(nfeat=data.x.shape[1],
                    nhid=args.hidden,
                    nclass= int(data.y.max()+1),
                    dropout=args.dropout,
                    lr=args.lr,
                    weight_decay=args.weight_decay,
                    device=device)
    return model

In [6]:
'''
train benign model
'''
import os
benign_modelpath = './modelpath/{}_{}_benign.pth'.format(args.model, args.dataset)
if(os.path.exists(benign_modelpath) and args.load_benign_model):
    # load existing benign model
    benign_model = torch.load(benign_modelpath)
    benign_model = benign_model.to(device)
    edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
    print("Loading benign {} model Finished!".format(args.model))
else:
    benign_model = model_construct(args,args.model,data).to(device) 
    t_total = time.time()
    edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
    print("Length of training set: {}".format(len(idx_train)))
    benign_model.fit(data.x, train_edge_index, train_edge_weights, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=True)
    print("Training benign model Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    # Save trained model
    torch.save(benign_model, benign_modelpath)
    print("Benign model saved at {}".format(benign_modelpath))

Loading benign GCN model Finished!


In [7]:
benign_output = benign_model(data.x, data.edge_index, edge_weights)
benign_ca = benign_model.test(data.x, data.edge_index, edge_weights, data.y,idx_test)
print("Benign CA: {:.4f}".format(benign_ca))
atk_test_nodes, clean_test_nodes,poi_train_nodes = select_target_nodes(args,args.seed,benign_model,data.x, data.edge_index, edge_weights,data.y,idx_val,idx_test)
clean_test_ca = benign_model.test(data.x, data.edge_index, edge_weights, data.y,clean_test_nodes)
print("Benign CA on clean test nodes: {:.4f}".format(clean_test_ca))

Benign CA: 0.8410
Benign CA on clean test nodes: 0.8250


In [8]:
#%%
from sklearn_extra import cluster
from models.backdoor import obtain_attach_nodes,Backdoor,obtain_attach_nodes_by_influential,obtain_attach_nodes_by_cluster
# filter out the unlabeled nodes except from training nodes and testing nodes, nonzero() is to get index, flatten is to get 1-d tensor
unlabeled_idx = (torch.bitwise_not(data.test_mask)&torch.bitwise_not(data.train_mask)).nonzero().flatten()
# poison nodes' size
size = int((len(data.test_mask)-data.test_mask.sum())*args.vs_ratio)
# here is randomly select poison nodes from unlabeled nodes
if(args.selection_method == 'none'):
    idx_attach = obtain_attach_nodes(unlabeled_idx,size)
elif(args.selection_method == 'loss' or args.selection_method == 'conf'):
    idx_attach = obtain_attach_nodes_by_influential(args,benign_model,unlabeled_idx.cpu().tolist(),data.x,train_edge_index,train_edge_weights,data.y,device,size,selected_way=args.selection_method)
    idx_attach = torch.LongTensor(idx_attach).to(device)
elif(args.selection_method == 'cluster'):
    # construct GCN encoder
    encoder_modelpath = './modelpath/{}_{}_benign.pth'.format('GCN_Encoder', args.dataset)
    if(os.path.exists(encoder_modelpath) and args.load_benign_model):
        # load existing benign model
        gcn_encoder = torch.load(encoder_modelpath)
        gcn_encoder = gcn_encoder.to(device)
        edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
        print("Loading benign {} model Finished!".format(args.model))
    else:
        gcn_encoder = model_construct(args,'GCN_Encoder',data).to(device) 
        t_total = time.time()
        edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
        print("Length of training set: {}".format(len(idx_train)))
        gcn_encoder.fit(data.x, train_edge_index, train_edge_weights, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=True)
        print("Training encoder Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
        # Save trained model
        torch.save(gcn_encoder, encoder_modelpath)
        print("Encoder saved at {}".format(encoder_modelpath))
    # test gcn encoder 
    encoder_benign_ca = gcn_encoder.test(data.x, data.edge_index, edge_weights, data.y,idx_test)
    print("Encoder CA: {:.4f}".format(encoder_benign_ca))
    encoder_clean_test_ca = gcn_encoder.test(data.x, data.edge_index, edge_weights, data.y,clean_test_nodes)
    print("Encoder CA on clean test nodes: {:.4f}".format(encoder_clean_test_ca))
    # from sklearn import cluster
    seen_node_idx = torch.concat([idx_train,unlabeled_idx])
    nclass = np.unique(data.y.cpu().numpy()).shape[0]
    # kmeans = cluster.KMeans(n_clusters=nclass,random_state=1)
    # kmeans.fit(data.x[seen_node_idx].cpu().numpy())
    # # unlabeled_idx.cpu().tolist()

    # train_adj = to_dense_adj(train_edge_index,edge_attr=train_edge_weights)[0].cpu()
    # train_adj = train_adj + train_adj @ train_adj
    # train_adj = torch.where(train_adj>0, torch.tensor(1.0),
    #                                             torch.tensor(0.0))
    # train_x = train_adj @ data.x.cpu()
    # new_train_edge_index, new_train_edge_weights= dense_to_sparse(train_adj)
    # kmeans = cluster.KMedoids(n_clusters=nclass,method='pam')
    # # kmeans.fit(data.x[seen_node_idx].detach().cpu().numpy())
    # kmeans.fit(train_x.detach().cpu().numpy())
    # idx_attach = obtain_attach_nodes_by_cluster(args,kmeans,unlabeled_idx.cpu().tolist(),train_x,data.y,device,size)
    # idx_attach = torch.LongTensor(idx_attach).to(device)
    
    encoder_x = gcn_encoder.get_h(data.x, train_edge_index,train_edge_weights).clone().detach()
    kmeans = cluster.KMedoids(n_clusters=nclass,method='pam')
    # kmeans.fit(data.x[seen_node_idx].detach().cpu().numpy())
    kmeans.fit(encoder_x.detach().cpu().numpy())
    idx_attach = obtain_attach_nodes_by_cluster(args,kmeans,unlabeled_idx.cpu().tolist(),encoder_x,data.y,device,size)
    idx_attach = torch.LongTensor(idx_attach).to(device)

Loading benign GCN model Finished!
Encoder CA: 0.8380
Encoder CA on clean test nodes: 0.8000
[4 4 3 ... 4 4 4]


In [9]:
# train trigger generator 
model = Backdoor(args,device)
print(args.epochs)
model.fit(data.x, train_edge_index, None, data.y, idx_train,idx_attach)

200
tensor(1.9393, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(20.4000, device='cuda:2', grad_fn=<MulBackward0>)
Epoch 0, training loss: 1.9392701387405396
acc_train_clean: 0.1768, acc_train_attach: 0.0588
tensor(1.7866, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(9.8495, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.6262, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(13.2455, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.4436, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(11.8224, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.3043, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(10.5700, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.1444, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(9.4848, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.0180, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(8.4280, device='cuda:2', grad_fn=<MulBackward0>)
tensor(0.9117, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(6.9387, device='cuda

In [None]:
# %%
poison_x = model.poison_x.data
poison_edge_index = model.poison_edge_index.data
poison_edge_weights = model.poison_edge_weights.data
poison_labels = model.labels

In [None]:
if(args.defense_mode == 'prune'):
    poison_edge_index,poison_edge_weights = prune_unrelated_edge(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
elif(args.defense_mode == 'isolate'):
    poison_edge_index,poison_edge_weights,rel_nodes = prune_unrelated_edge_isolated(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).tolist()
    bkd_tn_nodes = torch.LongTensor(list(set(bkd_tn_nodes) - set(rel_nodes))).to(device)
else:
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)

In [None]:

print(len(torch.cat([idx_train,idx_attach])))
print(len(bkd_tn_nodes))
print(len(model.poison_edge_index.data[0]),len(poison_edge_index[0]))
# print(idx_attach & bkd_tn_nodes)
print(set(bkd_tn_nodes.tolist()) & set(idx_attach.tolist()))

577
577
4306 2880
{1569, 2081, 1699, 1251, 133, 2185, 843, 1775, 303, 305, 1780, 1814, 1782, 2143, 1243, 2046, 1791}


In [None]:
#%%
from models.GCN import GCN
from models.GAT import GAT
from models.GIN import GIN
test_model = model_construct(args,args.test_model,data).to(device) 
if(args.test_model == 'GraphSage' or args.test_model == 'GAT'):
    poison_adj = to_dense_adj(poison_edge_index, edge_attr=poison_edge_weights)
    poison_edge_index, poison_edge_weights = dense_to_sparse(poison_adj)
test_model.fit(poison_x, poison_edge_index, poison_edge_weights, poison_labels, bkd_tn_nodes, idx_val,train_iters=200,verbose=True)

=== training gcn model ===
Epoch 0, training loss: 1.9486966133117676
acc_val: 0.3725
Epoch 10, training loss: 0.5837284326553345
acc_val: 0.7350
Epoch 20, training loss: 0.22602947056293488
acc_val: 0.7625
Epoch 30, training loss: 0.126153364777565
acc_val: 0.7625
Epoch 40, training loss: 0.09417053312063217
acc_val: 0.7625
Epoch 50, training loss: 0.08149732649326324
acc_val: 0.7525
Epoch 60, training loss: 0.08437832444906235
acc_val: 0.7425
Epoch 70, training loss: 0.07754302769899368
acc_val: 0.7500
Epoch 80, training loss: 0.08388686925172806
acc_val: 0.7575
Epoch 90, training loss: 0.0656595528125763
acc_val: 0.7525
Epoch 100, training loss: 0.0655829906463623
acc_val: 0.7475
Epoch 110, training loss: 0.06835032254457474
acc_val: 0.7600
Epoch 120, training loss: 0.060565438121557236
acc_val: 0.7525
Epoch 130, training loss: 0.07300901412963867
acc_val: 0.7550
Epoch 140, training loss: 0.05255601555109024
acc_val: 0.7500
Epoch 150, training loss: 0.05699390545487404
acc_val: 0.75

In [None]:
output = test_model(poison_x,poison_edge_index,poison_edge_weights)
train_attach_rate = (output.argmax(dim=1)[idx_attach]==args.target_class).float().mean()
print("target class rate on Vs: {:.4f}".format(train_attach_rate))
#%%
induct_edge_index = torch.cat([poison_edge_index,mask_edge_index],dim=1)
induct_edge_weights = torch.cat([poison_edge_weights,torch.ones([mask_edge_index.shape[1]],dtype=torch.float,device=device)])
# idx_test = data.test_mask.nonzero().flatten()[:200]
# idx_test = list(set(data.test_mask.nonzero().flatten().tolist()) - set(atk_test_nodes))
# idx_atk = data.test_mask.nonzero().flatten()[200:].tolist()
# yt_nids = [nid for nid in idx_atk if data.y.tolist()==args.target_class] 
# yx_nids = torch.LongTensor(list(set(idx_atk) - set(yt_nids))).to(device)
atk_labels = poison_labels.clone()
atk_labels[atk_test_nodes] = args.target_class
clean_acc = test_model.test(poison_x,induct_edge_index,induct_edge_weights,data.y,clean_test_nodes)
'''clean accuracy of clean test nodes before injecting triggers to the attack test nodes'''
print("accuracy on clean test nodes: {:.4f}".format(clean_acc))
'''inject trigger on attack test nodes (idx_atk)'''
induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(atk_test_nodes,poison_x,induct_edge_index,induct_edge_weights)
'''do pruning in test datas'''
if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
    induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
'''attack evaluation'''
asr = test_model.test(induct_x,induct_edge_index,induct_edge_weights,atk_labels,atk_test_nodes)
ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,clean_test_nodes)
print("ASR: {:.4f}".format(asr))
print("CA: {:.4f}".format(ca))
# output = test_model(induct_x,induct_edge_index,induct_edge_weights)
# train_attach_rate = (output.argmax(dim=1)[atk_test_nodes]==args.target_class).float().mean()
# print("ASR: {:.4f}".format(train_attach_rate))

target class rate on Vs: 0.8824
accuracy on clean test nodes: 0.8050
ASR: 0.7700
CA: 0.7550


## To deleted

In [None]:
import scipy.sparse as sp
from torch_geometric.utils import to_dense_adj,dense_to_sparse
sp_induct_x = help_funcs.normalize(sp.csr_matrix(induct_x.cpu().detach().numpy()))
sp_induct_adj = help_funcs.normalize_adj(sp.csr_matrix(to_dense_adj(induct_edge_index)[0].cpu().detach().numpy()))
induct_x = torch.FloatTensor(np.array(sp_induct_x.todense())).to(device)
induct_adj = torch.FloatTensor(np.array(sp_induct_adj.todense())).to(device)
induct_edge_index,induct_edge_weights = dense_to_sparse(induct_adj)

In [None]:
output = gcn(induct_x,induct_edge_index,induct_edge_weights)
train_attach_rate = (output.argmax(dim=1)[yx_nids]==args.target_class).float().mean()
print("ASR: {:.4f}".format(train_attach_rate))
clean_acc = gcn.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_test)
asr = gcn.test(induct_x,induct_edge_index,induct_edge_weights,atk_labels,idx_atk)
print("accuracy on clean test nodes: {:.4f}".format(clean_acc))
print("ASR1: {:.4f}".format(asr))

In [None]:
benign_gcn = GCN(nfeat=data.x.shape[1],\
            nhid=args.hidden,\
            nclass= int(data.y.max()+1),\
            dropout=args.dropout,\
            lr=args.lr,\
            weight_decay=args.weight_decay,\
            device=device).to(device)
#%%
atk_labels = poison_labels.clone()
atk_labels[idx_atk] = args.target_class
edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
benign_gcn.fit(data.x, data.edge_index, edge_weights, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=True)
benign_output = benign_gcn(data.x, data.edge_index, edge_weights)
benign4poison_output = benign_gcn(induct_x,induct_edge_index,induct_edge_weights)
benign_ca = (benign_output.argmax(dim=1)[idx_test]==data.y[idx_test]).float().mean()
benign4poison_ca = (benign4poison_output.argmax(dim=1)[idx_test]==atk_labels[idx_test]).float().mean()
print("BenignCA: {:.4f}".format(benign_ca))
print("Benign for poisoning CA: {:.4f}".format(benign4poison_ca))
print((benign_output.argmax(dim=1)[yx_nids]==args.target_class).float().mean())

In [None]:
atk_labels = data.y.clone()
idx_atk = obtain_attach_nodes(data.test_mask.nonzero().flatten(), 200)
can_test_nodes = torch.LongTensor(list(set(data.test_mask.nonzero().flatten()) - set(idx_atk))).to(device)
idx_test = obtain_attach_nodes(can_test_nodes,200)

In [None]:
edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(idx_atk,data.x,data.edge_index,edge_weights)
output = gcn(induct_x,induct_edge_index,induct_edge_weights)
test_asr= (output.argmax(dim=1)[idx_atk]==args.target_class).float().mean()
print("ASR: {:.4f}".format(test_asr))
test_ca = (output.argmax(dim=1)[idx_test]==atk_labels[idx_test]).float().mean()
print("CA: {:.4f}".format(test_ca))

In [2]:
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj
poison_adj_dense = to_dense_adj(poison_edge_index)
def edge_sim_analysis(edge_index, features):
    sims = []
    for (u,v) in edge_index:
        sims.append(float(F.cosine_similarity(features[u].unsqueeze(0),features[v].unsqueeze(0))))
    sims = np.array(sims)
    # print(f"mean: {sims.mean()}, <0.1: {sum(sims<0.1)}/{sims.shape[0]}")
    return sims

bkd_nids = list(range(data.x.shape[0],poison_x.shape[0]))
for nid in idx_attach:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = poison_adj_dense[0][nid].nonzero()
    # bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
    for v in polished_adj_nodes:
        v = int(v)
        if(v in bkd_nids):
            u = nid
            print(nid,v)
            print(F.cosine_similarity(poison_x[u].unsqueeze(0),poison_x[v].unsqueeze(0)))

RuntimeError: CUDA out of memory. Tried to allocate 29.81 GiB (GPU 1; 79.20 GiB total capacity; 779.04 MiB already allocated; 17.36 GiB free; 1.23 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [4]:
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj
induct_adj_dense = to_dense_adj(induct_edge_index)
def edge_sim_analysis(edge_index, features):
    sims = []
    for (u,v) in edge_index:
        sims.append(float(F.cosine_similarity(features[u].unsqueeze(0),features[v].unsqueeze(0))))
    sims = np.array(sims)
    # print(f"mean: {sims.mean()}, <0.1: {sum(sims<0.1)}/{sims.shape[0]}")
    return sims

bkd_nids = induct_x.shape[0] - poison_x.shape[0]
for nid in idx_atk:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = induct_adj_dense[0][nid].nonzero()
    bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
    for v in polished_adj_nodes:
        v = int(v)
        if(v in bkd_nids):
            u = nid
            print(nid,v)
            print(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))

IndexError: index 1370 is out of bounds for dimension 0 with size 36

In [None]:
bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
for nid in idx_test:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = induct_adj_dense[0][nid].nonzero()
    for v in polished_adj_nodes:
        v = int(v)
        # if(v in bkd_nids):
        u = nid
        print(nid,v)
        print(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))

In [None]:
def calculate_graph_homophily(adj,x,device):
    deg_vector = adj.sum(1)
    deg_matrix = torch.diag(adj.sum(1)).to(device)
    deg_matrix += torch.eye(len(adj)).to(device)
    deg_inv_sqrt = deg_matrix.pow(-0.5)
    deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.)
    adj = torch.matmul(deg_inv_sqrt,adj)
    adj = torch.matmul(adj,deg_inv_sqrt)
    x_neg = adj @ x
    node_sims = np.array([float(F.cosine_similarity(xn.unsqueeze(0),xx.unsqueeze(0))) for (xn,xx) in zip(x_neg,x)])   
    # node_sims = np.array([torch.round(i,decimals=2) for i in node_sims])
    # print(node_sims)
    return node_sims
bkd_graph_test_node_sims = calculate_graph_homophily(to_dense_adj(data.edge_index)[0].to(device),data.x.to(device),device)
bkd_graph_train_node_sims = calculate_graph_homophily(to_dense_adj(poison_edge_index)[0].to(device),poison_x.to(device),device)
clean_graph_node_sims = calculate_graph_homophily(to_dense_adj(induct_edge_index)[0].to(device),induct_x.to(device),device)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.font_manager import FontProperties

def to_percent(y,position):
    return str(100*y)+"%"#这里可以用round（）函数设置取几位小数

plt.hist(clean_graph_node_sims,bins=10,weights=[1./len(clean_graph_node_sims)]*len(clean_graph_node_sims),density=True, alpha=0.75, label='clean')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
plt.hist(np.array(bkd_graph_test_node_sims),bins=20,weights=[1./len(bkd_graph_test_node_sims)]*len(bkd_graph_test_node_sims),density=True, alpha=0.75, label='poison')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
plt.hist(np.array(bkd_graph_train_node_sims),bins=20,weights=[1./len(bkd_graph_train_node_sims)]*len(bkd_graph_train_node_sims),density=True, alpha=0.75, label='attack')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
fomatter=FuncFormatter(to_percent)
# plt.gca().yaxis.set_major_formatter(fomatter)
plt.grid(True)
plt.xlabel("Cosine Similarity")
plt.ylabel("Density")
plt.legend()
# plt.savefig("pics/grb_cora_node_sims.png")
# plt.savefig("pics/grb_cora_node_sims.pdf")
plt.show()
plt.close()

In [None]:
bkd_test_edge_index = to_dense_adj(data.edge_index)[0].nonzero()
trigger_trigger_edge_sims = []
trigger_trigger_edge_index = []

trigger_target_edge_sims = []
trigger_target_edge_index = []

normal_normal_edge_sims = []
normal_normal_edge_index = []

trigger_normal_edge_sims = []
trigger_normal_edge_index = []

target_target_edge_sims = []
target_target_edge_index = []
for (u,v) in bkd_test_edge_index:
    if ((v,u) in trigger_trigger_edge_index) or ((u,v) in trigger_trigger_edge_index):
        continue
    if ((v,u) in trigger_target_edge_index) or ((u,v) in trigger_target_edge_index):
        continue
    if ((v,u) in normal_normal_edge_index) or ((u,v) in normal_normal_edge_index):
        continue
    if ((v,u) in trigger_normal_edge_index) or ((u,v) in trigger_normal_edge_index):
        continue
    if ((v,u) in target_target_edge_index) or ((u,v) in target_target_edge_index):
        continue
    
    if (u in bkd_nids) and (v in bkd_nids):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_trigger_edge_sims.append(edge_sims)
        trigger_trigger_edge_index.append((u,v))
        continue
    if ((u in bkd_nids) and (v in idx_atk)) or ((v in bkd_nids) and (u in idx_atk)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_target_edge_sims.append(edge_sims)
        trigger_target_edge_index.append((u,v))
        continue
    if (u in idx_test) and (v in idx_test):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        normal_normal_edge_sims.append(edge_sims)
        normal_normal_edge_index.append((u,v))
        continue
    if ((u in bkd_nids) and (v in idx_test)) or ((v in bkd_nids) and (u in idx_test)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_normal_edge_sims.append(edge_sims)
        trigger_normal_edge_index.append((u,v))

    if ((u in idx_atk) and (v in idx_atk)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        target_target_edge_sims.append(edge_sims)
        target_target_edge_index.append((u,v))
        continue
    