In [1]:
import time
import argparse
import numpy as np
import torch
from models.GCN import GCN
from torch_geometric.datasets import Planetoid, WebKB, WikipediaNetwork,Reddit
from torch_geometric.utils import to_dense_adj,dense_to_sparse
from help_funcs import prune_unrelated_edge,prune_unrelated_edge_isolated,select_target_nodes
import help_funcs
import scipy.sparse as sp

# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true',
        default=True, help='debug mode')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--seed', type=int, default=10, help='Random seed.')
parser.add_argument('--model', type=str, default='GCN', help='model',
                    choices=['GCN','GAT','GraphSage','GIN'])
parser.add_argument('--dataset', type=str, default='cora', help='Dataset',
                    choices=['cora','citeseer','pubmed'])
parser.add_argument('--lr', type=float, default=0.01,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=32,
                    help='Number of hidden units.')
# parser.add_argument('--trigger_size', type=int, default=3,
#                     help='tirgger_size')
# parser.add_argument('--vs_ratio', type=float, default=0.01)
parser.add_argument('--thrd', type=float, default=0.5)
parser.add_argument('--target_class', type=int, default=0)
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--epochs', type=int,  default=200, help='Number of epochs to train benign and backdoor model.')
parser.add_argument('--trojan_epochs', type=int,  default=200, help='Number of epochs to train trigger generator.')

parser.add_argument('--load_benign_model', action='store_true', default=True,
                    help='Loading benign model if exists.')
# backdoor setting
parser.add_argument('--trigger_size', type=int, default=3,
                    help='tirgger_size')
parser.add_argument('--vs_ratio', type=float, default=0.01,
                    help="ratio of poisoning nodes relative to the full graph")
parser.add_argument('--target_test_nodes_num', type=float, default=200,
                    help="the number of of test nodes attached with 1 (independent) trigger, which is corretly classified and not belong to the target class")
parser.add_argument('--clean_test_nodes_num', type=float, default=200,
                    help="ratio of poisoning nodes relative to the full graph")
# defense setting
parser.add_argument('--defense_mode', type=str, default="isolate",
                    choices=['prune', 'isolate', 'none'],
                    help="Mode of defense")
parser.add_argument('--prune_thr', type=float, default=0.1,
                    help="Threshold of prunning edges")
parser.add_argument('--homo_loss_weight', type=float, default=1,
                    help="Weight of optimize similarity loss")
parser.add_argument('--homo_boost_thrd', type=float, default=0.6,
                    help="Threshold of increase similarity")
# attack setting
parser.add_argument('--selection_method', type=str, default='conf',
                    choices=['loss','conf','cluster','none'],
                    help='Method to select idx_attach for training trojan model (none means randomly select)')
parser.add_argument('--test_model', type=str, default='GCN',
                    choices=['GCN','GAT','GraphSage','GIN'],
                    help='Model used to attack')

# GPU setting
parser.add_argument('--device_id', type=int, default=2,
                    help="Threshold of prunning edges")
# args = parser.parse_args()
args = parser.parse_known_args()[0]
args.cuda =  not args.no_cuda and torch.cuda.is_available()
device = torch.device(('cuda:{}' if torch.cuda.is_available() else 'cpu').format(args.device_id))

# args = parser.parse_known_args()[0]
# args.cuda = not args.no_cuda and torch.cuda.is_available()
# device = torch.device("cuda:1" if args.cuda else "cpu")
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
print(args)

Namespace(clean_test_nodes_num=200, cuda=True, dataset='cora', debug=True, defense_mode='isolate', device_id=2, dropout=0.5, epochs=200, hidden=32, homo_boost_thrd=0.6, homo_loss_weight=1, load_benign_model=True, lr=0.01, model='GCN', no_cuda=False, prune_thr=0.1, seed=10, selection_method='conf', target_class=0, target_test_nodes_num=200, test_model='GCN', thrd=0.5, trigger_size=3, trojan_epochs=200, vs_ratio=0.01, weight_decay=0.0005)


In [2]:
#%%
from torch_geometric.utils import to_undirected
import torch_geometric.transforms as T
transform = T.Compose([T.NormalizeFeatures()])

# if args.dataset in ['Cora', 'Citeseer', 'Pubmed']:
dataset = Planetoid(root='./data/', split="random", num_train_per_class=80, num_val=400, num_test=1000, \
                    name=args.dataset,transform=None)
# dataset = Reddit(root='./data/', transform=transform, pre_transform=None)
# dataset = classFlickr(root='./data/', transform=transform, pre_transform=None)

data = dataset[0].to(device)

In [3]:
#%%
from torch_geometric.utils import to_undirected
# get the overall edge index of the graph
data.edge_index = to_undirected(data.edge_index)

In [4]:
#%%  mask the test nodes
from utils import subgraph
# get the edge index used for training (except from test nodes) and 
train_edge_index,_, edge_mask = subgraph(torch.bitwise_not(data.test_mask),data.edge_index,relabel_nodes=False)

mask_edge_index = data.edge_index[:,torch.bitwise_not(edge_mask)]
idx_train =data.train_mask.nonzero().flatten()
idx_val = data.val_mask.nonzero().flatten()
idx_test = data.test_mask.nonzero().flatten()
# val_mask = node_idx[data.val_mask]
# labels = data.y[torch.bitwise_not(data.test_mask)]
# features = data.x[torch.bitwise_not(data.test_mask)]

In [5]:
from models.GCN import GCN
from models.GAT import GAT
from models.GIN import GIN
from models.SAGE import GraphSage
def model_construct(args,model_name,data):
    if (model_name == 'GCN'):
        model = GCN(nfeat=data.x.shape[1],\
                    nhid=args.hidden,\
                    nclass= int(data.y.max()+1),\
                    dropout=args.dropout,\
                    lr=args.lr,\
                    weight_decay=args.weight_decay,\
                    device=device)
    elif(model_name == 'GAT'):
        model = GAT(nfeat=data.x.shape[1], 
                    nhid=args.hidden, 
                    nclass=int(data.y.max()+1), 
                    heads=8,
                    dropout=args.dropout, 
                    lr=args.lr, 
                    weight_decay=args.weight_decay, 
                    device=device)
    elif(model_name == 'GraphSage'):
        model = GraphSage(nfeat=data.x.shape[1],\
                    nhid=args.hidden,\
                    nclass= int(data.y.max()+1),\
                    dropout=args.dropout,\
                    lr=args.lr,\
                    weight_decay=args.weight_decay,\
                    device=device)
    return model

In [6]:
'''
train benign model
'''
import os
benign_modelpath = './modelpath/{}_{}_benign.pth'.format(args.model, args.dataset)
if(os.path.exists(benign_modelpath) and args.load_benign_model):
    # load existing benign model
    benign_model = torch.load(benign_modelpath)
    benign_model = benign_model.to(device)
    edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
    print("Loading benign {} model Finished!".format(args.model))
else:
    # benign_model = GCN(nfeat=data.x.shape[1],\
    #             nhid=args.hidden,\
    #             nclass= int(data.y.max()+1),\
    #             dropout=args.dropout,\
    #             lr=args.lr,\
    #             weight_decay=args.weight_decay,\
    #             device=device).to(device)
    benign_model = model_construct(args,args.model,data).to(device) 
    t_total = time.time()
    edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
    print("Length of training set: {}".format(len(idx_train)))
    benign_model.fit(data.x, data.edge_index, edge_weights, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=True)
    print("Training benign model Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    # Save trained model
    torch.save(benign_model, benign_modelpath)
    print("Benign model saved at {}".format(benign_modelpath))

Loading benign GCN model Finished!


In [7]:
benign_output = benign_model(data.x, data.edge_index, edge_weights)
# benign4poison_output = benign_gcn(induct_x,induct_edge_index,induct_edge_weights)
# benign_ca = (benign_output.argmax(dim=1)[idx_test]==data.y[idx_test]).float().mean()
benign_ca = benign_model.test(data.x, data.edge_index, edge_weights, data.y,idx_test)
# benign4poison_ca = (benign4poison_output.argmax(dim=1)[idx_test]==atk_labels[idx_test]).float().mean()
print("Benign CA: {:.4f}".format(benign_ca))
atk_test_nodes, clean_test_nodes,poi_train_nodes = select_target_nodes(args,args.seed,benign_model,data.x, data.edge_index, edge_weights,data.y,idx_val,idx_test)
clean_test_ca = benign_model.test(data.x, data.edge_index, edge_weights, data.y,clean_test_nodes)
print("Benign CA on clean test nodes: {:.4f}".format(clean_test_ca))
# print("Benign for poisoning CA: {:.4f}".format(benign4poison_ca))
# print((benign_output.argmax(dim=1)[yx_nids]==args.target_class).float().mean())

Benign CA: 0.8360
Benign CA on clean test nodes: 0.7800


In [9]:
from models.backdoor import obtain_attach_nodes,Backdoor,obtain_attach_nodes_by_influential
# filter out the unlabeled nodes except from training nodes and testing nodes, nonzero() is to get index, flatten is to get 1-d tensor
unlabeled_idx = (torch.bitwise_not(data.test_mask)&torch.bitwise_not(data.train_mask)).nonzero().flatten()
# poison nodes' size
size = int((len(data.test_mask)-data.test_mask.sum())*args.vs_ratio)
print(len(data.test_mask))
# here is randomly select poison nodes from unlabeled nodes
if(args.selection_method == 'none'):
    idx_attach = obtain_attach_nodes(unlabeled_idx,size)
elif(args.selection_method == 'loss' or args.selection_method == 'conf'):
    idx_attach = obtain_attach_nodes_by_influential(args,benign_model,unlabeled_idx.cpu().tolist(),data.x,data.edge_index,edge_weights,data.y,device,size,selected_way=args.selection_method)
    idx_attach = torch.LongTensor(idx_attach).to(device)

2708


In [10]:
# train trigger generator 
model = Backdoor(args,device)
print(args.epochs)
model.fit(data.x, train_edge_index, None, data.y, idx_train,idx_attach)

200
tensor(1.9394, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(21.6000, device='cuda:2', grad_fn=<MulBackward0>)
Epoch 0, training loss: 1.9393671751022339
acc_train_clean: 0.1750, acc_train_attach: 0.1111
tensor(1.7808, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(10.7935, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.6231, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(14.1113, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.4406, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(12.2913, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.2995, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(11.2433, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.1433, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(10.0998, device='cuda:2', grad_fn=<MulBackward0>)
tensor(1.0243, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(8.3671, device='cuda:2', grad_fn=<MulBackward0>)
tensor(0.9191, device='cuda:2', grad_fn=<NllLossBackward0>) tensor(6.3128, device='cu

In [11]:
# %%
poison_x = model.poison_x.data
poison_edge_index = model.poison_edge_index.data
poison_edge_weights = model.poison_edge_weights.data
poison_labels = model.labels

In [12]:
if(args.defense_mode == 'prune'):
    poison_edge_index,poison_edge_weights = prune_unrelated_edge(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)
elif(args.defense_mode == 'isolate'):
    poison_edge_index,poison_edge_weights,rel_nodes = prune_unrelated_edge_isolated(args,poison_edge_index,poison_edge_weights,poison_x,device)
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).tolist()
    bkd_tn_nodes = torch.LongTensor(list(set(bkd_tn_nodes) - set(rel_nodes))).to(device)
else:
    bkd_tn_nodes = torch.cat([idx_train,idx_attach]).to(device)

tensor([[   2, 1986],
        [   9, 2614],
        [  12, 1001],
        ...,
        [2685, 2452],
        [2686, 1657],
        [2691,  206]])


In [13]:
print(len(torch.cat([idx_train,idx_attach])))
print(len(bkd_tn_nodes))
print(len(model.poison_edge_index.data[0]),len(poison_edge_index[0]))
# print(idx_attach & bkd_tn_nodes)
print(set(bkd_tn_nodes.tolist()) & set(idx_attach.tolist()))

578
320
4314 2882
{106, 1835, 2479, 505, 26, 1307}


In [14]:
#%%
from models.GCN import GCN
from models.GAT import GAT
from models.GIN import GIN
test_model = model_construct(args,args.test_model,data).to(device) 
if(args.test_model == 'GraphSage' or args.test_model == 'GAT'):
    poison_adj = to_dense_adj(poison_edge_index, edge_attr=poison_edge_weights)
    poison_edge_index, poison_edge_weights = dense_to_sparse(poison_adj)
test_model.fit(poison_x, poison_edge_index, poison_edge_weights, poison_labels, bkd_tn_nodes, idx_val,train_iters=200,verbose=True)

=== training gcn model ===
Epoch 0, training loss: 1.9510772228240967
acc_val: 0.3625
Epoch 10, training loss: 0.5053321123123169
acc_val: 0.7000
Epoch 20, training loss: 0.14754590392112732
acc_val: 0.7175
Epoch 30, training loss: 0.09236279129981995
acc_val: 0.7250
Epoch 40, training loss: 0.06816352903842926
acc_val: 0.7200
Epoch 50, training loss: 0.06020592898130417
acc_val: 0.7150
Epoch 60, training loss: 0.0682452842593193
acc_val: 0.7175
Epoch 70, training loss: 0.04516971856355667
acc_val: 0.7200
Epoch 80, training loss: 0.0528641939163208
acc_val: 0.6975
Epoch 90, training loss: 0.044221844524145126
acc_val: 0.7175
Epoch 100, training loss: 0.05050992965698242
acc_val: 0.7100
Epoch 110, training loss: 0.048176392912864685
acc_val: 0.7100
Epoch 120, training loss: 0.04732782393693924
acc_val: 0.7125
Epoch 130, training loss: 0.0622921884059906
acc_val: 0.7225
Epoch 140, training loss: 0.04177570715546608
acc_val: 0.7150
Epoch 150, training loss: 0.03020920418202877
acc_val: 0.

In [15]:
# gcn.eval()
# model.eval()
output = test_model(poison_x,poison_edge_index,poison_edge_weights)
train_attach_rate = (output.argmax(dim=1)[idx_attach]==args.target_class).float().mean()
print("target class rate on Vs: {:.4f}".format(train_attach_rate))
#%%
induct_edge_index = torch.cat([poison_edge_index,mask_edge_index],dim=1)
induct_edge_weights = torch.cat([poison_edge_weights,torch.ones([mask_edge_index.shape[1]],dtype=torch.float,device=device)])

# idx_test = data.test_mask.nonzero().flatten()[:200]
# idx_test = list(set(data.test_mask.nonzero().flatten().tolist()) - set(atk_test_nodes))
# idx_atk = data.test_mask.nonzero().flatten()[200:].tolist()
# yt_nids = [nid for nid in idx_atk if data.y.tolist()==args.target_class] 
# yx_nids = torch.LongTensor(list(set(idx_atk) - set(yt_nids))).to(device)
atk_labels = poison_labels.clone()
atk_labels[atk_test_nodes] = args.target_class
clean_acc = test_model.test(poison_x,induct_edge_index,induct_edge_weights,data.y,clean_test_nodes)
'''clean accuracy of clean test nodes before injecting triggers to the attack test nodes'''
print("accuracy on clean test nodes: {:.4f}".format(clean_acc))
'''inject trigger on attack test nodes (idx_atk)'''
induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(atk_test_nodes,poison_x,induct_edge_index,induct_edge_weights)
'''do pruning in test datas'''
if(args.defense_mode == 'prune' or args.defense_mode == 'isolate'):
    induct_edge_index,induct_edge_weights = prune_unrelated_edge(args,induct_edge_index,induct_edge_weights,induct_x,device)
'''attack evaluation'''
asr = test_model.test(induct_x,induct_edge_index,induct_edge_weights,atk_labels,atk_test_nodes)
ca = test_model.test(induct_x,induct_edge_index,induct_edge_weights,data.y,clean_test_nodes)
print("ASR: {:.4f}".format(asr))
print("CA: {:.4f}".format(ca))
# output = test_model(induct_x,induct_edge_index,induct_edge_weights)
# train_attach_rate = (output.argmax(dim=1)[atk_test_nodes]==args.target_class).float().mean()
# print("ASR: {:.4f}".format(train_attach_rate))

target class rate on Vs: 0.5000
accuracy on clean test nodes: 0.8000
ASR: 0.5800
CA: 0.7900


## To deleted

In [None]:
import scipy.sparse as sp
from torch_geometric.utils import to_dense_adj,dense_to_sparse
sp_induct_x = help_funcs.normalize(sp.csr_matrix(induct_x.cpu().detach().numpy()))
sp_induct_adj = help_funcs.normalize_adj(sp.csr_matrix(to_dense_adj(induct_edge_index)[0].cpu().detach().numpy()))
induct_x = torch.FloatTensor(np.array(sp_induct_x.todense())).to(device)
induct_adj = torch.FloatTensor(np.array(sp_induct_adj.todense())).to(device)
induct_edge_index,induct_edge_weights = dense_to_sparse(induct_adj)

In [None]:
output = gcn(induct_x,induct_edge_index,induct_edge_weights)
train_attach_rate = (output.argmax(dim=1)[yx_nids]==args.target_class).float().mean()
print("ASR: {:.4f}".format(train_attach_rate))
clean_acc = gcn.test(induct_x,induct_edge_index,induct_edge_weights,data.y,idx_test)
asr = gcn.test(induct_x,induct_edge_index,induct_edge_weights,atk_labels,idx_atk)
print("accuracy on clean test nodes: {:.4f}".format(clean_acc))
print("ASR1: {:.4f}".format(asr))

In [None]:
benign_gcn = GCN(nfeat=data.x.shape[1],\
            nhid=args.hidden,\
            nclass= int(data.y.max()+1),\
            dropout=args.dropout,\
            lr=args.lr,\
            weight_decay=args.weight_decay,\
            device=device).to(device)
#%%
atk_labels = poison_labels.clone()
atk_labels[idx_atk] = args.target_class
edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
benign_gcn.fit(data.x, data.edge_index, edge_weights, data.y, idx_train, idx_val,train_iters=args.epochs,verbose=True)
benign_output = benign_gcn(data.x, data.edge_index, edge_weights)
benign4poison_output = benign_gcn(induct_x,induct_edge_index,induct_edge_weights)
benign_ca = (benign_output.argmax(dim=1)[idx_test]==data.y[idx_test]).float().mean()
benign4poison_ca = (benign4poison_output.argmax(dim=1)[idx_test]==atk_labels[idx_test]).float().mean()
print("BenignCA: {:.4f}".format(benign_ca))
print("Benign for poisoning CA: {:.4f}".format(benign4poison_ca))
print((benign_output.argmax(dim=1)[yx_nids]==args.target_class).float().mean())

In [None]:
atk_labels = data.y.clone()
idx_atk = obtain_attach_nodes(data.test_mask.nonzero().flatten(), 200)
can_test_nodes = torch.LongTensor(list(set(data.test_mask.nonzero().flatten()) - set(idx_atk))).to(device)
idx_test = obtain_attach_nodes(can_test_nodes,200)

In [None]:
edge_weights = torch.ones([data.edge_index.shape[1]],device=device,dtype=torch.float)
induct_x, induct_edge_index,induct_edge_weights = model.inject_trigger(idx_atk,data.x,data.edge_index,edge_weights)
output = gcn(induct_x,induct_edge_index,induct_edge_weights)
test_asr= (output.argmax(dim=1)[idx_atk]==args.target_class).float().mean()
print("ASR: {:.4f}".format(test_asr))
test_ca = (output.argmax(dim=1)[idx_test]==atk_labels[idx_test]).float().mean()
print("CA: {:.4f}".format(test_ca))

In [22]:
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj
poison_adj_dense = to_dense_adj(poison_edge_index)
def edge_sim_analysis(edge_index, features):
    sims = []
    for (u,v) in edge_index:
        sims.append(float(F.cosine_similarity(features[u].unsqueeze(0),features[v].unsqueeze(0))))
    sims = np.array(sims)
    # print(f"mean: {sims.mean()}, <0.1: {sum(sims<0.1)}/{sims.shape[0]}")
    return sims

bkd_nids = list(range(data.x.shape[0],poison_x.shape[0]))
for nid in idx_attach:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = poison_adj_dense[0][nid].nonzero()
    # bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
    for v in polished_adj_nodes:
        v = int(v)
        if(v in bkd_nids):
            u = nid
            print(nid,v)
            print(F.cosine_similarity(poison_x[u].unsqueeze(0),poison_x[v].unsqueeze(0)))

In [23]:
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj
induct_adj_dense = to_dense_adj(induct_edge_index)
def edge_sim_analysis(edge_index, features):
    sims = []
    for (u,v) in edge_index:
        sims.append(float(F.cosine_similarity(features[u].unsqueeze(0),features[v].unsqueeze(0))))
    sims = np.array(sims)
    # print(f"mean: {sims.mean()}, <0.1: {sum(sims<0.1)}/{sims.shape[0]}")
    return sims

bkd_nids = induct_x.shape[0] - poison_x.shape[0]
for nid in atk_test_nodes:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = induct_adj_dense[0][nid].nonzero()
    bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
    for v in polished_adj_nodes:
        v = int(v)
        if(v in bkd_nids):
            u = nid
            print(nid,v)
            print(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))

615 2813
tensor([0.1483], device='cuda:2', grad_fn=<SumBackward1>)
1675 2867
tensor([0.1121], device='cuda:2', grad_fn=<SumBackward1>)
1675 3347
tensor([0.1121], device='cuda:2', grad_fn=<SumBackward1>)
1421 3239
tensor([0.1104], device='cuda:2', grad_fn=<SumBackward1>)
1675 2867
tensor([0.1121], device='cuda:2', grad_fn=<SumBackward1>)
1675 3347
tensor([0.1121], device='cuda:2', grad_fn=<SumBackward1>)


In [None]:
bkd_nids = list(range(poison_x.shape[0],induct_x.shape[0]))
for nid in idx_test:
    # polished_dr_test = copy.deepcopy(bkd_dr_test)
    # polished_adj_nodes = polished_dr_test.data['mat_adj'].to_dense()[nid].nonzero()
    polished_adj_nodes = induct_adj_dense[0][nid].nonzero()
    for v in polished_adj_nodes:
        v = int(v)
        # if(v in bkd_nids):
        u = nid
        print(nid,v)
        print(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))

In [None]:
def calculate_graph_homophily(adj,x,device):
    deg_vector = adj.sum(1)
    deg_matrix = torch.diag(adj.sum(1)).to(device)
    deg_matrix += torch.eye(len(adj)).to(device)
    deg_inv_sqrt = deg_matrix.pow(-0.5)
    deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.)
    adj = torch.matmul(deg_inv_sqrt,adj)
    adj = torch.matmul(adj,deg_inv_sqrt)
    x_neg = adj @ x
    node_sims = np.array([float(F.cosine_similarity(xn.unsqueeze(0),xx.unsqueeze(0))) for (xn,xx) in zip(x_neg,x)])   
    # node_sims = np.array([torch.round(i,decimals=2) for i in node_sims])
    # print(node_sims)
    return node_sims
bkd_graph_test_node_sims = calculate_graph_homophily(to_dense_adj(data.edge_index)[0].to(device),data.x.to(device),device)
bkd_graph_train_node_sims = calculate_graph_homophily(to_dense_adj(poison_edge_index)[0].to(device),poison_x.to(device),device)
clean_graph_node_sims = calculate_graph_homophily(to_dense_adj(induct_edge_index)[0].to(device),induct_x.to(device),device)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.font_manager import FontProperties

def to_percent(y,position):
    return str(100*y)+"%"#这里可以用round（）函数设置取几位小数

plt.hist(clean_graph_node_sims,bins=10,weights=[1./len(clean_graph_node_sims)]*len(clean_graph_node_sims),density=True, alpha=0.75, label='clean')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
plt.hist(np.array(bkd_graph_test_node_sims),bins=20,weights=[1./len(bkd_graph_test_node_sims)]*len(bkd_graph_test_node_sims),density=True, alpha=0.75, label='poison')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
plt.hist(np.array(bkd_graph_train_node_sims),bins=20,weights=[1./len(bkd_graph_train_node_sims)]*len(bkd_graph_train_node_sims),density=True, alpha=0.75, label='attack')#这里weights是每一个数据的权重，这里设置是1，weights是和x等维的列表或者series
fomatter=FuncFormatter(to_percent)
# plt.gca().yaxis.set_major_formatter(fomatter)
plt.grid(True)
plt.xlabel("Cosine Similarity")
plt.ylabel("Density")
plt.legend()
# plt.savefig("pics/grb_cora_node_sims.png")
# plt.savefig("pics/grb_cora_node_sims.pdf")
plt.show()
plt.close()

In [None]:
bkd_test_edge_index = to_dense_adj(data.edge_index)[0].nonzero()
trigger_trigger_edge_sims = []
trigger_trigger_edge_index = []

trigger_target_edge_sims = []
trigger_target_edge_index = []

normal_normal_edge_sims = []
normal_normal_edge_index = []

trigger_normal_edge_sims = []
trigger_normal_edge_index = []

target_target_edge_sims = []
target_target_edge_index = []
for (u,v) in bkd_test_edge_index:
    if ((v,u) in trigger_trigger_edge_index) or ((u,v) in trigger_trigger_edge_index):
        continue
    if ((v,u) in trigger_target_edge_index) or ((u,v) in trigger_target_edge_index):
        continue
    if ((v,u) in normal_normal_edge_index) or ((u,v) in normal_normal_edge_index):
        continue
    if ((v,u) in trigger_normal_edge_index) or ((u,v) in trigger_normal_edge_index):
        continue
    if ((v,u) in target_target_edge_index) or ((u,v) in target_target_edge_index):
        continue
    
    if (u in bkd_nids) and (v in bkd_nids):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_trigger_edge_sims.append(edge_sims)
        trigger_trigger_edge_index.append((u,v))
        continue
    if ((u in bkd_nids) and (v in idx_atk)) or ((v in bkd_nids) and (u in idx_atk)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_target_edge_sims.append(edge_sims)
        trigger_target_edge_index.append((u,v))
        continue
    if (u in idx_test) and (v in idx_test):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        normal_normal_edge_sims.append(edge_sims)
        normal_normal_edge_index.append((u,v))
        continue
    if ((u in bkd_nids) and (v in idx_test)) or ((v in bkd_nids) and (u in idx_test)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        trigger_normal_edge_sims.append(edge_sims)
        trigger_normal_edge_index.append((u,v))

    if ((u in idx_atk) and (v in idx_atk)):
        edge_sims = float(F.cosine_similarity(induct_x[u].unsqueeze(0),induct_x[v].unsqueeze(0)))
        target_target_edge_sims.append(edge_sims)
        target_target_edge_index.append((u,v))
        continue
    