In [1]:
import torch
from gcn import GCN
from utils import load_data, preprocess, normalize_adj_tensor, accuracy, get_train_val_test
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from matplotlib import pyplot as plt
from metattack import MetaApprox, Metattack
import seaborn as sns
from deeprobust.graph.data import Dataset
import gc

In [2]:
# Fixed parameters for Cora dataset
torch.cuda.empty_cache()
seed = 15
epochs = 200
lr = 0.01
hidden = 64  # Changed from 16 to 64 as requested
dataset = 'citeseer'
model_variant = 'Meta-Self'  # Options: 'A-Meta-Self', 'Meta-Self'
ptb_rates = [0.13]  # Multiple perturbation rates


In [3]:
# Set seeds for reproducibility
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device != 'cpu':
    torch.cuda.manual_seed(seed)

# Load Cora dataset
adj, features, labels = load_data(dataset=dataset)
nclass = max(labels) + 1

Loading citeseer dataset...
reading citeseer...
Selecting 1 largest connected components


In [4]:
# Split dataset
val_size = 0.1
test_size = 0.8
train_size = 1 - test_size - val_size
idx = np.arange(adj.shape[0])
idx_train, idx_val, idx_test = get_train_val_test(idx, train_size, val_size, test_size, stratify=labels)
idx_unlabeled = np.union1d(idx_val, idx_test)

# Preprocess without normalizing adjacency yet
adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False)

# Move data to device
if device != 'cpu':
    adj = adj.to(device)
    features = features.to(device)
    labels = labels.to(device)

In [5]:
def train_gcn(adj):
    ''' Train GCN on the given adjacency matrix '''
    # Normalize adjacency matrix for GCN
    norm_adj = normalize_adj_tensor(adj)
    
    # Initialize GCN model - the provided GCN is already 2-layer
    # We just need to set the hidden dimension to 64
    gcn = GCN(nfeat=features.shape[1],
              nhid=hidden,
              nclass=labels.max().item() + 1,
              dropout=0.5)
    
    if device != 'cpu':
        gcn = gcn.to(device)
    
    optimizer = optim.Adam(gcn.parameters(),
                           lr=lr, weight_decay=5e-4)
    
    # Train GCN
    gcn.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = gcn(features, norm_adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()
    
    return gcn

In [6]:
def evaluate(gcn, adj, idx):
    ''' Evaluate GCN on the given adjacency matrix and index '''
    # Normalize adjacency matrix for evaluation
    norm_adj = normalize_adj_tensor(adj)
    
    gcn.eval()
    with torch.no_grad():
        output = gcn(features, norm_adj)
        loss = F.nll_loss(output[idx], labels[idx])
        acc = accuracy(output[idx], labels[idx])
    
    return acc.item()

In [7]:
def run_attack():
    results = {}
    
    print('=== Training GCN on original(clean) graph ===')
    # First, train the GCN on the clean graph
    trained_gcn = train_gcn(adj)
    
    # Set up attack model parameters
    if 'Self' in model_variant:
        lambda_ = 0
    if 'Train' in model_variant:
        lambda_ = 1
    if 'Both' in model_variant:
        lambda_ = 0.5
    
    # Configure attack model
    if 'A' in model_variant:
        model_class = MetaApprox
    else:
        model_class = Metattack
    
    # Evaluate across different perturbation rates
    for ptb_rate in ptb_rates:
        print(f'\n=== Testing perturbation rate: {ptb_rate*100:.1f}% ===')
        perturbations = int(ptb_rate * (adj.sum()//2))
        
        print('=== Setting up attack model ===')
        model = model_class(nfeat=features.shape[1], hidden_sizes=[hidden],
                        nnodes=adj.shape[0], nclass=nclass, dropout=0.5,
                        train_iters=0, attack_features=False, lambda_=lambda_, device=device)
        
        if device != 'cpu':
            model = model.to(device)
            
        # Generate adversarial adjacency matrix focusing on test nodes
        print(f'=== Perturbing graph with {perturbations} edge modifications ===')
        
        modified_adj = model(features, adj, labels, idx_train, idx_test, perturbations, ll_constraint=False)
        modified_adj = modified_adj.detach()
        
        runs = 3  # Reduced from 10 to 3 for quicker execution
        clean_acc = []
        attacked_acc = []
        
        print('=== Evaluating GCN performance ===')
        # Test the already trained GCN on both clean and perturbed graph
        for i in range(runs):
            # Reset the GCN for each run
            trained_gcn = train_gcn(adj)
            
            # Evaluate on clean test data
            clean_acc.append(evaluate(trained_gcn, adj, idx_test))
            
            # Evaluate on perturbed test data (evasion)
            attacked_acc.append(evaluate(trained_gcn, modified_adj, idx_test))
            
            print(f"Run {i+1}/{runs}: Clean acc = {clean_acc[-1]:.4f}, Attacked acc = {attacked_acc[-1]:.4f}")
        
        # Calculate effectiveness metrics
        clean_mean = np.mean(clean_acc)
        clean_std = np.std(clean_acc)
        attack_mean = np.mean(attacked_acc)
        attack_std = np.std(attacked_acc)
        acc_drop = clean_mean - attack_mean
        relative_drop = (acc_drop / clean_mean) * 100
        
        # Print summary statistics
        print(f"\n=== Attack Effectiveness Summary (Perturbation rate: {ptb_rate*100:.1f}%) ===")
        print(f"Clean accuracy: {clean_mean:.4f} ± {clean_std:.4f}")
        print(f"Attacked accuracy: {attack_mean:.4f} ± {attack_std:.4f}")
        print(f"Absolute accuracy drop: {acc_drop:.4f}")
        print(f"Relative accuracy drop: {relative_drop:.2f}%")
        print(f"Effectiveness ratio: {acc_drop/ptb_rate:.4f} (drop per perturbation unit)")
        
        # Assessment
        if acc_drop > 0.10:
            print("Attack assessment: Highly effective")
        elif acc_drop > 0.05:
            print("Attack assessment: Moderately effective")
        elif acc_drop > 0.02:
            print("Attack assessment: Slightly effective")
        else:
            print("Attack assessment: Minimally effective")
        
        # Store results
        results[ptb_rate] = {
            'modified_adj': modified_adj,
            'clean_acc': clean_acc,
            'attacked_acc': attacked_acc,
            'accuracy_drop': acc_drop,
            'relative_drop': relative_drop,
            'effectiveness_ratio': acc_drop/ptb_rate
        }
        
        # Free up memory
        del model
        torch.cuda.empty_cache()
        gc.collect()
    
    # Add overall results
    results['clean_adj'] = adj
    
    # Create and save comparative visualization
    plt.figure(figsize=(10, 6))
    
    ptb_values = list(ptb_rates)
    acc_drops = [results[ptb]['accuracy_drop'] for ptb in ptb_rates]
    rel_drops = [results[ptb]['relative_drop'] for ptb in ptb_rates]
    
    plt.subplot(1, 2, 1)
    plt.plot(ptb_values, acc_drops, 'o-', linewidth=2)
    plt.xlabel('Perturbation Rate')
    plt.ylabel('Absolute Accuracy Drop')
    plt.title('Impact of Perturbation Rate on Accuracy Drop')
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    plt.plot(ptb_values, rel_drops, 'o-', linewidth=2, color='orange')
    plt.xlabel('Perturbation Rate')
    plt.ylabel('Relative Accuracy Drop (%)')
    plt.title('Impact of Perturbation Rate on Relative Accuracy Drop')
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig('perturbation_impact.png')
    plt.close()
    
    return results

In [8]:
def clean_memory():
    """Clean GPU memory and garbage collect"""
    if device != 'cpu':
        torch.cuda.empty_cache()
    gc.collect()

In [9]:
if __name__ == '__main__':
    torch.cuda.empty_cache()
    results = run_attack()
    
    # Print comparative summary
    print("\n=== Comparative Analysis ===")
    print("Perturbation Rate | Accuracy Drop | Relative Drop | Effectiveness Ratio")
    print("-" * 65)
    for ptb_rate in ptb_rates:
        print(f"{ptb_rate*100:15.1f}% | {results[ptb_rate]['accuracy_drop']:12.4f} | {results[ptb_rate]['relative_drop']:12.2f}% | {results[ptb_rate]['effectiveness_ratio']:18.4f}")

=== Training GCN on original(clean) graph ===

=== Testing perturbation rate: 13.0% ===
=== Setting up attack model ===
=== Perturbing graph with 476 edge modifications ===
=== training surrogate model to predict unlabled data for self-training


Perturbing graph:   1%|▌                                                                | 4/476 [00:00<00:14, 33.48it/s]

GCN loss on unlabled data: 1.8471766710281372
GCN acc on unlabled data: 0.12203791469194314
attack loss: 1.8366998434066772
GCN loss on unlabled data: 1.7757670879364014
GCN acc on unlabled data: 0.21504739336492892
attack loss: 1.7775812149047852
GCN loss on unlabled data: 1.7863550186157227
GCN acc on unlabled data: 0.21504739336492892
attack loss: 1.7866251468658447
GCN loss on unlabled data: 1.892317771911621
GCN acc on unlabled data: 0.1765402843601896
attack loss: 1.9162280559539795
GCN loss on unlabled data: 1.9482496976852417
GCN acc on unlabled data: 0.1575829383886256
attack loss: 1.9510427713394165
GCN loss on unlabled data: 1.8410935401916504
GCN acc on unlabled data: 0.19609004739336494
attack loss: 1.8667527437210083
GCN loss on unlabled data: 1.853586196899414
GCN acc on unlabled data: 0.14454976303317538
attack loss: 1.8671443462371826
GCN loss on unlabled data: 1.8589560985565186
GCN acc on unlabled data: 0.15639810426540285
attack loss: 1.8896580934524536


Perturbing graph:   3%|█▉                                                              | 14/476 [00:00<00:11, 41.17it/s]

GCN loss on unlabled data: 1.8866548538208008
GCN acc on unlabled data: 0.14277251184834125
attack loss: 1.8955787420272827
GCN loss on unlabled data: 1.8121960163116455
GCN acc on unlabled data: 0.20142180094786732
attack loss: 1.7935444116592407
GCN loss on unlabled data: 1.936407208442688
GCN acc on unlabled data: 0.11789099526066352
attack loss: 1.949977159500122
GCN loss on unlabled data: 1.7952238321304321
GCN acc on unlabled data: 0.2061611374407583
attack loss: 1.7744115591049194
GCN loss on unlabled data: 1.913456916809082
GCN acc on unlabled data: 0.1279620853080569
attack loss: 1.9282749891281128
GCN loss on unlabled data: 1.9148504734039307
GCN acc on unlabled data: 0.11848341232227488
attack loss: 1.9036039113998413
GCN loss on unlabled data: 1.8709715604782104
GCN acc on unlabled data: 0.20379146919431282
attack loss: 1.83245050907135
GCN loss on unlabled data: 1.8870294094085693
GCN acc on unlabled data: 0.19135071090047395
attack loss: 1.8956533670425415
GCN loss on unl

Perturbing graph:   5%|███▏                                                            | 24/476 [00:00<00:10, 43.76it/s]

GCN loss on unlabled data: 1.756677269935608
GCN acc on unlabled data: 0.2511848341232228
attack loss: 1.757948875427246
GCN loss on unlabled data: 1.8764750957489014
GCN acc on unlabled data: 0.12440758293838863
attack loss: 1.8763161897659302
GCN loss on unlabled data: 1.8468490839004517
GCN acc on unlabled data: 0.1575829383886256
attack loss: 1.8397094011306763
GCN loss on unlabled data: 1.8556787967681885
GCN acc on unlabled data: 0.16528436018957346
attack loss: 1.863849401473999
GCN loss on unlabled data: 1.8282607793807983
GCN acc on unlabled data: 0.19549763033175357
attack loss: 1.7968158721923828
GCN loss on unlabled data: 1.7930859327316284
GCN acc on unlabled data: 0.24466824644549764
attack loss: 1.7990057468414307
GCN loss on unlabled data: 1.9186240434646606
GCN acc on unlabled data: 0.11848341232227488
attack loss: 1.9308252334594727
GCN loss on unlabled data: 1.8247673511505127
GCN acc on unlabled data: 0.20675355450236968
attack loss: 1.81878662109375
GCN loss on unl

Perturbing graph:   7%|████▌                                                           | 34/476 [00:00<00:09, 44.86it/s]

GCN loss on unlabled data: 1.8251756429672241
GCN acc on unlabled data: 0.18009478672985785
attack loss: 1.8366419076919556
GCN loss on unlabled data: 1.9622302055358887
GCN acc on unlabled data: 0.08293838862559243
attack loss: 1.966123342514038
GCN loss on unlabled data: 1.8037930727005005
GCN acc on unlabled data: 0.21741706161137442
attack loss: 1.770782709121704
GCN loss on unlabled data: 1.850417971611023
GCN acc on unlabled data: 0.1949052132701422
attack loss: 1.839905023574829
GCN loss on unlabled data: 1.906314492225647
GCN acc on unlabled data: 0.12322274881516589
attack loss: 1.9095295667648315
GCN loss on unlabled data: 1.9196242094039917
GCN acc on unlabled data: 0.11492890995260664
attack loss: 1.910885214805603
GCN loss on unlabled data: 1.890047311782837
GCN acc on unlabled data: 0.14099526066350712
attack loss: 1.8911423683166504
GCN loss on unlabled data: 1.8205432891845703
GCN acc on unlabled data: 0.16824644549763035
attack loss: 1.8407336473464966
GCN loss on unla

Perturbing graph:   9%|█████▉                                                          | 44/476 [00:01<00:09, 46.20it/s]

GCN loss on unlabled data: 1.8451027870178223
GCN acc on unlabled data: 0.16409952606635073
attack loss: 1.8311861753463745
GCN loss on unlabled data: 1.8368821144104004
GCN acc on unlabled data: 0.18009478672985785
attack loss: 1.8450247049331665
GCN loss on unlabled data: 1.8065910339355469
GCN acc on unlabled data: 0.19075829383886259
attack loss: 1.779954195022583
GCN loss on unlabled data: 1.8891863822937012
GCN acc on unlabled data: 0.1415876777251185
attack loss: 1.8777626752853394
GCN loss on unlabled data: 1.8408100605010986
GCN acc on unlabled data: 0.16587677725118485
attack loss: 1.8395129442214966
GCN loss on unlabled data: 1.7958976030349731
GCN acc on unlabled data: 0.18661137440758296
attack loss: 1.8009614944458008
GCN loss on unlabled data: 1.7910469770431519
GCN acc on unlabled data: 0.22393364928909953
attack loss: 1.7865010499954224
GCN loss on unlabled data: 1.8642528057098389
GCN acc on unlabled data: 0.13625592417061613
attack loss: 1.8713757991790771
GCN loss o

Perturbing graph:  11%|███████▎                                                        | 54/476 [00:01<00:08, 46.98it/s]

GCN loss on unlabled data: 1.8609329462051392
GCN acc on unlabled data: 0.14691943127962087
attack loss: 1.8668394088745117
GCN loss on unlabled data: 1.8056703805923462
GCN acc on unlabled data: 0.1712085308056872
attack loss: 1.7848389148712158
GCN loss on unlabled data: 1.8488428592681885
GCN acc on unlabled data: 0.2061611374407583
attack loss: 1.8401169776916504
GCN loss on unlabled data: 1.8112133741378784
GCN acc on unlabled data: 0.17002369668246448
attack loss: 1.800360918045044
GCN loss on unlabled data: 1.81243896484375
GCN acc on unlabled data: 0.18601895734597157
attack loss: 1.8195551633834839
GCN loss on unlabled data: 1.865751028060913
GCN acc on unlabled data: 0.13566350710900474
attack loss: 1.8525266647338867
GCN loss on unlabled data: 1.8270597457885742
GCN acc on unlabled data: 0.15936018957345974
attack loss: 1.84345543384552
GCN loss on unlabled data: 1.8503167629241943
GCN acc on unlabled data: 0.13744075829383887
attack loss: 1.8733326196670532
GCN loss on unla

Perturbing graph:  13%|████████▌                                                       | 64/476 [00:01<00:08, 47.39it/s]

GCN loss on unlabled data: 1.7901629209518433
GCN acc on unlabled data: 0.18957345971563982
attack loss: 1.771983027458191
GCN loss on unlabled data: 1.7645072937011719
GCN acc on unlabled data: 0.25947867298578203
attack loss: 1.7499700784683228
GCN loss on unlabled data: 1.853928804397583
GCN acc on unlabled data: 0.17535545023696683
attack loss: 1.8457555770874023
GCN loss on unlabled data: 1.9255545139312744
GCN acc on unlabled data: 0.10722748815165878
attack loss: 1.9485440254211426
GCN loss on unlabled data: 1.8271421194076538
GCN acc on unlabled data: 0.18601895734597157
attack loss: 1.8201438188552856
GCN loss on unlabled data: 1.8706992864608765
GCN acc on unlabled data: 0.17298578199052134
attack loss: 1.8866809606552124
GCN loss on unlabled data: 1.880849838256836
GCN acc on unlabled data: 0.15639810426540285
attack loss: 1.8723613023757935
GCN loss on unlabled data: 1.862335443496704
GCN acc on unlabled data: 0.14277251184834125
attack loss: 1.8710997104644775
GCN loss on 

Perturbing graph:  16%|█████████▉                                                      | 74/476 [00:01<00:08, 47.64it/s]

GCN loss on unlabled data: 1.8977383375167847
GCN acc on unlabled data: 0.13388625592417064
attack loss: 1.9085122346878052
GCN loss on unlabled data: 1.8387125730514526
GCN acc on unlabled data: 0.1718009478672986
attack loss: 1.8397471904754639
GCN loss on unlabled data: 1.8209227323532104
GCN acc on unlabled data: 0.2221563981042654
attack loss: 1.7992866039276123
GCN loss on unlabled data: 1.914428949356079
GCN acc on unlabled data: 0.12618483412322276
attack loss: 1.9162181615829468
GCN loss on unlabled data: 1.8869675397872925
GCN acc on unlabled data: 0.14099526066350712
attack loss: 1.8727422952651978
GCN loss on unlabled data: 1.830829381942749
GCN acc on unlabled data: 0.17002369668246448
attack loss: 1.820177674293518
GCN loss on unlabled data: 1.8308857679367065
GCN acc on unlabled data: 0.19549763033175357
attack loss: 1.8351519107818604
GCN loss on unlabled data: 1.874547004699707
GCN acc on unlabled data: 0.16054502369668247
attack loss: 1.8754894733428955
GCN loss on un

Perturbing graph:  17%|███████████                                                     | 82/476 [00:01<00:08, 44.48it/s]

GCN loss on unlabled data: 1.8546751737594604
GCN acc on unlabled data: 0.19549763033175357
attack loss: 1.842084527015686
GCN loss on unlabled data: 1.8737459182739258
GCN acc on unlabled data: 0.14691943127962087
attack loss: 1.8911879062652588
GCN loss on unlabled data: 1.845984697341919
GCN acc on unlabled data: 0.14218009478672988
attack loss: 1.8457603454589844
GCN loss on unlabled data: 1.7955960035324097
GCN acc on unlabled data: 0.18720379146919433
attack loss: 1.7817816734313965
GCN loss on unlabled data: 1.8770570755004883
GCN acc on unlabled data: 0.13092417061611375
attack loss: 1.8805170059204102





OutOfMemoryError: CUDA out of memory. Tried to allocate 18.00 MiB (GPU 0; 31.74 GiB total capacity; 4.40 GiB already allocated; 7.12 MiB free; 4.77 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF