In [4]:
import torch
import numpy as np
import torch.nn.functional as F
from deeprobust.graph.defense import GCN
from deeprobust.graph.global_attack import MetaApprox
from deeprobust.graph.utils import preprocess
from deeprobust.graph.data import Dataset
import scipy.sparse as sp

In [5]:
# Set random seed for reproducibility
np.random.seed(15)
torch.manual_seed(15)

# Load Cora dataset
print("Loading Cora dataset...")
data = Dataset(root='/tmp/', name='cora')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

Loading Cora dataset...
Loading cora dataset...
Selecting 1 largest connected components


In [10]:
# Setup device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Convert adjacency matrix to tensor and move to the correct device
if sp.issparse(adj):
    adj_tensor = torch.FloatTensor(adj.todense()).to(device)
else:
    adj_tensor = torch.FloatTensor(adj).to(device)

# Convert features to tensor and move to the correct device
if sp.issparse(features):
    features_tensor = torch.FloatTensor(features.todense()).to(device)
else:
    features_tensor = torch.FloatTensor(features).to(device)

# Move labels to the correct device
labels_tensor = torch.LongTensor(labels).to(device)
idx_train_tensor = torch.LongTensor(idx_train).to(device)
idx_val_tensor = torch.LongTensor(idx_val).to(device)
idx_test_tensor = torch.LongTensor(idx_test).to(device)

# Setup target model (GCN with 2 layers, hidden dim 64)
model = GCN(nfeat=features_tensor.shape[1], nhid=64, nclass=labels_tensor.max().item()+1, 
            device=device, weight_decay=5e-4, dropout=0.5)
model = model.to(device)

Using device: cuda:0


In [12]:
# Train the GCN model (clean model)
print("Training GCN on clean graph...")

model.fit(features_tensor, adj_tensor, labels_tensor, idx_train_tensor, idx_val_tensor, patience=30)

# Test clean model
print("Testing GCN on clean graph...")
model.eval()
output = model.predict()
acc_test = model.test(idx_test_tensor)
print(f"Test accuracy on clean graph: {acc_test:.4f}")

Training GCN on clean graph...
Testing GCN on clean graph...
Test set results: loss= 0.4999 accuracy= 0.8421
Test accuracy on clean graph: 0.8421


In [13]:
# Initialize MetaApprox attack (MetAttack-based) with edge-based perturbation
print("Setting up MetaApprox attack...")
attack_model = MetaApprox(model=model, nnodes=adj_tensor.shape[0], feature_shape=features_tensor.shape,
                         attack_structure=True, attack_features=False, device=device)

Setting up MetaApprox attack...


In [14]:
# Adjust ptb_rate to control the percentage of edges to perturb
ptb_rate = 0.05  # 5% of edges
n_perturbations = int(ptb_rate * (adj_tensor.sum() // 2))
print(f"Generating {n_perturbations} edge perturbations...")

# Make sure attack_model uses tensors on the same device
attack_model.attack(features_tensor, adj_tensor, labels_tensor, idx_train_tensor, idx_val_tensor, 
                   n_perturbations=n_perturbations, ll_constraint=False)

Generating 253 edge perturbations...


TypeError: expected TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) (got TensorOptions(dtype=float, device=cuda:0, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)))

In [17]:
import torch
import numpy as np
import torch.nn.functional as F
from deeprobust.graph.defense import GCN
from deeprobust.graph.global_attack import MetaApprox
from deeprobust.graph.utils import preprocess
from deeprobust.graph.data import Dataset
import scipy.sparse as sp

# Force CPU usage to avoid device mismatch issues
torch.cuda.is_available = lambda: False
device = torch.device('cpu')
print(f"Using device: {device}")

# Set random seed for reproducibility
np.random.seed(15)
torch.manual_seed(15)

# Load Cora dataset
print("Loading Cora dataset...")
data = Dataset(root='/tmp/', name='cora')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

# Setup target model (GCN with 2 layers, hidden dim 64)
model = GCN(nfeat=features.shape[1], nhid=64, nclass=labels.max().item()+1, 
            device=device, weight_decay=5e-4, dropout=0.5)

# Train the GCN model (clean model)
print("Training GCN on clean graph...")
model.fit(features, adj, labels, idx_train, idx_val, patience=30)

# Test clean model
print("Testing GCN on clean graph...")
model.eval()
output = model.predict()
acc_test = model.test(idx_test)
print(f"Test accuracy on clean graph: {acc_test:.4f}")

# --- Custom implementation to work around sparse tensor issues ---
# Manual edge perturbation as an alternative to MetaApprox
print("Creating manual perturbations since MetaApprox has compatibility issues...")

# Convert adjacency matrix to numpy for easier manipulation
if sp.issparse(adj):
    adj_np = adj.todense()
else:
    adj_np = adj.numpy() if torch.is_tensor(adj) else adj

# Function to add/remove edges randomly
def perturb_edges_randomly(adj_matrix, n_perturbations):
    """
    Randomly add or remove edges from the adjacency matrix
    """
    adj_perturbed = adj_matrix.copy()
    n_nodes = adj_matrix.shape[0]
    
    # Flip edges (add where there's no edge, remove where there is)
    perturbations_made = 0
    while perturbations_made < n_perturbations:
        # Generate random node pairs
        i = np.random.randint(0, n_nodes)
        j = np.random.randint(0, n_nodes)
        
        # Skip self-loops
        if i == j:
            continue
            
        # Flip the edge
        if adj_perturbed[i, j] == 0:
            # Add edge
            adj_perturbed[i, j] = 1
            adj_perturbed[j, i] = 1  # Maintain symmetry
        else:
            # Remove edge
            adj_perturbed[i, j] = 0
            adj_perturbed[j, i] = 0  # Maintain symmetry
            
        perturbations_made += 1
    
    return adj_perturbed

# Calculate perturbation rate
ptb_rate = 0.05  # 5% of edges
n_perturbations = int(ptb_rate * (adj_np.sum() // 2))
print(f"Generating {n_perturbations} edge perturbations...")

# Perturb the adjacency matrix
modified_adj_np = perturb_edges_randomly(adj_np, n_perturbations)

# Convert back to sparse format if needed
modified_adj = sp.csr_matrix(modified_adj_np)

# Evaluate the target model on the perturbed graph
print("Testing GCN on perturbed graph...")
# Train a new GCN on the perturbed graph
model_attacked = GCN(nfeat=features.shape[1], nhid=64, nclass=labels.max().item()+1, 
                     device=device, weight_decay=5e-4, dropout=0.5)

model_attacked.fit(features, modified_adj, labels, idx_train, idx_val, patience=30)
acc_test_attacked = model_attacked.test(idx_test)

print(f"Test accuracy on perturbed graph: {acc_test_attacked:.4f}")
print(f"Attack impact (drop in accuracy): {acc_test - acc_test_attacked:.4f}")

# Function to save the modified adjacency matrix
def save_modified_adj(modified_adj, filename="modified_adj_matrix.npz"):
    # Convert to CPU if needed
    if torch.is_tensor(modified_adj):
        modified_adj_np = modified_adj.cpu().numpy()
    elif sp.issparse(modified_adj):
        modified_adj_np = modified_adj
    else:
        modified_adj_np = sp.csr_matrix(modified_adj)
    
    # Save
    sp.save_npz(filename, modified_adj_np)
    print(f"Modified adjacency matrix saved to {filename}")

# Uncomment to save the modified adjacency matrix
# save_modified_adj(modified_adj)

# Visualize the perturbed edges if needed
def visualize_perturbations(original_adj, modified_adj, filename="perturbed_graph_comparison.png"):
    import networkx as nx
    import matplotlib.pyplot as plt
    
    # Convert to numpy arrays for visualization
    if torch.is_tensor(original_adj):
        original_adj_np = original_adj.cpu().numpy()
    elif sp.issparse(original_adj):
        original_adj_np = original_adj.todense()
    else:
        original_adj_np = original_adj
        
    if torch.is_tensor(modified_adj):
        modified_adj_np = modified_adj.cpu().numpy()
    elif sp.issparse(modified_adj):
        modified_adj_np = modified_adj.todense()
    else:
        modified_adj_np = modified_adj
    
    # Find differences
    diff = np.abs(original_adj_np - modified_adj_np)
    
    # Get indices of perturbations
    perturbed_edges = np.where(diff > 0)
    
    print(f"Number of perturbed edges: {len(perturbed_edges[0])//2}")  # Divide by 2 because undirected graph
    
    # Visualize a subgraph for better visibility (first 100 nodes)
    n_nodes = 100
    G_orig = nx.from_numpy_array(original_adj_np[:n_nodes, :n_nodes])
    G_mod = nx.from_numpy_array(modified_adj_np[:n_nodes, :n_nodes])
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(121)
    nx.draw(G_orig, node_size=30, with_labels=False, node_color='blue', edge_color='gray')
    plt.title("Original Graph (Subgraph)")
    
    plt.subplot(122)
    nx.draw(G_mod, node_size=30, with_labels=False, node_color='blue', edge_color='gray')
    plt.title("Perturbed Graph (Subgraph)")
    
    plt.tight_layout()
    plt.savefig(filename)
    print(f"Graph visualization saved to {filename}")
    plt.show()

# Uncomment to visualize
# visualize_perturbations(adj_np, modified_adj_np)

Using device: cpu
Loading Cora dataset...
Loading cora dataset...
Selecting 1 largest connected components
Training GCN on clean graph...
Testing GCN on clean graph...
Test set results: loss= 0.4946 accuracy= 0.8461
Test accuracy on clean graph: 0.8461
Creating manual perturbations since MetaApprox has compatibility issues...
Generating 253 edge perturbations...
Testing GCN on perturbed graph...
Test set results: loss= 0.5133 accuracy= 0.8350
Test accuracy on perturbed graph: 0.8350
Attack impact (drop in accuracy): 0.0111
