In [54]:
import torch
from torch_geometric.utils import from_dgl, to_networkx, k_hop_subgraph
import random
from torch_geometric.data import Data
from torch_geometric.utils import subgraph, to_undirected

In [55]:
def get_khop_subgraph_with_random_walk(pyg_data, node_idx, maxN, p_1hop=0.7):
    # Initialize the sampled node set with the central node
    sampled_nodes = set([node_idx])
    current_node = node_idx
    
    while len(sampled_nodes) < maxN:
        print(f"Sampled {len(sampled_nodes)} nodes")
        if random.random() < p_1hop:
            # Perform a 1-hop random walk (current node is the destination)
            neighbors = pyg_data.edge_index[0][pyg_data.edge_index[1] == current_node].tolist()
            print(f"Neighbors: {neighbors}")
            if neighbors:
                next_node = random.choice(neighbors)
                print(f"Next node: {next_node}")
                sampled_nodes.add(next_node)
                current_node = next_node
        else:
            # Perform a 2-hop random walk (current node is the destination)
            neighbors = pyg_data.edge_index[0][pyg_data.edge_index[1] == current_node].tolist()
            if neighbors:
                intermediate_node = random.choice(neighbors)
                second_neighbors = pyg_data.edge_index[0][pyg_data.edge_index[1] == intermediate_node].tolist()
                if second_neighbors:
                    next_node = random.choice(second_neighbors)
                    sampled_nodes.add(intermediate_node)
                    sampled_nodes.add(next_node)
                    current_node = next_node
        
        break

        # Stop if we have reached maxN nodes
        if len(sampled_nodes) >= maxN:
            sampled_nodes = set(list(sampled_nodes)[:maxN])
            break
    
    # Convert sampled nodes to a list
    sampled_nodes = list(sampled_nodes)
    
    # Create a mask for the sampled nodes
    node_mask = torch.zeros(pyg_data.num_nodes, dtype=torch.bool)
    node_mask[sampled_nodes] = True
    
    # Filter edges to keep only those that connect sampled nodes
    edge_index = pyg_data.edge_index[:, node_mask[pyg_data.edge_index[0]] & node_mask[pyg_data.edge_index[1]]]
    
    # Remap the node indices in edge_index
    node_idx_map = {old_idx: new_idx for new_idx, old_idx in enumerate(sampled_nodes)}
    edge_index = torch.tensor(
        [[node_idx_map[idx] for idx in edge] for edge in edge_index.t().tolist()],
        dtype=torch.long
    ).t()
    
    # Create a new Data object for the subsampled subgraph
    subgraph_data = Data(
        x=pyg_data.x[sampled_nodes],
        edge_index=edge_index,
        y=pyg_data.y[sampled_nodes] if pyg_data.y is not None else None,
        edge_attr=pyg_data.edge_attr[node_mask[pyg_data.edge_index[0]] & node_mask[pyg_data.edge_index[1]]] if pyg_data.edge_attr is not None else None
    )
    
    return subgraph_data


def get_khop_subgraph(pyg_data, node_idx, k, maxN):
    subset, edge_index, mapping, edge_mask = k_hop_subgraph(node_idx, k, pyg_data.edge_index, relabel_nodes=True)
    hop_dists = torch.full((pyg_data.num_nodes,), -1, dtype=torch.long)
    hop_dists[subset] = mapping

    if len(subset) > maxN:
        # Sort nodes by hop distance in descending order
        sorted_nodes = sorted(subset.tolist(), key=lambda n: -hop_dists[n])
        subset = torch.tensor(sorted_nodes[:maxN])
        edge_index, edge_mask = subgraph(subset, pyg_data.edge_index, relabel_nodes=True, num_nodes=pyg_data.num_nodes)

    # Apply NormalizeFeatures and SVDFeatureReduction
    subgraph_data = Data(x=pyg_data.x[subset], edge_index=edge_index, num_nodes=len(subset))

    label = pyg_data.y[node_idx].item()
    subgraph_data.y = torch.tensor([label], dtype=torch.long)
    subgraph_data.center_node_idx = mapping[0].item()
    return subgraph_data

In [5]:
pyg_data = torch.load('./pyg_dataset/tolokers.pt')
pyg_data 

Data(edge_index=[2, 530758], train_masks=[11758], val_masks=[11758], test_masks=[11758], num_nodes=11758, y=[11758], x=[11758, 10])

In [7]:
train_indices = torch.nonzero(pyg_data.train_masks, as_tuple=False).squeeze().tolist()
vali_indices = torch.nonzero(pyg_data.val_masks, as_tuple=False).squeeze().tolist()
test_indices = torch.nonzero(pyg_data.test_masks, as_tuple=False).squeeze().tolist()

print(len(train_indices), len(vali_indices), len(test_indices))

5879 2939 2940


In [60]:
anomaly_idx = (pyg_data.y == 1).nonzero().squeeze()
train_anomaly_idx = list(set(anomaly_idx.tolist()).intersection(set(train_indices)))
len(train_anomaly_idx)

1283

In [52]:
subset, edge_index, mapping, edge_mask = k_hop_subgraph(5, 10, pyg_data.edge_index , relabel_nodes=True, flow='target_to_source')
print(subset)

tensor([    5,   415,   416,  ..., 11755, 11756, 11757])


In [53]:
len(subset)

10109

In [40]:
import networkx as nx
nx_graph = to_networkx(subgraph, to_undirected=True)
components = list(nx.connected_components(nx_graph))
if len(components) > 1:
    print(f"Graph contains {len(components)} connected components.")
else:
    print("Graph is connected.")

Graph is connected.


In [61]:
import torch
from torch_geometric.data import Data
from torch_geometric.utils import k_hop_subgraph

# Create a sample graph
edge_index = torch.tensor([[0, 1, 2, 3, 4, 5],
                           [2, 2, 4, 4, 6, 6]], dtype=torch.long)

# Parameters
node_idx = 6  # Node to start the k-hop subgraph extraction
num_hops = 2  # Number of hops

# Extract k-hop subgraph (source to target)
subset_s2t, edge_index_s2t, _, _ = k_hop_subgraph(node_idx, num_hops, edge_index, flow='source_to_target')

# Extract k-hop subgraph (target to source)
subset_t2s, edge_index_t2s, _, _ = k_hop_subgraph(node_idx, num_hops, edge_index, flow='target_to_source')

# Print results
print("Original Edge Index:")
print(edge_index)

print("\nSubset nodes (source to target):")
print(subset_s2t)

print("Edge Index (source to target):")
print(edge_index_s2t)

print("\nSubset nodes (target to source):")
print(subset_t2s)

print("Edge Index (target to source):")
print(edge_index_t2s)


Original Edge Index:
tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5],
        [1, 0, 2, 1, 3, 2, 4, 3, 5, 4]])

Subset nodes (source to target):
tensor([0, 1, 2, 3, 4])
Edge Index (source to target):
tensor([[0, 1, 1, 2, 2, 3, 3, 4],
        [1, 0, 2, 1, 3, 2, 4, 3]])

Subset nodes (target to source):
tensor([0, 1, 2, 3, 4])
Edge Index (target to source):
tensor([[0, 1, 1, 2, 2, 3, 3, 4],
        [1, 0, 2, 1, 3, 2, 4, 3]])
