In [2]:
import os
import random
import pickle
from joblib import Parallel, delayed
import glob
import torch
import networkx as nx

# === Standard Library ===
import math
import random
import time
import heapq
import itertools
from collections import defaultdict, deque
from itertools import combinations
from typing import Any, Tuple, Dict, List, Set, Sequence, Union

# === Third-Party Libraries ===

# --- Scientific Computing ---
import numpy as np
import pandas as pd
import scipy.sparse as sp
from scipy.sparse    import coo_matrix
from scipy.optimize import linprog

# --- Plotting ---
import matplotlib.pyplot as plt

# --- Parallel Processing ---
from joblib import Parallel, delayed
from tqdm import tqdm

# --- Graph Processing ---
import networkx as nx

# --- JIT Compilation ---
from numba import njit, prange

from torch_geometric.loader import NeighborLoader
from torch_geometric.utils import from_networkx
from torch_geometric.nn import SAGEConv


# --- Model definition ---
import torch.nn as nn
import torch.nn.functional as F




# 0. EPC mc deleted

In [3]:
def nx_to_csr(G: nx.Graph) -> Tuple[List[int], Dict[int, int], np.ndarray, np.ndarray, np.ndarray]:
     """Convert an undirected NetworkX graph (edge attr `'p'`) to CSR arrays."""
     nodes: List[int] = list(G.nodes())
     idx_of: Dict[int, int] = {u: i for i, u in enumerate(nodes)}

     indptr: List[int] = [0]
     indices: List[int] = []
     probs: List[float] = []

     for u in nodes:
         for v in G.neighbors(u):
             indices.append(idx_of[v])
             probs.append(G.edges[u, v]['p'])
         indptr.append(len(indices))

     return (
         nodes,
         idx_of,
         np.asarray(indptr, dtype=np.int32),
         np.asarray(indices, dtype=np.int32),
         np.asarray(probs, dtype=np.float32),
     )

@njit(inline="always")
def _bfs_component_size(start: int,
                    indptr: np.ndarray,
                    indices: np.ndarray,
                    probs: np.ndarray,
                    deleted: np.ndarray) -> int:
    """Return |C_u|−1 for **one** random realisation (stack BFS)."""
    n = deleted.size
    stack = np.empty(n, dtype=np.int32)
    visited = np.zeros(n, dtype=np.uint8)

    size = 1
    top = 0
    stack[top] = start
    top += 1
    visited[start] = 1

    while top:
        top -= 1
        v = stack[top]
        for eid in range(indptr[v], indptr[v + 1]):
            w = indices[eid]
            if deleted[w]:
                continue
            if np.random.random() >= probs[eid]:
                continue
            if visited[w]:
                continue
            visited[w] = 1
            stack[top] = w
            top += 1
            size += 1
    return size - 1

@njit(parallel=True)
def epc_mc(indptr: np.ndarray,
            indices: np.ndarray,
            probs: np.ndarray,
            deleted: np.ndarray,
            num_samples: int) -> float:
    """Monte‑Carlo estimator of **expected pairwise connectivity** (EPC)."""
    surv = np.where(~deleted)[0]
    m = surv.size
    if m < 2:
        return 0.0

    acc = 0.0
    for _ in prange(num_samples):
        u = surv[np.random.randint(m)]
        acc += _bfs_component_size(u, indptr, indices, probs, deleted)

    return (m * acc) / (2.0 * num_samples)

def epc_mc_deleted(
  G: nx.Graph,
  S: set,
  num_samples: int = 100_000,
) -> float:
  # build csr once
  nodes, idx_of, indptr, indices, probs = nx_to_csr(G)
  n = len(nodes)

  # turn python set S into a mask (node-IDs to delete)
  deleted = np.zeros(n, dtype=np.bool_)
  for u in S:
    deleted[idx_of[u]] = True

  epc = epc_mc(indptr, indices, probs, deleted, num_samples)

  return epc

## REGA

In [41]:
def solve_lp_reaga_sparse(G: nx.Graph, pre_fixed: set, k: int):
    V = list(G.nodes())
    n = len(V)

    # variables: s_i  (i = 0…n-1)      x_ij (j = 0…m2-1)
    Pairs = [tuple(sorted(e)) for e in combinations(V, 2)]
    m2    = len(Pairs)
    Nvar  = n + m2
    s_idx = {v: i         for i, v in enumerate(V)}
    x_idx = {e: n + j     for j, e in enumerate(Pairs)}

    
    rows, cols, data = [], [], []
    rhs              = []

    def add_coef(r, c, val):
        rows.append(r); cols.append(c); data.append(val)

    r = 0 

    # budget 
    for i in range(n):
        add_coef(r, i, 1.0)
    rhs.append(k); r += 1

    # edge upper bounds  x_uv − s_u − s_v ≤ 1 − p_uv
    for (u, v) in G.edges():
        u, v   = sorted((u, v))
        puv    = G.edges[u, v]['p']
        add_coef(r, x_idx[(u, v)],  1.0)
        add_coef(r, s_idx[u],      -1.0)
        add_coef(r, s_idx[v],      -1.0)
        rhs.append(1 - puv); r += 1

    # triangle cuts for each real edge (i,j) and every
    for (i, j) in G.edges():
        i, j = sorted((i, j))
        for k_ in V:
            if k_ == i or k_ == j:
                continue
            add_coef(r, x_idx[tuple(sorted((i, k_)))],  1.0)  
            add_coef(r, x_idx[(i, j)]               , -1.0)  
            add_coef(r, x_idx[tuple(sorted((j, k_)))], -1.0)   
            rhs.append(0.0); r += 1

    n_rows = r
    A_ub   = coo_matrix((data, (rows, cols)), shape=(n_rows, Nvar)).tocsr()
    b_ub   = np.asarray(rhs)

    # bounds 
    bounds = [(0.0, 1.0)] * Nvar
    for v in pre_fixed:
        bounds[s_idx[v]] = (1.0, 1.0)

    #  objective 
    c = np.zeros(Nvar)
    for e in Pairs:
        c[x_idx[e]] = -1.0

    # 
    res = linprog(c, A_ub=A_ub, b_ub=b_ub,
                  bounds=bounds, method="highs")
    if not res.success:
        raise RuntimeError("LP infeasible: " + res.message)

    #
    s_vals = {v: res.x[s_idx[v]] for v in V}
    x_sum  = res.x[n:].sum()
    obj    = len(Pairs) - x_sum
    return s_vals, obj

def local_search_(
  G: nx.Graph,
  S_init: set,
  num_samples: int = 10_000
):
  """1-swap local search"""

  S = S_init.copy()
  nodes_not_in_set = set(G.nodes()) - S

  current_epc = epc_mc_deleted(G, S, num_samples)

  improved = True
  while improved:
    improved = False
    best_swap = None

    for u in list(S):
      for v in nodes_not_in_set:        
        
        D_new = (S - {u}) | {v}

        temp_epc = epc_mc_deleted(G, D_new, num_samples)

        if temp_epc < current_epc:
            current_epc = temp_epc
            best_swap = (u, v)
            improved = True

    if improved and best_swap:
      u, v = best_swap

      S.remove(u)
      S.add(v)
      nodes_not_in_set.remove(v)
      nodes_not_in_set.add(u)
  
  return S

def rega(G: nx.Graph,
        k: int,
        num_samples: int = 100_000,
        max_iter: int = 1,
        # epsilon: float = None,
        # delta: float = None,
        use_tqdm: bool = False):
    """
    Full REGA pipeline: LP‐rounding + CSP‐refined local swaps.
    """

    csr = nx_to_csr(G)

    # iterative rounding
    D = set()
    for _ in range(k):
      # s_vals, _ = solve_lp_(G, pre_fixed=D, k=k)
      s_vals, _ = solve_lp_reaga_sparse(G, pre_fixed=D, k=k)

      # pick the fractional s_i largest among V\D
      u = max((v for v in G.nodes() if v not in D),
              key=lambda v: s_vals[v])
      D.add(u)

    # local‐swap refinement

    S_opt = local_search_(G, D, num_samples)
    
    # S_opt = local_search_(G, greedy_es_S, num_samples)

    # S_opt = local_search_swap(
    #   D, csr=csr, num_samples=num_samples, max_iter=max_iter)
    
    # improved = True
    
    # while improved:

    #     improved = False
    #     best_epc = current_epc
    #     best_swap = None

    #     for u in list(D):
    #         for v in G.nodes():

    #             if v in D: 
    #                 continue

    #             D_new = (D - {u}) | {v}

    #             epc_val = epc_func(G, D_new,
    #                                num_samples=num_samples,
    #                             #    epsilon=epsilon,
    #                             #    delta=delta,
    #                             #    use_tqdm=use_tqdm
    #                                )
                
    #             if epc_val < best_epc:
    #                 best_epc = epc_val
    #                 best_swap = (u, v)

    #     if best_swap is not None:

    #         u, v = best_swap
    #         D.remove(u)
    #         D.add(v)
    #         current_epc = best_epc
    #         improved = True

    return S_opt

# 1. Dataset generation

In [2]:
structural_params = {
    'ER': {'p': [0.0443, 0.0667]},
    'BA': {'m': [2, 3]},
    'SW': {'beta': [4, 5]}
}
train_sizes   = [20, 50, 80]
test_sizes    = [100, 200, 300, 500]
reliability_p = [i/10 for i in range(1, 11)]  # 0.1, 0.2, ..., 1.0
val_reliability_p = [0.15, 0.35, 0.55, 0.75, 0.95]  # for validation set

n_train     = 3
n_val       = 6
n_test100   = 3
n_test_large = 2

base_dir     = 'data'
graphs_dir   = os.path.join(base_dir, 'graphs')
labels_dir   = os.path.join(base_dir, 'labels')

In [62]:
def make_split_dirs():
    for split in ['train', 'val', 'test100', 'test_large']:
        os.makedirs(os.path.join(graphs_dir, split), exist_ok=True)
        os.makedirs(os.path.join(labels_dir, split), exist_ok=True)

make_split_dirs()

In [63]:
# --- Graph generation ---
def gen_graph(topo, size, s_param, rel_p, seed):
    random.seed(seed)
    if topo == 'ER':
        G = nx.erdos_renyi_graph(size, s_param, seed=seed)
    elif topo == 'BA':
        G = nx.barabasi_albert_graph(size, int(s_param), seed=seed)
    elif topo == 'SW':
        G = nx.watts_strogatz_graph(size, k=4, p=s_param, seed=seed)
    nx.set_edge_attributes(G, rel_p, 'p')
    # for u, v in G.edges():
    #     G[u][v]['p'] = rel_p

    return G

In [64]:
def save_graph(G, meta, idx, split):
    # save into corresponding split subfolder
    fname = f"{meta['topo']}_sz{meta['size']}_sp{meta['s_param']}_rp{meta['rel_p']}_{split}_{idx}.pkl"
    path = os.path.join(graphs_dir, split, fname)
    # ensure the directory exists
    dirpath = os.path.dirname(path)
    os.makedirs(dirpath, exist_ok=True)
    # if a directory with the same name exists, this will fail; remove or rename it first
    with open(path, 'wb') as f:
        pickle.dump({'graph': G, 'meta': meta}, f)

In [65]:
def generate_split(split, sizes, n_graphs, reliability_p):
    for topo, params in structural_params.items():
        key = list(params.keys())[0]

        for s_param in params[key]:
            for size in sizes:
                for rel_p in reliability_p:
                    for i in range(n_graphs):

                        seed = hash((topo, s_param, size, rel_p, split, i)) & 0xffffffff
                        G = gen_graph(topo, size, s_param, rel_p, seed)
                        meta = {'topo': topo, 'size': size, 's_param': s_param, 'rel_p': rel_p}
                        save_graph(G, meta, i, split)


In [66]:
# generate_split('train',      train_sizes,   n_train, reliability_p)
# generate_split('val',        [100],         n_val, reliability_p=val_reliability_p)
# generate_split('test100',    [100],         n_test100, reliability_p)
generate_split('test_large', test_sizes[1:],n_test_large, reliability_p)

In [None]:
# --- Label generation  ---
def compute_labels(file_path, mc_samples=1_000):

    data = pickle.load(open(file_path, 'rb'))
    G_orig = data['graph']

    base = epc_mc_deleted(G_orig, set(), num_samples=mc_samples)
    
    n = G_orig.number_of_nodes()
    labels = torch.zeros(n)

    for v in G_orig.nodes():
        # print(v)
        # print(type(set(v)))
        drop = epc_mc_deleted(G_orig, {v}, num_samples=mc_samples)
        labels[v] = base - drop
        
    # stabilise scale
    labels = torch.log1p(labels.clamp(min=0))

    # save labels
    fname = os.path.basename(file_path).replace('.pkl', '_labels.pt')
    split = os.path.basename(os.path.dirname(file_path))
    save_path = os.path.join(labels_dir, split, fname)
    torch.save(labels, save_path)

In [47]:
def greedy_cndp_epc_celf(
    G: nx.Graph,
    K: int,
    *,
    num_samples: int = 20_000,
    reuse_csr: Tuple = None,
    return_trace: bool = False,
) -> Union[Set[int], Tuple[Set[int], List[float]]]:
    """Select **K** nodes that minimise EPC using CELF & Numba.

    Parameters
    ----------
    return_trace : bool, default *False*
        If *True*, also return a list `[σ(S₁), σ(S₂), …]` where `S_i` is the
        prefix after deleting *i* nodes.  Useful for plots.
    """

    # CSR cache --------------------------------------------------------
    if reuse_csr is None:
        nodes, idx_of, indptr, indices, probs = nx_to_csr(G)
    else:
        nodes, idx_of, indptr, indices, probs = reuse_csr
    n = len(nodes)

    deleted = np.zeros(n, dtype=np.bool_)
    current_sigma = epc_mc(indptr, indices, probs, deleted, num_samples)

    pq: List[Tuple[float, int, int]] = []  # (-gain, v, last_round)
    gains = np.empty(n, dtype=np.float32)

    for v in range(n):
        deleted[v] = True
        gains[v] = current_sigma - epc_mc(indptr, indices, probs, deleted, num_samples)
        deleted[v] = False
        heapq.heappush(pq, (-gains[v], v, 0))

    S: Set[int] = set()
    trace: List[float] = []
    round_ = 0

    trace.append(current_sigma)

    while len(S) < K and pq:
        neg_gain, v, last = heapq.heappop(pq)
        if last == round_:
            # gain up‑to‑date → accept
            S.add(nodes[v])
            deleted[v] = True
            current_sigma += neg_gain  # add neg (= subtract gain)
            round_ += 1
            if return_trace:
                trace.append(current_sigma)
        else:
            # recompute gain lazily
            deleted[v] = True
            new_gain = current_sigma - epc_mc(indptr, indices, probs, deleted, num_samples)
            deleted[v] = False
            heapq.heappush(pq, (-new_gain, v, round_))

    return (S, trace) if return_trace else S

In [64]:
GRAPHS_ROOT  = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs"     # expecting graphs/<split>/<type>/*.pkl
LABELS_ROOT  = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/rega_labels"     # will mirror the same structure

# ----- budget percentage -----
ALPHA        = 0.10         # 10 % of nodes

# ----- MC parameters -----
MC_SAMPLES   = 10_000       # inside greedy
MC_EPC_SAVE  = 20_000

In [None]:
def build_and_save_label(pkl_path: str, alpha: float = ALPHA):
    # --- load --------------------------------------------------------
    with open(pkl_path, "rb") as f:
        G = pickle.load(f)["graph"]

    N = G.number_of_nodes()
    K = max(1, math.ceil(alpha * N))

    # print(f"K: {K}")
    
    # --- greedy delete set ------------------------------------------
    delete_set = rega(G, K, num_samples=MC_SAMPLES)

    # --- binary mask -------------------------------------------------
    mask = torch.zeros(N, dtype=torch.float32)
    mask[list(delete_set)] = 1.0

    # --- save --------------------------------------------------------
    #  graphs/<split>/<type>/foo.pkl  ->  labels/<split>/<type>/foo_labels.pt
    rel_dir   = os.path.relpath(os.path.dirname(pkl_path), GRAPHS_ROOT)
    save_dir  = os.path.join(LABELS_ROOT, rel_dir)
    os.makedirs(save_dir, exist_ok=True)

    fname_out = os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    torch.save(mask, os.path.join(save_dir, fname_out))

In [67]:
# label_path = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs"

all_graphs = glob.glob(os.path.join(GRAPHS_ROOT, '*', '*.pkl'))

for fp in tqdm(all_graphs, desc="building greedy labels"):
    build_and_save_label(fp, alpha=ALPHA)

print("✓ Finished.  All binary-mask labels written to", LABELS_ROOT)

building greedy labels:   0%|          | 0/720 [00:00<?, ?it/s]

K: 8





TypeError: rega() missing 1 required positional argument: 'epc_func'

## Previous label generation code

In [63]:
import glob

all_graphs = glob.glob(os.path.join(graphs_dir, '*', '*.pkl'))

for fp in tqdm(all_graphs, total=len(all_graphs), desc="Computing labels"):
    compute_labels(fp, 10_000)

Computing labels:   0%|          | 0/1080 [00:00<?, ?it/s]


NameError: name 'compute_labels' is not defined

In [None]:
data = torch.load("path/to/your_file.pt", map_location="cpu")

# 2. See what you got:
print(type(data))
# e.g. <class 'dict'> (often a state_dict) or a ScriptModule

# 3. If it’s a dict of tensors (state_dict):
if isinstance(data, dict):
    for k, v in data.items():
        print(f"{k:40s} → {tuple(v.shape) if hasattr(v, 'shape') else type(v)}")

In [25]:
import os
import torch

base_folder = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels"         # Folder with .pt files
output_folder = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels_txt"   # Destination folder for .txt files

for root, _, files in os.walk(base_folder):
    for file in files:
        if file.endswith(".pt"):
            pt_path = os.path.join(root, file)
            
            # Load tensor or model (depending on format)
            try:
                content = torch.load(pt_path, map_location='cpu')
            except Exception as e:
                print(f"Skipping {pt_path} due to load error: {e}")
                continue
            
            # Generate corresponding txt path
            rel_path = os.path.relpath(pt_path, base_folder)
            txt_path = os.path.join(output_folder, os.path.splitext(rel_path)[0] + ".txt")
            os.makedirs(os.path.dirname(txt_path), exist_ok=True)

            # Write to text file
            try:
                with open(txt_path, "w") as f:
                    f.write(str(content))
            except Exception as e:
                print(f"Error writing {txt_path}: {e}")

# 2. Feature Engineering

In [26]:
def get_neigbors(g, node, depth):
    output = {}
    layers = dict(nx.bfs_successors(g, source=node, depth_limit=depth))
    nodes = [node]
    for i in range(1, depth + 1):
        output[i] = []
        for x in nodes:
            output[i].extend(layers.get(x, []))
        nodes = output[i]
    return output

In [27]:
def get_dgl_g_input(G):
    input = torch.ones(len(G), 11)
    for i in G.nodes():
        input[i, 0] = G.degree()[i]
        input[i, 1] = sum([G.degree()[j] for j in list(G.neighbors(i))]) / max(len(list(G.neighbors(i))), 1)
        input[i, 2] = sum([nx.clustering(G, j) for j in list(G.neighbors(i))]) / max(len(list(G.neighbors(i))), 1)
        egonet = G.subgraph(list(G.neighbors(i)) + [i])
        input[i, 3] = len(egonet.edges())
        input[i, 4] = sum([G.degree()[j] for j in egonet.nodes()]) - 2 * input[i, 3]

    for l in [1, 2, 3]:
        for i in G.nodes():
            ball = get_neigbors(G, i, l)
            input[i, 5 + l - 1] = (G.degree()[i] - 1) * sum([G.degree()[j] - 1 for j in ball[l]])

    v = nx.voterank(G)
    votescore = dict()
    
    for i in list(G.nodes()): votescore[i] = 0
    for i in range(len(v)):
        votescore[v[i]] = len(G) - i
    e = nx.eigenvector_centrality(G, max_iter=1000)
    k = nx.core_number(G)
    for i in G.nodes():
        input[i, 8] = votescore[i]
        input[i, 9] = e[i]
        input[i, 10] = k[i]
    for i in range(len(input[0])):
        if max(input[:, i]) != 0:
            input[:, i] = input[:, i] / max(input[:, i])
    return input

# 3. Traininig

In [4]:
def extract_node_features(G):
    """
    Compute per-node structural features for DGL input.
    Returns: torch.FloatTensor of shape [num_nodes, 11]
    Features:
      0: degree
      1: avg neighbor degree
      2: avg neighbor clustering coeff
      3: egonet edge count
      4: egonet sum-degree minus internal edges (volume)
      5-7: l-hop neighbor sum-degree offsets for l=1,2,3
      8: voterank score
      9: eigenvector centrality
     10: k-core number
    Normalized per feature by dividing by feature-wise max.
    """
    n = G.number_of_nodes()
    feats = torch.ones(n, 11)

    # precompute degrees and clustering
    deg = dict(G.degree())
    clust = nx.clustering(G)

    # voterank ordering and scoring
    order = nx.voterank(G)
    vote_score = {u: n - i for i, u in enumerate(order)}

    # eigenvector centrality
    eig = nx.eigenvector_centrality(G, max_iter=500)
    core = nx.core_number(G)

    # compute for each node
    for u in G.nodes():
        nbrs = list(G.neighbors(u))
        feats[u, 0] = deg[u]
        feats[u, 1] = sum(deg[v] for v in nbrs) / max(len(nbrs), 1)
        feats[u, 2] = sum(clust[v] for v in nbrs) / max(len(nbrs), 1)
        egonet = G.subgraph(nbrs + [u])
        feats[u, 3] = egonet.number_of_edges()
        feats[u, 4] = sum(deg[v] for v in egonet.nodes()) - 2 * feats[u, 3]
        # l-hop neighbor sums
        for l in (1,2,3):
            # BFS up to l hops
            visited = {u}
            queue = deque([(u, 0)])
            hop_nodes = set()
            while queue:
                v, d = queue.popleft()
                if d == l: continue
                for w in G.neighbors(v):
                    if w not in visited:
                        visited.add(w)
                        queue.append((w, d+1))
                        if d+1 == l:
                            hop_nodes.add(w)
            feats[u, 4 + l] = sum(deg[v] - 1 for v in hop_nodes)
        feats[u, 8] = vote_score.get(u, 0)
        feats[u, 9] = eig.get(u, 0)
        feats[u, 10] = core.get(u, 0)

    # normalize each feature dimension
    for i in range(feats.size(1)):
        col = feats[:, i]
        maxval = col.max()
        if maxval > 0:
            feats[:, i] = col / maxval
    return feats

In [4]:
# class SAGE2AttnModel_fanout(nn.Module):
#     def __init__(self, in_dim, hidden_dim=128, num_heads=2):
#         super().__init__()
#         self.layers = nn.ModuleList()
#         self.norms  = nn.ModuleList()

#         fanouts = [15, 10, 5]

#         for _ in fanouts:
#             self.layers.append(SAGEConv(in_dim, hidden_dim, 'lstm'))
#             self.norms.append(nn.LayerNorm(hidden_dim))
#             in_dim = hidden_dim
            
#         self.attn = nn.MultiheadAttention(hidden_dim, num_heads)
#         self.out  = nn.Linear(hidden_dim, 1)

#     def forward(self, g, x):
#         h = x
#         for sage, norm in zip(self.layers, self.norms):
#             h = F.relu(norm(sage(g, h)))
#         # attention works with seq_len x batch_size x hidden
#         # here nodes as sequence, batch=1
#         h2, _ = self.attn(h.unsqueeze(1), h.unsqueeze(1), h.unsqueeze(1))
#         h2 = h2.squeeze(1)
#         return self.out(h2).squeeze(-1)

class SAGE2AttnModel(nn.Module):
    """
    GraphSAGE + attention model using full-graph (no neighbor sampling).
    Three SAGEConv layers followed by multi-head self-attention and output head.
    """
    def __init__(self, in_dim, hidden_dim=128, num_heads=2, num_layers=3):
        super().__init__()
        self.layers = nn.ModuleList()
        self.norms  = nn.ModuleList()
        # Build fixed number of layers without explicit fan-out

        for _ in range(num_layers):
            self.layers.append(SAGEConv(in_dim, hidden_dim, 'lstm'))
            self.norms.append(nn.LayerNorm(hidden_dim))
            in_dim = hidden_dim

        # self-attention across all nodes

        self.attn = nn.MultiheadAttention(hidden_dim, num_heads)
        self.out  = nn.Linear(hidden_dim, 1)

    def forward(self, g, x):
        h = x
        # message-passing over full graph
        for sage, norm in zip(self.layers, self.norms):
            h = F.relu(norm(sage(g, h)))
        # apply self-attention: treat nodes as sequence length
        h2, _ = self.attn(h.unsqueeze(1), h.unsqueeze(1), h.unsqueeze(1))
        h2 = h2.squeeze(1)
        return self.out(h2).squeeze(-1)

In [103]:
from torch_geometric.data import Data, Dataset

class GraphEPCDataset(Dataset):
    def __init__(self, graphs_dir, labels_dir, split):
        self.graph_paths = glob.glob(os.path.join(graphs_dir, split, '*.pkl'))
        self.labels_dir  = os.path.join(labels_dir, split)

    def __len__(self):
        return len(self.graph_paths)

    def __getitem__(self, idx):
        # --------  load graph ----------
        path = self.graph_paths[idx]
        G_nx  = pickle.load(open(path, 'rb'))['graph']

        # node-level features
        x = extract_node_features(G_nx)          # [N, 11]

        # edge index & probabilities
        edges      = list(G_nx.edges())
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)  # undirected
        p_list     = [G_nx[u][v]['p'] for u, v in edges]
        edge_prob  = torch.tensor(p_list + p_list, dtype=torch.float)

        # labels  (make sure they are float for MSELoss)
        lbl_name = os.path.basename(path).replace('.pkl', '_labels.pt')
        y        = torch.load(os.path.join(self.labels_dir, lbl_name)).float()

        # --------  wrap in Data ----------
        data = Data(x=x,
                    edge_index=edge_index,
                    edge_prob=edge_prob,
                    y=y)   
        
        data.file_name = os.path.basename(path)
        data.idx = torch.tensor(idx, dtype=torch.long)  # add index for reference

        return data

In [111]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import softmax

class EdgeProbGATConv(MessagePassing):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 heads: int = 2,
                 negative_slope: float = 0.2,
                 dropout: float = 0.2,
                 concat: bool = True,
                 bias: bool = True):
        super().__init__(aggr='add', node_dim=0)  # standard GAT aggregation
        
        self.in_channels   = in_channels
        self.out_channels  = out_channels
        self.heads         = heads
        self.negative_slope= negative_slope
        self.dropout       = dropout
        self.concat        = concat

        # Linear projection for query/key/value
        self.lin = nn.Linear(in_channels, heads * out_channels, bias=False)
        # Attention weights aᵀ [Wh_i || Wh_j]
        # self.att = nn.Parameter(torch.Tensor(1, heads, 2*out_channels))
        self.att = nn.Parameter(torch.Tensor(1, heads, 2*out_channels + 1))

        if bias and concat:
            self.bias = nn.Parameter(torch.Tensor(heads * out_channels))
        elif bias and not concat:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.lin.weight)
        nn.init.xavier_uniform_(self.att)
        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(self,
                x: torch.Tensor,
                edge_index: torch.LongTensor,
                edge_prob: torch.Tensor):
        """
        x: [N, in_channels]
        edge_index: [2, E]
        edge_prob: [E]   (the p_ij for each edge in edge_index order)
        """
        N = x.size(0)
        # 1. Linearly project node features to multi-head space
        x = self.lin(x)                              # [N, heads*out]
        x = x.view(N, self.heads, self.out_channels) # [N, heads, out]

        # 2. Start propagation
        out = self.propagate(edge_index, x=x, edge_prob=edge_prob, size=(N, N))
        # out: [N, heads, out]

        # 3. Concat or average heads
        if self.concat:
            out = out.view(N, self.heads * self.out_channels)
        else:
            out = out.mean(dim=1)  # [N, out]

        if self.bias is not None:
            out = out + self.bias

        return out

    # def message(self,
    #             x_j: torch.Tensor,
    #             x_i: torch.Tensor,
    #             edge_prob: torch.Tensor,
    #             index: torch.LongTensor,
    #             ptr,
    #             size_i):
    #     """
    #     x_j, x_i: [E, heads, out_channels] (sender and receiver node reps)
    #     edge_prob: [E]           (scalar reliability)
    #     index:   [E]             (destination node indices)
    #     """
    #     # 1. compute standard attention logits: aᵀ [Wh_i || Wh_j]
    #     cat = torch.cat([x_i, x_j], dim=-1)           # [E, heads, 2*out]
    #     alpha = (cat * self.att).sum(dim=-1)          # [E, heads]

    #     # 2. add log(edge_prob)
    #     log_p = edge_prob.log().unsqueeze(-1)        # [E, 1]
    #     alpha = alpha + log_p                        # broadcasting to [E, heads]

    #     # 3. leaky‐relu + softmax over all incoming edges
    #     alpha = F.leaky_relu(alpha, self.negative_slope)
    #     alpha = softmax(alpha, index, ptr, size_i)    # [E, heads]

    #     # 4. dropout on attention weights
    #     alpha = F.dropout(alpha, p=self.dropout, training=self.training)

    #     # 5. scale messages
    #     return x_j * alpha.unsqueeze(-1)             # [E, heads, out]

    def message(self, x_j, x_i, edge_prob, index, ptr, size_i):
        # concat node reps and edge scalar
        edge_prob = edge_prob.view(-1, 1, 1)               # [E,1,1]
        cat = torch.cat([x_i, x_j, edge_prob.expand(-1, self.heads, 1)], dim=-1)
        # shape: [E, heads, 2*out+1]

        alpha = (cat * self.att).sum(dim=-1)               # [E, heads]
        alpha = F.leaky_relu(alpha, self.negative_slope)
        alpha = softmax(alpha, index, ptr, size_i)
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        return x_j * alpha.unsqueeze(-1)

    def update(self, aggr_out):
        # aggr_out: [N, heads, out] if concat else [N, out]
        return aggr_out

In [112]:
class SAGEEdgeProbModel(nn.Module):
    def __init__(self, in_dim, hidden_dim=256, heads=4, dropout=0.3):
        super().__init__()

        self.conv1 = SAGEConv(in_dim,  hidden_dim, normalize=True)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim , normalize=True)
        self.bn2 = nn.BatchNorm1d(hidden_dim )
        self.conv3 = SAGEConv(hidden_dim, hidden_dim, normalize=True)
        self.bn3 = nn.BatchNorm1d(hidden_dim)

        # now our custom GAT that adds log(p_ij)
        self.gat_edge = EdgeProbGATConv(hidden_dim, hidden_dim, 
                                        heads=heads, dropout=dropout)
        
        self.out       = nn.Linear(heads * hidden_dim, 1)  # if concat=True

    def forward(self, x, edge_index, edge_prob):
        # x: [N, in_dim], edge_prob: [E]
        h = F.relu(self.bn1(self.conv1(x, edge_index)))
        h = F.relu(self.bn2(self.conv2(h, edge_index)))
        h = F.relu(self.bn3(self.conv3(h, edge_index)))

        # incorporate per-edge probabilities
        h = self.gat_edge(h, edge_index, edge_prob)  # [N, heads*out]

        return self.out(h).squeeze(-1)               # [N]

In [None]:
# class SAGEEdgeProbModel(nn.Module):
#     def __init__(self, in_dim, hidden_dim=256, heads=4):
#         super().__init__()

#         # ── SAGE blocks ───────────────────────────────────────
#         self.conv1 = SAGEConv(in_dim, hidden_dim, normalize=True)
#         self.bn1   = nn.BatchNorm1d(hidden_dim)

#         self.conv2 = SAGEConv(hidden_dim, hidden_dim * 2, normalize=True)
#         self.bn2   = nn.BatchNorm1d(hidden_dim * 2)

#         self.conv3 = SAGEConv(hidden_dim * 2, hidden_dim, normalize=True)
#         self.bn3   = nn.BatchNorm1d(hidden_dim)

#         # 1×1 “shortcut” projections so shapes match for +
#         self.sc12 = nn.Linear(hidden_dim,       hidden_dim * 2, bias=False)
#         self.sc23 = nn.Linear(hidden_dim * 2,   hidden_dim,     bias=False)

#         # ── edge-aware GAT head ───────────────────────────────
#         self.gat_edge = EdgeProbGATConv(
#             hidden_dim, hidden_dim, heads=heads, dropout=0.3
#         )

#         self.out = nn.Linear(heads * hidden_dim, 1)

#     def forward(self, x, edge_index, edge_prob):
#         # Block 1
#         h1 = F.relu(self.bn1(self.conv1(x, edge_index)))        # [N, H]

#         # Block 2  (add projected shortcut from h1)
#         h2 = F.relu(self.bn2(self.conv2(h1, edge_index))
#                     + self.sc12(h1))                            # [N, 2H]

#         # Block 3  (add projected shortcut from h2)
#         h3 = F.relu(self.bn3(self.conv3(h2, edge_index))
#                     + self.sc23(h2))                            # [N, H]

#         # Edge-probability attention
#         h = self.gat_edge(h3, edge_index, edge_prob)            # [N, heads·H]

#         return self.out(h).squeeze(-1) 

## Regression

In [None]:
from torch_geometric.loader import DataLoader 

def train_model():
    SEED = 42
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    in_dim = 11                   # keep your original setting
    model  = SAGEEdgeProbModel(in_dim).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
    loss_fn  = nn.MSELoss()

    train_ds = GraphEPCDataset(graphs_dir, labels_dir, 'train')
    val_ds   = GraphEPCDataset(graphs_dir, labels_dir, 'val')

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=32)

    best_val = float('inf')
    for epoch in range(1, 31):
        # -------------------- training --------------------
        model.train()
        total_loss = 0.0
        for data in train_loader:                   # data is a Batch
            data = data.to(device)                  
            preds = model(data.x, data.edge_index, data.edge_prob)
            loss  = loss_fn(preds, data.y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)

        # -------------------- validation ------------------
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                val_loss += loss_fn(
                    model(data.x, data.edge_index, data.edge_prob),
                    data.y
                ).item()
        val_loss /= len(val_loader)

        print(f"Epoch {epoch:02d}: Train={avg_loss:.4f} | Val={val_loss:.4f}")

        if val_loss < best_val:
            best_val = val_loss
            torch.save(model.state_dict(),
                       os.path.join(base_dir, 'best_model.pt'))

## Binary

In [106]:
GRAPHS_DIR = r"C:\Users\btugu\Documents\develop\research\SCNDP\src\extension\learning\notebooks\gnn\data\graphs"
LABELS_DIR = r"C:\Users\btugu\Documents\develop\research\SCNDP\src\extension\learning\notebooks\gnn\data\graphs_labels"
LABELS_OLD_DIR = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels"

In [None]:
BATCH = 256
EPOCHS = 100

In [57]:
class FocalLoss(nn.Module):
    def __init__(self, gamma: float = 2.0, alpha: float | None = None):
        super().__init__()
        self.gamma = gamma
        # optional α-balancing (same role as pos_weight)
        self.alpha = alpha            # scalar ∈ (0,1) or None

    def forward(self, logits, targets):
        # logits: [N], targets: 0/1 floats
        prob = torch.sigmoid(logits)
        pt   = prob * targets + (1 - prob) * (1 - targets)   # p_t
        focal = (1 - pt) ** self.gamma
        logp  = F.binary_cross_entropy_with_logits(
                    logits, targets, reduction='none')
        if self.alpha is not None:
            α_t = self.alpha * targets + (1 - self.alpha) * (1 - targets)
            logp = α_t * logp
        return (focal * logp).mean()


In [109]:
from torch_geometric.loader import DataLoader as PyGDataLoader
from torchmetrics.classification import BinaryAUROC
import torch.nn as nn, torch

base_dir     = 'data'

def train_model():
    SEED = 42
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ---------------- model ----------------
    model = SAGEEdgeProbModel(in_dim=11, hidden_dim=256).to(device)
    optimizer  = torch.optim.AdamW(model.parameters(), 
                                   lr=1.5e-3, weight_decay=1e-4)
    # scheduler  = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #                optimizer, mode='min', factor=0.5, patience=4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=4, min_lr=1e-5)
    
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #           optimizer, T_0=10, T_mult=2)

    pos_weight = torch.tensor(9.0, device=device)
    # loss_fn = nn.MSELoss()  # for regression task
    loss_fn    = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    # loss_fn = FocalLoss(gamma=2.0, alpha=0.10).to(device)

    # ---------------- data -----------------
    train_ds = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'train')
    val_ds   = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'val')

    train_loader = PyGDataLoader(train_ds, batch_size=BATCH, shuffle=True)
    val_loader   = PyGDataLoader(val_ds,   batch_size=BATCH)

    best_val = float('inf')
    for epoch in range(1, EPOCHS + 1):
        # ---------- training ----------
        model.train()
        total_loss = 0.0
        for batch in train_loader:
            batch = batch.to(device)
            logits = model(batch.x, batch.edge_index, batch.edge_prob)
            loss   = loss_fn(logits, batch.y)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)

        # scheduler.step()
        # ---------- validation ----------
        model.eval()
        val_loss = 0.0
        auroc = BinaryAUROC().to(device)

        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                logits = model(batch.x, batch.edge_index, batch.edge_prob)
                auroc.update(logits, batch.y.int())
                val_loss += loss_fn(logits, batch.y).item()
        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        print(f"Epoch {epoch:02d} | train {avg_loss:.4f} "
              f"| val {val_loss:.4f} | AUROC {auroc.compute():.4f}")

        if val_loss < best_val:
            best_val = val_loss
            torch.save(model.state_dict(),
                       os.path.join(base_dir, 'best_model_bce_50_(1)_residual.pt'))

In [113]:
def train_model(
        *,
        hidden_dim      = 256,
        heads           = 4,
        dropout         = 0.30,
        lr              = 1e-3,
        weight_decay    = 1e-4,
        loss_name       = "bce",          # "bce" or "focal"
        epochs          = 10,
        batch_size      = 32,
        seed            = 42,
        silent          = True):          # suppress epoch prints
    """
    Train for `epochs`, return best val_AUROC.
    """
    torch.manual_seed(seed); 
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ---- model ----
    model = SAGEEdgeProbModel(11, hidden_dim, heads, dropout).to(device)
    opt   = torch.optim.AdamW(model.parameters(),
                              lr=lr, weight_decay=weight_decay)
    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(
              opt, mode='min', factor=0.5, patience=3, min_lr=1e-5)

    if loss_name == "mse":
        loss_fn = nn.MSELoss()
    else:
        pos_weight = torch.tensor(9.0, device=device)
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    train_ds = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'train')
    val_ds   = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'val')
    train_loader = PyGDataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = PyGDataLoader(val_ds,   batch_size=batch_size)

    best_auroc = 0.0
    for epoch in range(1, epochs+1):
        # --- train ---
        model.train()
        for batch in train_loader:
            batch = batch.to(device)
            loss  = loss_fn(model(batch.x, batch.edge_index, batch.edge_prob),
                            batch.y)
            opt.zero_grad(); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()

        # --- validate ---
        model.eval(); val_loss, auroc = 0.0, BinaryAUROC().to(device)
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                logits = model(batch.x, batch.edge_index, batch.edge_prob)
                val_loss += loss_fn(logits, batch.y).item()
                auroc.update(logits, batch.y.int())
        val_loss /= len(val_loader); au = auroc.compute().item()
        sched.step(val_loss)
        best_auroc = max(best_auroc, au)

        if not silent:
            print(f"ep{epoch:02d} val_loss={val_loss:.4f} AUROC={au:.4f}")

    return best_auroc

In [116]:
import optuna

def objective(trial):
    params = dict(
        hidden_dim  = trial.suggest_categorical("hidden_dim", [128,256,512]),
        heads       = trial.suggest_categorical("heads", [2,4,8]),
        dropout     = trial.suggest_float("dropout", 0.2, 0.5, step=0.05),
        lr          = trial.suggest_loguniform("lr", 1e-4, 3e-3),
        loss_name   = trial.suggest_categorical("loss_name", ["bce", "mse"]),
    )
    return train_model(**params, epochs=15, silent=True)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
print("Best AUROC:", study.best_value)
print("Best params:", study.best_params)

[I 2025-07-15 03:35:39,196] A new study created in memory with name: no-name-38c1eda4-a0cd-4ee5-be4a-75da962301be
  lr          = trial.suggest_loguniform("lr", 1e-4, 3e-3),
[I 2025-07-15 03:41:53,951] Trial 0 finished with value: 0.9117720127105713 and parameters: {'hidden_dim': 128, 'heads': 2, 'dropout': 0.5, 'lr': 0.0017795372456809143, 'loss_name': 'bce'}. Best is trial 0 with value: 0.9117720127105713.
[I 2025-07-15 03:51:06,656] Trial 1 finished with value: 0.914645254611969 and parameters: {'hidden_dim': 512, 'heads': 8, 'dropout': 0.2, 'lr': 0.0009953769602514386, 'loss_name': 'bce'}. Best is trial 1 with value: 0.914645254611969.
[I 2025-07-15 03:58:40,868] Trial 2 finished with value: 0.9085859060287476 and parameters: {'hidden_dim': 512, 'heads': 4, 'dropout': 0.25, 'lr': 0.0002874864607757464, 'loss_name': 'mse'}. Best is trial 1 with value: 0.914645254611969.
[I 2025-07-15 04:07:49,786] Trial 3 finished with value: 0.9063542485237122 and parameters: {'hidden_dim': 512, 'h

Best AUROC: 0.9177669882774353
Best params: {'hidden_dim': 256, 'heads': 8, 'dropout': 0.4, 'lr': 0.0015178563257490345, 'loss_name': 'bce'}


In [98]:
train_model()

Epoch 01 | train 1.2741 | val 1.2377 | AUROC 0.8249
Epoch 02 | train 0.9059 | val 1.2404 | AUROC 0.8032
Epoch 03 | train 0.8322 | val 1.2367 | AUROC 0.7373
Epoch 04 | train 0.8071 | val 1.2362 | AUROC 0.7228
Epoch 05 | train 0.7622 | val 1.2272 | AUROC 0.7370
Epoch 06 | train 0.7558 | val 1.2301 | AUROC 0.7782
Epoch 07 | train 0.7374 | val 1.2279 | AUROC 0.7928
Epoch 08 | train 0.6514 | val 1.2312 | AUROC 0.8120
Epoch 09 | train 0.6763 | val 1.2060 | AUROC 0.8183
Epoch 10 | train 0.7003 | val 1.1931 | AUROC 0.8555
Epoch 11 | train 0.6629 | val 1.1802 | AUROC 0.8716
Epoch 12 | train 0.6271 | val 1.1288 | AUROC 0.8860
Epoch 13 | train 0.6559 | val 1.0651 | AUROC 0.8919
Epoch 14 | train 0.6358 | val 0.9896 | AUROC 0.8928
Epoch 15 | train 0.6149 | val 1.0048 | AUROC 0.8945
Epoch 16 | train 0.5912 | val 0.9636 | AUROC 0.9031
Epoch 17 | train 0.6448 | val 0.9057 | AUROC 0.8950
Epoch 18 | train 0.5912 | val 0.9794 | AUROC 0.8896
Epoch 19 | train 0.6273 | val 0.7631 | AUROC 0.9088
Epoch 20 | t

KeyboardInterrupt: 

## Inference

In [99]:
from torch_geometric.loader import DataLoader

ROOT = r"C:\Users\btugu\Documents\develop\research\SCNDP\src\extension\learning\notebooks\gnn\data"

graphs_dir   = f"{ROOT}/graphs/test_100_separate"          # same dirs you used for train/val
labels_dir   = f"{ROOT}/graphs_labels/test_100_separate"          # not needed for inference but Dataset expects it
ckpt_path    = f"{ROOT}/best_model_bce_50_(1)_residual.pt" # saved in train_model()
K            = 10                     # number of nodes to delete
mc_samples   = 100_000                 # per-graph Monte-Carlo samples for EPC
device       = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------
# 1.  DATASET & DATALOADER  (batch_size = 1 for clarity)
# ------------------------------------------------------------
test_ds      = GraphEPCDataset(graphs_dir, labels_dir, split="ER")
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False)

# ------------------------------------------------------------
# 2.  LOAD MODEL
# ------------------------------------------------------------
in_dim       = 11                     # you decided to keep only the 11 node features
model        = SAGEEdgeProbModel(in_dim).to(device)
model.load_state_dict(torch.load(ckpt_path, map_location=device))
model.eval()

# ------------------------------------------------------------
# 3.  INFERENCE + EPC
# ------------------------------------------------------------
from tqdm import tqdm

all_epc = []
i = 0

for data in tqdm(test_loader, desc="Inference"):

    # if i == 30:
    #     break
    # move everything to GPU/CPU
    data = data.to(device)

    fname = data.file_name[0]
    
    if "sp0.0443" in fname and ("rp0.9" in fname or "rp1.0" in fname):
        # ---- 3.1 node scores ----
        print("file: ", data.file_name)
        with torch.no_grad():
            scores = model(data.x, data.edge_index, data.edge_prob)   # [N]
        
        # print(data.edge_prob)
        # ---- 3.2 pick top-K nodes ----
        # scores is already on the same device; .cpu() only if epc_mc_deleted needs CPU tensors
        topk = scores.topk(K, largest=True).indices.tolist()         # list[int]

        # ---- 3.3 compute EPC after deleting top-K ----
        #   We need the *NetworkX graph*; fetch it via the original .pkl
        #   The path is stored in test_ds.graph_paths[index] where `index`
        #   is the position in the dataset.  The DataLoader gives us that
        #   index in data.__dict__['idx']  (PyG attaches it automatically).
        idx  = data.idx.item()         # scalar tensor → int
        G_nx = pickle.load(open(test_ds.graph_paths[idx], 'rb'))['graph']
        
        epc_0   = epc_mc_deleted(G_nx.copy(), set(), num_samples=mc_samples)  
        epc_del = epc_mc_deleted(G_nx.copy(), set(topk), num_samples=mc_samples)

        all_epc.append(epc_del)

        delta   = epc_del - epc_0     # negative  ⇒ improvement
        print(f"{fname}  EPC₀={epc_0:.1f}  after={epc_del:.1f}  Δ={delta:+.1f}")

        print(f"Graph {idx:03d} | EPC(after delete) = {epc_del:.4f} | top-K = {topk}")
        
        print("top-scores id,logit:")
        print(sorted(zip(topk, scores[topk].tolist()), key=lambda x: -x[1])[:5])

        # confirm they exist in the NetworkX graph
        assert all(v in G_nx for v in topk)
        i += 1

# ------------------------------------------------------------
# 4.  SUMMARY
# ------------------------------------------------------------
import numpy as np
print(f"\nAverage EPC over {len(all_epc)} test graphs: {np.mean(all_epc):.4f}")

Inference:   0%|          | 0/60 [00:00<?, ?it/s]

Inference:  38%|███▊      | 23/60 [00:01<00:02, 16.02it/s]

file:  ['ER_sz100_sp0.0443_rp0.9_test100_0.pkl']


Inference:  42%|████▏     | 25/60 [00:02<00:04,  7.20it/s]

ER_sz100_sp0.0443_rp0.9_test100_0.pkl  EPC₀=4889.0  after=3632.3  Δ=-1256.6
Graph 024 | EPC(after delete) = 3632.3316 | top-K = [75, 53, 44, 17, 25, 57, 50, 74, 12, 0]
top-scores id,logit:
[(75, 2.639892101287842), (53, 2.5764102935791016), (44, 2.5606229305267334), (17, 2.4412384033203125), (25, 2.128079652786255)]
file:  ['ER_sz100_sp0.0443_rp0.9_test100_1.pkl']
ER_sz100_sp0.0443_rp0.9_test100_1.pkl  EPC₀=4818.4  after=3695.0  Δ=-1123.3
Graph 025 | EPC(after delete) = 3695.0449 | top-K = [89, 38, 23, 70, 76, 98, 22, 34, 46, 17]
top-scores id,logit:
[(89, 1.991632342338562), (38, 1.8881057500839233), (23, 1.5986566543579102), (70, 1.433693766593933), (76, 1.1931877136230469)]
file:  ['ER_sz100_sp0.0443_rp0.9_test100_2.pkl']


Inference:  45%|████▌     | 27/60 [00:02<00:07,  4.46it/s]

ER_sz100_sp0.0443_rp0.9_test100_2.pkl  EPC₀=4802.8  after=3515.2  Δ=-1287.5
Graph 026 | EPC(after delete) = 3515.2380 | top-K = [82, 18, 90, 28, 4, 93, 44, 59, 46, 95]
top-scores id,logit:
[(82, 2.2519538402557373), (18, 2.038780450820923), (90, 1.9877452850341797), (28, 1.6016144752502441), (4, 1.5051566362380981)]
file:  ['ER_sz100_sp0.0443_rp1.0_test100_0.pkl']


Inference:  47%|████▋     | 28/60 [00:03<00:08,  4.00it/s]

ER_sz100_sp0.0443_rp1.0_test100_0.pkl  EPC₀=4753.3  after=3654.1  Δ=-1099.3
Graph 027 | EPC(after delete) = 3654.0608 | top-K = [63, 57, 54, 23, 84, 18, 5, 33, 22, 28]
top-scores id,logit:
[(63, 1.9420028924942017), (57, 1.8142141103744507), (54, 1.7594722509384155), (23, 1.6962289810180664), (84, 1.455165982246399)]
file:  ['ER_sz100_sp0.0443_rp1.0_test100_1.pkl']


Inference:  48%|████▊     | 29/60 [00:03<00:08,  3.51it/s]

ER_sz100_sp0.0443_rp1.0_test100_1.pkl  EPC₀=4950.0  after=3658.4  Δ=-1291.6
Graph 028 | EPC(after delete) = 3658.3830 | top-K = [17, 29, 8, 47, 2, 26, 81, 55, 74, 88]
top-scores id,logit:
[(17, 3.069204807281494), (29, 2.2433431148529053), (8, 1.6436070203781128), (47, 1.5533246994018555), (2, 1.522660493850708)]
file:  ['ER_sz100_sp0.0443_rp1.0_test100_2.pkl']


Inference:  53%|█████▎    | 32/60 [00:04<00:05,  4.84it/s]

ER_sz100_sp0.0443_rp1.0_test100_2.pkl  EPC₀=4753.5  after=3656.8  Δ=-1096.7
Graph 029 | EPC(after delete) = 3656.8476 | top-K = [87, 25, 27, 1, 60, 94, 92, 49, 57, 34]
top-scores id,logit:
[(87, 3.8588340282440186), (25, 1.9634592533111572), (27, 1.880918264389038), (1, 1.6437443494796753), (60, 1.5613504648208618)]


Inference: 100%|██████████| 60/60 [00:06<00:00,  9.40it/s]


Average EPC over 6 test graphs: 3635.3177





In [100]:
ROOT = r"C:\Users\btugu\Documents\develop\research\SCNDP\src\extension\learning\notebooks\gnn\data"

graphs_dir   = rf"{ROOT}/graphs/test_100_separate"          # same dirs you used for train/val
labels_dir   = rf"{ROOT}/graphs_labels/test_100_separate"          # not needed for inference but Dataset expects it
ckpt_path    = rf"{ROOT}/best_model_bce_50_(1)_residual.pt" # saved in train_model()
K            = 10                     # number of nodes to delete
mc_samples   = 100_000                 # per-graph Monte-Carlo samples for EPC

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------------
# 1.  Dataset & loader  (batch_size = 1 for clarity)
# ------------------------------------------------------------------
test_ds     = GraphEPCDataset(graphs_dir, labels_dir, split="ER")
test_loader = PyGDataLoader(test_ds, batch_size=1, shuffle=False)

# ------------------------------------------------------------------
# 2.  Load trained model
# ------------------------------------------------------------------
model = SAGEEdgeProbModel(in_dim=11, hidden_dim=256, heads=4).to(device)

model.load_state_dict(torch.load(ckpt_path, map_location=device))
model.eval()

# ------------------------------------------------------------------
# 3.  Helper: greedy iterative delete
# ------------------------------------------------------------------
def greedy_iterative_delete(model, G_orig, K, device):
    """
    Iteratively pick K nodes, re-running the model after each deletion.
    Returns a list of ORIGINAL node IDs.
    """
    S = []                          # selected original IDs
    G = G_orig.copy()               # work on a local copy

    for _ in range(K):
        # ---- relabel so nodes are 0…n-1 contiguous ---------------
        G_rel = nx.convert_node_labels_to_integers(
                    G, label_attribute='orig_id')

        # build PyG tensors from the *relabeled* graph
        x = extract_node_features(G_rel)                       # [n, 11]
        # print(x.shape, "node features shape")
        edges = list(G_rel.edges())
        ei = torch.tensor(edges, dtype=torch.long).t().contiguous()
        ei = torch.cat([ei, ei.flip(0)], dim=1)                # undirected
        p  = torch.tensor([G_rel[u][v]['p'] for u, v in edges] * 2,
                          dtype=torch.float)

        # ---- run model & pick best node --------------------------
        with torch.no_grad():
            logits = model(x.to(device), ei.to(device), p.to(device)).cpu()

        v_rel  = logits.argmax().item()                        # 0…n-1
        v_orig = G_rel.nodes[v_rel]['orig_id']                 # back-map
        S.append(v_orig)

        # ---- delete from the *original-ID* graph ---------------
        G.remove_node(v_orig)

    return S

In [101]:
# 4.  Inference + EPC
# ------------------------------------------------------------------
import time

K           = 10
mc_samples  = 100_000
all_delta   = []

for batch in tqdm(test_loader, desc="Greedy-iter"):
    batch = batch.to(device)
    idx   = batch.idx.item()
    fname = batch.file_name[0]

    if "sp0.0443" in fname and ("rp0.9" in fname or "rp1.0" in fname):
        t0 = time.perf_counter()
        G_nx = pickle.load(open(test_ds.graph_paths[idx], 'rb'))['graph']

        topk = greedy_iterative_delete(model, G_nx.copy(), K, device)

        # opt_k = local_search_(G_nx.copy(), set(topk), mc_samples)

        epc_del = epc_mc_deleted(G_nx.copy(), set(topk), mc_samples)
        time_greedy_gnn = time.perf_counter() - t0

        epc_0   = epc_mc_deleted(G_nx.copy(), set(),      mc_samples)
        delta   = epc_del - epc_0      # negative = improvement
        all_delta.append(epc_del)

        print(f"{fname} | EPC₀ {epc_0:.1f} → {epc_del:.1f}  Δ {delta:+.1f} | time: {time_greedy_gnn:.2f}s")
        print("top-K nodes:", topk)

# ------------------------------------------------------------------
# 5.  Summary
# ------------------------------------------------------------------
if all_delta:
    print(f"\nGraphs processed: {len(all_delta)}")
    print(f"Avg del EPC: {np.mean(all_delta):+.1f}")
else:
    print("No graphs matched the filename filter.")

Greedy-iter:  40%|████      | 24/60 [00:01<00:01, 19.97it/s]

ER_sz100_sp0.0443_rp0.9_test100_0.pkl | EPC₀ 4888.3 → 3484.1  Δ -1404.2 | time: 0.93s
top-K nodes: [75, 44, 53, 17, 25, 57, 64, 12, 50, 20]


Greedy-iter:  43%|████▎     | 26/60 [00:03<00:10,  3.14it/s]

ER_sz100_sp0.0443_rp0.9_test100_1.pkl | EPC₀ 4817.4 → 3594.4  Δ -1223.1 | time: 0.77s
top-K nodes: [89, 38, 30, 7, 42, 80, 70, 22, 98, 76]
ER_sz100_sp0.0443_rp0.9_test100_2.pkl | EPC₀ 4800.9 → 3364.2  Δ -1436.6 | time: 0.91s
top-K nodes: [82, 90, 44, 68, 28, 59, 93, 48, 19, 18]


Greedy-iter:  47%|████▋     | 28/60 [00:05<00:18,  1.74it/s]

ER_sz100_sp0.0443_rp1.0_test100_0.pkl | EPC₀ 4750.1 → 3654.9  Δ -1095.3 | time: 1.10s
top-K nodes: [63, 57, 23, 49, 54, 5, 33, 24, 14, 7]


Greedy-iter:  48%|████▊     | 29/60 [00:07<00:22,  1.40it/s]

ER_sz100_sp0.0443_rp1.0_test100_1.pkl | EPC₀ 4950.0 → 3657.3  Δ -1292.7 | time: 1.13s
top-K nodes: [17, 29, 26, 8, 55, 2, 13, 20, 92, 7]


Greedy-iter:  53%|█████▎    | 32/60 [00:08<00:15,  1.80it/s]

ER_sz100_sp0.0443_rp1.0_test100_2.pkl | EPC₀ 4750.5 → 3402.2  Δ -1348.3 | time: 0.98s
top-K nodes: [87, 25, 60, 50, 1, 31, 2, 92, 51, 65]


Greedy-iter: 100%|██████████| 60/60 [00:10<00:00,  5.71it/s]


Graphs processed: 6
Avg del EPC: +3526.2





In [52]:
from torch_geometric.loader import DataLoader

graphs_dir   = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs/test_large_separate"          # same dirs you used for train/val
labels_dir   = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels/test_large_separate"          # not needed for inference but Dataset expects it
ckpt_path    = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/best_model.pt" # saved in train_model()
K            = 10                     # number of nodes to delete
mc_samples   = 10_000                 # per-graph Monte-Carlo samples for EPC
device       = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------
# 1.  DATASET & DATALOADER  (batch_size = 1 for clarity)
# ------------------------------------------------------------
test_ds      = GraphEPCDataset(graphs_dir, labels_dir, split="ER")
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False)

# ------------------------------------------------------------
# 2.  LOAD MODEL
# ------------------------------------------------------------
in_dim       = 11                     # you decided to keep only the 11 node features
model        = SAGEEdgeProbModel(in_dim).to(device)
model.load_state_dict(torch.load(ckpt_path, map_location=device))
model.eval()

# ------------------------------------------------------------
# 3.  INFERENCE + EPC
# ------------------------------------------------------------
from tqdm import tqdm

all_epc = []
i = 0

for data in tqdm(test_loader, desc="Inference"):

    # if i == 30:
    #     break
    # move everything to GPU/CPU
    data = data.to(device)

    fname = data.file_name[0]
    
    if "sp0.0443" in fname and ("rp0.9" in fname or "rp1.0" in fname):
        # ---- 3.1 node scores ----
        print("file: ", data.file_name)
        with torch.no_grad():
            scores = model(data.x, data.edge_index, data.edge_prob)   # [N]
        
        # print(data.edge_prob)
        # ---- 3.2 pick top-K nodes ----
        # scores is already on the same device; .cpu() only if epc_mc_deleted needs CPU tensors
        topk = scores.topk(K, largest=True).indices.tolist()         # list[int]

        # ---- 3.3 compute EPC after deleting top-K ----
        #   We need the *NetworkX graph*; fetch it via the original .pkl
        #   The path is stored in test_ds.graph_paths[index] where `index`
        #   is the position in the dataset.  The DataLoader gives us that
        #   index in data.__dict__['idx']  (PyG attaches it automatically).
        idx  = data.idx.item()         # scalar tensor → int
        G_nx = pickle.load(open(test_ds.graph_paths[idx], 'rb'))['graph']
        
        epc_0   = epc_mc_deleted(G_nx.copy(), set(), num_samples=mc_samples)  
        epc_del = epc_mc_deleted(G_nx.copy(), set(topk), num_samples=mc_samples)

        all_epc.append(epc_del)

        delta   = epc_del - epc_0     # negative  ⇒ improvement
        print(f"{fname}  EPC₀={epc_0:.1f}  after={epc_del:.1f}  Δ={delta:+.1f}")

        print(f"Graph {idx:03d} | EPC(after delete) = {epc_del:.4f} | top-K = {topk}")
        
        print("top-scores id,logit:")
        print(sorted(zip(topk, scores[topk].tolist()), key=lambda x: -x[1])[:5])

        # confirm they exist in the NetworkX graph
        assert all(v in G_nx for v in topk)
        i += 1

# ------------------------------------------------------------
# 4.  SUMMARY
# ------------------------------------------------------------
import numpy as np
print(f"\nAverage EPC over {len(all_epc)} test graphs: {np.mean(all_epc):.4f}")

FileNotFoundError: [Errno 2] No such file or directory: '/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/best_model.pt'

In [39]:
# 0. CONFIGURATION
# ----------------------------------------------------------------------
graphs_dir  = "data/graphs/test_100/ER"
labels_dir  = "data/labels/test_100/ER"          # same sub-folder structure
K           = 10                            # how many nodes to delete
mc_samples  = 10_000                        # EPC Monte-Carlo samples

# ----------------------------------------------------------------------
# 1. HELPER: load graph + label file
# ----------------------------------------------------------------------
def load_graph_and_scores(pkl_path):
    G   = pickle.load(open(pkl_path, "rb"))["graph"]

    # label file has the same stem plus '_labels.pt'
    lbl_path = os.path.join(
        labels_dir,
        os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    )
    if not os.path.exists(lbl_path):
        raise FileNotFoundError(f"label file not found for {pkl_path}")

    # tensor shape [N], dtype=float
    log1_scores = torch.load(lbl_path)
    # undo stabilisation: score = exp(label) - 1
    scores = log1_scores.exp() - 1.0

    return G, scores

# ----------------------------------------------------------------------
# 2. MAIN LOOP
# ----------------------------------------------------------------------
for pkl in tqdm(sorted(glob.glob(os.path.join(graphs_dir, "*.pkl"))),
                desc="sanity"):

    G, scores = load_graph_and_scores(pkl)
    N         = G.number_of_nodes()

    # baseline EPC (no deletions)
    epc_0 = epc_mc_deleted(G.copy(), set(), num_samples=mc_samples)

    # top-K indices by descending score
    topk   = scores.topk(K).indices.tolist()       # list[int]
    epc_K  = epc_mc_deleted(G.copy(), set(topk), num_samples=mc_samples)

    print(f"{os.path.basename(pkl):<25}"
          f" EPC0={epc_0:7.4f}  "
          f"EPC-del={epc_K:7.4f}  "
          f"Δ={epc_K-epc_0:+.4f}  "
          f"K={K}")

print("done.")

sanity:   7%|▋         | 4/60 [00:00<00:01, 34.31it/s]

ER_sz100_sp0.0443_rp0.1_test100_0.pkl EPC0=39.5800  EPC-del=24.3855  Δ=-15.1945  K=10
ER_sz100_sp0.0443_rp0.1_test100_1.pkl EPC0=33.5550  EPC-del=20.0340  Δ=-13.5210  K=10
ER_sz100_sp0.0443_rp0.1_test100_2.pkl EPC0=36.4950  EPC-del=21.7890  Δ=-14.7060  K=10
ER_sz100_sp0.0443_rp0.2_test100_0.pkl EPC0=206.7300  EPC-del=74.4525  Δ=-132.2775  K=10
ER_sz100_sp0.0443_rp0.2_test100_1.pkl EPC0=225.4850  EPC-del=63.9765  Δ=-161.5085  K=10
ER_sz100_sp0.0443_rp0.2_test100_2.pkl EPC0=223.3700  EPC-del=71.1315  Δ=-152.2385  K=10
ER_sz100_sp0.0443_rp0.3_test100_0.pkl EPC0=1069.6250  EPC-del=201.0375  Δ=-868.5875  K=10


sanity:  20%|██        | 12/60 [00:00<00:01, 31.08it/s]

ER_sz100_sp0.0443_rp0.3_test100_1.pkl EPC0=1015.0900  EPC-del=188.9190  Δ=-826.1710  K=10
ER_sz100_sp0.0443_rp0.3_test100_2.pkl EPC0=868.6600  EPC-del=158.2245  Δ=-710.4355  K=10
ER_sz100_sp0.0443_rp0.4_test100_0.pkl EPC0=2556.6050  EPC-del=895.8645  Δ=-1660.7405  K=10
ER_sz100_sp0.0443_rp0.4_test100_1.pkl EPC0=2758.2750  EPC-del=723.4830  Δ=-2034.7920  K=10
ER_sz100_sp0.0443_rp0.4_test100_2.pkl EPC0=3103.0700  EPC-del=1142.8155  Δ=-1960.2545  K=10
ER_sz100_sp0.0443_rp0.5_test100_0.pkl EPC0=3225.4200  EPC-del=1195.1460  Δ=-2030.2740  K=10
ER_sz100_sp0.0443_rp0.5_test100_1.pkl EPC0=2999.6550  EPC-del=1153.9350  Δ=-1845.7200  K=10


sanity:  33%|███▎      | 20/60 [00:00<00:01, 30.12it/s]

ER_sz100_sp0.0443_rp0.5_test100_2.pkl EPC0=3702.0400  EPC-del=2174.4630  Δ=-1527.5770  K=10
ER_sz100_sp0.0443_rp0.6_test100_0.pkl EPC0=4439.0500  EPC-del=3007.7190  Δ=-1431.3310  K=10
ER_sz100_sp0.0443_rp0.6_test100_1.pkl EPC0=3891.4600  EPC-del=2129.1075  Δ=-1762.3525  K=10
ER_sz100_sp0.0443_rp0.6_test100_2.pkl EPC0=3904.3700  EPC-del=2063.5515  Δ=-1840.8185  K=10
ER_sz100_sp0.0443_rp0.7_test100_0.pkl EPC0=4354.1150  EPC-del=2876.7780  Δ=-1477.3370  K=10
ER_sz100_sp0.0443_rp0.7_test100_1.pkl EPC0=4180.1350  EPC-del=2393.1495  Δ=-1786.9855  K=10


sanity:  40%|████      | 24/60 [00:00<00:01, 30.21it/s]

ER_sz100_sp0.0443_rp0.7_test100_2.pkl EPC0=4177.4600  EPC-del=2403.7515  Δ=-1773.7085  K=10
ER_sz100_sp0.0443_rp0.8_test100_0.pkl EPC0=4775.0400  EPC-del=3512.2545  Δ=-1262.7855  K=10
ER_sz100_sp0.0443_rp0.8_test100_1.pkl EPC0=4638.9150  EPC-del=3093.4260  Δ=-1545.4890  K=10
ER_sz100_sp0.0443_rp0.8_test100_2.pkl EPC0=4552.2600  EPC-del=2915.7570  Δ=-1636.5030  K=10
ER_sz100_sp0.0443_rp0.9_test100_0.pkl EPC0=4895.4800  EPC-del=3300.3405  Δ=-1595.1395  K=10
ER_sz100_sp0.0443_rp0.9_test100_1.pkl EPC0=4818.5200  EPC-del=3452.0445  Δ=-1366.4755  K=10


sanity:  52%|█████▏    | 31/60 [00:01<00:01, 27.34it/s]

ER_sz100_sp0.0443_rp0.9_test100_2.pkl EPC0=4801.9200  EPC-del=3301.5600  Δ=-1500.3600  K=10
ER_sz100_sp0.0443_rp1.0_test100_0.pkl EPC0=4753.0000  EPC-del=3732.6150  Δ=-1020.3850  K=10
ER_sz100_sp0.0443_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3734.1630  Δ=-1215.8370  K=10
ER_sz100_sp0.0443_rp1.0_test100_2.pkl EPC0=4762.6400  EPC-del=3469.4370  Δ=-1293.2030  K=10
ER_sz100_sp0.0667_rp0.1_test100_0.pkl EPC0=88.1100  EPC-del=48.2355  Δ=-39.8745  K=10
ER_sz100_sp0.0667_rp0.1_test100_1.pkl EPC0=90.0400  EPC-del=46.6920  Δ=-43.3480  K=10


sanity:  65%|██████▌   | 39/60 [00:01<00:00, 30.30it/s]

ER_sz100_sp0.0667_rp0.1_test100_2.pkl EPC0=83.4400  EPC-del=44.3160  Δ=-39.1240  K=10
ER_sz100_sp0.0667_rp0.2_test100_0.pkl EPC0=818.1300  EPC-del=235.2825  Δ=-582.8475  K=10
ER_sz100_sp0.0667_rp0.2_test100_1.pkl EPC0=891.2250  EPC-del=266.9040  Δ=-624.3210  K=10
ER_sz100_sp0.0667_rp0.2_test100_2.pkl EPC0=930.5250  EPC-del=252.3150  Δ=-678.2100  K=10
ER_sz100_sp0.0667_rp0.3_test100_0.pkl EPC0=2964.3100  EPC-del=1183.6575  Δ=-1780.6525  K=10
ER_sz100_sp0.0667_rp0.3_test100_1.pkl EPC0=2410.0000  EPC-del=663.5295  Δ=-1746.4705  K=10
ER_sz100_sp0.0667_rp0.3_test100_2.pkl EPC0=3247.7200  EPC-del=1636.1055  Δ=-1611.6145  K=10


sanity:  72%|███████▏  | 43/60 [00:01<00:00, 27.68it/s]

ER_sz100_sp0.0667_rp0.4_test100_0.pkl EPC0=4154.5450  EPC-del=2779.1865  Δ=-1375.3585  K=10
ER_sz100_sp0.0667_rp0.4_test100_1.pkl EPC0=3980.9750  EPC-del=2569.8780  Δ=-1411.0970  K=10
ER_sz100_sp0.0667_rp0.4_test100_2.pkl EPC0=4190.5700  EPC-del=2892.8925  Δ=-1297.6775  K=10
ER_sz100_sp0.0667_rp0.5_test100_0.pkl EPC0=4563.9500  EPC-del=3225.7170  Δ=-1338.2330  K=10
ER_sz100_sp0.0667_rp0.5_test100_1.pkl EPC0=4516.0250  EPC-del=3048.5700  Δ=-1467.4550  K=10


sanity:  82%|████████▏ | 49/60 [00:01<00:00, 25.43it/s]

ER_sz100_sp0.0667_rp0.5_test100_2.pkl EPC0=4620.4350  EPC-del=3324.2175  Δ=-1296.2175  K=10
ER_sz100_sp0.0667_rp0.6_test100_0.pkl EPC0=4694.0150  EPC-del=3240.7605  Δ=-1453.2545  K=10
ER_sz100_sp0.0667_rp0.6_test100_1.pkl EPC0=4712.3650  EPC-del=3333.5055  Δ=-1378.8595  K=10
ER_sz100_sp0.0667_rp0.6_test100_2.pkl EPC0=4796.9400  EPC-del=3588.8085  Δ=-1208.1315  K=10
ER_sz100_sp0.0667_rp0.7_test100_0.pkl EPC0=4847.3450  EPC-del=3584.6640  Δ=-1262.6810  K=10


sanity:  87%|████████▋ | 52/60 [00:01<00:00, 24.83it/s]

ER_sz100_sp0.0667_rp0.7_test100_1.pkl EPC0=4886.2050  EPC-del=3756.9150  Δ=-1129.2900  K=10
ER_sz100_sp0.0667_rp0.7_test100_2.pkl EPC0=4821.6300  EPC-del=3510.3735  Δ=-1311.2565  K=10
ER_sz100_sp0.0667_rp0.8_test100_0.pkl EPC0=4897.6350  EPC-del=3646.3185  Δ=-1251.3165  K=10
ER_sz100_sp0.0667_rp0.8_test100_1.pkl EPC0=4939.0500  EPC-del=3774.8520  Δ=-1164.1980  K=10
ER_sz100_sp0.0667_rp0.8_test100_2.pkl EPC0=4917.1750  EPC-del=3633.2460  Δ=-1283.9290  K=10


sanity:  97%|█████████▋| 58/60 [00:02<00:00, 23.84it/s]

ER_sz100_sp0.0667_rp0.9_test100_0.pkl EPC0=4927.1100  EPC-del=3651.8580  Δ=-1275.2520  K=10
ER_sz100_sp0.0667_rp0.9_test100_1.pkl EPC0=4947.4750  EPC-del=3736.6740  Δ=-1210.8010  K=10
ER_sz100_sp0.0667_rp0.9_test100_2.pkl EPC0=4927.9550  EPC-del=3400.4925  Δ=-1527.4625  K=10
ER_sz100_sp0.0667_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=4005.0000  Δ=-945.0000  K=10
ER_sz100_sp0.0667_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3912.4800  Δ=-1037.5200  K=10


sanity: 100%|██████████| 60/60 [00:02<00:00, 27.23it/s]

ER_sz100_sp0.0667_rp1.0_test100_2.pkl EPC0=4851.9800  EPC-del=3733.3890  Δ=-1118.5910  K=10
done.





In [72]:
# 0. CONFIGURATION
# ----------------------------------------------------------------------
graphs_dir  = "data/graphs/test_100"
labels_dir  = "data/graphs_labels/test_100"          # same sub-folder structure
K           = 10                            # how many nodes to delete
mc_samples  = 10_000                        # EPC Monte-Carlo samples

# ----------------------------------------------------------------------
# 1. HELPER: load graph + label file
# ----------------------------------------------------------------------
def load_graph_and_scores(pkl_path):
    G   = pickle.load(open(pkl_path, "rb"))["graph"]

    # label file has the same stem plus '_labels.pt'
    lbl_path = os.path.join(
        labels_dir,
        os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    )
    if not os.path.exists(lbl_path):
        raise FileNotFoundError(f"label file not found for {pkl_path}")

    # tensor shape [N], dtype=float
    scores = torch.load(lbl_path)
    # undo stabilisation: score = exp(label) - 1
    # scores = log1_scores.exp() - 1.0

    return G, scores

# ----------------------------------------------------------------------
# 2. MAIN LOOP
# ----------------------------------------------------------------------
for pkl in tqdm(sorted(glob.glob(os.path.join(graphs_dir, "*.pkl"))),
                desc="sanity"):

    G, scores = load_graph_and_scores(pkl)
    N         = G.number_of_nodes()

    # baseline EPC (no deletions)
    epc_0 = epc_mc_deleted(G.copy(), set(), num_samples=mc_samples)

    # top-K indices by descending score
    # print(scores)
    topk   = scores.topk(K).indices.tolist()       # list[int]
    epc_K  = epc_mc_deleted(G.copy(), set(topk), num_samples=mc_samples)

    print(f"{os.path.basename(pkl):<25}"
          f" EPC0={epc_0:7.4f}  "
          f"EPC-del={epc_K:7.4f}  "
          f"Δ={epc_K-epc_0:+.4f}  "
          f"K={K}")

print("done.")

sanity:   2%|▏         | 4/180 [00:00<00:05, 29.65it/s]

BA_sz100_sp2_rp0.1_test100_0.pkl EPC0=41.8550  EPC-del=11.1960  Δ=-30.6590  K=10
BA_sz100_sp2_rp0.1_test100_1.pkl EPC0=43.1600  EPC-del= 9.0720  Δ=-34.0880  K=10
BA_sz100_sp2_rp0.1_test100_2.pkl EPC0=44.3950  EPC-del= 9.5535  Δ=-34.8415  K=10
BA_sz100_sp2_rp0.2_test100_0.pkl EPC0=239.7600  EPC-del=25.2045  Δ=-214.5555  K=10
BA_sz100_sp2_rp0.2_test100_1.pkl EPC0=234.3200  EPC-del=25.8660  Δ=-208.4540  K=10
BA_sz100_sp2_rp0.2_test100_2.pkl EPC0=258.6100  EPC-del=22.4010  Δ=-236.2090  K=10


sanity:   6%|▌         | 11/180 [00:00<00:05, 30.86it/s]

BA_sz100_sp2_rp0.3_test100_0.pkl EPC0=910.2500  EPC-del=50.0760  Δ=-860.1740  K=10
BA_sz100_sp2_rp0.3_test100_1.pkl EPC0=903.3950  EPC-del=51.4845  Δ=-851.9105  K=10
BA_sz100_sp2_rp0.3_test100_2.pkl EPC0=850.8100  EPC-del=43.2450  Δ=-807.5650  K=10
BA_sz100_sp2_rp0.4_test100_0.pkl EPC0=1990.8750  EPC-del=80.4060  Δ=-1910.4690  K=10
BA_sz100_sp2_rp0.4_test100_1.pkl EPC0=1997.6100  EPC-del=88.6815  Δ=-1908.9285  K=10
BA_sz100_sp2_rp0.4_test100_2.pkl EPC0=1976.9650  EPC-del=93.0645  Δ=-1883.9005  K=10
BA_sz100_sp2_rp0.5_test100_0.pkl EPC0=3005.4250  EPC-del=108.9630  Δ=-2896.4620  K=10


sanity:  11%|█         | 19/180 [00:00<00:05, 30.26it/s]

BA_sz100_sp2_rp0.5_test100_1.pkl EPC0=3015.4850  EPC-del=145.9215  Δ=-2869.5635  K=10
BA_sz100_sp2_rp0.5_test100_2.pkl EPC0=3028.8750  EPC-del=122.8635  Δ=-2906.0115  K=10
BA_sz100_sp2_rp0.6_test100_0.pkl EPC0=3820.7100  EPC-del=229.7610  Δ=-3590.9490  K=10
BA_sz100_sp2_rp0.6_test100_1.pkl EPC0=3818.7250  EPC-del=248.7465  Δ=-3569.9785  K=10
BA_sz100_sp2_rp0.6_test100_2.pkl EPC0=3802.4250  EPC-del=184.5495  Δ=-3617.8755  K=10
BA_sz100_sp2_rp0.7_test100_0.pkl EPC0=4370.7200  EPC-del=680.3955  Δ=-3690.3245  K=10


sanity:  13%|█▎        | 23/180 [00:00<00:05, 29.77it/s]

BA_sz100_sp2_rp0.7_test100_1.pkl EPC0=4398.4300  EPC-del=1466.0910  Δ=-2932.3390  K=10
BA_sz100_sp2_rp0.7_test100_2.pkl EPC0=4353.4550  EPC-del=683.2125  Δ=-3670.2425  K=10
BA_sz100_sp2_rp0.8_test100_0.pkl EPC0=4696.5000  EPC-del=1546.9785  Δ=-3149.5215  K=10
BA_sz100_sp2_rp0.8_test100_1.pkl EPC0=4694.9250  EPC-del=1495.2645  Δ=-3199.6605  K=10
BA_sz100_sp2_rp0.8_test100_2.pkl EPC0=4714.7400  EPC-del=1611.8865  Δ=-3102.8535  K=10
BA_sz100_sp2_rp0.9_test100_0.pkl EPC0=4898.1800  EPC-del=2531.8755  Δ=-2366.3045  K=10


sanity:  16%|█▌        | 29/180 [00:00<00:05, 28.12it/s]

BA_sz100_sp2_rp0.9_test100_1.pkl EPC0=4903.9800  EPC-del=2504.0475  Δ=-2399.9325  K=10
BA_sz100_sp2_rp0.9_test100_2.pkl EPC0=4893.9400  EPC-del=1408.5000  Δ=-3485.4400  K=10
BA_sz100_sp2_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3171.7575  Δ=-1778.2425  K=10
BA_sz100_sp2_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=2505.2625  Δ=-2444.7375  K=10
BA_sz100_sp2_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3249.1080  Δ=-1700.8920  K=10
BA_sz100_sp3_rp0.1_test100_0.pkl EPC0=88.9650  EPC-del=23.3820  Δ=-65.5830  K=10


sanity:  21%|██        | 37/180 [00:01<00:04, 30.18it/s]

BA_sz100_sp3_rp0.1_test100_1.pkl EPC0=89.0400  EPC-del=22.6260  Δ=-66.4140  K=10
BA_sz100_sp3_rp0.1_test100_2.pkl EPC0=94.2300  EPC-del=19.9755  Δ=-74.2545  K=10
BA_sz100_sp3_rp0.2_test100_0.pkl EPC0=919.5350  EPC-del=53.2800  Δ=-866.2550  K=10
BA_sz100_sp3_rp0.2_test100_1.pkl EPC0=893.5150  EPC-del=60.4350  Δ=-833.0800  K=10
BA_sz100_sp3_rp0.2_test100_2.pkl EPC0=869.3700  EPC-del=64.4040  Δ=-804.9660  K=10
BA_sz100_sp3_rp0.3_test100_0.pkl EPC0=2401.8550  EPC-del=194.1345  Δ=-2207.7205  K=10
BA_sz100_sp3_rp0.3_test100_1.pkl EPC0=2405.4500  EPC-del=232.5510  Δ=-2172.8990  K=10


sanity:  23%|██▎       | 41/180 [00:01<00:04, 30.16it/s]

BA_sz100_sp3_rp0.3_test100_2.pkl EPC0=2420.4850  EPC-del=173.4030  Δ=-2247.0820  K=10
BA_sz100_sp3_rp0.4_test100_0.pkl EPC0=3490.8950  EPC-del=456.2820  Δ=-3034.6130  K=10
BA_sz100_sp3_rp0.4_test100_1.pkl EPC0=3478.3650  EPC-del=273.6270  Δ=-3204.7380  K=10
BA_sz100_sp3_rp0.4_test100_2.pkl EPC0=3510.5500  EPC-del=469.7775  Δ=-3040.7725  K=10
BA_sz100_sp3_rp0.5_test100_0.pkl EPC0=4264.9650  EPC-del=1898.0460  Δ=-2366.9190  K=10


sanity:  27%|██▋       | 48/180 [00:01<00:05, 25.97it/s]

BA_sz100_sp3_rp0.5_test100_1.pkl EPC0=4149.9900  EPC-del=1230.7095  Δ=-2919.2805  K=10
BA_sz100_sp3_rp0.5_test100_2.pkl EPC0=4203.4050  EPC-del=1906.3125  Δ=-2297.0925  K=10
BA_sz100_sp3_rp0.6_test100_0.pkl EPC0=4603.6400  EPC-del=2325.6135  Δ=-2278.0265  K=10
BA_sz100_sp3_rp0.6_test100_1.pkl EPC0=4610.2500  EPC-del=2314.5750  Δ=-2295.6750  K=10
BA_sz100_sp3_rp0.6_test100_2.pkl EPC0=4602.8650  EPC-del=2089.3905  Δ=-2513.4745  K=10


sanity:  28%|██▊       | 51/180 [00:01<00:05, 24.42it/s]

BA_sz100_sp3_rp0.7_test100_0.pkl EPC0=4800.2600  EPC-del=2266.1820  Δ=-2534.0780  K=10
BA_sz100_sp3_rp0.7_test100_1.pkl EPC0=4823.6150  EPC-del=2728.3365  Δ=-2095.2785  K=10
BA_sz100_sp3_rp0.7_test100_2.pkl EPC0=4822.1050  EPC-del=2967.0345  Δ=-1855.0705  K=10
BA_sz100_sp3_rp0.8_test100_0.pkl EPC0=4922.7400  EPC-del=3563.7795  Δ=-1358.9605  K=10
BA_sz100_sp3_rp0.8_test100_1.pkl EPC0=4919.5100  EPC-del=3490.9200  Δ=-1428.5900  K=10


sanity:  32%|███▏      | 57/180 [00:02<00:05, 23.83it/s]

BA_sz100_sp3_rp0.8_test100_2.pkl EPC0=4910.2600  EPC-del=3212.0730  Δ=-1698.1870  K=10
BA_sz100_sp3_rp0.9_test100_0.pkl EPC0=4945.2300  EPC-del=3642.4620  Δ=-1302.7680  K=10
BA_sz100_sp3_rp0.9_test100_1.pkl EPC0=4947.8500  EPC-del=3625.4205  Δ=-1322.4295  K=10
BA_sz100_sp3_rp0.9_test100_2.pkl EPC0=4946.0000  EPC-del=3827.7990  Δ=-1118.2010  K=10
BA_sz100_sp3_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3655.1700  Δ=-1294.8300  K=10
BA_sz100_sp3_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3915.2520  Δ=-1034.7480  K=10


sanity:  36%|███▌      | 64/180 [00:02<00:04, 27.25it/s]

BA_sz100_sp3_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3913.6680  Δ=-1036.3320  K=10
ER_sz100_sp0.0443_rp0.1_test100_0.pkl EPC0=38.7300  EPC-del=22.7655  Δ=-15.9645  K=10
ER_sz100_sp0.0443_rp0.1_test100_1.pkl EPC0=33.0550  EPC-del=17.7300  Δ=-15.3250  K=10
ER_sz100_sp0.0443_rp0.1_test100_2.pkl EPC0=39.4600  EPC-del=21.5370  Δ=-17.9230  K=10
ER_sz100_sp0.0443_rp0.2_test100_0.pkl EPC0=209.3800  EPC-del=74.4075  Δ=-134.9725  K=10
ER_sz100_sp0.0443_rp0.2_test100_1.pkl EPC0=228.3750  EPC-del=63.6390  Δ=-164.7360  K=10
ER_sz100_sp0.0443_rp0.2_test100_2.pkl EPC0=224.0950  EPC-del=64.6560  Δ=-159.4390  K=10


sanity:  39%|███▉      | 71/180 [00:02<00:03, 29.10it/s]

ER_sz100_sp0.0443_rp0.3_test100_0.pkl EPC0=1066.3500  EPC-del=179.2890  Δ=-887.0610  K=10
ER_sz100_sp0.0443_rp0.3_test100_1.pkl EPC0=1005.3950  EPC-del=179.4060  Δ=-825.9890  K=10
ER_sz100_sp0.0443_rp0.3_test100_2.pkl EPC0=856.9550  EPC-del=127.2285  Δ=-729.7265  K=10
ER_sz100_sp0.0443_rp0.4_test100_0.pkl EPC0=2533.8300  EPC-del=739.2600  Δ=-1794.5700  K=10
ER_sz100_sp0.0443_rp0.4_test100_1.pkl EPC0=2755.4450  EPC-del=679.9275  Δ=-2075.5175  K=10
ER_sz100_sp0.0443_rp0.4_test100_2.pkl EPC0=3100.5150  EPC-del=1009.6785  Δ=-2090.8365  K=10
ER_sz100_sp0.0443_rp0.5_test100_0.pkl EPC0=3204.9650  EPC-del=1100.4210  Δ=-2104.5440  K=10


sanity:  43%|████▎     | 77/180 [00:02<00:03, 27.22it/s]

ER_sz100_sp0.0443_rp0.5_test100_1.pkl EPC0=3009.8250  EPC-del=1307.9925  Δ=-1701.8325  K=10
ER_sz100_sp0.0443_rp0.5_test100_2.pkl EPC0=3671.3400  EPC-del=2041.2810  Δ=-1630.0590  K=10
ER_sz100_sp0.0443_rp0.6_test100_0.pkl EPC0=4447.3850  EPC-del=2953.8585  Δ=-1493.5265  K=10
ER_sz100_sp0.0443_rp0.6_test100_1.pkl EPC0=3909.8950  EPC-del=2087.3385  Δ=-1822.5565  K=10
ER_sz100_sp0.0443_rp0.6_test100_2.pkl EPC0=3910.6800  EPC-del=2000.3400  Δ=-1910.3400  K=10
ER_sz100_sp0.0443_rp0.7_test100_0.pkl EPC0=4367.3950  EPC-del=2840.5440  Δ=-1526.8510  K=10


sanity:  46%|████▌     | 83/180 [00:02<00:03, 28.51it/s]

ER_sz100_sp0.0443_rp0.7_test100_1.pkl EPC0=4195.6550  EPC-del=2311.7760  Δ=-1883.8790  K=10
ER_sz100_sp0.0443_rp0.7_test100_2.pkl EPC0=4169.9500  EPC-del=2254.0500  Δ=-1915.9000  K=10
ER_sz100_sp0.0443_rp0.8_test100_0.pkl EPC0=4782.9600  EPC-del=3399.1425  Δ=-1383.8175  K=10
ER_sz100_sp0.0443_rp0.8_test100_1.pkl EPC0=4628.1800  EPC-del=2920.5990  Δ=-1707.5810  K=10
ER_sz100_sp0.0443_rp0.8_test100_2.pkl EPC0=4542.8750  EPC-del=2866.5045  Δ=-1676.3705  K=10
ER_sz100_sp0.0443_rp0.9_test100_0.pkl EPC0=4888.3700  EPC-del=3369.7215  Δ=-1518.6485  K=10


sanity:  50%|█████     | 90/180 [00:03<00:02, 30.15it/s]

ER_sz100_sp0.0443_rp0.9_test100_1.pkl EPC0=4819.0900  EPC-del=3283.7310  Δ=-1535.3590  K=10
ER_sz100_sp0.0443_rp0.9_test100_2.pkl EPC0=4804.1200  EPC-del=3293.9730  Δ=-1510.1470  K=10
ER_sz100_sp0.0443_rp1.0_test100_0.pkl EPC0=4755.4250  EPC-del=3733.7760  Δ=-1021.6490  K=10
ER_sz100_sp0.0443_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3488.8635  Δ=-1461.1365  K=10
ER_sz100_sp0.0443_rp1.0_test100_2.pkl EPC0=4755.4400  EPC-del=3410.5500  Δ=-1344.8900  K=10
ER_sz100_sp0.0667_rp0.1_test100_0.pkl EPC0=89.1650  EPC-del=45.6120  Δ=-43.5530  K=10
ER_sz100_sp0.0667_rp0.1_test100_1.pkl EPC0=91.3100  EPC-del=46.9125  Δ=-44.3975  K=10
ER_sz100_sp0.0667_rp0.1_test100_2.pkl EPC0=83.0400  EPC-del=44.0055  Δ=-39.0345  K=10
ER_sz100_sp0.0667_rp0.2_test100_0.pkl EPC0=832.0000  EPC-del=205.7940  Δ=-626.2060  K=10
ER_sz100_sp0.0667_rp0.2_test100_1.pkl EPC0=889.3400  EPC-del=225.9765  Δ=-663.3635  K=10
ER_sz100_sp0.0667_rp0.2_test100_2.pkl EPC0=936.7200  EPC-del=218.9385  Δ=-717.7815  K=10
ER_sz100_sp0.06

sanity:  56%|█████▌    | 100/180 [00:03<00:01, 47.64it/s]

ER_sz100_sp0.0667_rp0.4_test100_0.pkl EPC0=4142.0700  EPC-del=2685.8475  Δ=-1456.2225  K=10
ER_sz100_sp0.0667_rp0.4_test100_1.pkl EPC0=4003.8850  EPC-del=2480.3820  Δ=-1523.5030  K=10
ER_sz100_sp0.0667_rp0.4_test100_2.pkl EPC0=4198.6400  EPC-del=2922.2100  Δ=-1276.4300  K=10
ER_sz100_sp0.0667_rp0.5_test100_0.pkl EPC0=4584.0050  EPC-del=3154.1220  Δ=-1429.8830  K=10
ER_sz100_sp0.0667_rp0.5_test100_1.pkl EPC0=4503.3600  EPC-del=3066.7500  Δ=-1436.6100  K=10


sanity:  61%|██████    | 109/180 [00:03<00:02, 30.29it/s]

ER_sz100_sp0.0667_rp0.5_test100_2.pkl EPC0=4600.9850  EPC-del=3309.0300  Δ=-1291.9550  K=10
ER_sz100_sp0.0667_rp0.6_test100_0.pkl EPC0=4695.0700  EPC-del=3186.5715  Δ=-1508.4985  K=10
ER_sz100_sp0.0667_rp0.6_test100_1.pkl EPC0=4705.5250  EPC-del=3301.4295  Δ=-1404.0955  K=10
ER_sz100_sp0.0667_rp0.6_test100_2.pkl EPC0=4792.6750  EPC-del=3563.4510  Δ=-1229.2240  K=10
ER_sz100_sp0.0667_rp0.7_test100_0.pkl EPC0=4845.8600  EPC-del=3565.8225  Δ=-1280.0375  K=10


sanity:  63%|██████▎   | 113/180 [00:03<00:02, 27.82it/s]

ER_sz100_sp0.0667_rp0.7_test100_1.pkl EPC0=4887.1950  EPC-del=3747.8790  Δ=-1139.3160  K=10
ER_sz100_sp0.0667_rp0.7_test100_2.pkl EPC0=4831.0750  EPC-del=3503.4795  Δ=-1327.5955  K=10
ER_sz100_sp0.0667_rp0.8_test100_0.pkl EPC0=4899.1900  EPC-del=3559.2525  Δ=-1339.9375  K=10
ER_sz100_sp0.0667_rp0.8_test100_1.pkl EPC0=4936.2650  EPC-del=3644.0865  Δ=-1292.1785  K=10
ER_sz100_sp0.0667_rp0.8_test100_2.pkl EPC0=4914.6650  EPC-del=3619.3905  Δ=-1295.2745  K=10


sanity:  65%|██████▌   | 117/180 [00:04<00:02, 26.30it/s]

ER_sz100_sp0.0667_rp0.9_test100_0.pkl EPC0=4922.5600  EPC-del=3575.2590  Δ=-1347.3010  K=10
ER_sz100_sp0.0667_rp0.9_test100_1.pkl EPC0=4946.0250  EPC-del=3735.0045  Δ=-1211.0205  K=10
ER_sz100_sp0.0667_rp0.9_test100_2.pkl EPC0=4922.9600  EPC-del=3394.9125  Δ=-1528.0475  K=10
ER_sz100_sp0.0667_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3913.6680  Δ=-1036.3320  K=10
ER_sz100_sp0.0667_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3921.1920  Δ=-1028.8080  K=10


sanity:  69%|██████▉   | 124/180 [00:04<00:02, 27.37it/s]

ER_sz100_sp0.0667_rp1.0_test100_2.pkl EPC0=4857.8600  EPC-del=3585.3300  Δ=-1272.5300  K=10
SW_sz100_sp4_rp0.1_test100_0.pkl EPC0=30.2100  EPC-del=22.4685  Δ=-7.7415  K=10
SW_sz100_sp4_rp0.1_test100_1.pkl EPC0=29.0000  EPC-del=21.6720  Δ=-7.3280  K=10
SW_sz100_sp4_rp0.1_test100_2.pkl EPC0=29.7550  EPC-del=19.9170  Δ=-9.8380  K=10
SW_sz100_sp4_rp0.2_test100_0.pkl EPC0=116.6100  EPC-del=55.9080  Δ=-60.7020  K=10
SW_sz100_sp4_rp0.2_test100_1.pkl EPC0=119.6300  EPC-del=52.3845  Δ=-67.2455  K=10
SW_sz100_sp4_rp0.2_test100_2.pkl EPC0=114.8000  EPC-del=55.6200  Δ=-59.1800  K=10


sanity:  73%|███████▎  | 132/180 [00:04<00:01, 30.03it/s]

SW_sz100_sp4_rp0.3_test100_0.pkl EPC0=473.6600  EPC-del=143.6220  Δ=-330.0380  K=10
SW_sz100_sp4_rp0.3_test100_1.pkl EPC0=489.8150  EPC-del=134.3610  Δ=-355.4540  K=10
SW_sz100_sp4_rp0.3_test100_2.pkl EPC0=467.6600  EPC-del=137.7045  Δ=-329.9555  K=10
SW_sz100_sp4_rp0.4_test100_0.pkl EPC0=1917.9950  EPC-del=356.9850  Δ=-1561.0100  K=10
SW_sz100_sp4_rp0.4_test100_1.pkl EPC0=1815.4300  EPC-del=361.6965  Δ=-1453.7335  K=10
SW_sz100_sp4_rp0.4_test100_2.pkl EPC0=1882.7350  EPC-del=380.4390  Δ=-1502.2960  K=10
SW_sz100_sp4_rp0.5_test100_0.pkl EPC0=3415.7650  EPC-del=938.7810  Δ=-2476.9840  K=10


sanity:  76%|███████▌  | 136/180 [00:04<00:01, 29.39it/s]

SW_sz100_sp4_rp0.5_test100_1.pkl EPC0=3413.2650  EPC-del=1256.5170  Δ=-2156.7480  K=10
SW_sz100_sp4_rp0.5_test100_2.pkl EPC0=3437.0950  EPC-del=1176.2010  Δ=-2260.8940  K=10
SW_sz100_sp4_rp0.6_test100_0.pkl EPC0=4294.5350  EPC-del=2201.7870  Δ=-2092.7480  K=10
SW_sz100_sp4_rp0.6_test100_1.pkl EPC0=4262.8300  EPC-del=2386.2465  Δ=-1876.5835  K=10
SW_sz100_sp4_rp0.6_test100_2.pkl EPC0=4216.4950  EPC-del=2210.4540  Δ=-2006.0410  K=10
SW_sz100_sp4_rp0.7_test100_0.pkl EPC0=4695.3500  EPC-del=3151.3140  Δ=-1544.0360  K=10


sanity:  79%|███████▉  | 143/180 [00:04<00:01, 28.12it/s]

SW_sz100_sp4_rp0.7_test100_1.pkl EPC0=4669.3650  EPC-del=3087.7290  Δ=-1581.6360  K=10
SW_sz100_sp4_rp0.7_test100_2.pkl EPC0=4733.5000  EPC-del=3114.8640  Δ=-1618.6360  K=10
SW_sz100_sp4_rp0.8_test100_0.pkl EPC0=4865.0950  EPC-del=3405.0645  Δ=-1460.0305  K=10
SW_sz100_sp4_rp0.8_test100_1.pkl EPC0=4862.4350  EPC-del=3262.2075  Δ=-1600.2275  K=10
SW_sz100_sp4_rp0.8_test100_2.pkl EPC0=4847.5100  EPC-del=3317.8950  Δ=-1529.6150  K=10
SW_sz100_sp4_rp0.9_test100_0.pkl EPC0=4931.6050  EPC-del=3559.1760  Δ=-1372.4290  K=10


sanity:  83%|████████▎ | 150/180 [00:05<00:01, 28.63it/s]

SW_sz100_sp4_rp0.9_test100_1.pkl EPC0=4929.6450  EPC-del=3515.4675  Δ=-1414.1775  K=10
SW_sz100_sp4_rp0.9_test100_2.pkl EPC0=4929.9100  EPC-del=3463.7985  Δ=-1466.1115  K=10
SW_sz100_sp4_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3917.6280  Δ=-1032.3720  K=10
SW_sz100_sp4_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3917.2320  Δ=-1032.7680  K=10
SW_sz100_sp4_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3915.2520  Δ=-1034.7480  K=10
SW_sz100_sp5_rp0.1_test100_0.pkl EPC0=30.6250  EPC-del=21.0555  Δ=-9.5695  K=10
SW_sz100_sp5_rp0.1_test100_1.pkl EPC0=30.4850  EPC-del=22.2120  Δ=-8.2730  K=10


sanity:  88%|████████▊ | 158/180 [00:05<00:00, 31.09it/s]

SW_sz100_sp5_rp0.1_test100_2.pkl EPC0=30.4850  EPC-del=22.3695  Δ=-8.1155  K=10
SW_sz100_sp5_rp0.2_test100_0.pkl EPC0=114.1700  EPC-del=54.5760  Δ=-59.5940  K=10
SW_sz100_sp5_rp0.2_test100_1.pkl EPC0=117.9000  EPC-del=53.6535  Δ=-64.2465  K=10
SW_sz100_sp5_rp0.2_test100_2.pkl EPC0=119.8300  EPC-del=58.4505  Δ=-61.3795  K=10
SW_sz100_sp5_rp0.3_test100_0.pkl EPC0=487.7050  EPC-del=145.8495  Δ=-341.8555  K=10
SW_sz100_sp5_rp0.3_test100_1.pkl EPC0=509.3800  EPC-del=132.5430  Δ=-376.8370  K=10
SW_sz100_sp5_rp0.3_test100_2.pkl EPC0=487.9750  EPC-del=136.4085  Δ=-351.5665  K=10


sanity:  92%|█████████▏| 166/180 [00:05<00:00, 30.49it/s]

SW_sz100_sp5_rp0.4_test100_0.pkl EPC0=1851.8050  EPC-del=318.0870  Δ=-1533.7180  K=10
SW_sz100_sp5_rp0.4_test100_1.pkl EPC0=1809.4200  EPC-del=400.9095  Δ=-1408.5105  K=10
SW_sz100_sp5_rp0.4_test100_2.pkl EPC0=1871.9850  EPC-del=374.0760  Δ=-1497.9090  K=10
SW_sz100_sp5_rp0.5_test100_0.pkl EPC0=3415.1750  EPC-del=952.6320  Δ=-2462.5430  K=10
SW_sz100_sp5_rp0.5_test100_1.pkl EPC0=3356.0850  EPC-del=880.0740  Δ=-2476.0110  K=10
SW_sz100_sp5_rp0.5_test100_2.pkl EPC0=3486.3200  EPC-del=1193.0895  Δ=-2293.2305  K=10
SW_sz100_sp5_rp0.6_test100_0.pkl EPC0=4255.8950  EPC-del=2374.2945  Δ=-1881.6005  K=10


sanity:  94%|█████████▍| 170/180 [00:05<00:00, 29.19it/s]

SW_sz100_sp5_rp0.6_test100_1.pkl EPC0=4159.8150  EPC-del=2230.6905  Δ=-1929.1245  K=10
SW_sz100_sp5_rp0.6_test100_2.pkl EPC0=4250.8150  EPC-del=2278.0530  Δ=-1972.7620  K=10
SW_sz100_sp5_rp0.7_test100_0.pkl EPC0=4659.0350  EPC-del=2874.0510  Δ=-1784.9840  K=10
SW_sz100_sp5_rp0.7_test100_1.pkl EPC0=4698.6900  EPC-del=3193.1505  Δ=-1505.5395  K=10
SW_sz100_sp5_rp0.7_test100_2.pkl EPC0=4645.7200  EPC-del=3035.2590  Δ=-1610.4610  K=10
SW_sz100_sp5_rp0.8_test100_0.pkl EPC0=4852.9750  EPC-del=3348.0315  Δ=-1504.9435  K=10


sanity:  98%|█████████▊| 177/180 [00:06<00:00, 28.87it/s]

SW_sz100_sp5_rp0.8_test100_1.pkl EPC0=4841.6550  EPC-del=3243.8880  Δ=-1597.7670  K=10
SW_sz100_sp5_rp0.8_test100_2.pkl EPC0=4851.8250  EPC-del=3440.3085  Δ=-1411.5165  K=10
SW_sz100_sp5_rp0.9_test100_0.pkl EPC0=4936.8650  EPC-del=3441.6585  Δ=-1495.2065  K=10
SW_sz100_sp5_rp0.9_test100_1.pkl EPC0=4942.9050  EPC-del=3628.2825  Δ=-1314.6225  K=10
SW_sz100_sp5_rp0.9_test100_2.pkl EPC0=4933.0800  EPC-del=3507.8085  Δ=-1425.2715  K=10
SW_sz100_sp5_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3826.1295  Δ=-1123.8705  K=10


sanity: 100%|██████████| 180/180 [00:06<00:00, 28.90it/s]

SW_sz100_sp5_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3915.6480  Δ=-1034.3520  K=10
SW_sz100_sp5_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3835.9170  Δ=-1114.0830  K=10
done.



