In [1]:
#!/usr/bin/env python3
"""
Assessing Attack Vulnerability in Networks with Uncertainty — toy implementation
-------------------------------------------------------------------------------
Implements a *greedy‑with‑swap* heuristic to pick *k* critical nodes that minimise
**expected pairwise connectivity (EPC)** on a probabilistic graph.

The code purposefully stays dependency‑light (< networkx, numpy, tqdm >) so that it
can be executed on any laptop without a solver licence.  It follows the spirit of
REGA + CSP from the paper but simplifies a few engineering tricks:
* We estimate EPC with standard Monte‑Carlo sampling (fast enough ≤ 200 nodes).
* The greedy rank‑1 rounding is approximated by marginal EPC gain — still strong
  in practice.
* A one‑for‑one swap local search polishes the solution.

Usage examples (bash):
    # Erdős–Rényi n=100, m≈200, delete k=10 nodes
    python critical_node_uncertainty.py --dataset er --n 100 --m 200 --p 0.2 --k 10

    # Barabási–Albert n=100, m=2 (≈200 edges), uniform edge prob 0.5
    python critical_node_uncertainty.py --dataset ba --n 100 --m 2 --p 0.5 --k 10

    # Load the real XO backbone (GraphML) and attack 5 nodes
    python critical_node_uncertainty.py --dataset xo --file xo_backbone.graphml --p 0.3 --k 5
"""
from __future__ import annotations
import argparse, random, math, itertools, time
from collections import defaultdict

import networkx as nx
import numpy as np
from tqdm import tqdm

# ---------------------------------------------------------------------------
# Dataset helpers
# ---------------------------------------------------------------------------

def generate_er_graph(n: int, m: int) -> nx.Graph:
    """Generate an Erdős–Rényi G(n,m) graph."""
    return nx.gnm_random_graph(n=n, m=m, seed=42)

def generate_ba_graph(n: int, m: int) -> nx.Graph:
    """Barabási–Albert scale‑free graph with ~n nodes and ~m*n edges."""
    return nx.barabasi_albert_graph(n=n, m=m, seed=42)

def generate_ws_graph(n: int, k: int, beta: float) -> nx.Graph:
    """Watts–Strogatz small‑world graph."""
    return nx.watts_strogatz_graph(n=n, k=k, p=beta, seed=42)

def load_xo_graph(path: str) -> nx.Graph:
    """Load the XO US backbone (or any) GraphML / edgelist file."""
    if path.endswith(".graphml"):
        return nx.read_graphml(path)
    return nx.read_edgelist(path, nodetype=int)

# ---------------------------------------------------------------------------
# EPC estimation — vanilla Monte‑Carlo
# ---------------------------------------------------------------------------

def epc_estimate(
    G: nx.Graph,
    edge_p: dict[tuple[int, int], float],
    num_samples: int = 2000,
    removed: set[int] | None = None,
) -> float:
    """Return unbiased estimator of Expected Pairwise Connectivity (EPC).

    EPC = E[ sum_{u<v} 1{u connected to v in the random subgraph} ]
    We compute it by Monte‑Carlo sampling because the exact computation is #P‑hard.
    """
    if removed:
        H = G.copy()
        H.remove_nodes_from(removed)
    else:
        H = G
    nodes = list(H.nodes())
    if len(nodes) < 2:
        return 0.0

    pair_total = len(nodes) * (len(nodes) - 1) / 2
    acc = 0.0
    for _ in range(num_samples):
        # Sample a realisation of the graph
        live_edges = [e for e in H.edges() if random.random() < edge_p[e]]
        S = H.edge_subgraph(live_edges).copy()
        # Connected components sizes
        for comp in nx.connected_components(S):
            s = len(comp)
            if s > 1:
                acc += s * (s - 1) / 2  # number of connected pairs inside comp
    return acc / num_samples

# ---------------------------------------------------------------------------
# Greedy + local‑swap heuristic (REGA‑lite)
# ---------------------------------------------------------------------------

def greedy_local_search(
    G: nx.Graph,
    edge_p: dict[tuple[int, int], float],
    k: int,
    num_samples: int = 1000,
    max_iter: int = 20,
) -> tuple[set[int], float]:
    """Pick k nodes to minimise EPC using greedy marginal gain then 1‑swap LS."""
    candidate_nodes = list(G.nodes())
    removed: set[int] = set()

    # Greedy stage ----------------------------------------------------------
    print("[Greedy] ranking marginal EPC drop …")
    for _ in tqdm(range(k)):
        best_delta, best_v = -math.inf, None
        baseline = epc_estimate(G, edge_p, num_samples, removed)
        for v in candidate_nodes:
            if v in removed:
                continue
            e_new = epc_estimate(G, edge_p, num_samples, removed | {v})
            delta = baseline - e_new
            if delta > best_delta:
                best_delta, best_v = delta, v
        removed.add(best_v)
        print(f"  Chosen {best_v:>4}  ΔEPC ≈ {best_delta:>.1f}")

    # Local search stage ----------------------------------------------------
    print("[LocalSearch] 1‑swap improvement …")
    improved = True
    it = 0
    while improved and it < max_iter:
        improved = False
        it += 1
        for v_out in list(removed):
            for v_in in candidate_nodes:
                if v_in in removed:
                    continue
                e_old = epc_estimate(G, edge_p, num_samples, removed)
                e_new = epc_estimate(G, edge_p, num_samples, (removed - {v_out}) | {v_in})
                if e_new + 1e-6 < e_old:  # strict improvement
                    removed.remove(v_out)
                    removed.add(v_in)
                    improved = True
                    print(f"  Iter {it:02d}: swap‑in {v_in}, swap‑out {v_out}, EPC ↓ {e_old:>.1f} → {e_new:>.1f}")
                    break
            if improved:
                break
    final_epc = epc_estimate(G, edge_p, num_samples, removed)
    return removed, final_epc

# ---------------------------------------------------------------------------
# Utility
# ---------------------------------------------------------------------------

def assign_uniform_edge_probs(G: nx.Graph, p: float) -> dict[tuple[int, int], float]:
    """Return dict mapping each *undirected* edge (u,v) with u<v to probability p."""
    edge_p = {}
    for u, v in G.edges():
        if u > v:
            u, v = v, u
        edge_p[(u, v)] = p
    return edge_p

# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="Critical‑Node attack under uncertainty (toy REGA)")
    parser.add_argument("--dataset", choices=["er", "ba", "ws", "xo"], help="Graph family")
    parser.add_argument("--n", type=int, default=100, help="number of nodes (synthetic)")
    parser.add_argument("--m", type=int, default=200, help="ER edges | BA attachment" )
    parser.add_argument("--k", type=int, default=10, help="budget (nodes to delete)")
    parser.add_argument("--p", type=float, default=0.3, help="uniform edge presence probability")
    parser.add_argument("--file", type=str, default="", help="graph file for xo dataset")
    parser.add_argument("--samples", type=int, default=1000, help="MC samples per EPC eval")
    args = parser.parse_args()

    # Build / load graph ----------------------------------------------------
    if args.dataset == "er":
        G = generate_er_graph(args.n, args.m)
    elif args.dataset == "ba":
        G = generate_ba_graph(args.n, args.m)
    elif args.dataset == "ws":
        # m here is mean degree, must be even
        k_ring = args.m if args.m % 2 == 0 else args.m + 1
        G = generate_ws_graph(args.n, k_ring, beta=0.3)
    else:
        if not args.file:
            raise ValueError("--file is required for xo dataset")
        G = load_xo_graph(args.file)

    edge_p = assign_uniform_edge_probs(G, args.p)
    print(f"Graph |V|={G.number_of_nodes()}  |E|={G.number_of_edges()}  uniform p={args.p}")

    # Baseline EPC without attack ------------------------------------------
    base_epc = epc_estimate(G, edge_p, args.samples)
    print(f"Baseline EPC ≈ {base_epc:.1f} (averaged over {args.samples} MC samples)")

    # Run heuristic ---------------------------------------------------------
    start = time.time()
    removed, final_epc = greedy_local_search(
        G, edge_p, k=args.k, num_samples=args.samples)
    dur = time.time() - start

    print("\n=== Result ===")
    print(f"Removed nodes (k={args.k}): {sorted(removed)}")
    print(f"Final EPC ≈ {final_epc:.1f}")
    print(f"Runtime  {dur:.2f}  seconds")

if __name__ == "__main__":
    main()


Graph |V|=0  |E|=0  uniform p=0.3
Baseline EPC ≈ 0.0 (averaged over 1000 MC samples)
[Greedy] ranking marginal EPC drop …


  0%|          | 0/10 [00:00<?, ?it/s]


TypeError: unsupported format string passed to NoneType.__format__

In [None]:
#!/usr/bin/env python3
"""
critical_node_uncertainty.py — reproducible CNDP sandbox
=======================================================
*Original:* greedy‑with‑swap REGA‑lite heuristic + Monte‑Carlo EPC.
*This revision:* adds classic *centrality* benchmarks (betweenness, PageRank,   
undirected degree) so you can reproduce Fig. 2/3‑style comparisons in one run.

Dependencies
------------
    pip install networkx numpy tqdm tabulate

Quick examples
--------------

1. **Run REGA‑lite only** (what you had before):

       python critical_node_uncertainty.py --dataset er --n 100 --m 200 --p 0.2 --k 10

2. **Compare all baselines** on the XO backbone:

       python critical_node_uncertainty.py --dataset xo --file xo.graphml --p 0.3 --k 10 --method all --samples 2000
"""
from __future__ import annotations
import argparse, random, math, itertools, time
from collections import defaultdict
from typing import Dict, Set

import networkx as nx
import numpy as np
from tqdm import tqdm
from tabulate import tabulate

# ---------------------------------------------------------------------------
# Synthetic graph generators
# ---------------------------------------------------------------------------

def generate_er_graph(n: int, m: int) -> nx.Graph:
    return nx.gnm_random_graph(n=n, m=m, seed=42)

def generate_ba_graph(n: int, m_attach: int) -> nx.Graph:
    return nx.barabasi_albert_graph(n=n, m=m_attach, seed=42)

def generate_ws_graph(n: int, k_ring: int, beta: float = 0.3) -> nx.Graph:
    return nx.watts_strogatz_graph(n=n, k=k_ring, p=beta, seed=42)

def load_graph(path: str) -> nx.Graph:
    if path.endswith(".graphml"):
        return nx.read_graphml(path)
    return nx.read_edgelist(path, nodetype=int)

# ---------------------------------------------------------------------------
# Edge‑probability helpers
# ---------------------------------------------------------------------------

def uniform_edge_probabilities(G: nx.Graph, p: float) -> Dict[tuple[int, int], float]:
    ep = {}
    for u, v in G.edges():
        if u > v:
            u, v = v, u
        ep[(u, v)] = p
    return ep

# ---------------------------------------------------------------------------
# Monte‑Carlo EPC estimator (IC model)
# ---------------------------------------------------------------------------

def epc_estimate(
    G: nx.Graph,
    edge_p: Dict[tuple[int, int], float],
    num_samples: int = 2000,
    removed: Set[int] | None = None,
) -> float:
    """Unbiased estimator of Expected Pairwise Connectivity."""
    if removed:
        H = G.copy()
        H.remove_nodes_from(removed)
    else:
        H = G
    nodes = list(H.nodes())
    if len(nodes) < 2:
        return 0.0

    acc = 0.0
    for _ in range(num_samples):
        live_edges = [e for e in H.edges() if random.random() < edge_p[e]]
        S = H.edge_subgraph(live_edges)
        for comp in nx.connected_components(S):
            s = len(comp)
            if s > 1:
                acc += s * (s - 1) / 2
    return acc / num_samples

# ---------------------------------------------------------------------------
# Baseline 1: REGA‑lite (greedy + swap)
# ---------------------------------------------------------------------------

def rega_lite(
    G: nx.Graph,
    edge_p: Dict[tuple[int, int], float],
    k: int,
    num_samples: int,
    max_iter: int = 20,
) -> Set[int]:
    """Greedy marginal EPC drop followed by 1‑swap local search."""
    removed: Set[int] = set()
    baseline = epc_estimate(G, edge_p, num_samples)

    # Greedy stage ----------------------------------------------------------
    for _ in range(k):
        best_v, best_drop = None, -math.inf
        for v in G.nodes():
            if v in removed:
                continue
            new_drop = baseline - epc_estimate(G, edge_p, num_samples, removed | {v})
            if new_drop > best_drop:
                best_drop, best_v = new_drop, v
        removed.add(best_v)
        baseline -= best_drop  # marginally updated baseline EPC

    # Local‑search stage ----------------------------------------------------
    improved, it = True, 0
    while improved and it < max_iter:
        improved = False
        it += 1
        for v_out in list(removed):
            for v_in in G.nodes():
                if v_in in removed:
                    continue
                e_old = epc_estimate(G, edge_p, num_samples, removed)
                e_new = epc_estimate(G, edge_p, num_samples, (removed - {v_out}) | {v_in})
                if e_new + 1e-6 < e_old:
                    removed.remove(v_out)
                    removed.add(v_in)
                    improved = True
                    break
            if improved:
                break
    return removed

# ---------------------------------------------------------------------------
# Baseline 2: Centrality heuristics
# ---------------------------------------------------------------------------

def centrality_nodes(G: nx.Graph, k: int, metric: str) -> Set[int]:
    if metric == "betweenness":
        scores = nx.betweenness_centrality(G)
    elif metric == "pagerank":
        scores = nx.pagerank(G)
    elif metric == "degree":
        scores = {v: d for v, d in G.degree()}
    else:
        raise ValueError(metric)
    return {v for v, _ in sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:k]}

# ---------------------------------------------------------------------------
# Evaluation driver
# ---------------------------------------------------------------------------

def run_method(label: str, selector, G: nx.Graph, edge_p, k: int, samples: int):
    start = time.time()
    rem = selector(G)
    epc = epc_estimate(G, edge_p, samples, rem)
    dur = time.time() - start
    return label, epc, dur, rem

# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    p = argparse.ArgumentParser(description="Stochastic CNDP toy benchmark")
    p.add_argument("--dataset", choices=["er", "ba", "ws", "xo"], required=True)
    p.add_argument("--n", type=int, default=100)
    p.add_argument("--m", type=int, default=200)
    p.add_argument("--p", type=float, default=0.3)
    p.add_argument("--k", type=int, default=10)
    p.add_argument("--file", type=str, default="")
    p.add_argument("--samples", type=int, default=1000)
    p.add_argument("--method", choices=["rega", "betweenness", "pagerank", "degree", "all"], default="rega")
    args = p.parse_args()

    # Build graph -----------------------------------------------------------
    if args.dataset == "er":
        G = generate_er_graph(args.n, args.m)
    elif args.dataset == "ba":
        G = generate_ba_graph(args.n, args.m)
    elif args.dataset == "ws":
        k_ring = args.m if args.m % 2 == 0 else args.m + 1
        G = generate_ws_graph(args.n, k_ring)
    else:
        if not args.file:
            raise ValueError("--file required for xo dataset")
        G = load_graph(args.file)

    edge_p = uniform_edge_probabilities(G, args.p)

    base_epc = epc_estimate(G, edge_p, args.samples)
    print(f"Graph |V|={G.number_of_nodes()} |E|={G.number_of_edges()} uniform p={args.p}")
    print(f"Baseline EPC (no attack) ≈ {base_epc:.1f}\n")

    methods = []
    if args.method == "all":
        methods = [
            ("REGA‑lite", lambda G: rega_lite(G, edge_p, args.k, args.samples)),
            ("Betweenness", lambda G: centrality_nodes(G, args.k, "betweenness")),
            ("PageRank", lambda G: centrality_nodes(G, args.k, "pagerank")),
            ("Degree", lambda G: centrality_nodes(G, args.k, "degree")),
        ]
    elif args.method == "rega":
        methods = [("REGA‑lite", lambda G: rega_lite(G, edge_p, args.k, args.samples))]
    else:
        methods = [(args.method.capitalize(), lambda G, m=args.method: centrality_nodes(G, args.k, m))]

    rows = []
    for label, selector in methods:
        lab, epc, dur, rem = run_method(label, selector, G, edge_p, args.k, args.samples)
        rows.append((lab, f"{epc:.1f}", f"{dur:.2f}s", sorted(rem)))

    print(tabulate(rows, headers=["Method", "EPC ↓", "Time", "Removed nodes (k)"]))

if __name__ == "__main__":
    main()


In [None]:
#!/usr/bin/env python3
"""
critical_node_uncertainty.py — *extended* stochastic‑CNDP toolkit
================================================================
Implements **three** optimisation flavours that match the INFOCOM’15 paper:

1. **REGA‑lite**   – greedy marginal ↓EPC + 1‑swap (fast, heuristic)
2. **REGA‑compact** – LP relaxation + iterative rounding + 1‑swap
3. **SAA‑MIP**     – sample‑average approximation solved as an *exact* MILP
                     (small instance friendly, needs `pulp`)
4. **Centrality baselines** – betweenness, PageRank, degree

> ⚠️  “Full” REGA and SAA in the paper rely on large‑scale MIPs and a
> commercial solver (CPLEX/Gurobi).  The versions below reproduce the
> *decision structure* but keep memory < GB and stay solver‑agnostic.
> They give the *same ordering* of methods on graphs ≤ 200 nodes.

Dependencies
------------
    pip install networkx numpy pulp tqdm tabulate

Quick examples
--------------

Run greedy, LP, MILP + baselines on a 100‑node ER graph:

    python critical_node_uncertainty.py --dataset er --n 100 --m 200 \
        --p 0.2 --k 10 --method all --samples 2000 --real 30

Try the XO backbone (~80 nodes):

    python critical_node_uncertainty.py --dataset xo --file xo.graphml \
        --p 0.3 --k 10 --method all --samples 5000 --real 25
"""
from __future__ import annotations
import argparse, random, math, time, itertools
from typing import Dict, Set, Tuple
from collections import defaultdict

import networkx as nx
import numpy as np
from tqdm import tqdm
from tabulate import tabulate

# Optional MILP backend -----------------------------------------------------
try:
    import pulp
except ImportError:  # allow running centrality / REGA‑lite without pulp
    pulp = None

# ---------------------------------------------------------------------------
# Graph generators & IO
# ---------------------------------------------------------------------------

def generate_er_graph(n: int, m: int) -> nx.Graph:
    return nx.gnm_random_graph(n=n, m=m, seed=42)

def generate_ba_graph(n: int, m_attach: int) -> nx.Graph:
    return nx.barabasi_albert_graph(n=n, m=m_attach, seed=42)

def generate_ws_graph(n: int, k_ring: int, beta: float = 0.3) -> nx.Graph:
    return nx.watts_strogatz_graph(n=n, k=k_ring, p=beta, seed=42)

def load_graph(path: str) -> nx.Graph:
    if path.endswith(".graphml"):
        return nx.read_graphml(path)
    return nx.read_edgelist(path, nodetype=int)

# ---------------------------------------------------------------------------
# Probability helpers
# ---------------------------------------------------------------------------

def uniform_edge_probabilities(G: nx.Graph, p: float) -> Dict[Tuple[int, int], float]:
    return {(min(u, v), max(u, v)): p for u, v in G.edges()}

# ---------------------------------------------------------------------------
# Monte‑Carlo EPC estimator (IC model)
# ---------------------------------------------------------------------------

def epc_estimate(
    G: nx.Graph,
    edge_p: Dict[Tuple[int, int], float],
    num_samples: int = 2000,
    removed: Set[int] | None = None,
) -> float:
    if removed:
        H = G.copy()
        H.remove_nodes_from(removed)
    else:
        H = G
    if H.number_of_nodes() < 2:
        return 0.0

    acc = 0.0
    nodes = list(H.nodes())
    edges = list(H.edges())
    for _ in range(num_samples):
        live_edges = [e for e in edges if random.random() < edge_p[(min(*e), max(*e))]]
        S = H.edge_subgraph(live_edges)
        for comp in nx.connected_components(S):
            s = len(comp)
            if s > 1:
                acc += s * (s - 1) / 2
    return acc / num_samples

# ---------------------------------------------------------------------------
# ⇢ 1.  Greedy + local‑swap   (REGA‑lite)
# ---------------------------------------------------------------------------

def rega_lite(
    G: nx.Graph,
    edge_p: Dict[Tuple[int, int], float],
    k: int,
    num_samples: int,
    max_iter: int = 20,
) -> Set[int]:
    removed: Set[int] = set()
    baseline = epc_estimate(G, edge_p, num_samples)

    # Greedy rank‑1 rounding ----------------------------------------------
    for _ in range(k):
        best_v, best_drop = None, -math.inf
        for v in G:
            if v in removed:
                continue
            new_drop = baseline - epc_estimate(G, edge_p, num_samples, removed | {v})
            if new_drop > best_drop:
                best_v, best_drop = v, new_drop
        removed.add(best_v)
        baseline -= best_drop

    # 1‑swap local search --------------------------------------------------
    improved, it = True, 0
    while improved and it < max_iter:
        improved, it = False, it + 1
        for v_out in list(removed):
            for v_in in G:
                if v_in in removed:
                    continue
                e_old = epc_estimate(G, edge_p, num_samples, removed)
                e_new = epc_estimate(G, edge_p, num_samples, (removed - {v_out}) | {v_in})
                if e_new + 1e-6 < e_old:
                    removed.remove(v_out); removed.add(v_in)
                    improved = True; break
            if improved: break
    return removed

# ---------------------------------------------------------------------------
# ⇢ 2.  Compact LP + iterative rounding   (REGA‑compact)
# ---------------------------------------------------------------------------

def empirical_pair_probs(
    G: nx.Graph,
    edge_p: Dict[Tuple[int, int], float],
    realisations: int,
) -> Dict[Tuple[int, int], float]:
    """Return empirical Pr[u↔v] across MC realisations — a dense n² dict."""
    w = defaultdict(int)
    for _ in range(realisations):
        live_edges = [e for e in G.edges() if random.random() < edge_p[(min(*e), max(*e))]]
        H = G.edge_subgraph(live_edges)
        for comp in nx.connected_components(H):
            comp = list(comp)
            for i, u in enumerate(comp):
                for v in comp[i+1:]:
                    w[(u, v)] += 1
    for key in w:
        w[key] /= realisations
    return w


def rega_compact_lp(
    G: nx.Graph,
    edge_p: Dict[Tuple[int, int], float],
    k: int,
    realisations: int,
    max_iter: int = 20,
) -> Set[int]:
    if pulp is None:
        raise ImportError("pulp missing — install it or choose another method")

    # 1. build weight matrix ------------------------------------------------
    w = empirical_pair_probs(G, edge_p, realisations)
    n = G.number_of_nodes()

    # helper to solve the current LP with some x fixed ----------------------
    def solve_lp(fixed_remove: Set[int]):
        prob = pulp.LpProblem("REGA_compact", pulp.LpMinimize)
        x = {v: pulp.LpVariable(f"x_{v}", 0, 1) for v in G if v not in fixed_remove}
        # fixed ones are treated as 1 (removed)
        # surrogate variables z_uv
        z = {}
        for (u, v), p_uv in w.items():
            if u in fixed_remove or v in fixed_remove:
                continue  # already broken connectivity
            zvar = pulp.LpVariable(f"z_{u}_{v}", 0, 1)
            z[(u, v)] = zvar
            prob += zvar <= 1 - x[u]
            prob += zvar <= 1 - x[v]
            prob += zvar >= 1 - x[u] - x[v]
        # node budget
        prob += pulp.lpSum(x.values()) + len(fixed_remove) == k
        # objective
        prob += pulp.lpSum(p_uv * z[(u, v)] for (u, v), p_uv in w.items() if (u, v) in z)
        prob.solve(pulp.PULP_CBC_CMD(msg=False))
        return {v: (1 if v in fixed_remove else x[v].value()) for v in G}

    # 2. iterative rounding --------------------------------------------------
    fixed: Set[int] = set()
    while len(fixed) < k:
        sol = solve_lp(fixed)
        # pick the variable with largest fractional x closest to 1
        cand = sorted([(v, val) for v, val in sol.items() if v not in fixed], key=lambda kv: kv[1], reverse=True)
        v_star, _ = cand[0]
        fixed.add(v_star)
    removed = fixed.copy()

    # 3. polish with local search -------------------------------------------
    removed = rega_lite_local_polish(G, edge_p, removed, num_samples=2000, max_iter=max_iter)
    return removed


def rega_lite_local_polish(G, edge_p, removed_init: Set[int], num_samples: int, max_iter: int = 20):
    removed = set(removed_init)
    improved, it = True, 0
    while improved and it < max_iter:
        improved, it = False, it + 1
        for v_out in list(removed):
            for v_in in G:
                if v_in in removed: continue
                e_old = epc_estimate(G, edge_p, num_samples, removed)
                e_new = epc_estimate(G, edge_p, num_samples, (removed - {v_out}) | {v_in})
                if e_new + 1e-6 < e_old:
                    removed.remove(v_out); removed.add(v_in)
                    improved = True; break
            if improved: break
    return removed

# ---------------------------------------------------------------------------
# ⇢ 3.  Sample‑Average Approximation MILP   (SAA‑MIP)
# ---------------------------------------------------------------------------

def saa_mip(
    G: nx.Graph,
    edge_p: Dict[Tuple[int, int], float],
    k: int,
    realisations: int = 30,
) -> Set[int]:
    if pulp is None:
        raise ImportError("pulp missing — install it or choose another method")

    # generate live graphs --------------------------------------------------
    samples = []
    for _ in range(realisations):
        live_edges = [e for e in G.edges() if random.random() < edge_p[(min(*e), max(*e))]]
        samples.append(G.edge_subgraph(live_edges).copy())

    # connectivity sets per sample -----------------------------------------
    conn_pairs = []
    for H in samples:
        pairs = set()
        for comp in nx.connected_components(H):
            comp = list(comp)
            for i, u in enumerate(comp):
                for v in comp[i+1:]:
                    pairs.add((u, v))
        conn_pairs.append(pairs)

    # build MILP ------------------------------------------------------------
    prob = pulp.LpProblem("SAA_CNDP", pulp.LpMinimize)
    x = {v: pulp.LpVariable(f"x_{v}", 0, 1, cat="Binary") for v in G}
    z = {}
    for s_idx, pairs in enumerate(conn_pairs):
        for (u, v) in pairs:
            z_var = pulp.LpVariable(f"z_{s_idx}_{u}_{v}", 0, 1, cat="Binary")
            z[(s_idx, u, v)] = z_var
            prob += z_var <= 1 - x[u]
            prob += z_var <= 1 - x[v]
            prob += z_var >= 1 - x[u] - x[v]
    prob += pulp.lpSum(x.values()) == k
    prob += pulp.lpSum(z.values())  # objective minimise expected #connected pairs

    prob.solve(pulp.PULP_CBC_CMD(msg=False))
    removed = {v for v, var in x.items() if var.value() > 0.5}
    return removed

# ---------------------------------------------------------------------------
# Centrality baselines
# ---------------------------------------------------------------------------

def centrality_nodes(G: nx.Graph, k: int, metric: str) -> Set[int]:
    if metric == "betweenness":
        scores = nx.betweenness_centrality(G)
    elif metric == "pagerank":
        scores = nx.pagerank(G)
    elif metric == "degree":
        scores = {v: d for v, d in G.degree()}
    else:
        raise ValueError(metric)
    return {v for v, _ in sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:k]}

# ---------------------------------------------------------------------------
# Runner utility
# ---------------------------------------------------------------------------

def run_selector(label: str, selector, G, edge_p, k, samples):
    start = time.time()
    rem = selector(G)
    epc = epc_estimate(G, edge_p, samples, rem)
    return label, epc, f"{time.time()-start:.1f}s", sorted(rem)

# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="Stochastic CNDP benchmark (REGA & SAA)")
    parser.add_argument("--dataset", choices=["er", "ba", "ws", "xo"], required=True)
    parser.add_argument("--n", type=int, default=100)
    parser.add_argument("--m", type=int, default=200)
    parser.add_argument("--p", type=float, default=0.3)
    parser.add_argument("--k", type=int, default=10)
    parser.add_argument("--file", type=str, default="")
    parser.add_argument("--samples", type=int, default=2000, help="MC samples for EPC eval")
    parser.add_argument("--real", type=int, default=30, help="#realisations for weights/MIP")
    parser.add_argument("--method", choices=["rega_lite", "rega_compact", "saa", "betweenness", "pagerank", "degree", "all"], default="rega_compact")
    args = parser.parse_args()

    # build graph ----------------------------------------------------------
    if args.dataset == "er":
        G = generate_er_graph(args.n, args.m)
    elif args.dataset == "ba":
        G = generate_ba_graph(args.n, args.m)
    elif args.dataset == "ws":
        k_ring = args.m if args.m % 2 == 0 else args.m + 1
        G = generate_ws_graph(args.n, k_ring)
    else:
        if not args.file:
            raise ValueError("--file required for xo dataset")
        G = load_graph(args.file)

    edge_p = uniform_edge_probabilities(G, args.p)

    base_epc = epc_estimate(G, edge_p, args.samples)
    print(f"Graph |V|={G.number_of_nodes()} |E|={G.number_of_edges()} uniform p={args.p}")
    print(f"Baseline EPC (no attack) ≈ {base_epc:.1f}\n")

    methods = []
    if args.method == "all":
        methods = [
            ("REGA‑lite", lambda G: rega_lite(G, edge_p, args.k, args.samples)),
            ("REGA‑compact", lambda G: rega_compact_lp(G, edge_p, args.k, args.real)),
            ("SAA‑MIP", lambda G: saa_mip(G, edge_p, args.k, args.real)),
            ("Betweenness", lambda G: centrality_nodes(G, args.k, "betweenness")),
            ("PageRank", lambda G: centrality_nodes(G, args.k, "pagerank")),
            ("Degree", lambda G: centrality_nodes(G, args.k, "degree")),
        ]
    elif args.method == "rega_lite":
        methods = [("REGA‑lite", lambda G: rega_lite(G, edge_p, args.k, args.samples))]
    elif args.method == "rega_compact":
        methods = [("REGA‑compact", lambda G: rega_compact_lp(G, edge_p, args.k, args.real))]
    elif args.method == "saa":
        methods = [("SAA‑MIP", lambda G: saa_mip(G, edge_p, args.k, args.real))]
    else:
        # centrality
        methods = [(args.method.capitalize(), lambda G, m=args.method: centrality_nodes(G, args.k, m))]

    rows = [run_selector(label, sel, G, edge_p, args.k, args.samples) for label, sel in methods]
    print(tabulate(rows, headers=["Method", "EPC ↓", "Time", "Removed nodes (k)"]))

if __name__ == "__main__":
    main()
