In [1]:
import networkx as nx
import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import linprog
from tqdm import tqdm
from itertools import combinations
from collections import defaultdict, deque
import math, random
import heapq
import scipy.sparse as sp
from collections import deque
from joblib import Parallel, delayed
from numba import njit
from typing import Tuple, Dict, List, Set, Sequence, Union
import itertools

# 1. Heuristics

In [2]:
er = nx.erdos_renyi_graph(100, 0.045, seed=42)
er.number_of_nodes(), er.number_of_edges() 

(100, 202)

In [9]:
ba = nx.barabasi_albert_graph(100, 2, seed=42)
ba.number_of_nodes(), ba.number_of_edges()

(100, 196)

In [10]:
ws = nx.watts_strogatz_graph(100, 4, 0.4, seed=42)
ws.number_of_nodes(), ws.number_of_edges()

(100, 200)

In [11]:
er = nx.gnp_random_graph(100, 0.045, seed=42)
er.number_of_nodes(), er.number_of_edges()

(100, 202)

## 1.1 Greedy + EPC (lone)

In [3]:
def sigma_exact(
    G: nx.Graph,
    S: set,
    use_tqdm: bool = False
) -> int:
    from itertools import product
    edges = list(G.edges())
    total_conn = 0.0

    for state in product([0,1], repeat=len(edges)):
        p_state = 1
        Gp = nx.Graph()
        Gp.add_nodes_from(set(G.nodes())-S)

        for (e, keep) in zip(edges, state):
            p_edge = G.edges[e]['p']
            p_state *= (p_edge if keep else (1-p_edge))

            if keep and e[0] not in S and e[1] not in S:
                Gp.add_edge(*e)

        # count connected i<j pairs in Gp−S
        for i,j in combinations(set(G.nodes())-S, 2):
            if nx.has_path(Gp, i, j):
                total_conn += p_state

    return total_conn

In [4]:
def component_sampling_epc_mc(G, S, num_samples=10_000,
                              epsilon=None, delta=None, use_tqdm=False):
  """
  Theoretic bounds: compute N = N(epsilon, delta) by the theoretical bound.
  Experimentation:  Otherwise, use the N as input for sample count.
  """

  # Surviving vertex set and its size
  V_remaining = set(G.nodes()) - S
  n_rem = len(V_remaining)

  # base case
  if n_rem < 2:
    return 0.0

  if num_samples is None:
    assert epsilon is not None and delta is not None
    P_E = sum(G.edges[u, v]['p'] for u, v in G.edges())
    coeff = 4 * (math.e - 2) * math.log(2 / delta)
    num_samples = math.ceil(coeff * n_rem * (n_rem - 1) /
                            (epsilon ** 2 * P_E))

  C2 = 0
  it = tqdm(range(num_samples), desc='Component sampling',
            total=num_samples) if use_tqdm else range(num_samples)

  for _ in it:
    u = random.choice(tuple(V_remaining))

    # BFS based on edge probabilities

    visited = {u}
    queue = [u]

    while queue:

      v = queue.pop()
      for w in G.neighbors(v):

        # flip a coin biased by the edge probability
        # w not in deleted nodes
        if w in V_remaining and random.random() < G.edges[v, w]['p']:

          # if w is not visited
          if w not in visited:
              visited.add(w)
              queue.append(w)

    # component counting
    C2 += (len(visited) - 1)

  return (n_rem * C2) / (2 * num_samples)

In [5]:
def greedy_cndp_epc(
    G: nx.Graph,
    K: int,
    num_samples: int = 10000,
    exact: bool = False,
    use_tqdm: bool = False
) -> set:
  """
  Algorithm 2 from the paper: Greedy selection of S |S| <= K
  to minimize sigma(S) via sigma_monte_carlo().

  Returns the list S (in pick order).
  """

  # S <= {Empty set} init
  S = set()

  Sigma_delta = []
  # Current sigma(S) for the empty set
  sigma_S = 0
  if exact:
    sigma_S = sigma_exact(G, S)
  else:
    sigma_S = component_sampling_epc_mc(G, S, num_samples=num_samples)

  Sigma_delta.append(sigma_S)
  # print(f"Initial sigma(S): {sigma_S}")

  if use_tqdm:
    it = tqdm(range(K), desc='Greedy selection', total=K)
  else:
    it = range(K)

  # Greedily select K nodes
  for _ in it:
    # inits
    best_j = None
    best_gain = -float('inf')
    best_sigma = None

    # find v maximizing gain sigma(S) - sigma(S ∪ j)
    for j in G:
      # Skip if j is already in S to avoid redundant calculations
      # j ∈ S
      if j in S:
        continue

      # S ∪ j = S + {j}
      if exact:
        sigma_Sj = sigma_exact(G, S | {j})
      else:
        sigma_Sj = component_sampling_epc_mc(G, S | {j}, num_samples=num_samples)

      gain = sigma_S - sigma_Sj

      # j <= argmax_{j ∈ V\S} (sigma(S) - sigma(S ∪ j))

      if gain > best_gain:
        best_gain = gain
        best_j = j
        best_sigma = sigma_Sj


    # add the best node
    if best_j is None:
      break

    S.add(best_j)
    sigma_S = best_sigma

    Sigma_delta.append(best_sigma)
    # print(f"Selected node {best_j}, gain: {best_gain}, new sigma(S): {sigma_S}")

  return S, Sigma_delta

In [27]:
t0 = time.perf_counter()

G = er.copy()
K = 10
p = 0.9

records = []
print(f"G: nodes: {G.number_of_nodes()} and edges: {G.number_of_edges()}")  

for p in tqdm(np.arange(0.0, 1.1, 0.1), desc="Processing", total=int(1.0/0.1)):
  def fresh_graph():
    H = G.copy()
    for u, v in H.edges():
      H[u][v]['p'] = p
    return H


  Sg, sigma_delta = greedy_cndp_epc(fresh_graph(), K, 10_000, False, False)
  t1 = time.perf_counter()
  t_greedy = t1 - t0

  print(f"Greedy sigma(S): S: {Sg}")
  print(f"Time: {t_greedy:.4f}s")
  print(f"Sigma Delta: {sigma_delta}")

  records.append({
    'model': 'ER',
    'p': p,
    'algo': 'Greedy',
    'time': t_greedy,
    'epc': sigma_delta[-1],
  })

G: nodes: 100 and edges: 202


Processing:  10%|█         | 1/10 [00:15<02:15, 15.08s/it]

Greedy sigma(S): S: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
Time: 15.0781s
Sigma Delta: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


Processing:  20%|██        | 2/10 [00:35<02:24, 18.02s/it]

Greedy sigma(S): S: {96, 65, 98, 34, 59, 75, 16, 84, 24, 27}
Time: 35.1665s
Sigma Delta: [32.35, 30.38805, 28.4249, 27.08725, 25.224, 23.08975, 21.9631, 21.00405, 20.4102, 19.4922, 17.9685]


Processing:  30%|███       | 3/10 [01:07<02:51, 24.54s/it]

Greedy sigma(S): S: {96, 65, 34, 64, 39, 12, 22, 24, 90, 27}
Time: 67.4582s
Sigma Delta: [148.35, 126.6408, 110.642, 100.6181, 90.8256, 80.46975, 73.085, 66.0858, 58.029, 55.04135, 50.301]


Processing:  40%|████      | 4/10 [02:27<04:39, 46.62s/it]

Greedy sigma(S): S: {96, 64, 34, 65, 12, 22, 54, 24, 90, 27}
Time: 147.9338s
Sigma Delta: [722.34, 567.7749, 470.8949, 401.7837, 335.0256, 283.56075, 240.9032, 206.3391, 177.1046, 159.1772, 128.0565]


Processing:  50%|█████     | 5/10 [06:13<09:15, 111.14s/it]

Greedy sigma(S): S: {96, 65, 34, 5, 10, 12, 54, 24, 90, 27}
Time: 373.4752s
Sigma Delta: [2148.365, 1862.5464, 1624.7224, 1355.93875, 1169.0304, 971.2895, 800.3066, 652.2741, 535.8034, 422.94525, 327.843]


Processing:  60%|██████    | 6/10 [12:59<14:05, 211.40s/it]

Greedy sigma(S): S: {96, 65, 34, 5, 10, 12, 54, 24, 90, 27}
Time: 779.4848s
Sigma Delta: [3313.425, 3038.53275, 2744.5341, 2504.7728, 2291.2368, 2047.25, 1768.7369, 1509.9852, 1288.0552, 1075.5745, 869.1615]


Processing:  70%|███████   | 7/10 [22:38<16:34, 331.66s/it]

Greedy sigma(S): S: {96, 65, 34, 10, 79, 84, 54, 24, 90, 27}
Time: 1358.7315s
Sigma Delta: [3941.675, 3671.94465, 3441.1328, 3227.66045, 3047.1888, 2833.36075, 2627.1543, 2423.4126, 2244.1376, 2069.92695, 1880.739]


Processing:  80%|████████  | 8/10 [33:10<14:14, 427.21s/it]

Greedy sigma(S): S: {65, 34, 43, 12, 79, 24, 27, 92, 93, 94}
Time: 1990.5434s
Sigma Delta: [4318.695, 4112.0838, 3879.9719, 3662.1574, 3435.5568, 3251.337, 3056.504, 2869.8963, 2736.241, 2593.6547, 2439.3825]


Processing:  90%|█████████ | 9/10 [45:06<08:37, 517.37s/it]

Greedy sigma(S): S: {0, 64, 6, 27, 43, 12, 79, 50, 59, 94}
Time: 2706.1650s
Sigma Delta: [4532.79, 4333.6359, 4179.8911, 3988.9892, 3839.5056, 3681.4875, 3544.3123, 3380.1315, 3233.0686, 3047.08495, 2939.706]


Processing: 100%|██████████| 10/10 [57:21<00:00, 584.65s/it]

Greedy sigma(S): S: {0, 64, 6, 40, 43, 47, 50, 59, 94, 31}
Time: 3441.4722s
Sigma Delta: [4670.15, 4474.008, 4284.2366, 4115.45295, 3939.1824, 3775.29525, 3617.1952, 3453.2853, 3308.6052, 3192.33915, 3045.6495]


Processing: 11it [1:09:21, 378.29s/it]                      

Greedy sigma(S): S: {0, 64, 6, 43, 47, 81, 50, 51, 59, 94}
Time: 4161.2153s
Sigma Delta: [4746.21, 4542.14475, 4361.049, 4162.8132, 4003.2912, 3808.92525, 3641.4425, 3487.83015, 3309.4332, 3214.484, 3130.8885]





In [29]:
df = pd.DataFrame(records)
df.to_csv(f"Greedy_result_100_200_ER.csv", index=False)
df

Unnamed: 0,model,p,algo,time,epc
0,ER,0.0,Greedy,15.078099,0.0
1,ER,0.1,Greedy,35.166469,17.9685
2,ER,0.2,Greedy,67.458208,50.301
3,ER,0.3,Greedy,147.933775,128.0565
4,ER,0.4,Greedy,373.475165,327.843
5,ER,0.5,Greedy,779.484757,869.1615
6,ER,0.6,Greedy,1358.73147,1880.739
7,ER,0.7,Greedy,1990.54345,2439.3825
8,ER,0.8,Greedy,2706.164965,2939.706
9,ER,0.9,Greedy,3441.472245,3045.6495


In [None]:
from pulp import (
    LpProblem, LpVariable, lpSum,
    LpBinary, LpMinimize, PULP_CBC_CMD, LpStatus
)

def sample_scenarios(G_prob, T=30, rng=None):
    rng = rng or random.Random()
    scenarios = []
    for _ in range(T):
        H = nx.Graph()
        H.add_nodes_from(G_prob.nodes)
        for u, v, d in G_prob.edges(data=True):
            if rng.random() < d['p']:
                H.add_edge(u, v)
        scenarios.append(H)
    return scenarios

def solve_saa_mip_cbc(scenarios, k, msg=False):
    """
    Solve the SAA master problem with CBC and return initial set S₀ (|S₀|=k).
    """
    n = len(scenarios[0])
    T = len(scenarios)
    prob = LpProblem("k-pCND-SAA-CBC", LpMinimize)

    s = [LpVariable(f"s_{i}", cat=LpBinary) for i in range(n)]

    x = {}
    for l, H in enumerate(scenarios):
        for i, j in itertools.combinations(range(n), 2):
            x[(l, i, j)] = LpVariable(f"x_{l}_{i}_{j}", cat=LpBinary)
        for i in range(n):
            x[(l, i, i)] = 1

    prob += lpSum(s) == k

    for l, H in enumerate(scenarios):
        for i, j in itertools.combinations(range(n), 2):
            prob += x[(l, i, j)] <= 1 - s[i]
            prob += x[(l, i, j)] <= 1 - s[j]

        for i, j in itertools.combinations(range(n), 2):
            if not nx.has_path(H, i, j):
                prob += x[(l, i, j)] == 0

        for i, h, j in itertools.permutations(range(n), 3):
            ij = tuple(sorted((i, j)))
            ih = tuple(sorted((i, h)))
            hj = tuple(sorted((h, j)))
            prob += x[(l, ij[0], ij[1])] <= x[(l, ih[0], ih[1])]
            prob += x[(l, ij[0], ij[1])] <= x[(l, hj[0], hj[1])]

    prob += (1 / T) * lpSum(x.values())

    status = prob.solve(PULP_CBC_CMD(msg=msg))
    if LpStatus[status] != "Optimal":
        print("⚠ CBC ended with status:", LpStatus[status])

    S0 = {i for i in range(n) if s[i].value() > 0.5}
    return S0

def local_search(G_prob, S0, num_samples=10_000, max_iter=1000, seed=None):
    rng = random.Random(seed)
    S = set(S0)
    best_val = component_sampling_epc_mc(G_prob, S, num_samples)
    for _ in tqdm(range(max_iter), desc="Local_search", total=max_iter):
        improved = False
        for u in list(S):
            # random permutation of outside nodes
            outside = rng.sample(list(set(G_prob.nodes) - S), len(G_prob) - len(S))
            for v in outside:
                candidate = (S - {u}) | {v}      # keeps size = k
                val = component_sampling_epc_mc(G_prob, candidate, num_samples)
                if val < best_val:
                    S, best_val = candidate, val
                    improved = True
                    break
            if improved:
                break
        if not improved:
            break
    return S, best_val

def final_evaluation(G_prob, S, num_samples=100_000):
    return component_sampling_epc_mc(G_prob, S, num_samples)

def saa_algorithm(G_prob, k, T=30, ls_samples=10_000, seed=None):
    """
    Full 4-phase SAA algorithm (CBC core + numba EPC).
    Returns
    -------
    S_best      : set[int]       # final deletion set (size = k)
    epc_est     : float          # EPC estimate after local search (ls_samples draws)
    epc_final   : float          # high-accuracy EPC (100 000 draws)
    """
    scenarios = sample_scenarios(G_prob, T, rng=random.Random(seed))

    S0 = solve_saa_mip_cbc(scenarios, k, msg=False)
    S_best, epc_est = local_search(G_prob, S0, ls_samples, seed=seed)

    epc_final = final_evaluation(G_prob, S_best)

    return S_best, epc_est, epc_final

Restricted license - for non-production use only - expires 2026-11-23
Restricted license - for non-production use only - expires 2026-11-23
Restricted license - for non-production use only - expires 2026-11-23
Restricted license - for non-production use only - expires 2026-11-23
Restricted license - for non-production use only - expires 2026-11-23
Restricted license - for non-production use only - expires 2026-11-23


In [None]:


M = 0.0443

er = nx.erdos_renyi_graph(100, 0.045, seed=42)

G = er.copy()
K = 10

records3 = []
print(f"G: nodes: {G.number_of_nodes()} and edges: {G.number_of_edges()}")  

for p in tqdm(np.arange(0.0, 1.1, 0.1), desc="Processing", total=int(1.1/0.1)):
  def fresh_graph():
    H = G.copy()
    for u, v in H.edges():
      H[u][v]['p'] = p
    return H

  t0 = time.perf_counter()
  S_best, epc_est, epc_final = saa_algorithm(fresh_graph(), k=K, T=30)
  t1 = time.perf_counter()
  t_saa = t1 - t0
  
  print(f"SAA S: S_best {S_best}")
  print(f"SAA sigma(S): {epc_final}")
  print(f"Time: {t_saa:.4f}s")

  records3.append({
    'model': 'ER',
    'p': p,
    'algo': 'SAA',
    'time': t_saa,
    'epc': epc_final,
  })

G: nodes: 100 and edges: 202


Processing:   0%|          | 0/11 [03:38<?, ?it/s]


KeyboardInterrupt: 

In [None]:
df3 = pd.DataFrame(records3)
df3.to_csv(f"SAA_100_200_ER.csv", index=False)

In [6]:
def greedy_epc_mis(G, k, num_samples):

  # Maximal independent set
  MIS = nx.maximal_independent_set(G)
  R = set(MIS)
  target = len(G) - k
  V = G.number_of_nodes()

  sigma_delta = []

  print(f"#MIS: {len(R)}")

  # Greedy grow R set until |R| = |V| - k
  while len(R) < target:
    best_j, best_sigma = None, float('inf')
    for j in G.nodes():
      if j in R:
        continue

      # delete node
      S_j = set(G.nodes()) - (R | {j})
      sigma = component_sampling_epc_mc(G, S=S_j, num_samples=num_samples)

      if sigma < best_sigma:
        best_sigma, best_j = sigma, j

        sigma_delta.append(best_sigma)

    R.add(best_j)
  
  D = set(G.nodes()) - R
  return D, sigma_delta

In [None]:
er = nx.erdos_renyi_graph(100, 0.045, seed=42)

G = er.copy()
K = 10

records4 = []
print(f"G: nodes: {G.number_of_nodes()} and edges: {G.number_of_edges()}")  

for p in tqdm(np.arange(0.1, 1.1, 0.1), desc="Processing", total=int(1.0/0.1)):
  def fresh_graph():
    H = G.copy()
    for u, v in H.edges():
      H[u][v]['p'] = p
    return H

  t0 = time.perf_counter()
  S_mis, sigma_delta = greedy_epc_mis(G=fresh_graph(), k=K, num_samples=100_000)
  t1 = time.perf_counter()
  t_greedy_mis = t1 - t0
  est_final_epc = component_sampling_epc_mc(fresh_graph(), S_mis, 100_000)

  print(f"Greedy S: {S_mis}")
  print(f"Greedy sigma(S): {sigma_delta}")
  print(f"final epc: {est_final_epc}")
  print(f"Time: {t_greedy_mis:.4f}s")

  records4.append({
    'model': 'ER',
    'p': p,
    'algo': 'Greedy',
    'epc': est_final_epc,
  })

G: nodes: 100 and edges: 202


Processing:   0%|          | 0/10 [00:00<?, ?it/s]

#MIS: 39


Processing:  10%|█         | 1/10 [03:08<28:13, 188.15s/it]

Greedy S: {64, 65, 34, 96, 98, 42, 12, 79, 22, 27}
Greedy sigma(S): [0.3246, 0.2038, 0.1074, 0.0994, 0.0974, 0.0924, 0.0912, 0.424145, 0.319185, 0.31857, 0.31775, 0.313035, 0.195365, 0.19229, 0.190855, 0.52311, 0.40698, 0.40299, 0.30681, 0.30177, 0.29694, 0.29442, 0.62694, 0.50396, 0.407425, 0.397965, 0.394525, 0.388935, 0.74008, 0.6039, 0.48972, 0.839475, 0.72225, 0.706275, 0.690525, 0.58815, 0.585675, 0.92368, 0.92, 0.82156, 0.7797, 0.71622, 0.70472, 0.68678, 0.67781, 1.031885, 0.903105, 0.79289, 0.78208, 1.12584, 1.02528, 1.02072, 1.01688, 1.00368, 0.86616, 1.23676, 1.12602, 1.10936, 1.101765, 0.968485, 1.36775, 1.33875, 1.27, 1.19975, 1.09225, 1.42749, 1.33824, 1.30254, 1.289535, 1.23012, 1.22451, 1.58678, 1.54674, 1.45886, 1.4378, 1.38346, 1.29636, 1.76649, 1.651215, 1.51792, 1.5158, 1.50149, 1.499105, 1.40503, 1.88757, 1.75716, 1.65888, 1.62783, 1.62459, 2.129325, 2.013825, 1.887875, 1.88375, 1.82105, 1.8139, 2.40744, 2.21816, 2.09356, 2.08544, 2.02916, 2.0202, 2.56671, 2.389725,

Processing:  20%|██        | 2/10 [06:17<25:12, 189.00s/it]

Greedy S: {65, 34, 39, 10, 42, 43, 75, 78, 24, 27}
Greedy sigma(S): [0.44016, 0.19089, 0.1848, 0.649085, 0.40334, 0.39861, 0.39388, 0.38743, 0.83996, 0.59224, 0.58256, 1.0791, 0.82935, 0.802125, 0.79245, 0.791325, 0.77625, 1.21371, 1.02925, 0.98463, 1.453005, 1.22764, 1.22341, 1.201555, 1.191685, 1.18064, 1.169125, 1.61544, 1.46304, 1.40832, 1.39872, 1.3788, 1.83701, 1.58564, 1.572165, 2.07175, 1.83925, 1.81, 1.8005, 1.78925, 2.221305, 2.029545, 1.99308, 1.989765, 1.96809, 2.45908, 2.21052, 2.17802, 2.64258, 2.410175, 2.403285, 2.91006, 2.64924, 3.48645, 3.058825, 3.043425, 3.89312, 3.55124, 3.48712, 4.35024, 4.0299, 3.999405, 3.93243, 4.84967, 4.39466, 4.38393, 5.3159, 4.920305, 4.871335, 4.84213, 5.6805, 5.3871, 5.3151, 6.22993, 5.787375, 5.7706, 6.60796, 6.36771, 6.31067, 7.123095, 6.83928, 7.71616, 7.70048, 7.61472, 7.35712, 8.250775, 8.16075, 7.928375, 7.874425, 8.79483, 8.71101, 8.62422, 8.54205, 8.32557, 9.83426, 9.374975, 9.26945, 9.241645, 10.63486, 10.40774, 10.3445, 10.16736

Processing:  30%|███       | 3/10 [10:17<24:45, 212.14s/it]

Greedy S: {64, 65, 34, 10, 42, 12, 78, 54, 24, 27}
Greedy sigma(S): [0.704585, 0.68142, 0.301555, 0.28454, 0.283925, 1.68441, 1.01535, 0.99393, 0.98658, 0.6069, 0.59577, 0.58758, 0.58464, 1.883615, 1.311285, 1.287205, 1.27108, 0.89053, 0.885155, 0.869675, 2.19516, 1.55804, 1.21044, 1.19372, 1.177, 1.17106, 2.545425, 1.910025, 1.90935, 1.856475, 1.491075, 1.469475, 1.464975, 2.82049, 2.17856, 2.1436, 1.76318, 1.76042, 3.122445, 2.50369, 2.462095, 2.11218, 2.108185, 2.07975, 3.39576, 2.77992, 2.76408, 2.37432, 3.87296, 3.038735, 2.66756, 4.0975, 3.39925, 3.34975, 3.0355, 2.99025, 2.96675, 4.459185, 3.720195, 3.678375, 3.66945, 3.30021, 3.25176, 4.75514, 4.0027, 3.93198, 3.6179, 5.04984, 4.283725, 4.00521, 5.42754, 4.64481, 4.6278, 6.18145, 5.34105, 5.298425, 6.88352, 6.14348, 6.03652, 7.47384, 6.708615, 8.2273, 7.52579, 7.43357, 9.19987, 9.157685, 8.375345, 8.35381, 8.308675, 9.909, 9.2037, 9.1806, 10.929065, 10.80127, 9.942085, 9.908535, 9.902435, 11.70715, 11.67801, 11.58129, 10.90766,

Processing:  40%|████      | 4/10 [15:05<24:12, 242.09s/it]

Greedy S: {96, 65, 34, 5, 75, 22, 24, 90, 27, 30}
Greedy sigma(S): [1.743, 0.9276, 0.414, 0.402, 0.3954, 0.3926, 0.3908, 2.10904, 1.618885, 0.794785, 0.776335, 0.76752, 2.5179, 2.42655, 1.98387, 1.19931, 1.16697, 2.87111, 2.39252, 1.571005, 1.562405, 3.30506, 3.28614, 2.79114, 1.98594, 1.97582, 1.9602, 3.664575, 3.214575, 2.387025, 2.37015, 2.3688, 4.08825, 3.63285, 2.79565, 2.77495, 2.77288, 5.780295, 5.065425, 3.198115, 3.19177, 3.190125, 3.18942, 6.08016, 5.53032, 3.65688, 3.58176, 3.534, 6.565755, 5.88637, 4.028045, 4.00722, 4.00428, 6.86825, 6.26175, 4.3945, 4.3855, 7.30473, 6.67794, 4.783035, 4.75269, 7.76984, 7.13024, 5.24004, 5.19532, 5.17712, 8.05388, 7.480685, 5.63178, 5.54804, 8.48853, 7.81029, 6.00615, 8.87645, 8.306375, 6.56095, 9.44048, 8.77548, 8.43752, 7.77756, 7.52024, 7.46956, 7.1176, 10.028295, 9.45972, 9.05046, 8.39781, 8.06835, 8.04156, 7.625745, 10.65576, 10.48611, 10.45334, 9.6454, 9.03118, 8.6942, 8.6188, 12.143675, 11.330065, 11.089345, 9.924095, 9.49074, 13.08

Processing:  50%|█████     | 5/10 [22:39<26:32, 318.50s/it]

Greedy S: {65, 34, 5, 10, 75, 76, 78, 54, 24, 27}
Greedy sigma(S): [1.27775, 1.24545, 1.23899, 1.21676, 0.51167, 0.48963, 1.77645, 1.72029, 1.012635, 0.99333, 0.975975, 2.2444, 2.222, 1.4704, 1.4694, 2.73101, 2.72978, 2.00162, 2.000595, 1.992395, 1.964515, 1.944835, 3.29763, 3.25605, 3.23736, 2.50068, 2.49249, 2.49144, 2.49123, 2.47674, 3.78185, 3.75992, 3.722725, 2.96829, 2.96012, 4.27262, 4.22136, 3.4793, 3.46698, 4.709925, 4.000725, 3.98565, 3.97755, 3.9771, 5.18788, 4.49351, 4.48523, 4.44406, 5.77912, 5.76972, 5.731415, 5.0102, 4.97918, 6.25584, 6.2352, 5.54184, 5.4696, 5.466, 5.45976, 5.43312, 6.77866, 6.710305, 5.99907, 5.96085, 5.953255, 7.26525, 7.21475, 6.44775, 7.694625, 7.68519, 7.20732, 7.05636, 6.985215, 8.2888, 8.25786, 7.75762, 7.74046, 7.50048, 8.70896, 8.23037, 9.50562, 9.44244, 9.40005, 9.00963, 10.319925, 10.2828, 10.278125, 10.259975, 10.21405, 11.4408, 11.4072, 12.838965, 12.751755, 14.5754, 14.35819, 14.3347, 16.251845, 15.914365, 18.7566, 18.2565, 18.0345, 17.903

Processing:  60%|██████    | 6/10 [32:47<27:48, 417.07s/it]

Greedy S: {96, 65, 34, 5, 37, 10, 12, 54, 90, 27}
Greedy sigma(S): [2.8364, 0.6146, 0.5912, 0.5908, 0.5864, 0.5806, 3.48664, 1.177315, 1.163375, 4.13763, 1.76694, 1.76463, 4.713015, 2.40714, 2.37962, 2.357045, 5.21884, 2.97176, 2.96472, 2.92644, 8.46675, 3.582675, 3.5811, 3.572775, 9.22714, 4.15035, 4.14414, 9.70644, 4.794235, 4.78366, 10.41144, 5.50032, 5.37408, 10.902255, 5.95546, 5.944925, 11.57825, 6.60925, 6.53825, 12.061755, 7.2318, 12.79928, 11.66906, 9.37248, 8.16738, 13.760125, 12.76134, 10.3138, 10.205945, 9.684955, 9.635135, 9.10487, 14.77926, 13.85154, 11.18529, 10.63368, 10.61478, 16.278625, 15.27735, 12.883475, 12.2507, 12.183875, 17.73464, 16.88568, 14.3234, 13.79028, 21.260715, 19.89186, 16.96149, 16.64628, 16.31454, 15.867375, 15.371475, 15.266025, 22.6113, 21.82772, 18.71167, 18.58958, 18.54202, 18.24535, 17.81209, 17.33069, 16.98443, 16.97863, 24.369655, 23.30795, 20.2606, 20.190685, 20.180655, 19.87592, 19.472065, 19.06054, 19.027795, 26.3307, 25.4058, 22.3479, 22.1

In [None]:
import time
import networkx as nx
import numpy as np
from numba import njit, set_num_threads

# Restrict Numba to a single thread to avoid parallel crashes
set_num_threads(1)

def graph_to_csr(G):
    """
    Convert NetworkX G into CSR arrays and an edge-probability array.
    Returns:
      n        -- number of nodes
      row_ptr  -- int64[n+1]
      col_idx  -- int64[2|E|]
      prob_data-- float64[2|E|]
      mapping  -- dict: node -> index
    """
    mapping = {node: i for i, node in enumerate(G.nodes())}
    n = len(mapping)
    # degree counts
    deg = np.zeros(n, np.int64)
    for u, v in G.edges():
        iu, iv = mapping[u], mapping[v]
        deg[iu] += 1
        deg[iv] += 1
    # build row pointers
    row_ptr = np.empty(n+1, np.int64)
    row_ptr[0] = 0
    for i in range(n):
        row_ptr[i+1] = row_ptr[i] + deg[i]
    total = row_ptr[-1]
    # allocate CSR and prob arrays
    col_idx   = np.empty(total, np.int64)
    prob_data = np.empty(total, np.float64)
    cursor = np.zeros(n, np.int64)
    # fill
    for u, v in G.edges():
        p = G.edges[u, v]['p']
        iu, iv = mapping[u], mapping[v]
        pos = row_ptr[iu] + cursor[iu]
        col_idx[pos]   = iv
        prob_data[pos] = p
        cursor[iu]    += 1
        pos = row_ptr[iv] + cursor[iv]
        col_idx[pos]   = iu
        prob_data[pos] = p
        cursor[iv]    += 1
    return n, row_ptr, col_idx, prob_data, mapping

@njit
def epc_mc_numba(n, row_ptr, col_idx, prob_data, deletion_mask, num_samples):
    """
    Monte-Carlo EPC estimator in nopython mode (no Python objects).
    deletion_mask[i]==True => node i is deleted.
    """
    tot_C2 = 0.0
    visited = np.zeros(n, np.uint8)
    queue   = np.empty(n, np.int64)
    for si in range(num_samples):
        # pick a random surviving root
        u = np.random.randint(0, n)
        while deletion_mask[u]:
            u = np.random.randint(0, n)
        head = 0
        tail = 1
        visited[:] = 0
        visited[u] = 1
        queue[0]   = u
        comp_size  = 0
        # BFS with probability flips
        while head < tail:
            v = queue[head]
            head += 1
            comp_size += 1
            for idx in range(row_ptr[v], row_ptr[v+1]):
                w = col_idx[idx]
                if visited[w] or deletion_mask[w]:
                    continue
                if np.random.random() < prob_data[idx]:
                    visited[w]   = 1
                    queue[tail]  = w
                    tail        += 1
        tot_C2 += (comp_size - 1)
    # count survivors
    n_rem = 0
    for i in range(n):
        if not deletion_mask[i]:
            n_rem += 1
    return (n_rem * tot_C2) / (2.0 * num_samples)


def greedy_epc_mis_numba(n, row_ptr, col_idx, prob_data, mapping, MIS, k, num_samples):
    """
    MIS-seeded greedy: grow survivors from MIS up to n-k.
    Returns:
      D          -- final deletion set of size k
      sigma_delta-- EPC history (increasing as survivors grow)
    """
    # build deletion mask: True=>deleted
    deletion_mask = np.ones(n, np.bool_)
    for node in MIS:
        deletion_mask[mapping[node]] = False
    survivors = set(MIS)
    target    = n - k
    sigma_delta = []
    # initial EPC
    curr_sigma = epc_mc_numba(n, row_ptr, col_idx, prob_data, deletion_mask, num_samples)
    sigma_delta.append(curr_sigma)
    # greedy grow survivors
    while len(survivors) < target:
        best_val  = np.inf
        best_node = None
        for node, idx in mapping.items():
            if node in survivors:
                continue
            # try keeping `node`
            deletion_mask[idx] = False
            val = epc_mc_numba(n, row_ptr, col_idx, prob_data, deletion_mask, num_samples)
            deletion_mask[idx] = True
            if val < best_val:
                best_val, best_node = val, node
        # commit best keep
        deletion_mask[mapping[best_node]] = False
        survivors.add(best_node)
        curr_sigma = best_val
        sigma_delta.append(curr_sigma)
    # final deletions = everything not in survivors
    D = set(mapping.keys()) - survivors
    return D, sigma_delta

In [23]:
t0 = time.perf_counter()

er = nx.erdos_renyi_graph(100, 0.045, seed=42)

G = er.copy()
K = 10

records4 = []
print(f"G: nodes: {G.number_of_nodes()} and edges: {G.number_of_edges()}")  

for p in tqdm(np.arange(0.1, 1.1, 0.1), desc="Processing", total=int(1.0/0.1)):
  def fresh_graph():
    H = G.copy()
    for u, v in H.edges():
      H[u][v]['p'] = p
    return H


  S_mis, sigma_delta = greedy_epc_mis_numba(G=fresh_graph(), k=K, num_samples=100_000)
  t1 = time.perf_counter()
  t_greedy_mis = t1 - t0

  print(f"Greedy S: {S_mis}")
  print(f"Greedy sigma(S): {sigma_delta}")
  print(f"Time: {t_greedy_mis:.4f}s")

  records4.append({
    'model': 'ER',
    'p': p,
    'algo': 'Greedy',
    'time': t_greedy_mis,
    'epc': sigma_delta[-1],
  })

G: nodes: 100 and edges: 202


Processing:  10%|█         | 1/10 [00:09<01:23,  9.33s/it]

Greedy S: {96, 34, 39, 10, 42, 12, 15, 52, 24, 27}
Greedy sigma(S): [0.0, 0.094095, 0.19257, 0.29412, 0.38412, 0.493875, 0.58006, 0.694895, 0.79392, 0.879795, 0.99475, 1.08936, 1.2181, 1.380385, 1.53387, 1.75615, 1.95636, 2.18082, 2.41425, 2.60898, 2.8653, 3.07745, 3.29747, 3.52674, 3.73984, 4.01635, 4.23852, 4.500725, 4.86064, 5.23572, 5.544, 6.026125, 6.42384, 6.831705, 7.3815, 7.888875, 8.40522, 8.87656, 9.53628, 10.091855, 10.6956, 11.29707, 11.97774, 12.757515, 13.32114, 13.974, 14.79587, 15.88881, 16.5792, 17.517425, 18.51975]
Time: 9.3323s


Processing:  20%|██        | 2/10 [00:19<01:16,  9.61s/it]

Greedy S: {64, 65, 34, 96, 98, 10, 12, 24, 90, 27}
Greedy sigma(S): [0.0, 0.1952, 0.395445, 0.60627, 0.787115, 1.01574, 1.2033, 1.41542, 1.590715, 1.82136, 2.04673, 2.2585, 2.45361, 2.65642, 2.916325, 3.13578, 3.479025, 3.84468, 4.301505, 4.75832, 5.19318, 5.6667, 6.288795, 6.69135, 7.301385, 7.6864, 8.4292, 8.92716, 9.678485, 10.438, 11.47401, 12.52685, 13.40338, 14.49468, 15.33511, 16.64445, 17.68425, 18.8784, 20.295275, 21.83337, 23.984795, 25.518, 27.65826, 29.99642, 32.41897, 34.81086, 38.6801, 42.22428, 46.786425, 51.67404, 58.28966, 66.12435]
Time: 19.1319s


Processing:  30%|███       | 3/10 [00:31<01:17, 11.07s/it]

Greedy S: {96, 65, 34, 5, 10, 12, 52, 54, 90, 27}
Greedy sigma(S): [0.0, 0.300105, 0.5914, 0.916965, 1.2201, 1.52478, 1.80422, 2.15955, 2.46468, 2.79133, 3.08064, 3.379285, 3.71, 4.02798, 4.33602, 4.848705, 5.46021, 6.2601, 6.9006, 7.78848, 8.53818, 9.413155, 10.431, 11.232235, 12.16068, 13.06116, 13.80992, 14.98315, 16.25316, 17.38382, 18.1764, 20.155245, 21.6601, 23.81553, 26.50716, 28.509055, 31.25279, 34.878375, 37.5763, 43.965075, 51.22572, 57.92122, 65.99, 77.580585, 88.56615, 108.083845, 126.51072, 142.29085, 171.47712, 206.420985, 249.62696, 291.998765, 342.33615]
Time: 31.9363s


: 