In [3]:
%matplotlib notebook

In [1]:
import numpy as np
import random
import itertools
from graph_tool import Graph, GraphView
from graph_tool.draw import graph_draw
from tqdm import tqdm

from matplotlib import pyplot as plt

from viz_helpers import lattice_node_pos
from minimum_steiner_tree import min_steiner_tree
from cascade_generator import si, observe_cascade
from eval_helpers import infection_precision_recall
from graph_helpers import remove_filters, load_graph_by_name, gen_random_spanning_tree

from inference import infer_infected_nodes
from experiment import gen_input

In [2]:
graph_name = 'karate'
g = load_graph_by_name(graph_name)

In [16]:
n_rounds = 100
n_samples = 200
subset_size = 50
stop_fraction = 0.25
obs_fraction = 0.5

In [19]:
sampling_scores = []
sp_trees = [gen_random_spanning_tree(g) for _ in range(n_samples)]
for i in tqdm(range(n_rounds)):
    obs, c = gen_input(g, stop_fraction=stop_fraction, q=obs_fraction)
    preds = infer_infected_nodes(g, obs, method='sampling', sp_trees=sp_trees)
    prec, rec = infection_precision_recall(set(preds), c, obs)
    sampling_scores.append((prec, rec))


  0%|          | 0/200 [00:00<?, ?it/s][A
  0%|          | 1/200 [00:00<01:26,  2.30it/s][A
  1%|          | 2/200 [00:00<01:26,  2.30it/s][A
  2%|▏         | 3/200 [00:01<01:20,  2.44it/s][A
  2%|▏         | 4/200 [00:01<01:16,  2.55it/s][A
  2%|▎         | 5/200 [00:01<01:14,  2.63it/s][A
  3%|▎         | 6/200 [00:02<01:12,  2.68it/s][A
  4%|▎         | 7/200 [00:02<01:10,  2.73it/s][A
  4%|▍         | 8/200 [00:02<01:09,  2.76it/s][A
  4%|▍         | 9/200 [00:03<01:08,  2.78it/s][A
  5%|▌         | 10/200 [00:03<01:07,  2.80it/s][A
  6%|▌         | 11/200 [00:04<01:07,  2.81it/s][A
  6%|▌         | 12/200 [00:04<01:06,  2.81it/s][A
  6%|▋         | 13/200 [00:04<01:05,  2.84it/s][A
  7%|▋         | 14/200 [00:05<01:05,  2.85it/s][A
  8%|▊         | 15/200 [00:05<01:04,  2.85it/s][A
  8%|▊         | 16/200 [00:05<01:04,  2.84it/s][A
  8%|▊         | 17/200 [00:06<01:04,  2.84it/s][A
  9%|▉         | 18/200 [00:06<01:03,  2.85it/s][A
 10%|▉         | 19/200 [00:0

In [20]:
subset_sampling_scores = []
for i in tqdm(range(n_rounds)):
    obs, c = gen_input(g, stop_fraction=stop_fraction, q=obs_fraction)
    preds = infer_infected_nodes(g, obs, method='sampling', sp_trees=sp_trees, subset_size=subset_size)
    prec, rec = infection_precision_recall(set(preds), c, obs)
    subset_sampling_scores.append((prec, rec))

100%|██████████| 200/200 [01:07<00:00,  2.92it/s]


In [42]:
mst_scores = []
for i in tqdm(range(n_rounds)):
    obs, c = gen_input(g, stop_fraction=stop_fraction, q=obs_fraction)
    preds = infer_infected_nodes(g, obs, method='min_steiner_tree')
    prec, rec = infection_precision_recall(set(preds), c, obs)
    mst_scores.append((prec, rec))

100%|██████████| 100/100 [00:02<00:00, 38.02it/s]


In [26]:
import pandas as pd
sampling_scores = np.asarray(sampling_scores)
df = pd.DataFrame(sampling_scores, columns=['prec', 'rec'])
df.describe()

Unnamed: 0,prec,rec
count,200.0,200.0
mean,0.592583,0.25219
std,0.358632,0.175954
min,0.0,0.0
25%,0.333333,0.166667
50%,0.666667,0.2
75%,1.0,0.4
max,1.0,0.8


In [27]:
subset_sampling_scores = np.asarray(subset_sampling_scores)
df = pd.DataFrame(subset_sampling_scores, columns=['prec', 'rec'])
df.describe()

Unnamed: 0,prec,rec
count,200.0,200.0
mean,0.62325,0.206565
std,0.41465,0.160966
min,0.0,0.0
25%,0.25,0.107143
50%,0.666667,0.2
75%,1.0,0.333333
max,1.0,0.8


In [44]:
mst_scores = np.asarray(mst_scores)
mst_scores.mean(axis=0)

array([ 0.09299603,  0.10905952])