In [2]:
%matplotlib notebook



In [3]:
import numpy as np
import random
import itertools
from graph_tool import Graph, GraphView
from graph_tool.draw import graph_draw
from tqdm import tqdm

from matplotlib import pyplot as plt

from viz_helpers import lattice_node_pos
from minimum_steiner_tree import min_steiner_tree
from cascade_generator import si, observe_cascade
from eval_helpers import infection_precision_recall
from graph_helpers import remove_filters, load_graph_by_name

from inference import infer_infected_nodes
from query_selection import RandomQueryGenerator, OurQueryGenerator, PRQueryGenerator
from experiment import gen_input, one_round_experiment

In [11]:
graph_name = 'lattice'
g = load_graph_by_name(graph_name)

In [12]:
if False:
    import graph_tool as gt
    pos = gt.draw.sfdp_layout(gv)
    vertex_text = g.new_vertex_property('string')
    for v in g.vertices():
        vertex_text[v] = str(v)
    gt.draw.graph_draw(gv, pos=pos, vertex_text=vertex_text)

In [13]:
def one_combined_round(g, n_queries, obs, c, inference_method, ks, n_samples=100):
    from graph_helpers import gen_random_spanning_tree
        
    scores = []
    print('rand')
    gv = remove_filters(g)
    rand_gen = RandomQueryGenerator(gv, obs)
    score = one_round_experiment(gv, obs, c, rand_gen, 'random', 
                                 ks=ks, 
                                 inference_method=inference_method,
                                 n_queries=n_queries)
    scores.append(score)
    
    print('our')
    gv = remove_filters(g)
    our_gen = OurQueryGenerator(gv, obs, num_spt=100,
                                method='entropy',
                                use_resample=False)
    score = one_round_experiment(gv, obs, c, our_gen, 'ours',
                                 ks=ks, 
                                 inference_method=inference_method, 
                                 n_queries=n_queries)
    scores.append(score)    
    
    print('pagerank')
    gv = remove_filters(g)
    pr_gen = PRQueryGenerator(gv, obs)
    score = one_round_experiment(gv, obs, c, pr_gen, 'pagerank',
                                 ks=ks, 
                                 inference_method=inference_method, 
                                 n_queries=n_queries)
    scores.append(score)
    
    return scores
    

In [None]:
from joblib import Parallel, delayed

n_rounds = 50
n_samples=100
n_queries = 10
stop_fraction=0.25
p=0.4
cascade_model='ic'
ks=[5, 10, 15]

sc = Parallel(n_jobs=4)(delayed(one_combined_round)(g, n_queries,
                                                    *gen_input(g, p=p, stop_fraction=stop_fraction,
                                                               model=cascade_model),
                                                    inference_method='sampling',
                                                    ks=ks,
                                                    n_samples=n_samples)
                     for i in tqdm(range(n_rounds)))



  0%|          | 0/50 [00:00<?, ?it/s][A
  2%|▏         | 1/50 [00:00<00:13,  3.66it/s]

rand
rand
rand
rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 16%|█▌        | 8/50 [00:52<01:41,  2.41s/it]

rand


[A
 18%|█▊        | 9/50 [00:54<01:42,  2.49s/it]

rand
rand


[A
 22%|██▏       | 11/50 [00:57<01:23,  2.13s/it]

rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 24%|██▍       | 12/50 [01:45<10:10, 16.08s/it]

rand


[A
 26%|██▌       | 13/50 [01:46<07:01, 11.39s/it]

rand


[A
 28%|██▊       | 14/50 [01:48<05:10,  8.62s/it]

rand


[A
 30%|███       | 15/50 [01:53<04:19,  7.41s/it]

rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 32%|███▏      | 16/50 [02:39<10:46, 19.02s/it]

rand


[A
 34%|███▍      | 17/50 [02:39<07:20, 13.36s/it]

rand


[A
 36%|███▌      | 18/50 [02:42<05:25, 10.18s/it]

rand


[A
 38%|███▊      | 19/50 [02:46<04:21,  8.45s/it]

rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 40%|████      | 20/50 [03:33<09:55, 19.86s/it]

rand


[A
 42%|████▏     | 21/50 [03:34<06:56, 14.36s/it]

rand


[A
 44%|████▍     | 22/50 [03:37<05:05, 10.90s/it]

rand


[A
 46%|████▌     | 23/50 [03:41<04:02,  8.98s/it]

rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 48%|████▊     | 24/50 [04:28<08:43, 20.12s/it]

rand


[A
 50%|█████     | 25/50 [04:28<05:57, 14.28s/it]

rand


[A
 52%|█████▏    | 26/50 [04:31<04:22, 10.95s/it]

rand


[A
 54%|█████▍    | 27/50 [04:36<03:26,  8.99s/it]

rand


[A

our
our
our
our
pagerank
pagerank
pagerank
pagerank



 56%|█████▌    | 28/50 [05:20<07:07, 19.42s/it]

rand


[A
 58%|█████▊    | 29/50 [05:20<04:46, 13.67s/it]

rand


[A
 60%|██████    | 30/50 [05:25<03:42, 11.11s/it]

rand


[A
 62%|██████▏   | 31/50 [05:30<02:57,  9.32s/it]

rand


[A

In [None]:
scores = {k: {'random': [], 'pagerank': [], 'ours': []} for k in ks}

for rand, our, pr in sc:
    for k, seq_scores in rand.items():
        scores[k]['random'].append(np.asarray(seq_scores))
        
    for k, seq_scores in our.items():
        scores[k]['ours'].append(np.asarray(seq_scores))
        
    for k, seq_scores in pr.items():
        scores[k]['pagerank'].append(np.asarray(seq_scores))        

In [None]:
methods = ['random', 'ours', 'pagerank']
mean_scores = {k: {} for k in ks}
variance = {k: {} for k in ks}
for k in ks:
    for method in methods:    
        mean_scores[k][method] = np.mean(np.asarray(scores[k][method]), axis=0)
        variance[k][method] = np.std(np.asarray(scores[k][method]), axis=0)


In [None]:
fig, axes = plt.subplots(2, len(ks), figsize=(5*len(ks), 5*2), sharex=True)
for i in range(2):  # column 0 - precision / column 1 - recall
    for j, k in enumerate(ks):  # k
        ax = axes[i, j] 
        for method in methods:
            ax.plot(np.arange(n_queries), mean_scores[k][method][:, i]) 
        if i==0:
            ax.set_title('k={}'.format(k))
plt.legend(methods, loc='lower right')
plt.savefig('figs/query_comparison_{}_{}.pdf'.format(graph_name, cascade_model))