In [1]:
import networkx as nx
import numpy as np
import random
import math
from synthetic_data import add_p_and_delta, load_data_by_gtype
from network_stat import get_stat
from graph_tool.all import load_graph, GraphView, shortest_distance, bfs_search
from tqdm import tqdm

In [2]:
maxint = np.iinfo(np.int32).max

In [16]:
g = load_graph('data/dblp-collab/graph.gt')

In [4]:
p = 0.7

In [5]:
K = 100
rands2d = np.random.random((K, g.num_edges()))

In [6]:
edge_masks2d = (rands2d <= p)

In [7]:
sampled_edges_list = []
for i in range(K):
    p = g.new_edge_property('bool')
    p.set_2d_array(edge_masks2d[i, :])
    sampled_edges_list.append(p)

In [8]:
%%timeit -r 1 -n 1
for p in sampled_edges_list:
    gv = GraphView(g, efilt=p)
    # print(gv.num_edges() / g.num_edges())

1 loop, best of 1: 44.1 ms per loop


In [9]:
%%timeit -r 1 -n 1
for p in sampled_edges_list:
    g.set_edge_filter(p)
    # print(gv.num_edges() / g.num_edges())

1 loop, best of 1: 2.7 ms per loop


In [11]:
def activate_edges_by_p(g, p):
    """mask the edge according to probability p and return the masked graph"""
    flags = (np.random.random(g.num_edges()) <= p)
    p = g.new_edge_property('bool')
    p.set_2d_array(flags)
    g.set_edge_filter(p)
    return g

In [18]:
g.set_edge_filter(None)
num_edges = g.num_edges()
percent = activate_edges_by_p(g, 0.7).num_edges() / num_edges
np.testing.assert_almost_equal(percent, 0.7, decimal=1)
g.set_edge_filter(None)

In [31]:
def simulate_cascade(g, p, source=None):
    """return np.ndarray on vertices as the infection time in cascade
    uninfected node has dist -1
    """
    if source is None:
        source = random.choice(np.arange(g.num_vertices(), dtype=int))
    activate_edges_by_p(g, p)
    
    dist = shortest_distance(g, source=g.vertex(source)).a
    dist[dist == maxint] = -1
    g.set_edge_filter(None)
    return dist

In [35]:
c = simulate_cascade(g, 0.7)

In [37]:
c.min(), c.max()

(PropertyArray(-1, dtype=int32), PropertyArray(19, dtype=int32))

In [39]:
c[c != -1].shape[0] / g.num_vertices()

0.9087485807997981

In [52]:
def observe_cascade(c, q, method='uniform'):
    all_infection = np.nonzero(c != -1)[0]
    num_obs = int(math.ceil(all_infection.shape[0] * q))
    if method == 'uniform':
        return np.random.permutation(all_infection)[:num_obs]
    elif method == 'late':
        return np.argsort(c)[-num_obs:]

In [53]:
obs = observe_cascade(c, 0.01)
print(c[obs].min(), c[obs].max())

6 16


In [54]:
obs = observe_cascade(c, 0.01, 'late')
print(c[obs].min(), c[obs].max())

13 19


In [None]:
def source_likelihood_drs(g, obs_nodes, sim_flags,
                          infection_times,
                          source=None,
                          debug=False,
                          eps=1e-3,
                          nan_proba=0.1):
    num_nodes = g.num_vertices()
    N2 = len(sim_flags)
    
    source_likelihood = np.ones(n_nodes, dtype=np.float64) / n_nodes
    obs_nodes = list(obs_nodes)
    for o1, o2 in itertools.combinations(obs_nodes, 2):
        t1, t2 = infection_times[o1], infection_times[o2]

        sim_mask, counts = precondition_mask_and_count(o1, o2, inf_time_3d)

        probas = (np.sum(((inf_time_3d[:, o1, :] - inf_time_3d[:, o2, :]) == (t1 - t2)) * sim_mask,
                         axis=1)
                  / counts)
        probas[np.isnan(probas)] = nan_proba
        
        if debug:
            print('t1={}, t2={}'.format(t1, t2))
            print('source reward: {:.2f}'.format(probas[source]))
            print('obs reward: {}'.format([probas[obs] for obs in set(obs_nodes)-{source}]))

        source_likelihood *= (probas + eps)
        source_likelihood /= source_likelihood.sum()
    return source_likelihood