# Run methods that identify super spreaders & superblockers to spread information.
Using methods (HD, HDA and coreHD) to identify spreaders in a network:
* How well they reach the most vulnerable nodes?
* How fast are vulnerable nodes reached?

In [None]:
import networkx as nx
import numpy as np
import os
import random
from collections import Counter, defaultdict
import operator
import pickle
from core_functions import *

### Parameters & Functions

In [None]:
# define function to save activation count of nodes 
def save_activation_count(id_,method_,run_,seeds_,m_,activation_count_):
    # save activation count
    fileOut = open('some_path/activation_count/%s_%s.%d_seeds=%d_m=%dN_p=pc.csv' % (id_,method_,run_,seeds_,m_),'w')
    for n_,c_ in activation_count_.items():
        fileOut.write('%d,%d\n' % (n_,c_))
    fileOut.close()

# define function to save activation time of nodes 
def save_activation_time(id_,method_,run_,seeds_,m_,activation_time_):
    # save activation time
    fileOut = open('some_path/activation_time/%s_%s.%d_seeds=%d_m=%dN_p=pc.csv' % (id_,method_,run_,seeds_,m_),'w')
    for n_,c_ in activation_time_.items():
        s_ = ','.join(['%d:%d' % (nn_,t_) for nn_,t_ in c_.items()])
        fileOut.write('%d,%s\n' % (n_,s_))
    fileOut.close()

# define function to save cascade size
def save_cascade_size(id_,method_,run_,seeds_,m_,cascade_size_):
    # save activation time
    fileOut = open('some_path/cascade_size/%s_%s.%d_seeds=%d_m=%dN_p=pc.csv' % (id_,method_,run_,seeds_,m_),'w')
    for s_,no_ in cascade_size_.items():
        fileOut.write('%d,%s\n' % (s_,no_))
    fileOut.close()

## Parameters

In [None]:
path_to_networks = 'some_path/'
path_to_pc = 'some_path/'

path_to_networks = '/Volumes/ExtremeSSD/last-mile/last_mile/networks_empirical/'
path_to_pc = '/Volumes/ExtremeSSD/last-mile/last_mile/p_c/empirical/'

#id_ = 'polblogs' # political blogs
#id_ = 'URVemail' # University emails
#id_ = 'facebook_combined' # facebook friendships
#id_ = 'ca-AstroPh' # Collaborations in Astrophysics
#id_ = 'ca-CondMat' # Collaborations in condensed matter physics
#id_ = 'ia-infect-dublin' # Infectious socio ptterns 
#id_ = 'UC_irvine' # run on lada

id_ = 'village_HH_smallest' # smallest village (households)
#id_ = 'village_HH_median' # median village (households)
#id_ = 'village_HH_largest' # largest village (households)

# factor of repetitions
M = 10 # m = M * N 

## Randomly select nodes

In [None]:
for run in range(10):
    # load network
    G = load_network(path_to_networks + id_ + '.txt')
    seeds = max(1,int(round(len(G)/100.,0))) # select ~1% of nodes
    
    if (seeds < 4):
        if (id_ == 'village_HH_largest') or (id_ == 'village_HH_median') or (id_ == 'village_HH_smallest'):
            seeds = 4

    print('%s, seeds = %d, run = %d' % (id_,seeds, run))

    # load p_c value
    p_infection = load_pc(path_to_pc + id_ + '.pkl')
    print(p_infection)
    m = M*len(G)

    # initialize counters
    activation_count = Counter()
    activation_time = defaultdict(Counter)
    cascade_size = Counter()

    # get all nodes for network
    nodes_ = list(G.nodes())

    # run multiple spreading processes 
    for repetition in range(m):

        # select seed nodes and update seed statistics
        seed_nodes_ = random.sample(nodes_,seeds)

        # run spreading dynamics
        c = ICM(G,seed_nodes_,p_infection)

        # update time counter
        for n_,time_ in c.items():
            activation_time[n_][time_] += 1

        # update activation counter
        activation_count += Counter(c.keys())

        # update cascade size counter
        cascade_size[len(c)] += 1

        if repetition % 10000 == 0:
            print('\t repetition = %d' % repetition)

    # save activation count
    save_activation_count(id_,'random',run,seeds,M,activation_count)
    # save activation_time
    save_activation_time(id_,'random',run,seeds,M,activation_time)
    # save cascade size
    save_cascade_size(id_,'random',run,seeds,M,cascade_size)

## HD

In [None]:
for run in range(10):
    # load network
    G = load_network(path_to_networks + id_ + '.txt')
    seeds = max(1,int(round(len(G)/100.,0))) # select ~1% of nodes
    
    if (seeds < 4):
        if (id_ == 'village_HH_largest') or (id_ == 'village_HH_median') or (id_ == 'village_HH_smallest'):
            seeds = 4

    print('%s, seeds = %d, run = %d' % (id_,seeds, run))

    # load p_c value
    p_infection = load_pc(path_to_pc + id_ + '.pkl')
    m = M*len(G)

    # initialize counters
    activation_count = Counter()
    activation_time = defaultdict(Counter)
    cascade_size = Counter()

    # run multiple spreading processes 
    for repetition in range(m):

        # select seed nodes and update seed statistics
        seed_nodes_ = highest_degree(G,seeds)

        # run spreading dynamics
        c = ICM(G,seed_nodes_,p_infection)

        # update time counter
        for n_,time_ in c.items():
            activation_time[n_][time_] += 1

        # update activation counter
        activation_count += Counter(c.keys())

        # update cascade size counter
        cascade_size[len(c)] += 1

        if repetition % 10000 == 0:
            print('\t repetition = %d' % repetition)

    # save activation count
    save_activation_count(id_,'HD',run,seeds,M,activation_count)
    # save activation_time
    save_activation_time(id_,'HD',run,seeds,M,activation_time)
    # save cascade size
    save_cascade_size(id_,'HD',run,seeds,M,cascade_size)

## Degree Discount (superspreader method)
From _Efficient Influence Maximization in Social Networks_, Chen et al. (2009)

In [None]:
# load network
G = load_network(path_to_networks + id_ + '.txt')
seeds = max(1,int(round(len(G)/100.,0))) # select ~1% of nodes

if (seeds < 4):
    if (id_ == 'village_HH_largest') or (id_ == 'village_HH_median') or (id_ == 'village_HH_smallest'):
        seeds = 4
    
# load p_c value
p_infection = load_pc(path_to_pc + id_ + '.pkl')
    
# create node seeds - we create 100 realizations of DD to speed up the simulation process - wher we sample from these 100
print('creating node seeds')
seed_nodes = []
for i in range(100):
    print('node seed = %d' % i)
    seed_nodes.append(degree_discount(G,p_infection,seeds))

for run in range(10):
    print('%s, seeds = %d, run = %d' % (id_,seeds, run))

    m = M*len(G)
    
    # initialize counters
    activation_count = Counter()
    activation_time = defaultdict(Counter)
    cascade_size = Counter()

    # run multiple spreading processes 
    for repetition in range(m):

        # select seed nodes and update seed statistics
        seed_nodes_ = random.choice(seed_nodes)

        # run spreading dynamics
        c = ICM(G,seed_nodes_,p_infection)

        # update time counter
        for n_,time_ in c.items():
            activation_time[n_][time_] += 1

        # update activation counter
        activation_count += Counter(c.keys())

        # update cascade size counter
        cascade_size[len(c)] += 1

        if repetition % 10000 == 0:
            print('\t repetition = %d' % repetition)

    # save activation count
    save_activation_count(id_,'degreeDiscount',run,seeds,M,activation_count)
    # save activation_time
    save_activation_time(id_,'degreeDiscount',run,seeds,M,activation_time)
    # save cascade size
    save_cascade_size(id_,'degreeDiscount',run,seeds,M,cascade_size)

## Core HD

In [None]:
# load network
G = load_network(path_to_networks + id_ + '.txt')
seeds = max(1,int(round(len(G)/100.,0))) # select ~1% of nodes

if (seeds < 4):
    if (id_ == 'village_HH_largest') or (id_ == 'village_HH_median') or (id_ == 'village_HH_smallest'):
        seeds = 4

# create node seeds - we create 100 realizations of DD to speed up the simulation process - wher we sample from these 100
print('creating node seeds')
seed_nodes = []
for i in range(100):
    print('node seed = %d' % (i+1))
    H = load_network(path_to_networks + id_ + '.txt')
    seed_nodes.append(coreHD(nx.Graph(H),seeds))

for run in range(0,10):
    print('%s, seeds = %d, run = %d' % (id_,seeds, run))

    m = M*len(G)

    # load p_c value
    p_infection = load_pc(path_to_pc + id_ + '.pkl')

    # initialize counters
    activation_count = Counter()
    activation_time = defaultdict(Counter)
    cascade_size = Counter()

    # run multiple spreading processes 
    for repetition in range(m):

        # select seed nodes and update seed statistics
        seed_nodes_ = random.choice(seed_nodes)

        # run spreading dynamics
        c = ICM(G,seed_nodes_,p_infection)

        # update time counter
        for n_,time_ in c.items():
            activation_time[n_][time_] += 1

        # update activation counter
        activation_count += Counter(c.keys())

        # update cascade size counter
        cascade_size[len(c)] += 1

        if repetition % 10000 == 0:
            print('\t repetition = %d' % repetition)

    # save activation count
    save_activation_count(id_,'coreHD',run,seeds,M,activation_count)
    # save activation_time
    save_activation_time(id_,'coreHD',run,seeds,M,activation_time)
    # save cascade size
    save_cascade_size(id_,'coreHD',run,seeds,M,cascade_size)

# K-core

In [None]:
# load network
G = load_network(path_to_networks + id_ + '.txt')
seeds = max(1,int(round(len(G)/100.,0))) # select ~1% of nodes

if (seeds < 4):
    if (id_ == 'village_HH_largest') or (id_ == 'village_HH_median') or (id_ == 'village_HH_smallest'):
        seeds = 4

for run in range(0,10):
    print('%s, seeds = %d, run = %d' % (id_,seeds, run))

    m = M*len(G)

    # load p_c value
    p_infection = load_pc(path_to_pc + id_ + '.pkl')

    # initialize counters
    activation_count = Counter()
    activation_time = defaultdict(Counter)
    cascade_size = Counter()

    # run multiple spreading processes 
    for repetition in range(m):

        # select seed nodes and update seed statistics
        seed_nodes_ = k_core(G,seeds)

        # run spreading dynamics
        c = ICM(G,seed_nodes_,p_infection)

        # update time counter
        for n_,time_ in c.items():
            activation_time[n_][time_] += 1

        # update activation counter
        activation_count += Counter(c.keys())

        # update cascade size counter
        cascade_size[len(c)] += 1

        if repetition % 10000 == 0:
            print('\t repetition = %d' % repetition)

    # save activation count
    save_activation_count(id_,'kcore',run,seeds,M,activation_count)
    # save activation_time
    save_activation_time(id_,'kcore',run,seeds,M,activation_time)
    # save cascade size
    save_cascade_size(id_,'kcore',run,seeds,M,cascade_size)