In [1]:
import networkx as nx
import ModelConfig as mc
# import IndependentCascadesModelP001 as icm
# import IndependentCascadesModelP010 as icm
# import WeightedCascadeModel as icm
import ThresholdModel as tm
import operator
import random 
import matplotlib.pyplot as plt
import numpy as np
import math
from concurrent.futures import ThreadPoolExecutor
import time

In [2]:
start = time.perf_counter()

In [3]:
def InitModel(g):
    model = tm.ThresholdModel(g) 
    config = mc.Configuration()
    
    edge_dict = {}
    for (v, w) in list(g.edges()):
        counter = list(g.edges()).count((v, w))
        if (w, v) in list(g.edges()): 
            counter += list(g.edges()).count((w, v))
        edge_dict[(v, w)] = counter / len(list(g.neighbors(w)))

    threshold_dict = {}
    for node in g.nodes():
        threshold_dict[node] = np.random.random_sample()
    
    config.add_node_set_configuration('threshold', threshold_dict)
    config.add_edge_set_configuration('weight', edge_dict)
    return model, config

def InfluenceSpread(model, config, target_set):
    config.add_model_initial_configuration("Infected", target_set)
    model.set_initial_status(config)
    active_set_size, newly_activated_nodes = model.iteration_bunch()
    return active_set_size

def Get_ActiveSetSize(g, target_set_size, strategy, sorted_graph):   
    '''
    Assuming continuous process (previously activated nodes cannot reattempt)
    As compared to one-time influence spread, all influencer same start point
    Default strategy set as greedy 
    '''
    model, config = InitModel(g)
    target_set = sorted_graph[0:target_set_size]
    active_set_size = InfluenceSpread(model, config, target_set)
    return active_set_size

In [4]:
def HighDegreeSort(g):
    sorted_hd = []
    for tup in sorted(g.degree, key=lambda x: x[1], reverse=True):
        sorted_hd.append(tup[0])
    return sorted_hd

In [5]:
def CentralitySort(g):
    '''
    Returns dictionary storing the average shortest distance of each node 
    '''
    print("CentralitySort: Running...")
    asd_dict = {} 
    for node in g.nodes():
        count_dist = 0
        node_paths = nx.shortest_path(g)[node]
        for value in node_paths.values():
            count_dist += len(value) - 1 # excluding start node
        asd_dict[node] = count_dist/len(g)
        print(f"{node}/{len(g)-1}")
    
    sorted_c = dict(sorted(asd_dict.items(), key=lambda item: item[1]))
    print("CentralitySort: Completed")
    print(list(sorted_c))
    return list(sorted_c)

def remove_isolated_nodes(g):
    isolated_nodes = []
    for pair in g.degree:
        node = pair[0]
        degree = pair[1]
        if degree == 0:
            isolated_nodes.append(node)

    for node in isolated_nodes:
        g.remove_node(node)
        
    return g

In [6]:
def GreedySort(g, tss_range, num_iterations=100):
    ''' 
    Select node with max average marginal gain
    Variable 'mg' refers to 'marginal gain'
    Number of iterations to find average marginal gain defaulted to 100
    '''
    model, config = InitModel(g)
    print("GreedySort: Running...")
    mg_dict = {}
    influence_dict = {}
    for candidate in g.nodes():
        mg = 0
        influence_list = []
        for i in range(num_iterations):
            newly_activated_nodes = GreedySim(model, config, candidate)
            mg += len(newly_activated_nodes)
            influence_list.append(len(newly_activated_nodes))
        avg_mg = mg/num_iterations
        mg_dict[candidate] = avg_mg
        influence_dict[candidate] = max(influence_list)
        print(f"{candidate}/{len(g)-1}")
        
    print("GreedySort: Completed")
    mg_list = list(sorted(mg_dict.items(), key=lambda item: item[1], reverse=True))
    influence_list = list(sorted(mg_dict.items(), key=lambda item: item[1], reverse=True))
    rg_list = sorted(random.choices(mg_list, k=tss_range), key=lambda item: item[1], reverse=True)
    r_list = sorted(random.choices(influence_list, k=tss_range), key=lambda item: item[1], reverse=True)

    sorted_g = []
    sorted_rg = []
    sorted_r = []
    for tup in mg_list:
        sorted_g.append(tup[0])
    for tup in rg_list:
        sorted_rg.append(tup[0])
    for tup in r_list:
        sorted_r.append(tup[0])

    return sorted_g, sorted_rg, sorted_r

def GreedySim(model, config, candidate):
    config.add_model_initial_configuration("Infected", [candidate])
    model.set_initial_status(config)
    active_set_size, newly_activated_nodes = model.iteration_bunch()
    newly_activated_nodes.append(candidate)
    model.mg_reset(newly_activated_nodes)
    return newly_activated_nodes

In [7]:
#prepare graph
g = nx.erdos_renyi_graph(150, 0.1) #number of nodes, probability to form edges
# g = nx.gnm_random_graph(1075, 5300) # number of nodes, number of edges
g = remove_isolated_nodes(g)

#experiment parameters
# target set size range 
# e.g. get active set size for target set ranging from 0 to 30 influencers
tss_range = 30 
num_i = 5

In [8]:
def run_g_rg_r():
    #Greedy
    greedy_i = 5
    sorted_g, sorted_rg, sorted_r = GreedySort(g, tss_range, greedy_i)

    g_active_ss_list = []
    for i in range(tss_range):
        g_active_ss_list.append(0)

    for a in range(num_i):
        for i in range(tss_range):
            target_set_size = i
            strategy = 'greedy'
            active_set_size = Get_ActiveSetSize(g, target_set_size, strategy, sorted_g)
            g_active_ss_list[i] += active_set_size
        print(f"{a}/{num_i-1}")

    for i in range(tss_range):
        g_active_ss_list[i] /= num_i
        
    #Random Greedy
    rg_active_ss_list = []
    for i in range(tss_range):
        rg_active_ss_list.append(0)

    for a in range(num_i):
        for i in range(tss_range):
            target_set_size = i
            strategy = 'randomgreedy'
            active_set_size = Get_ActiveSetSize(g, target_set_size, strategy, sorted_rg)
            rg_active_ss_list[i] += active_set_size
        print(f"{a}/{num_i-1}")

    for i in range(tss_range):
        rg_active_ss_list[i] /= num_i
        
    #Random
    r_active_ss_list = []
    for i in range(tss_range):
        r_active_ss_list.append(0)

    for a in range(num_i):
        for i in range(tss_range):
            target_set_size = i
            strategy = 'random'
            active_set_size = Get_ActiveSetSize(g, target_set_size, strategy, sorted_r)
            r_active_ss_list[i] += active_set_size
        print(f"{a}/{num_i-1}")

    for i in range(tss_range):
        r_active_ss_list[i] /= num_i
        
    return g_active_ss_list, rg_active_ss_list, r_active_ss_list

In [9]:
def run_hd():
    print("HD")
    sorted_hd = HighDegreeSort(g)

    hd_active_ss_list = []
    for i in range(tss_range):
        hd_active_ss_list.append(0)

    for a in range(num_i):
        for i in range(tss_range):
            target_set_size = i
            strategy = 'highdegree'
            active_set_size = Get_ActiveSetSize(g, target_set_size, strategy, sorted_hd)
            hd_active_ss_list[i] += active_set_size
        print(f"{a}/{num_i-1}")

    for i in range(tss_range):
        hd_active_ss_list[i] /= num_i
    
    return hd_active_ss_list

In [10]:
def run_c():
    sorted_c = CentralitySort(g)

    c_active_ss_list = []
    for i in range(tss_range):
        c_active_ss_list.append(0)

    for a in range(num_i):
        for i in range(tss_range):
            target_set_size = i
            strategy = 'centrality'
            active_set_size = Get_ActiveSetSize(g, target_set_size, strategy, sorted_c)
            c_active_ss_list[i] += active_set_size
        print(f"{a}/{num_i-1}")

    for i in range(tss_range):
        c_active_ss_list[i] /= num_i
    
    return c_active_ss_list

In [None]:
with ThreadPoolExecutor(max_workers=3) as executor:
    t1 = executor.submit(run_g_rg_r)
    t2 = executor.submit(run_hd)
    t3 = executor.submit(run_c)
g_active_ss_list, rg_active_ss_list, r_active_ss_list = t1.result()
hd_active_ss_list = t2.result()
c_active_ss_list = t3.result()

GreedySort: Running...HD

CentralitySort: Running...
0/1074
0/999
1/1074


In [None]:
# No Threading
# g_active_ss_list, rg_active_ss_list, r_active_ss_list = run_g_rg_r()
# hd_active_ss_list = run_hd()
# c_active_ss_list = run_c()

In [None]:
g_active_ss_list = np.array(g_active_ss_list)
hd_active_ss_list = np.array(hd_active_ss_list)
r_active_ss_list = np.array(r_active_ss_list)
c_active_ss_list = np.array(c_active_ss_list)
rg_active_ss_list = np.array(rg_active_ss_list)

In [None]:
g_active_ss_list

In [None]:
hd_active_ss_list

In [None]:
r_active_ss_list

In [None]:
c_active_ss_list

In [None]:
rg_active_ss_list

In [None]:
plt.plot(g_active_ss_list, label = "Greedy")
plt.plot(hd_active_ss_list, label = "High Degree")
plt.plot(r_active_ss_list, label = "Random")
plt.plot(c_active_ss_list, label = "Centrality")
plt.plot(rg_active_ss_list, label = "Random Greedy")
plt.xlabel('Target Set Size')
plt.ylabel('Active Set Size')
plt.title('Independent Cascade Model Pr0.01')

plt.legend()
plt.show()

In [None]:
finish = time.perf_counter()
print(f'Finished in {round(finish - start, 2)} seconds(s)')