In [2]:
import numpy as np
from random import random,choice
from igraph import *
import os
import sys
import pandas as pd
import time
from tqdm import tqdm

In [3]:
sys.path.append("..")     
from utils import *

In [4]:
def h_index(G,node,mode = 'out'):
    '''
    Return h-index of a node
    '''

    sorted_neighbor_degrees = sorted(G.degree(G.neighbors(node,mode),mode),reverse=True)
    h = 0
    for i in range(1, len(sorted_neighbor_degrees)+1):
        if sorted_neighbor_degrees[i-1] < i:
            break
        h = i

    return h


def hi_index(G,node,mode='out'):
    '''
    Return hi_index (h-index on h-index) of a node
    '''

    sorted_neighbor_h_index = sorted([h_index(G,v,mode) for v in G.neighbors(node,mode)],reverse=True)
    h = 0
    for i in range(1, len(sorted_neighbor_h_index)+1):
        if sorted_neighbor_h_index[i-1] < i:
            break
        h = i

    return h

In [5]:
def get_inf(seeds,no_iterations):

    inf_size = 0
    inf_list = []
    for i in range(no_iterations):
        inf_size += len(ICM(G,seeds))
        inf_list.append(inf_size/(i+1))
    
    # fig, ax = plt.subplots()

    # ax.plot([i+1 for i in range(no_iter)],inf_list)
    # plt.show()

    return inf_size/no_iterations


In [6]:
datasets = [("celegans_n306.txt","d","w"),("USairport500.txt","d","w"),
("Freemans_EIES-3_n32.txt","d","w"),("OClinks_w.txt","d","w"),
("USairport_2010.txt","d","w")]

In [16]:
seed_size = 10
no_iterations = 100

In [17]:
#methods

# max-outdegree neighbors
# max h2-idx neighbors
# max p*outdegree
# max link_pred * outdegree

In [18]:
def get_seeds(G,source_node,k,method):

    candidates = [G.vs[idx] for idx in G.neighbors(source_node,mode='out')]

    if method == 'original': 
        candidates =  sorted(candidates,key = lambda node: G.es[G.get_eid(source_node,node)]['p']* node.outdegree(),reverse=True)
        # return candidates[:min(k,len(candidates))]

    elif method == 'p_l':
        candidates = sorted(candidates,key = lambda node: G.es[G.get_eid(source_node,node)]['p_l'] * node.outdegree(),reverse= True)
        # return candidates[:min(k,len(candidates))]
    
    elif method == 'degree':
        candidates = sorted(candidates,key = lambda node: node.outdegree(),reverse= True)
    
    return candidates[:min(k,len(candidates))]



In [19]:
methods = ['original','p_l','degree']

In [20]:
results = {}

for dataset,directed,weighted in tqdm(datasets):
    
    df_result = pd.DataFrame(None,columns=['seed_size']+methods)

    if dataset in ["email.txt" ,"road.txt" ,"weblog.txt" ,"animal_interaction.txt","facebook_combined.txt"]:
        data_file_path = os.path.join(os.pardir,os.pardir,"data","Prev Data",dataset)
    else:
        data_file_path = os.path.join(os.pardir,os.pardir,"data",dataset)
    
    directed = True if directed == "d" else False
    weighted = True if weighted == "w" else False

    G = read_graph(data_file_path,directed,weighted = weighted)
    
    if weighted == True:
        G.es['p'] = scale(get_true_weights(dataset,directed,data_file_path,weighted))
    else:
        G.es['p'] = prob_heuristics(G,"RA")

    # df = pd.read_csv(os.path.join(os.pardir,"Cascade_Experiments","Cascade outputs",f"{dataset.split('.')[0]}_CP.csv"))

    G.es['p_l'] = prob_heuristics(G,"RA")
    G.vs['shell'] = G.coreness(mode='out')
    # for v in G.vs:
    #     # v['power'] = df[df.Node == v.index]['p=Original'].values[0]
    #     v['shell'] = G.coreness()
    
    # seeds_list = []
    G.vs['h2-idx'] = [hi_index(G,node,'out') for node in G.vs]
    
    # new_heuristic(G)
    # local_GSM(G)
    # Profit_Sharing(G)
    # GSM(G)
    # GSM_pc(G)
    # heuristic_2(G)
    
    
    source_node = choice(G.vs)
    # get_cost(G,source_node)
    for method in methods:
        seeds = get_seeds(G,source_node,seed_size,method)
        # for i in tqdm(range(seed_size)):
        #     df_result.at[i,'seed_size'] = i+1
        #     # seed = max([node for node in G.vs if node not in seeds],key= lambda node:node[method])
        #     # seeds.append(seed)
        #     seeds 
        df_result.at[0,method] =  ([seed.index for seed in seeds],get_inf(seeds,no_iterations))

        # seeds_list.append(seeds)
    
    results[dataset] = df_result

    results[dataset].to_csv(f"{dataset.split('.')[0]}_comp.csv",index=False)
    


  arr_value = np.asarray(value)
  arr_value = np.asarray(value)
  arr_value = np.asarray(value)
  arr_value = np.asarray(value)
  arr_value = np.asarray(value)
100%|██████████| 5/5 [00:36<00:00,  7.39s/it]
