In [50]:
import pandas as pd
import numpy as np
import networkx as nx
import community as community_louvain
import random


nodes = pd.read_csv('synth3.attr', sep=' ', header=None, names=['node', 'attribute'])
edges = pd.read_csv('synth3.links', sep=' ', header=None, names=['source', 'target'])

n=len(nodes)

matrix = np.zeros((n+1, n+1))


for _, row in edges.iterrows():
    source = row['source']
    target = row['target']
    matrix[target][source] = 1
    matrix[source][target] = 1

# print(matrix)


G= nx.Graph()


for _, row in nodes.iterrows():
    G.add_node(row['node'])

for _, row in edges.iterrows():
    G.add_edge(row['source'], row['target'])

# print("number of nodes:",G.number_of_nodes())
# print("mumber of edges:",G.number_of_edges())



In [51]:
partition = community_louvain.best_partition(G)

In [52]:
C={}
for node, com_i_th in partition.items():
    if com_i_th not in C:
        C[com_i_th] = []
    C[com_i_th].append(node)



R = nodes.groupby('attribute')['node'].apply(set)

# R=list(R)
# print (type(R))
# print (len(R[2]))
# print(type(R))
# print (len(R))

In [53]:
P=0.8

p = {i: {j: random.random() for j in G.nodes()} for i in G.nodes()}


degree_centrality = nx.degree_centrality(G)
total_nodes = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:4]
# S=dict(total_nodes)
S = {node for node, _ in total_nodes}

def independent_cascade(G, S, p, num_simulations=500):
    activated_nodes = set(S)
    total_activated = 0

    for _ in range(num_simulations):
        newly_activated = set(S)
        activated_this_simulation = set(S)
        while newly_activated:
            current_activated = newly_activated
            newly_activated = set()
            for node in current_activated:
                for neighbor in set(G.neighbors(node)):
                    if neighbor not in activated_nodes:
                        if random.random() < p[node][neighbor]:
                            newly_activated.add(neighbor)
                            activated_this_simulation.add(neighbor)
                            activated_nodes.add(neighbor)
        total_activated += len(activated_this_simulation)

    return total_activated / num_simulations

def MF(G, S, R, p, num_simulations=500):
    min_influence_ratio = 69

    for Ri in R:
        IGRiS = independent_cascade(G, S, p, num_simulations)
        influence_ratio = IGRiS/len(Ri)
        min_influence_ratio = min(min_influence_ratio, influence_ratio)

    return min_influence_ratio

print(MF(G, S, R, p))
print(independent_cascade(G, S, p))

0.01616
4.834


In [54]:
subgraphs = {}
for i, Ri in enumerate(R):
    subgraph_nodes = list(Ri)
    subgraph = G.subgraph(subgraph_nodes)
    subgraphs[i] = subgraph


# print (type(subgraphs[0]))
# print(type(G))

print(subgraphs[0])
print(list(R[1])[:(len(S)*len(R[1]))//len(G.nodes())])
print(S)

Graph with 125 nodes and 207 edges
[1, 2]
{169, 292, 269, 9}


In [55]:
def DCV(G, S, R, p, subgraphs, num_simulations=500):
    total =0
    i=0
    for Ri in R:
        ki=(len(S)*len(Ri))//len(G.nodes())

        IGRiKi = independent_cascade(subgraphs[i], list(Ri)[:ki], p, num_simulations)
        IGRiS = independent_cascade(G, S, p, num_simulations)
        value = (IGRiKi-IGRiS)/(IGRiKi+0.00001)
        total+=max(value, 0)
        i+=1

    return total/len(R)

print (DCV(G, S, R, p, subgraphs, num_simulations=500))



0.0
