In [None]:
import pandas as pd
import igraph as ig
import random
from plotly.subplots import make_subplots
import time
import matplotlib.pyplot as plt
import numpy as np


df = pd.read_excel('correlation_results.xlsx')


g = ig.Graph(directed=False)

added_vertices = set()

for row in df[df['Edge'] == 'Yes'].itertuples(index=False):
    source = row[0]
    target = row[1]
    correlation = row[2]
    
    if source not in added_vertices:
        g.add_vertex(name=source)
        added_vertices.add(source)
    if target not in added_vertices:
        g.add_vertex(name=target)
        added_vertices.add(target)

    g.add_edge(source, target, weight=correlation)

In [None]:
def ltm(graph, seed_set):
    activated_nodes = set(seed_set)  
    previous_activated_nodes = set()  

    # Repeat until the set of activated nodes does not change
    while previous_activated_nodes != activated_nodes:
        previous_activated_nodes = set(activated_nodes)  

        for vertex in graph.vs:
            if vertex.index not in activated_nodes:
                sum_neighbor_weights = 0
                all_neighbors = [neighbor.index for neighbor in vertex.neighbors()]      
                
                total_neighbor_weight = sum(graph.es[graph.get_eid(vertex.index, neighbor_index)]['weight'] for neighbor_index in all_neighbors)
                
                for neighbor_index in all_neighbors:
                    if neighbor_index in activated_nodes:
                        eid = graph.get_eid(vertex.index, neighbor_index)
                        edge_weight = graph.es[eid]['weight']
                        normalized_edge_weight = edge_weight / total_neighbor_weight if total_neighbor_weight != 0 else 0
                        sum_neighbor_weights += normalized_edge_weight

                if sum_neighbor_weights >= 0.4:
                    activated_nodes.add(vertex.index)

    spread = len(activated_nodes) - len(seed_set)  
    return spread,activated_nodes

def plot_network(graph, seed_set, activated_nodes):
    id_to_letter = {i: chr(65 + i) for i in range(g.vcount())}
    vertex_labels = [f"{id_to_letter[id]}" for id in range(g.vcount())]

    layout = graph.layout_fruchterman_reingold() 
    fig = make_subplots(rows=1, cols=1)

    for edge in g.get_edgelist():
        x0, y0 = layout[edge[0]]
        x1, y1 = layout[edge[1]]
        fig.add_scatter(x=[x0, x1, None], y=[y0, y1, None], mode='lines', line=dict(color='gray', width=1))

    fig.add_scatter(x=[coord[0] for coord in layout], y=[coord[1] for coord in layout],
                    mode='markers',
                    marker=dict(size=10, color=["blue" if v in seed_set else "lightblue" if v in activated_nodes else "black" for v in range(g.vcount())]),
                    text=vertex_labels, showlegend=False)

    fig.update_layout(
        width=1200, 
        height=800, 
        margin=dict(l=0, r=0, b=0, t=0),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    )
    fig.add_annotation(
        x=0,
        y=0,
        xref="paper",
        yref="paper",
        text=f"Number of Nodes: {g.vcount()}",
        showarrow=False,
        font=dict(size=14),
    )
    fig.show()

# SIMPLE GREEDY
seed_set= [54, 83, 86, 91, 0, 1, 2, 3, 4, 5, 6, 7, 19, 34, 9, 8, 18, 10, 13, 11]
_, activated_nodes = ltm(g, seed_set)
print(len(activated_nodes))
plot_network(g, seed_set, activated_nodes)

In [None]:
#greedy_ltm2
seed_set= [54, 83, 86, 91, 7, 19, 34, 48, 0, 1, 8, 18, 69, 72, 2, 3, 6, 11, 13, 14]
_, activated_nodes = ltm(g, seed_set)
print(len(activated_nodes))
plot_network(g, seed_set, activated_nodes)

In [None]:
#greedy_ltm2_dis
seed_set= [54, 86, 83, 91, 0, 80, 1, 88, 5, 82, 6, 69, 17, 90, 19, 79, 29, 51, 34, 78]
_, activated_nodes = ltm(g, seed_set)
print(len(activated_nodes))
plot_network(g, seed_set, activated_nodes)

In [None]:

def diffusion_degree(graph, vertex_index, threshold):
    vertex = graph.vs[vertex_index]
    # CDD(v)
    cd_v = graph.degree(vertex)
    # C'DD(v)
    cd_prime_v = threshold * cd_v
    # C''DD(v)
    c_double_prime_dd_v = 0

    for neighbor_index in graph.neighbors(vertex):
        neighbor = graph.vs[neighbor_index]

        # Calculate normalised edge weight (λ) from v to neighbor
        eid = graph.get_eid(vertex_index, neighbor_index)
        edge_weight = graph.es[eid]['weight']

        total_neighbor_weight = sum(graph.es[graph.get_eid(neighbor.index, neighbor_neighbor.index)]['weight'] for neighbor_neighbor in neighbor.neighbors())
        normalized_edge_weight = edge_weight / total_neighbor_weight if total_neighbor_weight != 0 else 0
        
        # Calculate λ * (degree of neighbor - 1)
        lambda_times_degree_minus_1 = normalized_edge_weight * (graph.degree(neighbor) - 1)
        c_double_prime_dd_v += lambda_times_degree_minus_1

    # CDD(v) = C'DD(v)+ C''DD(v)
    cdd_v = cd_prime_v + c_double_prime_dd_v

    return cdd_v


def top_cdd(graph, threshold, k):
    start_time = time.time()  
    
    diffusion_degrees = {}
    for vertex_index in range(len(graph.vs)):
        diffusion_degrees[vertex_index] = diffusion_degree(graph, vertex_index, threshold)
    
    sorted_vertices = sorted(diffusion_degrees.items(), key=lambda x: x[1], reverse=True)
    top_cdd = [vertex_index for vertex_index, _ in sorted_vertices[:k]]
    
    end_time = time.time() 
    elapsed_time = end_time - start_time  
    
    return top_cdd, elapsed_time

_, elapsed_time_2 = top_cdd(g,0.4,4 )
seed_set_2= [2, 18, 13, 49, 54, 83, 86, 91, 0, 1, 3, 4, 5, 6, 7, 8, 29, 9, 34, 11]
comp_time_2= [elapsed_time_2, 0.5915811061859131, 5.877347707748413, 14.640755414962769, 22.950454473495483, 31.323747158050537, 39.74720597267151, 47.726980447769165, 56.08556246757507, 63.351810455322266, 70.81751775741577, 78.35147714614868, 85.65398907661438, 92.74872207641602, 100.04925751686096, 110.25287055969238, 120.15820026397705, 135.56160402297974]
_, activated_nodes_2 = ltm(g,seed_set_2)
linear_progression = [(i + 1) * elapsed_time_2 / 3 for i in range(3)]
comp_time_2 = linear_progression + comp_time_2[1:]  

_, elapsed_time_4 = top_cdd(g,0.4,8 )
seed_set_4 =  [2, 18, 13, 49, 47, 10, 14, 3, 54, 12, 83, 86, 91, 0, 1, 4, 5, 7, 8, 29]
comp_time_4= [elapsed_time_4, 0.5648937225341797, 9.17858600616455, 25.816299200057983, 41.51489567756653, 56.727455377578735, 72.72218441963196, 87.59847617149353, 102.01604008674622, 116.12313175201416, 130.3963975906372, 145.06119513511658, 159.32308721542358, 172.35070514678955]
_,activated_nodes_4 = ltm(g,seed_set_4)
linear_progression = [(i + 1) * elapsed_time_4 / 7 for i in range(7)]
comp_time_4 = linear_progression + comp_time_4[1:] 

_, elapsed_time_6 = top_cdd(g,0.4,12 )
seed_set_6 = [2, 18, 13, 49, 47, 10, 14, 3, 27, 1, 24, 7, 54, 8, 0, 9, 86, 91, 4, 5]
comp_time_6 =  [elapsed_time_6, 0.5576767921447754, 16.662283658981323, 38.11702847480774, 63.5785231590271, 94.18814897537231, 129.94890880584717, 165.99355363845825, 201.62918043136597, 232.9149684906006]
_,activated_nodes_6 = ltm(g,seed_set_6)
linear_progression = [(i + 1) * elapsed_time_6 / 11 for i in range(11)]
comp_time_6 = linear_progression + comp_time_6[1:] 

_, elapsed_time_8 = top_cdd(g,0.4,16)
seed_set_8 = [2, 18, 13, 49, 47, 10, 14, 3, 27, 1, 24, 7, 25, 8, 28, 36, 86, 91, 0, 4]
comp_time_8= [elapsed_time_8, 0.6063737869262695, 31.095524549484253, 60.6303813457489, 89.09470820426941, 109.40944290161133]
_,activated_nodes_8 = ltm(g,seed_set_8)
linear_progression = [(i + 1) * elapsed_time_8 / 15 for i in range(15)]
comp_time_8 = linear_progression + comp_time_8[1:] 

seed_set_1, elapsed_time_1 = top_cdd(g,0.4,20)
comp_time_1=[elapsed_time_1]
_,activated_nodes_1 = ltm(g,seed_set_1)
linear_progression = [(i + 1) * elapsed_time_1 / 20 for i in range(20)]
comp_time_1 = linear_progression + comp_time_1[1:]  



# For seed_set_2
activated_nodes_sizes_2 = []
for i in range(len(seed_set_2)):
    _, activated_nodes = ltm(g, seed_set_2[:i+1])
    activated_nodes_sizes_2.append(len(activated_nodes))


# For seed_set_4
activated_nodes_sizes_4 = []
for i in range(len(seed_set_4)):
    _, activated_nodes = ltm(g, seed_set_4[:i+1])
    activated_nodes_sizes_4.append(len(activated_nodes))

# For seed_set_6
activated_nodes_sizes_6 = []
for i in range(len(seed_set_6)):
    _, activated_nodes = ltm(g, seed_set_6[:i+1])
    activated_nodes_sizes_6.append(len(activated_nodes))

# For seed_set_8
activated_nodes_sizes_8 = []
for i in range(len(seed_set_8)):
    _, activated_nodes = ltm(g, seed_set_8[:i+1])
    activated_nodes_sizes_8.append(len(activated_nodes))
    

# For seed_set_1
activated_nodes_sizes_1 = []
for i in range(len(seed_set_1)):
    _, activated_nodes = ltm(g, seed_set_1[:i+1])
    activated_nodes_sizes_1.append(len(activated_nodes))




import matplotlib.pyplot as plt

# first plot

# Plot the data
#seed_set_sizes = list(range(40))  # Update to include 0
seed_set_sizes = list(range(1, 21))
comp_times = comp_time_2  # Adjust comp_time_2 to align with seed_set_sizes
plt.plot(seed_set_sizes, comp_times, label="r = 0.2 ", color="blue")

comp_times = comp_time_4
plt.plot(seed_set_sizes, comp_times, label="r = 0.4 ", color="red")

comp_times =  comp_time_6
plt.plot(seed_set_sizes, comp_times, label="r = 0.6 ", color="green")

comp_times = comp_time_8
plt.plot(seed_set_sizes, comp_times, label="r = 0.8 ", color="orange")

comp_times = comp_time_1
plt.plot(seed_set_sizes, comp_times, label="r = 1.0 ", color="purple")

plt.xlabel('Seed Set Size')
plt.ylabel('Computation Time')
plt.title('Computation Time')
plt.legend()
plt.yscale('log')  # Set y-axis to log scale
plt.show()

# Create data for the second plot
plt.plot(seed_set_sizes, activated_nodes_sizes_2, label="r = 0.2 ", color="blue")
plt.plot(seed_set_sizes, activated_nodes_sizes_4, label="r = 0.4 ", color="red")
plt.plot(seed_set_sizes, activated_nodes_sizes_6, label="r = 0.6 ", color="green")
plt.plot(seed_set_sizes, activated_nodes_sizes_8, label="r = 0.8 ", color="orange")
plt.plot(seed_set_sizes, activated_nodes_sizes_1, label="r = 1.0 ", color="purple")

plt.xlabel('Seed Set Size')
plt.ylabel('Activated Nodes')
plt.title('Activated Nodes ') 
plt.legend()
plt.show()

In [None]:
## simple greedy
seed_set_1= [54, 83, 86, 91, 0, 1, 2, 3, 4, 5, 6, 7, 19, 34, 9, 8, 18, 10, 13, 11]
comp_time_1= [0.6932430267333984, 3.171221971511841, 5.670828342437744, 8.064326763153076, 10.607967376708984, 12.89676308631897, 15.261656761169434, 17.466293811798096, 19.57187271118164, 21.559252738952637, 23.489976167678833, 25.607878923416138, 27.563546419143677, 29.775285243988037, 32.88713097572327, 36.45547699928284, 40.04356074333191, 43.63926339149475, 46.90032887458801, 50.31855034828186]
_, activated_nodes_1 = ltm(g,seed_set_1)

activated_nodes_sizes_1 = []
for i in range(len(seed_set_1)):
    _, activated_nodes = ltm(g, seed_set_1[:i+1])
    activated_nodes_sizes_1.append(len(activated_nodes))

#greedy2
seed_set_2 = [54, 83, 86, 91, 7, 19, 34, 48, 0, 1, 8, 18, 69, 72, 2, 3, 6, 11, 13, 14]
comp_time_2 = [122.26038074493408, 319.36211943626404, 510.5739722251892, 690.246871471405, 950.4473912715912, 1181.8439836502075, 1339.2940275669098, 1489.3472168445587, 1634.0020020008087, 1868.5450239181519]
_, activated_nodes_2 = ltm(g,seed_set_2)
               
activated_nodes_sizes_2 = []
for i in range(2, len(seed_set_2) + 1, 2):
    _, activated_nodes = ltm(g, seed_set_2[:i])
    activated_nodes_sizes_2.append(len(activated_nodes))
    
#greedy2_dis
seed_set_3 = [54, 86, 83, 91, 0, 80, 1, 88, 5, 82, 6, 69, 17, 90, 19, 79, 29, 51, 34, 78]
comp_time_3 = [27.85740828514099, 61.52946853637695, 87.44615125656128, 110.64078164100647, 130.93603658676147, 149.794517993927, 168.5133237838745, 185.67426586151123, 201.5803303718567, 217.17396473884583]
_, activated_nodes_3 = ltm(g,seed_set_3)
               
activated_nodes_sizes_3 = []
for i in range(2, len(seed_set_3) + 1, 2):
    _, activated_nodes = ltm(g, seed_set_3[:i])
    activated_nodes_sizes_3.append(len(activated_nodes))
    
#greedy_centrality_0.8
_, elapsed_time_8 = top_cdd(g,0.4,16)
seed_set_8 = [2, 18, 13, 49, 47, 10, 14, 3, 27, 1, 24, 7, 25, 8, 28, 36, 86, 91, 0, 4]
comp_time_8= [elapsed_time_8, 0.6063737869262695, 31.095524549484253, 60.6303813457489, 89.09470820426941, 109.40944290161133]
_,activated_nodes_8 = ltm(g,seed_set_8)
linear_progression = [(i + 1) * elapsed_time_8 / 15 for i in range(15)]
comp_time_8 = linear_progression + comp_time_8[1:] 


 
               
import matplotlib.pyplot as plt

# Create data for the first plot
seed_set_sizes_1 = list(range(1, 21))
plt.plot(seed_set_sizes_1, comp_time_1, label="SG", color="blue")

seed_set_sizes_2 = list(range(2, 22, 2))
plt.plot(seed_set_sizes_2, comp_time_2, label="GP", color="red")
plt.plot(seed_set_sizes_2, comp_time_3, label="GPD", color="orange")


plt.plot(seed_set_sizes_1, comp_time_8, label="HCG", color="green")

plt.xlabel('Seed Set Size')
plt.ylabel('Computation Time')
plt.title('Computation Time')
plt.legend()
plt.yscale('log')  # Set y-axis to log scale
plt.show()



#activated_nodes_sizes_3 = []
#for i in range(len(seed_set_3)):
#    _, activated_nodes = ltm(g, seed_set_3[:i+1])
#    activated_nodes_sizes_3.append(len(activated_nodes))
    
# Create data for the second plot
plt.plot(seed_set_sizes_1, activated_nodes_sizes_1, label="SG", color="blue")

comp_times =  comp_time_8
plt.plot(seed_set_sizes_2, activated_nodes_sizes_2, label="GP", color="red")

plt.plot(seed_set_sizes_2, activated_nodes_sizes_3, label="GPD", color="orange")

plt.plot(seed_set_sizes_1, activated_nodes_sizes_8, label="HCG", color="green")

plt.xlabel('Seed Set Size')
plt.ylabel('Activated Nodes')
plt.title('Activated Nodes ')
plt.legend()
plt.show()