In [1]:
import pickle
import networkx as nx
import random

In [2]:
network = pickle.load(open('weighted_network.pkl', 'rb'))

Working with top 10 published papers in the journal.

In [3]:
top_10_authors = []
# Create subgraphs for each conference with the top 10 authors by the number of papers
conferences = []
for venue in nx.get_node_attributes(network, 'venues').values():
    conferences.extend(venue)

for conference in set(conferences):
    conference_authors = [author for author, data in network.nodes(data=True) if data['venues'][0] == conference]
    top_authors = sorted(conference_authors, key=lambda author: network.nodes[author]['num_of_papers'], reverse=True)[:10]
    top_10_authors.extend(top_authors)

subgraph = network.subgraph(top_10_authors)

In [4]:
def remove_edges_based_on_project_network(expert_network, project_network):
    edges_to_remove = []

    for edge in expert_network.edges():
        node1_label = expert_network.nodes[edge[0]]['venues'][0]
        node2_label = expert_network.nodes[edge[1]]['venues'][0]

        if not project_network.has_edge(node1_label, node2_label):
            edges_to_remove.append(edge)

    expert_network.remove_edges_from(edges_to_remove)

    return expert_network

In [5]:
def compute_influence(graph):
    if graph is None:
        print("Error: Graph is None.")
        return None

    for node in graph.nodes:
        total_weight = sum(edge['weight']
                           for _, _, edge in graph.edges(node, data=True))
        graph.nodes[node]['influence'] = total_weight

    # Scale scores to 100
    max_influence = max(graph.nodes[node]['influence'] for node in graph.nodes)
    scale_factor = 100 / max_influence

    for node in graph.nodes:
        graph.nodes[node]['influence'] *= scale_factor

    return graph

In [6]:
def get_top_node_per_venue(graph):
    top_nodes = {}
    nodes = []
    graph = compute_influence(graph)
    for node in graph.nodes:
        venue = graph.nodes[node]['venues'][0]
        if venue not in top_nodes:
            top_nodes[venue] = node
        elif graph.nodes[node]['influence'] > graph.nodes[top_nodes[venue]]['influence']:
            top_nodes[venue] = node

    return top_nodes, list(top_nodes.values())

get_top_node_per_venue(subgraph)

({'KDD': 694, 'IJCAI': 7977, 'NIPS': 1378, 'AAMAS': 15059, 'AAAI': 957},
 [694, 7977, 1378, 15059, 957])

In [18]:
def createProjectNetwork(list):
    project = nx.Graph()
    project.add_edges_from(list)
    return project

In [27]:
# Fully connected project network

project_1 = [('NIPS', 'IJCAI'), ('NIPS', 'AAAI'), ('NIPS', 'AAMAS'), ('NIPS', 'KDD'), ('IJCAI', 'AAAI'), ('IJCAI', 'AAMAS'), ('IJCAI', 'KDD'), ('AAAI', 'AAMAS'), ('AAAI', 'KDD'), ('AAMAS', 'KDD')]
project_1 = createProjectNetwork(project_1)
# Remove Edges to match project network
netowrk_based_on_project_1 = remove_edges_based_on_project_network(subgraph.copy(), project_1)

In [50]:
# Star connected project network

project_2 = [('NIPS', 'KDD'), ('IJCAI', 'KDD'), ('AAAI', 'KDD'), ('AAMAS', 'KDD')]
project_2 = createProjectNetwork(project_2)
# Remove Edges to match project network
network_based_on_project_2 = remove_edges_based_on_project_network(subgraph.copy(), project_2)

In [33]:
len(network_based_on_project_2.edges())

400

In [35]:
def sum_edge_weights(graph):
    total_weight = 0

    for _, _, data in graph.edges(data=True):
        if 'weight' in data:
            total_weight += data['weight']

    return total_weight

In [36]:
def randomGreedy(graph_G, graph_P):
    if graph_G is None or graph_P is None:
        print("Error: One or both of the graphs is None.")
        return None

    if len(graph_P.nodes) > len(graph_G.nodes):
        print("Error: Number of nodes in P is greater than the number of nodes in G.")
        return None

    # Start with a random node from G
    key = random.choice(list(graph_G.nodes))
    # print(f'key = {key}')
    subset = set()
    subset.add(key)
    labels = []
    labels.append(graph_G.nodes[key]['venues'][0])

    while len(subset) < len(graph_P.nodes):
        best_node = None
        min_total_edge_weight = 0.0

        # Iterate over nodes in G not in the subset
        for node in set(graph_G.nodes) - subset:
            # Create a temporary subset with the new node
            temp_subset = subset.copy()
            if graph_G.nodes[node]['venues'][0] not in labels:
                temp_subset.add(node)

                # Calculate the total edge weight in the subgraph
                total_edge_weight = sum_edge_weights(
                    graph_G.subgraph(temp_subset))

                # Update the best node if the current node minimizes the total edge weight
                if total_edge_weight > min_total_edge_weight:
                    min_total_edge_weight = total_edge_weight
                    best_node = node
                    # print(f'best_node: {best_node}')

        # Check if a suitable node was found
        if best_node is not None:
            # Add the best node to the subset
            subset.add(best_node)
            labels.append(graph_G.nodes[best_node]['venues'][0])
        else:
            print("Warning: No suitable node found. Terminating the loop.")
            break

    return graph_G.subgraph(subset)

In [40]:
random_coordinators = randomGreedy(network_based_on_project_2, project_2)

In [41]:
for node in random_coordinators.nodes:
    print(f'{node}: {random_coordinators.nodes[node]["venues"][0]}')

14086: KDD
15784: IJCAI
13608: NIPS
15059: AAMAS
28: AAAI


In [42]:
def influenceGreedy(graph_G, graph_P):
    if graph_G is None or graph_P is None:
        print("Error: One or both of the graphs is None.")
        return None

    if len(graph_P.nodes) > len(graph_G.nodes):
        print("Error: Number of nodes in P is greater than the number of nodes in G.")
        return None

    # Start with a random node from influence set
    top_nodes = get_top_node_per_venue(graph_G)
    key = random.choice(top_nodes[1])
    subset = set()
    subset.add(key)
    labels = []
    labels.append(graph_G.nodes[key]['venues'][0])
    
    while len(subset) < len(graph_P.nodes):
        best_node = None
        min_total_edge_weight = 0.0

        # Iterate over nodes in G not in the subset
        for node in set(graph_G.nodes) - subset:
            # Create a temporary subset with the new node
            temp_subset = subset.copy()
            if graph_G.nodes[node]['venues'][0] not in labels:
                temp_subset.add(node)

                # Calculate the total edge weight in the subgraph
                total_edge_weight = sum_edge_weights(graph_G.subgraph(temp_subset))

                # Update the best node if the current node minimizes the total edge weight
                if total_edge_weight > min_total_edge_weight:
                    min_total_edge_weight = total_edge_weight
                    best_node = node

        # Add the best node to the subset
        subset.add(best_node)
        labels.append(graph_G.nodes[best_node]['venues'][0])

    return graph_G.subgraph(subset)

In [43]:
influence_greedy_coordinators = influenceGreedy(network_based_on_project_2, project_1)

In [44]:
for node in influence_greedy_coordinators.nodes:
    print(f'{node}: {influence_greedy_coordinators.nodes[node]["venues"][0]}')

15520: IJCAI
13608: NIPS
8876: KDD
12205: AAMAS
5396: AAAI


In [45]:
def monte_carlo(f, graph_G, graph_P, num_iter):
    comm_eff = 0
    # selected_nodes = set()
    for i in range(num_iter):
        best = f(graph_G, graph_P)
        # selected_nodes = selected_nodes.union(set(best.nodes))
        eff = sum_edge_weights(best)
        comm_eff = comm_eff + eff
    avg_comm_eff = comm_eff/num_iter
    # print(f"Selected nodes: {selected_nodes}")
    print(f"Average Communication efficiency is : {avg_comm_eff}")

    return avg_comm_eff

In [48]:
avg_comm_eff = monte_carlo(randomGreedy,network_based_on_project_2, project_2, 10000)
avg_comm_eff

Average Communication efficiency is : 202.51461111112553


202.51461111112553

In [49]:
avg_comm_eff = monte_carlo(influenceGreedy,network_based_on_project_2, project_2, 10000)
avg_comm_eff

Average Communication efficiency is : 208.13216666668922


208.13216666668922

In [None]:
# import time

# # Start recording the time
# start_time = time.time()

# # Your algorithm or code here
# # ...

# # Stop recording the time
# end_time = time.time()

# # Calculate the runtime
# runtime = end_time - start_time

# print(f"Runtime: {runtime} seconds")