# Experiment 2. 

I am running the algorithms for a fully connected network.

In [1]:
import preprocessing as ps
import numpy as np
import networkx as nx
import pickle
import matplotlib.pyplot as plt
import random
random.seed(42)
import csv

In [2]:
# Load the data
dataset = pickle.load(open('./networks/network.pkl', 'rb'))

In [3]:
# create the project networks
list_1 = [('DM', 'T'), ('DB', 'T'), ('AI', 'T')]
list_2 = [('DM', 'T'), ('DB', 'T'), ('AI', 'T'), ('DM', 'DB'), ('DM', 'AI'), ('DB', 'AI')]
list_3 = [('DM', 'T'), ('DB', 'T'), ('AI', 'DB')]

star_proj = ps.createProjectNetwork(list_1)
full_proj = ps.createProjectNetwork(list_2)
chain_proj = ps.createProjectNetwork(list_3)

## Experiment 1.a 

In this experiment:
1. I am running on the entire network(this is unreasonable) so I am looking using influential sampling.
2. I use Average weight score as the metric to measure the influence of a node.


In [None]:
projects = [(star_proj, "Star-Structured Project"), (full_proj, "Fully-connected Structured Project"), (chain_proj, "Chain-Structured Project")]
network = dataset.copy()

In [None]:
with open(f'./results/exp_1_a/node_network.csv', 'w', newline='') as file:
    writer = csv.writer(file)
       
    for project in projects:
        print(f"*********** {project[1]} ***********")
        writer.writerow([project[1]])
        writer.writerow([])
        # Greedy
        print("\n--------      Using Greedy Strategy     -------\n")
        obj_max_1 = 0.0
        best_set_1 = set()
        for node in network.nodes():
            subset, comm_eff = ps.Greedy(network, project[0], node, beta=None)
            if comm_eff > obj_max_1:
                obj_max_1 = comm_eff
                best_set_1 = subset

        leaders_eff_1 = ps.sum_edge_weights(network.subgraph(best_set_1))
        print("Coordinators communication efficiency", leaders_eff_1)
        print(f"Objective value : {obj_max_1}")
        team_eff_1 = obj_max_1 - leaders_eff_1

        for node in best_set_1:
            print(f"Team :{network.nodes[node]['label']}, Node: {node} Rank: {leaders_eff_1}")
        
        title_attributes = f"Node Ranking for {project[1]} Under Greedy Strategy"
        writer.writerow([title_attributes])
        writer.writerow(['Author', "Label", "Rank"])
        for node in best_set_1:
            print(f"Team :{network.nodes[node]['label']}, Node: {node}, Rank: {ps.get_node_rank(network, node)}")
            writer.writerow([node, network.nodes[node]['label'], ps.get_node_rank(network, node)])
        writer.writerow([])

        # Influence Greedy
        print("\n--------      Using influence Greedy Strategy     -------\n")
        obj_max_2 = 0.0
        best_set_2 = set()
        print("Influential First")
        influential_nodes = ps.get_top_ranked_node_each_group(network)
        for node in influential_nodes:
            subset, comm_eff = ps.Greedy(network, project[0], node, beta=None)
            if comm_eff > obj_max_2:
                obj_max_2 = comm_eff
                best_set_2 = subset

        leaders_eff_2 = ps.sum_edge_weights(network.subgraph(best_set_2))
        print("Coordinators communication efficiency", leaders_eff_2)
        print(f"Objective value : {obj_max_2}")
        team_eff_2 = obj_max_2 - leaders_eff_2

        writer.writerow([])
        title_attributes = f"Node Ranking for {project[1]} Under Influence Greedy Strategy"
        writer.writerow([title_attributes])
        writer.writerow(['Author', "Label", "Rank"])
        for node in best_set_2:
            print(f"Team :{network.nodes[node]['label']}, Node: {node}, Rank: {ps.get_node_rank(network, node)}")
            writer.writerow([node, network.nodes[node]['label'], ps.get_node_rank(network, node)])
        writer.writerow([])

        writer.writerow([])
        title_attributes = f"Node Ranking for {project[1]} Under Influence Greedy Strategy"
        writer.writerow([title_attributes])
        writer.writerow(['Algorithm', "Objective Function", "Leaders Efficiency", "Team efficiency"])
        writer.writerow(['Greedy', obj_max_1, leaders_eff_1, team_eff_1])
        writer.writerow(['Inf Greedy', obj_max_2, leaders_eff_2, team_eff_2])
        writer.writerow([])

        print("\n\n")

## Experiment 1.b

In this experiment, 
1. I run on samples of the network. (10, 20, 30, 40, 50)
   - First using uniform random sampling.
2. I am using the average sum of adjacent weights as the team influence measure. 

### Uniform Random Sampling

In [4]:
def create_subnet(network, nodes_per_team):
    """
    Creates a smaller graph by selecting a fixed number of nodes from each label in the given dataset.

    Parameters:
    dataset (networkx.Graph): The original dataset graph.

    Returns:
    networkx.Graph: The new graph containing the selected samples.
    """

    # Create a list to store the selected samples
    selected_samples = []
    labels = ['T', 'DM', 'DB', 'AI']

    # Iterate over each label
    for label in labels:
        # Get all the nodes with the current label
        nodes_with_label = [node for node in network.nodes if network.nodes[node]['label'] == label]

        # Randomly select 60 nodes from the current label
        selected_nodes = random.sample(nodes_with_label, nodes_per_team)

        # Add the selected nodes to the list of selected samples
        selected_samples.extend(selected_nodes)

    # Create a new graph with the selected samples
    subnet = network.subgraph(selected_samples)

    return subnet

In [5]:
def add_weights(network):
    """
    Adds edges to the network with a weight value based on the minimum weight in the network.

    Parameters:
    - network: The network to add edges to.

    Returns:
    - network
    """

    # Find the minimum weight in the network
    min_weight = min(nx.get_edge_attributes(network, 'weight').values())

    # Iterate over all pairs of nodes
    for node1 in network.nodes():
        for node2 in network.nodes():
            # Check if there is no edge between the nodes
            if not network.has_edge(node1, node2):
                # Add the edge with the weight value
                network.add_edge(node1, node2, weight=0.1 * min_weight)

    return network


In [8]:
projects = [(star_proj, "Star-Structured Project"), (full_proj, "Fully-connected Structured Project"), (chain_proj, "Chain-Structured Project")]

for i in range(10, 51, 10):
    network = add_weights(create_subnet(dataset.copy(), i).copy()).copy()
    print(f">>>>>>>>>>>>>>>>>> {i} Node network <<<<<<<<<<<<<<<<<\n")
    with open(f'./results/exp_2/{i}_node_network.csv', 'w', newline='') as file:
        writer = csv.writer(file)

        for project in projects:
            print(f"*********** {project[1]} ***********")
            writer.writerow([project[1]])
            writer.writerow([])
            # Greedy
            print("\n--------      Using Greedy Strategy     -------\n")
            obj_max_1 = 0.0
            best_set_1 = set()
            for node in network.nodes():
                subset, comm_eff = ps.Greedy(network, project[0], node, beta=None)
                if comm_eff > obj_max_1:
                    obj_max_1 = comm_eff
                    best_set_1 = subset

            leaders_eff_1 = ps.sum_edge_weights(network.subgraph(best_set_1))
            print("Coordinators communication efficiency", leaders_eff_1)
            print(f"Objective value : {obj_max_1}")
            team_eff_1 = round(obj_max_1 - leaders_eff_1, 4)

            for node in best_set_1:
                print(f"Team :{network.nodes[node]['label']}, Node: {node} Rank: {leaders_eff_1}")
            
            title_attributes = f"Node Ranking for {project[1]} Under Greedy Strategy"
            writer.writerow([title_attributes])
            writer.writerow(['Author', "Label", "Rank"])
            for node in best_set_1:
                print(f"Team :{network.nodes[node]['label']}, Node: {node}, Rank: {ps.get_node_rank(network, node)}")
                writer.writerow([node, network.nodes[node]['label'], ps.get_node_rank(network, node)])
            writer.writerow([])

            # Influence Greedy
            print("\n--------      Using influence Greedy Strategy     -------\n")
            obj_max_2 = 0.0
            best_set_2 = set()
            print("Influential First")
            influential_nodes = ps.get_top_ranked_node_each_group(network)
            for node in influential_nodes:
                subset, comm_eff = ps.Greedy(network, project[0], node, beta=None)
                if comm_eff > obj_max_2:
                    obj_max_2 = comm_eff
                    best_set_2 = subset

            leaders_eff_2 = ps.sum_edge_weights(network.subgraph(best_set_2))
            print("Coordinators communication efficiency", leaders_eff_2)
            print(f"Objective value : {obj_max_2}")
            team_eff_2 = obj_max_2 - leaders_eff_2

            writer.writerow([])
            title_attributes = f"Node Ranking for {project[1]} Under Influence Greedy Strategy"
            writer.writerow([title_attributes])
            writer.writerow(['Author', "Label", "Rank"])
            for node in best_set_2:
                print(f"Team :{network.nodes[node]['label']}, Node: {node}, Rank: {ps.get_node_rank(network, node)}")
                writer.writerow([node, network.nodes[node]['label'], ps.get_node_rank(network, node)])
            writer.writerow([])

            writer.writerow([])
            title_attributes = f"Node Ranking for {project[1]} Under Influence Greedy Strategy"
            writer.writerow([title_attributes])
            writer.writerow(['Algorithm', "Objective Function", "Leaders Efficiency", "Team efficiency"])
            writer.writerow(['Greedy', obj_max_1, leaders_eff_1, team_eff_1])
            writer.writerow(['Inf Greedy', obj_max_2, leaders_eff_2, team_eff_2])
            writer.writerow([])

            print("\n\n")

>>>>>>>>>>>>>>>>>> 10 Node network <<<<<<<<<<<<<<<<<

*********** Star-Structured Project ***********

--------      Using Greedy Strategy     -------

Coordinators communication efficiency 65.708
Objective value : 162.8338
Team :DB, Node: Chengfei Liu Rank: 65.708
Team :T, Node: Luca de Alfaro Rank: 65.708
Team :DM, Node: Weizheng Gao Rank: 65.708
Team :AI, Node: Kazumi Saito Rank: 65.708
Team :DB, Node: Chengfei Liu, Rank: 1
Team :T, Node: Luca de Alfaro, Rank: 3
Team :DM, Node: Weizheng Gao, Rank: 8
Team :AI, Node: Kazumi Saito, Rank: 9

--------      Using influence Greedy Strategy     -------

Influential First
Coordinators communication efficiency 28.996
Objective value : 96.579
Team :DB, Node: Chengfei Liu, Rank: 1
Team :T, Node: Ravindran Kannan, Rank: 1
Team :AI, Node: Hector Geffner, Rank: 1
Team :DM, Node: Jiahui Liu, Rank: 2



*********** Fully-connected Structured Project ***********

--------      Using Greedy Strategy     -------

Coordinators communication efficiency 6

In [None]:
import matplotlib.pyplot as plt

# Data for the plots
data = {
    'Star': {
        'Random Greedy': {
            'Coordinators Communication Efficiency': 33.33,
            'Total Communication Efficiency': 225.9043
        },
        'Influence Greedy': {
            'Coordinators Communication Efficiency': 0,
            'Total Communication Efficiency': 150.0
        },
        'Greedy': {
            'Coordinators communication efficiency': 130.0,
            'Total Communication Efficiency': 423.2008,
        }
    },
    'Fully-Connected': {
        'Random Greedy': {
            'Coordinators Communication Efficiency': 22.220000000000002,
            'Total Communication Efficiency': 196.5813
        },
        'Influence Greedy': {
            'Coordinators Communication Efficiency': 0,
            'Total Communication Efficiency': 150.0
        },
        'Greedy': {
            'Coordinators communication efficiency': 130.0,
            'Total Communication Efficiency': 423.2008,
        }
    },
    'Chain': {
        'Random Greedy': {
            'Coordinators Communication Efficiency': 0,
            'Total Communication Efficiency': 150.0
        },
        'Influence Greedy': {
            'Coordinators Communication Efficiency': 40.0,
            'Total Communication Efficiency': 260.0
        },
        'Greedy': {
            'Coordinators communication efficiency': 130.0,
            'Total Communication Efficiency': 423.2008,
        }
    }
}

# Define abbreviations for the algorithms
abbreviations = {'Random Greedy': 'RG', 'Influence Greedy': 'IG', 'Greedy': 'GR'}

# Plotting
for project, algorithms in data.items():
    plt.figure(figsize=(10, 5))
    plt.title(f'{project} Project Structure', fontsize=16)
    legends_added = False  # Flag to track if legends have been added
    for algorithm, metrics in algorithms.items():
        x = range(len(metrics))
        colors = ['r', 'g']
        abbrev = abbreviations[algorithm]
        for i, (metric, value) in enumerate(metrics.items()):
            color = colors[i] if 'Coordinators' in metric else 'g'
            plt.bar(f'{metric[:3]} ({abbrev})', value, label=metric, color=color, alpha=0.7)
        # Add legend if it hasn't been added before
        if not legends_added:
            plt.legend(fontsize=12)
            legends_added = True  # Set the flag to True after adding legends
    plt.xlabel('Metrics', fontsize=14)
    plt.ylabel('Communication Efficiency', fontsize=14)
    plt.xticks(rotation=45, fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y')
    plt.tight_layout()
    plt.show()