In [1]:
# import libraries
import networkx as nx
import plotly.graph_objs as go
import pickle
import matplotlib.pyplot as plt
import math as mt
import os
import xml.etree.ElementTree as ET
import csv
import pandas as pd

In [2]:
#import datasets
cpp = pickle.load(open('labeled_data/cpp.pkl', 'rb'))
java = pickle.load(open('labeled_data/java.pkl', 'rb'))
web = pickle.load(open('labeled_data/webdev.pkl', 'rb'))
data_all = [cpp, java, web]

# creating one dataset
data = []
for item in data_all:
    data.extend(item)

len(data)

3524

In [65]:
web[203]

{'id': 204,
 'skills': ['PHP', 'MySQL', 'HTML', 'Website Design', 'Python'],
 'location': 'Venezuela',
 'reviews': 105,
 'progress': 5.9,
 'label': 'web'}

In [3]:
# Create location-based and skill-based networks
location_based_network = nx.Graph()
skill_based_network = nx.Graph()
fully_connected_graph = nx.Graph()

# Iterate through the data and add nodes to the networks
n = 0
for item in data:
    item['id'] = n  # Access 'id' key
    node_id = item['id']
    label = item['label']
    location = item['location']  # Access 'location' key
    skills = item['skills']  # Access 'skills' key

    location_based_network.add_node(node_id, location=location, skills = skills, label=label)
    skill_based_network.add_node(node_id, location=location, skills=skills, label=label)
    fully_connected_graph.add_node(node_id, location = location, skills=skills, label=label)
    n += 1

# Add edges to the location-based network
for node1, data1 in location_based_network.nodes(data=True):
    for node2, data2 in location_based_network.nodes(data=True):
        if node1 != node2 and data1['location'] == data2['location']:
            location_based_network.add_edge(node1, node2)

# Add weighted edges to the skill-based network
for node1, data1 in skill_based_network.nodes(data=True):
    for node2, data2 in skill_based_network.nodes(data=True):
        if node1 != node2:
            skills1 = set(data1['skills'])
            skills2 = set(data2['skills'])
            common_skills = skills1.intersection(skills2)
            if common_skills:
                weight = 1 / len(common_skills)
                skill_based_network.add_edge(node1, node2, weight=weight)

# Add weighted edges to fully connected graph
for node1, data1 in fully_connected_graph.nodes(data=True):
    for node2, data2 in fully_connected_graph.nodes(data=True):
        if node1 != node2:
            skills1 = set(data1['skills'])
            skills2 = set(data2['skills'])
            common_skills = skills1.intersection(skills2)
            if len(common_skills) > 0 and  (node1 != node2 and data1['location'] == data2['location']):
                weight = mt.exp(-(len(common_skills) + 1 ))
                fully_connected_graph.add_edge(node1, node2, weight=weight)
            else:
                weight = mt.exp(-(len(common_skills)))
                fully_connected_graph.add_edge(node1, node2, weight=weight)

# Example: Printing the number of nodes and edges in the two networks
print("Location-based Network:")
print(f"Nodes: {location_based_network.number_of_nodes()}")
print(f"Edges: {location_based_network.number_of_edges()}")
# Store the graph in a pickle file
try:
    pickle.dump(location_based_network, open("labeled_data\\location_based_network.pkl", "wb"))
    print("success")
except:
    print('failed to store location-based  graph')

print("\nSkill-based Network:")
print(f"Nodes: {skill_based_network.number_of_nodes()}")
print(f"Edges: {skill_based_network.number_of_edges()}")
# Store the graph in a pickle file
try:
    pickle.dump(skill_based_network, open("labeled_data\\skill_based_network.pkl", "wb"))
    print("success")
except:
    print('failed to store skill-based  graph')

print("\nFully connected Network:")
print(f"Nodes: {fully_connected_graph.number_of_nodes()}")
print(f"Edges: {fully_connected_graph.number_of_edges()}")
# Store the graph in a pickle file
try:
    pickle.dump(fully_connected_graph, open("labeled_data\\fully_connected_network.pkl", "wb"))
    print("success")
except:
    print('failed to store fully connected graph')

Location-based Network:
Nodes: 3524
Edges: 773082
success

Skill-based Network:
Nodes: 3524
Edges: 1841855
success

Fully connected Network:
Nodes: 3524
Edges: 6207526
success


In [None]:
# Create an XML representation of the graph
def graph_to_xml(graph):
    root = ET.Element("graph")
    
    # Iterate over nodes and add them to the XML
    for node, node_data in graph.nodes(data=True):
        node_elem = ET.SubElement(root, "node", attrib={"id": str(node)})
        
        # Add node attributes as sub-elements
        for key, value in node_data.items():
            attr_elem = ET.SubElement(node_elem, "attribute", attrib={"key": key})
            attr_elem.text = str(value)

    # Iterate over edges and add them to the XML
    for u, v, edge_data in graph.edges(data=True):
        edge_elem = ET.SubElement(root, "edge", attrib={"source": str(u), "target": str(v)})
        
        # Add edge attributes as sub-elements
        for key, value in edge_data.items():
            attr_elem = ET.SubElement(edge_elem, "attribute", attrib={"key": key})
            attr_elem.text = str(value)

    return ET.tostring(root, encoding="utf-8").decode()

In [None]:
def remove_extension(string):
  """Removes everything after a . in a Python string.

  Args:
    string: A Python string.

  Returns:
    A Python string with everything after a . removed.
  """

  index = string.find(".")
  if index != -1:
    return string[:index]
  else:
    return string

In [None]:
# Save data as XML from graph
directory = 'C:\\Users\\tm0663\Documents\Projects\Team formation\Repository\\networks\web'
for filename in os.listdir(directory):
    if filename.endswith('.pkl'):

        # Load the graph from the pickle file
        filepath = f"{directory}\{filename}"
        graph = pickle.load(open(filepath, "rb"))  # Load network
            
        # Convert the graph to XML
        xml_data = graph_to_xml(graph)

        # Save the XML data to a file
        with open(f'xml_networks\web\{remove_extension(filename)}.xml', 'w') as xml_file:
            xml_file.write(xml_data)


In [None]:
directory = 'C:\\Users\\tm0663\Documents\Projects\Team formation\Repository\\networks\web'
for filename in os.listdir(directory):
    if filename.endswith('.pkl'):
        try:
            # Load the graph from the pickle file
            filepath = f"{directory}\{filename}"
            graph = pickle.load(open(filepath, "rb"))  # Load network
            # Create a CSV writer object
            with open(f"csv_networks\web\{remove_extension(filename)}.csv", "w", newline="") as f:
                writer = csv.writer(f)

                # Write the header row
                writer.writerow(["node_1", "node_2", "weight"])

                # Write the edge data to the CSV file
                for edge in graph.edges():
                    weight = graph.edges[edge]["weight"]
                    writer.writerow([edge[0], edge[1], weight])
        except:
            print("error")
        # Close the CSV file
        f.close()

In [None]:
graph = pickle.load(open('networks\web\location_based_network.pkl', "rb"))  # Load network
# Create a CSV writer object
with open("C:\\Users\\tm0663\Documents\Projects\Team formation\Repository\csv_networks\web\location_based_network.csv", "w", newline="") as f:
    writer = csv.writer(f)

    # Write the header row
    writer.writerow(["node_1", "node_2"])

    # Write the edge data to the CSV file
    for edge in graph.edges():
        writer.writerow(edge)

In [None]:
# def common_skills(G, u, v):
#     U = set(G.nodes[u]['skills'])
#     V = set(G.nodes[v]['skills'])
#     common = U.intersection(V)
#     return list(common) if len(common) > 0 else 0

In [None]:
# def convert2csv(G, path):
#     # Convert the graph to a DataFrame
#     node_data = [(node, data) for node, data in G.nodes(data=True)]
#     edge_data = [(u, v, G.edges[(u,v)]["weight"]) for u, v, data in G.edges(data=True)]
#     # edge_data = [(u, v, G.edges[(u,v)]["weight"], G.nodes[u]['location'], common_skills(G, u, v)) for u, v, data in G.edges(data=True)]
    
#     # Create DataFrames for nodes and edges
#     nodes_df = pd.DataFrame(node_data, columns=["Node", "Attributes"])
#     edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2", "Weights"])
#     # edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2", "Weights", "Location", "Skills"])

#     # Concatenate nodes and edges DataFrames
#     # combined_df = pd.concat([nodes_df, edges_df], ignore_index=True)

#     # Export the combined data to a single CSV file
#     # combined_df.to_csv(path, index=False)
#     # Export the node and edge data to CSV files
#     nodes_df.to_csv(f"{path}\\nodes.csv", index=False)
#     edges_df.to_csv(f"{path}\edges.csv", index=False)

In [None]:
def createWeightedEdgelist(G, path, filename):
    # Convert the graph to a DataFrame
    node_data = [(node, data) for node, data in G.nodes(data=True)]
    edge_data = [(u, v, G.edges[(u,v)]["weight"]) for u, v, data in G.edges(data=True)]

    edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2", "Weights"])
    # edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2", "Weights", "Location", "Skills"])

    # Export the edge data to CSV files
    edges_df.to_csv(f"{path}\{filename}edgeslist.csv", index=False)

In [None]:
def createEdgelist(G, path, filename):
    # Convert the graph to a DataFrame
    node_data = [(node, data) for node, data in G.nodes(data=True)]
    edge_data = [(u, v) for u, v, data in G.edges(data=True)]

    edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2"])
    # edges_df = pd.DataFrame(edge_data, columns=["Node1", "Node2", "Weights", "Location", "Skills"])

    # Export the edge data to CSV files
    edges_df.to_csv(f"{path}\{filename}edgeslist.csv", index=False)

In [None]:
graph_path = 'networks\cpp\skill_based_network.pkl'
graph = pickle.load(open(graph_path, 'rb'))
path = 'csv_networks\cpp'
createWeightedEdgelist(graph, path, 'skills')

In [None]:
graph_path = 'networks\cpp\location_based_network.pkl'
graph = pickle.load(open(graph_path, 'rb'))
path = 'csv_networks\cpp'
createEdgelist(graph, path, 'location_')

In [None]:
# # Node degree distribution
# degree_sequence = [d for n, d in location_based_network.degree()]
# plt.hist(degree_sequence, bins=10, alpha=0.7, color='b', edgecolor='k')
# plt.xlabel('Degree')
# plt.ylabel('Number of Nodes')
# plt.title('Node Degree Distribution -- Location based')
# plt.show()

In [None]:
# # Node degree distribution
# degree_sequence = [d for n, d in skill_based_network.degree()]
# plt.hist(degree_sequence, bins=10, alpha=0.7, color='b', edgecolor='k')
# plt.xlabel('Degree')
# plt.ylabel('Number of Nodes')
# plt.title('Node Degree Distribution -- Skill based')
# plt.show()

In [None]:
# # Clustering coefficient
# print("====== Location Based ======")
# avg_clustering = nx.average_clustering(location_based_network)
# print(f"Average Clustering Coefficient: {avg_clustering}")

# # Centrality measures (e.g., degree centrality, betweenness centrality, closeness centrality)
# degree_centrality = nx.degree_centrality(location_based_network)
# betweenness_centrality = nx.betweenness_centrality(location_based_network)
# closeness_centrality = nx.closeness_centrality(location_based_network)
# print(f"Degree Centrality: {degree_centrality}")
# print(f"Betweeness Centrality: {betweenness_centrality}")
# print(f"Closeness Centrality: {closeness_centrality}")

In [None]:
# # Clustering coefficient
# print("======= Skill Based ========")
# avg_clustering = nx.average_clustering(skill_based_network)
# print(f"Average Clustering Coefficient: {avg_clustering}")

# # Centrality measures (e.g., degree centrality, betweenness centrality, closeness centrality)
# degree_centrality = nx.degree_centrality(skill_based_network)
# betweenness_centrality = nx.betweenness_centrality(skill_based_network)
# closeness_centrality = nx.closeness_centrality(skill_based_network)
# print(f"Degree Centrality: {degree_centrality}")
# print(f"Betweeness Centrality: {betweenness_centrality}")
# print(f"Closeness Centrality: {closeness_centrality}")

In [None]:

# # Visualize the graph
# pos = nx.spring_layout(location_based_network)  # Define node positions for visualization
# nx.draw(skill_based_network, pos, with_labels=True, node_size=300, font_size=10, font_color='black', node_color='lightblue', edge_color='gray')
# plt.title('Graph Visualization -- Location Based')
# plt.show()

In [None]:
# # Visualize the graph
# pos = nx.spring_layout(skill_based_network)  # Define node positions for visualization
# nx.draw(skill_based_network, pos, with_labels=True, node_size=300, font_size=10, font_color='black', node_color='lightblue', edge_color='gray')
# plt.title('Graph Visualization -- Skill Based')
# plt.show()

In [66]:
import networkx as nx

# Create a sample weighted graph (you can replace this with your own graph)
G = nx.Graph()
G.add_edge("A", "B", weight=4)
G.add_edge("A", "C", weight=2)
G.add_edge("B", "D", weight=5)
G.add_edge("C", "D", weight=3)

# Create a function to calculate the sum of neighbor weights for a node
def sum_neighbor_weights(node):
    return sum(G[node][neighbor]['weight'] for neighbor in G.neighbors(node))

# Sort nodes based on the sum of neighbor weights
sorted_nodes = sorted(G.nodes, key=sum_neighbor_weights)

# Print the nodes sorted by neighbor weights
print("Nodes sorted by neighbor weights:", sorted_nodes)


Nodes sorted by neighbor weights: ['C', 'A', 'D', 'B']


In [67]:
import networkx as nx

# Create a sample graph (you can replace this with your own graph)
G = nx.Graph()
G.add_edge("A", "B", weight=4)
G.add_edge("A", "C", weight=2)
G.add_edge("B", "C", weight=1)
G.add_edge("B", "D", weight=5)
G.add_edge("C", "D", weight=3)

# Find all-pair shortest paths
all_shortest_paths = dict(nx.all_pairs_shortest_path(G))

# Print the shortest paths
for source_node, paths in all_shortest_paths.items():
    for target_node, shortest_path in paths.items():
        print(f"Shortest path from {source_node} to {target_node}: {shortest_path}")


Shortest path from A to A: ['A']
Shortest path from A to B: ['A', 'B']
Shortest path from A to C: ['A', 'C']
Shortest path from A to D: ['A', 'B', 'D']
Shortest path from B to B: ['B']
Shortest path from B to A: ['B', 'A']
Shortest path from B to C: ['B', 'C']
Shortest path from B to D: ['B', 'D']
Shortest path from C to C: ['C']
Shortest path from C to A: ['C', 'A']
Shortest path from C to B: ['C', 'B']
Shortest path from C to D: ['C', 'D']
Shortest path from D to D: ['D']
Shortest path from D to B: ['D', 'B']
Shortest path from D to C: ['D', 'C']
Shortest path from D to A: ['D', 'B', 'A']
