In [2]:
import pandas as pd
import networkx as nx
import itertools

# ---------------------------
# Step 1: Load the dataset
# ---------------------------
# Load the papers dataset (ensure the CSV file is in your working directory)
papers_df = pd.read_csv("final_papers.csv")

# ---------------------------
# Step 2: Create the weighted edgelist
# ---------------------------
# This dictionary will store each unique pair of authors and count their co-authored papers.
edge_weights = {}

# Assuming the 'authors' column contains a comma-separated string of author IDs
for _, row in papers_df.iterrows():
    # Split and clean the authors list from the string
    authors = [a.strip() for a in row['author_ids'].split(",") if a.strip() != ""]
    
    # For every unique pair of authors, update the count of collaborations
    for author1, author2 in itertools.combinations(authors, 2):
        # Sort the pair to maintain consistency (e.g., (A, B) is the same as (B, A))
        pair = tuple(sorted((author1, author2)))
        edge_weights[pair] = edge_weights.get(pair, 0) + 1

# Convert the dictionary into a weighted edgelist (list of tuples: (author1, author2, weight))
weighted_edgelist = [(u, v, weight) for (u, v), weight in edge_weights.items()]

# ---------------------------
# Step 3: Construct the undirected graph
# ---------------------------
# Create an undirected NetworkX graph and add weighted edges from the edgelist.
G = nx.Graph()
G.add_weighted_edges_from(weighted_edgelist)

print("Graph created with only collaborations.")
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())


Graph created with only collaborations.
Number of nodes: 26463
Number of edges: 76236
