<a href="https://colab.research.google.com/github/varun-beep/Co-Authorship-Network-in-Generative-AI/blob/main/coauthorship_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import networkx as nx
import plotly.graph_objects as go
from collections import defaultdict, Counter
import pandas as pd

In [2]:
# ------------------------- CONFIG -------------------------

QUERY = "Generative AI"
MAX_RESULTS = 50
MIN_DEGREE = 2  # Filter authors with fewer connections
TOP_K_AUTHORS = 20  # For optional export

In [3]:
# ------------------------- DATA FETCHING -------------------------

def fetch_semantic_scholar_data(query, max_results=50):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "fields": "title,authors,year,citationCount",
        "limit": max_results
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()['data']

In [4]:
# ------------------------- GRAPH CONSTRUCTION -------------------------

def build_coauthorship_graph(papers):
    G = nx.Graph()
    for paper in papers:
        authors = paper.get("authors", [])
        author_names = [a['name'] for a in authors]
        for i, a1 in enumerate(author_names):
            for j in range(i + 1, len(author_names)):
                a2 = author_names[j]
                if G.has_edge(a1, a2):
                    G[a1][a2]['weight'] += 1
                else:
                    G.add_edge(a1, a2, weight=1)
    return G

def filter_graph_by_degree(G, min_degree=2):
    return G.subgraph([node for node in G.nodes if G.degree(node) >= min_degree]).copy()

In [5]:
# ------------------------- VISUALIZATION -------------------------

def plot_interactive_graph(G):
    pos = nx.spring_layout(G, seed=42, k=0.4)

    edge_x, edge_y = [], []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='gray'),
        hoverinfo='none',
        mode='lines'
    )

    node_x, node_y, node_text = [], [], []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(f"{node}<br>Degree: {G.degree(node)}")

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        textposition='bottom center',
        textfont=dict(size=10),
        text=[node for node in G.nodes()],
        hovertext=node_text,
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='YlOrRd',
            size=[5 + 2 * G.degree(node) for node in G.nodes()],
            color=[G.degree(node) for node in G.nodes()],
            colorbar=dict(
                thickness=15,
                title='Node Degree',
                xanchor='left',
                titleside='right'
            ),
            line_width=0.5
        )
    )

    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        title='<b>Generative AI Co-authorship Network</b>',
                        titlefont_size=20,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        annotations=[dict(
                            text="Source: Semantic Scholar API",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002
                        )],
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False)
                    ))
    fig.show()

In [6]:
# ------------------------- EXPORT (OPTIONAL) -------------------------

def export_top_authors(G, k=20, file_format="csv"):
    degrees = Counter(dict(G.degree()))
    top_k = degrees.most_common(k)
    df = pd.DataFrame(top_k, columns=["Author", "Degree"])

    if file_format == "csv":
        df.to_csv("top_authors.csv", index=False)
    elif file_format == "json":
        df.to_json("top_authors.json", orient="records", indent=2)

    print(f"Top {k} authors exported to top_authors.{file_format}")

In [None]:
# ------------------------- MAIN -------------------------

if __name__ == "__main__":
    papers = fetch_semantic_scholar_data(QUERY, MAX_RESULTS)
    G_full = build_coauthorship_graph(papers)
    G_filtered = filter_graph_by_degree(G_full, MIN_DEGREE)

    plot_interactive_graph(G_filtered)

    export_top_authors(G_filtered, TOP_K_AUTHORS, file_format="csv")

In [8]:
!pip freeze > requirements.txt