In [8]:
import pandas as pd
import networkx as nx
import json
from networkx.readwrite import json_graph

def load_data(file_path):
    df = pd.read_csv(file_path)
    df = df.dropna(subset=['Author(s) ID', 'Year', 'Authors', 'Authors with affiliations'])
    df = df.fillna(0)
    return df

def create_author_nodes(df):
    G = nx.Graph()

    for _, row in df.iterrows():
        authors = row['Authors'].split(',')
        author_Id = row['Author(s) ID'].split(';')
        Title = row['Title']
        Year = row['Year']
        Citations = row['Cited by']
        Publisher = row['Publisher']
        Authorwa = row['Authors with affiliations']

        for author in range(len(authors)):
            aid = author_Id[author]
            aname = ';'.join(authors)
            title = Title

            if aid != "":
                nodes_data = {
                    'id': aid,
                    "Authors": aname,
                    "Title": title,
                    "Year": Year,
                    "Citations": Citations,
                    "Publisher": Publisher,
                    "Author with affiliations": Authorwa
                }
                G.add_node(aid, **nodes_data)

    return G

def create_coauthorship_edges(G, df):
    for row in df.iterrows():
        authors = row[1]['Author(s) ID'].split(';')
        for i in range(len(authors)):
            for j in range(i + 1, len(authors)):
                if authors[i] != "" and authors[j] != "":
                    G.add_edge(authors[i], authors[j])

def get_author_country(authors_with_affiliations):
    first_affiliation = authors_with_affiliations.split(';')[0].strip()
    country = first_affiliation.split(',')[-1].strip()
    return country

def add_node_attributes(G, data):
    for node_data in data['nodes']:
        eid = node_data.get('id')
        authors = node_data.get('Authors')
        title = node_data.get('Title')
        year = node_data.get('Year')
        citations = node_data.get('Citations')
        publisher = node_data.get('Publisher')

        authors_with_affiliations = str(node_data.get('Author with affiliations'))
        country = get_author_country(authors_with_affiliations)

        G.add_node(eid, authors=authors, title=title, year=year, citations=citations,
                   publisher=publisher, authors_with_affiliations=authors_with_affiliations, country=country)

def create_clusters(G):
    clusters = list(nx.connected_components(G))
    class_mapping = {node: idx for idx, cluster in enumerate(clusters) for node in cluster}
    nx.set_node_attributes(G, class_mapping, 'class')

def save_json_data(G, output_file):
    coauthorship_data = {'nodes': [{'id': node, **G.nodes[node]} for node in G.nodes()],
                         'links': [{'source': source, 'target': target} for source, target in G.edges()]}
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(coauthorship_data, outfile, ensure_ascii=False)

def main():
    file_path = "C:/Users/User1/Desktop/data_scopus.csv"
    df = load_data(file_path)

    author_network = create_author_nodes(df)
    create_coauthorship_edges(author_network, df)

    # Save the author network as a JSON file
    with open("publication_network.json", "w") as f:
        json.dump(json_graph.node_link_data(author_network), f)

    # Load the data from the JSON file
    with open('publication_network.json', 'r') as infile:
        data = json.load(infile)

    # Create a new graph with additional attributes
    G = nx.Graph()
    add_node_attributes(G, data)

    create_clusters(G)
    print(G)

    # Save the coauthorship data as a JSON file
    
    save_json_data(G, 'coauthorship_data.json')

if __name__ == "__main__":
    main()


Graph with 1552 nodes and 0 edges


In [10]:
import pandas as pd
import networkx as nx
import json

def load_json_data(file_path):
    with open(file_path, 'r') as infile:
        return json.load(infile)

def get_author_country(authors_with_affiliations):
    first_affiliation = authors_with_affiliations.split(';')[0].strip()
    return first_affiliation.split(',')[-1].strip()

def add_nodes_from_json(graph, data):
    for node_data in data['nodes']:
        eid = node_data.get('id')
        authors = node_data.get('Authors')
        title = node_data.get('Title')
        year = node_data.get('Year')
        citations = node_data.get('Citations')
        publisher = node_data.get('Publisher')

        authors_with_affiliations = str(node_data.get('Author with affiliations'))
        country = get_author_country(authors_with_affiliations)

        graph.add_node(eid, authors=authors, title=title, year=year, citations=citations,
                       publisher=publisher, authors_with_affiliations=authors_with_affiliations, country=country)

def create_clusters(graph):
    clusters = list(nx.connected_components(graph))
    class_mapping = {node: idx for idx, cluster in enumerate(clusters) for node in cluster}
    nx.set_node_attributes(graph, class_mapping, 'class')

def add_edges_from_dataframe(graph, df):
    for row in df.iterrows():
        authors = row[1]['Author(s) ID'].split(';')
        for i in range(len(authors)):
            for j in range(i + 1, len(authors)):
                if authors[i] != "" and authors[j] != "":
                    graph.add_edge(authors[i], authors[j])

def save_json_data(graph, output_file):
    coauthorship_data = {'nodes': [{'id': node, **graph.nodes[node]} for node in graph.nodes()],
                         'links': [{'source': source, 'target': target} for source, target in graph.edges()]}
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(coauthorship_data, outfile, ensure_ascii=False)

def main():
    json_file_path = 'C:/Users/User1/Downloads/publication_network.json'
    data = load_json_data(json_file_path)

    G = nx.Graph()

    add_nodes_from_json(G, data)
    create_clusters(G)
    add_edges_from_dataframe(G, df)  # Assuming df is defined somewhere in your code

    # Save the coauthorship data as a JSON file
    save_json_data(G, 'coauthorship_data.json')

if __name__ == "__main__":
    main()
    
    print(G)


Graph with 1552 nodes and 3049 edges
