In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import networkx as nx
from networkx.readwrite import json_graph
import json

def extract_country_from_affiliations(affiliations):
    """Extract country from the first affiliation in the given string."""
    first_affiliation = affiliations.split(';')[0].strip()
    country = first_affiliation.split(',')[-1].strip()
    return country

def load_and_preprocess_data(file_path):
    """Load and preprocess data from the CSV file."""
    df = pd.read_csv(file_path)
    return df.fillna(0).dropna(subset=['Author(s) ID', 'Year', 'Authors', 'Authors with affiliations'])

def create_node_attributes(row):
    """Create node attributes from a DataFrame row."""
    authors = row['Authors'].split(',')
    author_ids = row['Author(s) ID'].split(';')

    attributes = {
        'Authors': ';'.join(authors),
        'Title': row['Title'],
        'Year': row['Year'],
        'Citations': row['Cited by'],
        'Publisher': row['Publisher'],
        'Author with affiliations': row['Authors with affiliations']
    }

    return [(author_id, attributes) for author_id in author_ids if author_id]

def build_graph(data_frame):
    """Build a network graph from the DataFrame."""
    G = nx.Graph()

    for _, row in data_frame.iterrows():
        G.add_nodes_from(create_node_attributes(row))

    return G

# Specify the file path
csv_file_path = "/content/drive/MyDrive/Colab Notebooks/Major Assignment3/data_scopus.csv"

# Load and preprocess the data
df = load_and_preprocess_data(csv_file_path)

# Build the network graph
research_network = build_graph(df)

# Display the graph
print(research_network)

json_output_path = "/content/drive/MyDrive/Colab Notebooks/Major Assignment3/network.json"
with open(json_output_path, "w") as json_file:
    json.dump(json_graph.node_link_data(research_network), json_file)


Graph with 1553 nodes and 0 edges


In [None]:
import networkx as nx
import json
import pandas as pd

def extract_country_from_affiliations(affiliations):
    """Extract country information from affiliations."""
    first_affiliation = affiliations.split(';')[0].strip()
    country = first_affiliation.split(',')[-1].strip()
    return country

# Define the JSON file path
json_path = '/content/drive/MyDrive/Colab Notebooks/Major Assignment3/network.json'

# Load data from the JSON file
with open(json_path, 'r') as json_file:
    json_data = json.load(json_file)

# Create a new graph
research_graph = nx.Graph()

def add_nodes_from_data(data):
    """Add nodes to the graph based on the provided data."""
    for node_data in data['nodes']:
        node_id = node_data.get('id')
        authors = node_data.get('Authors')
        title = node_data.get('Title')
        year = node_data.get('Year')
        citations = node_data.get('Citations')
        publisher = node_data.get('Publisher')

        affiliations = str(node_data.get('Author with affiliations'))
        country = extract_country_from_affiliations(affiliations)

        research_graph.add_node(
            node_id,
            authors=authors,
            title=title,
            year=year,
            citations=citations,
            publisher=publisher,
            affiliations=affiliations,
            country=country
        )

# Add nodes (publications) to the graph with attributes
add_nodes_from_data(json_data)

# Identify clusters based on authors' countries
clusters = list(nx.connected_components(research_graph))

# Assign classes to each cluster
class_mapping = {node: idx for idx, cluster in enumerate(clusters) for node in cluster}
nx.set_node_attributes(research_graph, class_mapping, 'class')

# Assuming df is the DataFrame from the previous code snippet
# Create a dictionary to store co-authorship relationships
for _, row in df.iterrows():
    authors = row['Author(s) ID'].split(';')
    for i in range(len(authors)):
        for j in range(i + 1, len(authors)):
            if authors[i] != "" and authors[j] != "":
                research_graph.add_edge(authors[i], authors[j])

# Display the graph
print(research_graph)

# Save the data as a JSON file
coauthorship_data = {
    'nodes': [{'id': node, **research_graph.nodes[node]} for node in research_graph.nodes()],
    'links': [{'source': source, 'target': target} for source, target in research_graph.edges()]
}

output_json_path = "/content/drive/MyDrive/Colab Notebooks/Major Assignment3/author.json"
with open(output_json_path, "w") as output_file:
    json.dump(coauthorship_data, output_file)

Graph with 1553 nodes and 3049 edges
