In [1]:
import csv
import json

# Define the input and output file paths
input_csv = 'data_scopus.csv'
output_json = 'filtered_publication_network12.json'

# Initialize data structures
nodes = []
links = []
authors_dict = {}

# Read the CSV file and filter the data
with open(input_csv, mode='r', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # Filter out records missing Year, Affiliation, or Author
        if not row['Year'] or not row['Authors'] or not row['Authors with affiliations']:
            continue
        
        # Process authors
        authors = row['Authors'].split(', ')
        for author in authors:
            if author not in authors_dict:
                authors_dict[author] = {
                    'id': author,
                    'metadata': {
                        'Affiliation': row['Authors with affiliations'],
                        'Year': row['Year'],
                        'Title': row['Title'],
                        'Publisher': row['Publisher']
                    }
                }
        
        # Process links (shared publications)
        for i in range(len(authors)):
            for j in range(i + 1, len(authors)):
                links.append({
                    'source': authors[i],
                    'target': authors[j],
                    'title': row['Title'],
                    'year': row['Year']
                })

# Convert authors_dict to a list of nodes
nodes = list(authors_dict.values())

# Create the final data structure
data = {
    'nodes': nodes,
    'links': links
}

# Save the data to a JSON file
with open(output_json, 'w', encoding='utf-8') as jsonfile:
    json.dump(data, jsonfile, indent=4)

print(f"Filtered data has been saved to {output_json}")

Filtered data has been saved to filtered_publication_network12.json
