In [None]:
import pandas as pd
import ast
import networkx as nx
import itertools  # Import itertools

# Load the dataset
df = pd.read_csv('2020_cleaned_fullcol.csv')  # Replace with the path to your actual CSV file

# Clean and preprocess funding data
def parse_funding(funding_data):
    try:
        return [item.get('funding sponsor') for item in ast.literal_eval(funding_data)] if isinstance(funding_data, str) else []
    except (ValueError, SyntaxError):
        return []

df['funding_sponsors'] = df['funding'].apply(parse_funding)

# Initialize a graph to track connections between funding sponsors
G = nx.Graph()

# Loop through the first 5 rows to find connections between funding sponsors
for index, row in df.head(100).iterrows():  # Only the first 5 rows
    funding_sponsors = set(row['funding_sponsors'])
    
    # Add an edge between every pair of funding sponsors that co-funded the research
    for sponsor1, sponsor2 in itertools.combinations(funding_sponsors, 2):
        if sponsor1 != sponsor2:
            G.add_edge(sponsor1, sponsor2)

            

# Draw the graph (optional)
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 8))
nx.draw_networkx(G, with_labels=True, node_size=100, node_color='lightblue', font_size=0.1, font_weight='bold')
plt.title("Co-Funding Network between Funding Sponsors (First 5 Rows)")
plt.show()

# Save the network graph to a CSV
edges = pd.DataFrame(G.edges(), columns=['Sponsor 1', 'Sponsor 2'])
edges.to_csv('co_funding_sponsors_network_test.csv', index=False)

print("Co-funding network graph for the first 5 rows created successfully.")
