In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
import os
import json
import networkx as nx

In [10]:
# File Paths
absolute_path = "C:\\Users\\tykun\\\OneDrive\\Documents\\SchoolDocs\VSCodeProjects\\connectedData\\board_analysis\\"
altered_dataframes = "altered_dataframes\\"
gpt_dataframes = "gpt_dataframes\\"
graphs = "graphs\\"
scripts =  "scripts\\"
board_dataframes = "board_dataframes\\"
temporary_data = "temporary_data\\"
college_matching = "college_matching\\"

altered_dataframe_path = f"{absolute_path}{altered_dataframes}"
gpt_dataframe_path = f"{absolute_path}{gpt_dataframes}" 
graph_path = f"{absolute_path}{graphs}"
script_path = f"{absolute_path}{scripts}"
boards_path = f"{absolute_path}{board_dataframes}"
state_path = f"{absolute_path}{temporary_data}state_systems_validated.csv"   
college_matching_path = f"{absolute_path}{college_matching}"

# Valid Years
years = ["1999", "2000", "2005", "2007", "2008", "2009", "2011", "2013", "2018"]

In [11]:
def json_to_dataframes(json_file_path):
    # Load the JSON data
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    
    # Extract nodes
    nodes = data.get("nodes", [])
    
    # Flatten nodes and attributes
    nodes_list = []
    for node in nodes:
        flattened_node = {
            "label": node.get("label"),
            "x": node.get("x"),
            "y": node.get("y"),
            "id": node.get("id"),
            "color": node.get("color"),
            "size": node.get("size"),
        }
        # Add all attributes from the "attributes" dictionary
        flattened_node.update(node.get("attributes", {}))
        nodes_list.append(flattened_node)
    
    # Convert to DataFrame
    nodes_df = pd.DataFrame(nodes_list)
    
    return nodes_df


In [12]:
board_statistics = pd.read_csv(f"{altered_dataframe_path}sample_board_statistics.csv")
network_statistics_path = f"{boards_path}interlock_stats.json"
network_statistics = json_to_dataframes(network_statistics_path)


In [13]:
def calculate_assortativity(nodes_df, edges_df, attribute):
    # Create a graph
    G = nx.Graph()
    
    # Add nodes with attributes
    for _, row in nodes_df.iterrows():
        G.add_node(row['Id'], **row.to_dict())
    
    # Add edges
    for _, row in edges_df.iterrows():
        G.add_edge(row['Source'], row['Target'])
    
    # Calculate assortativity for the given attribute
    assortativity = nx.attribute_assortativity_coefficient(G, attribute)
    return assortativity


nodes_df = pd.read_csv(f"{absolute_path}{board_dataframes}aggregated_nodes.csv")
edges_df = pd.read_csv(f"{absolute_path}{board_dataframes}aggregated_edges.csv")
assortativity = calculate_assortativity(nodes_df, edges_df, 'female_president')

print(f"Assortativity based on 'female president': {assortativity}")

Assortativity based on 'female president': 0.009319930929590993


In [14]:
# Paths
board_statistics_path = f"{altered_dataframe_path}sample_board_statistics.csv"
network_statistics_path = f"{boards_path}interlock_stats.json"

# Load board_statistics DataFrame
board_statistics = pd.read_csv(board_statistics_path)

# Remove Betweenness, Degree, and Eigenvector columns if they exist to prevent duplicate columns
for col in ['Betweenness', 'Degree', 'Eigenvector']:
    if col in board_statistics.columns:
        board_statistics.drop(columns=[col], inplace=True)

# Load and parse the JSON file
with open(network_statistics_path, 'r') as f:
    network_data = json.load(f)

# Extract node data (centrality measures and affiliationid)
nodes = network_data.get("nodes", [])
node_data = []

for node in nodes:
    attributes = node.get("attributes", {})
    affiliation_id = attributes.get("affiliationid")
    betweenness = attributes.get("betweenesscentrality", 0)  # Default to 0 if missing
    degree = attributes.get("Degree", 0)  # Default to 0 if missing
    eigenvector = attributes.get("eigencentrality", 0)  # Default to 0 if missing
    node_data.append({
        "AffiliationId": affiliation_id,
        "Betweenness": betweenness,
        "Degree": degree,
        "Eigenvector": eigenvector
    })

# Convert extracted node data to a DataFrame
node_df = pd.DataFrame(node_data)

# Merge the centrality measures into board_statistics
board_statistics = board_statistics.merge(
    node_df,
    how="left",
    on="AffiliationId"
)

# Fill any missing centrality values with 0
for col in ['Betweenness', 'Degree', 'Eigenvector']:
    board_statistics[col] = board_statistics[col].fillna(0)

# Remove duplicate rows
board_statistics = board_statistics.drop_duplicates()

# Save the updated board_statistics to CSV
board_statistics.to_csv(board_statistics_path, index=False)

print("Centrality measures added to board_statistics and saved successfully.")

Centrality measures added to board_statistics and saved successfully.
