In [1]:
import pandas as pd
import networkx as nx

# Load data
nodes_df  = pd.read_csv("D:\\Academic\\Master\\ST5225\\Review\\nodes.csv")
edges_raw = pd.read_csv("D:\\Academic\\Master\\ST5225\\Review\\edges.csv")

# Build a directed graph
G = nx.DiGraph()

# Add nodes with attributes
# Convert nodes_df into {id: {attr: value, ...}} shape
node_attrs = nodes_df.set_index("id").to_dict(orient="index")
G.add_nodes_from((nid, attrs) for nid, attrs in node_attrs.items())

# Collapse multiple book-specific edges into one per (from,to) with a count
edge_counts = (
    edges_raw
    .groupby(["from", "to"], as_index=False)
    .size()
    .rename(columns={"size": "occurrences"})
)

for _, row in edge_counts.iterrows():
    G.add_edge(int(row["from"]), int(row["to"]), occurrences=int(row["occurrences"]))

# Remove loops (self-edges)
G.remove_edges_from(nx.selfloop_edges(G))

# Optional: quick sanity checks (can be removed)
print(G.number_of_nodes(), "nodes")
print(G.number_of_edges(), "edges")
print(G.nodes[25])         # attributes for Harry James Potter (id 25)
print(G.nodes[26])         # attributes for Hermione Granger (id 26)
print(G[25][26])           # attributes for edge 25 -> 26 if present

64 nodes
201 edges
{'name': 'Harry James Potter', 'schoolyear': 1991, 'gender': 1, 'house': 1, 'first_book': 1, 'last_book': 7}
{'name': 'Hermione Granger', 'schoolyear': 1991, 'gender': 2, 'house': 1, 'first_book': 1, 'last_book': 7}
{'occurrences': 6}
