In [None]:
import pandas as pd
import networkx as nx
from pathlib import Path

# Local paths (no URL)
nodes_path = Path("graph_nodes.csv") if Path("graph_nodes.csv").exists() else Path("data/graph_nodes.csv")
edges_path = Path("graph_edges.csv") if Path("graph_edges.csv").exists() else Path("data/graph_edges.csv")

# Read CSVs
nodes = pd.read_csv(nodes_path)
edges = pd.read_csv(edges_path)

# Build undirected graph from edge list (columns: ID1, ID2)
G = nx.from_pandas_edgelist(edges, source="ID1", target="ID2", create_using=nx.Graph())

# Attach node attributes: all columns except 'ID'
attr = nodes.set_index("ID").to_dict("index")
G.add_nodes_from(attr.keys())
nx.set_node_attributes(G, attr)

print(G)

# Print the first 3 nodes with attributes
for i, (node, data) in enumerate(G.nodes(data=True)):
    if i >= 3:
        break
    print(f"Node {node}: {data}")

# Graph with 130 nodes and 806 edges
# Node 0: {'Age': 20, 'Gender': 'female', 'Hobbies': 'hiking,gaming'}
# Node 1: {'Age': 68, 'Gender': 'male', 'Hobbies': 'art,hiking,reading,travel'}
# Node 3: {'Age': 27, 'Gender': 'male', 'Hobbies': 'dancing,hiking'}


In [None]:
def fit_dcsbm(G, q=None, deg_corr=True):
    import graph_tool.all as gt
    from graph_tool.inference import minimize_blockmodel_dl, BlockState
    # convert networkx -> graph-tool preserving node order
    nodes = list(G.nodes())
    g = gt.Graph(directed=False)
    g.add_vertex(len(nodes))
    idx = {n: i for i, n in enumerate(nodes)}
    for u, v in G.edges():
        g.add_edge(g.vertex(idx[u]), g.vertex(idx[v]))
    # fit model (q fixes number of blocks when provided)
    state_args = dict(deg_corr=deg_corr)
    if q is None:
        state = minimize_blockmodel_dl(g, state=BlockState, state_args=state_args)
    else:
        state = minimize_blockmodel_dl(g, state=BlockState, state_args=state_args, multilevel_mcmc_args=dict(B_min=q, B_max=q))
    labels = list(state.get_blocks())
    # re-label blocks to 0,...,q-1
    unique_labels = sorted(set(labels))
    label_map = {old: new for new, old in enumerate(unique_labels)}
    labels = [label_map[l] for l in labels]
    # map labels back to original networkx node order
    return [labels[idx[n]] for n in nodes]

# Import the Karate Club graph
karate = nx.karate_club_graph()

# Fit a DCSBM to the graph
labels = fit_dcsbm(karate, q=2, deg_corr=True)

pos = nx.spring_layout(karate, seed=43)
nx.draw(karate, pos, node_color=labels, cmap='tab20', with_labels=True)