# Publication Analysis

This notebook generates figures and summary tables for the mitochondrial network dataset.

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
# Load processed network data
nodes = pd.read_csv('../network_nodes_clustered.csv')
edges = pd.read_csv('../network_edges.csv')
clusters = pd.read_csv('../network_clusters_summary.csv')


In [None]:
# Build graph
G = nx.from_pandas_edgelist(edges, 'source', 'target', edge_attr='weight')
node_attrs = nodes.set_index('node_id').to_dict('index')
nx.set_node_attributes(G, node_attrs)

# Calculate degree centrality
deg_cent = nx.degree_centrality(G)
nx.set_node_attributes(G, deg_cent, 'degree_centrality')
nodes['degree_centrality'] = nodes['node_id'].map(deg_cent)


In [None]:
# Figure 1: network coloured by cluster
pos = nx.spring_layout(G, seed=42)
cluster_ids = nodes['cluster_id'].unique()
colors = plt.cm.tab20(range(len(cluster_ids)))
color_map = {cid: colors[i] for i, cid in enumerate(cluster_ids)}
plt.figure(figsize=(12,8))
for cid in cluster_ids:
    nodelist = nodes[nodes['cluster_id']==cid]['node_id']
    nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=40, node_color=[color_map[cid]], label=f'Cluster {cid}')
nx.draw_networkx_edges(G, pos, alpha=0.2)
plt.axis('off')
plt.legend(title='Clusters', bbox_to_anchor=(1.05,1), loc='upper left')
plt.tight_layout()
plt.savefig('fig1_network_by_cluster.png', dpi=300)
plt.show()


In [None]:
# Figure 2: degree centrality distribution
plt.figure(figsize=(8,6))
plt.hist(nodes['degree_centrality'], bins=30, color='skyblue', edgecolor='black')
plt.xlabel('Degree Centrality')
plt.ylabel('Frequency')
plt.title('Degree Centrality Distribution')
plt.tight_layout()
plt.savefig('fig2_degree_centrality_hist.png', dpi=300)
plt.show()


In [None]:
# Figure 3: network annotated with representative species
rep_species = clusters.set_index('cluster_id')['representative_organism'].to_dict()
plt.figure(figsize=(12,8))
nx.draw_networkx_edges(G, pos, alpha=0.2)
for cid in cluster_ids:
    nodelist = nodes[nodes['cluster_id']==cid]['node_id']
    nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=40, node_color=[color_map[cid]])
for cid in cluster_ids:
    rep_node = nodes[nodes['cluster_id']==cid].iloc[0]['node_id']
    x, y = pos[rep_node]
    plt.text(x, y, rep_species.get(cid, ''), fontsize=6, ha='center')
plt.axis('off')
plt.tight_layout()
plt.savefig('fig3_representative_species.png', dpi=300)
plt.show()


In [None]:
# Summary table with average degree centrality per cluster
summary = nodes.groupby('cluster_id').agg(
    representative_organism=('organism','first'),
    num_nodes=('node_id','size'),
    avg_degree=('degree_centrality','mean')
).reset_index()
summary.to_csv('cluster_summary_table.csv', index=False)
summary
