# Epstein Network — Interactive Exploration

Interactive analysis of the unify-graph network using NetworkX.
Run `./build.sh` first to generate data files in `site/data/`.

In [None]:
import json
from pathlib import Path
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

# Load data
DATA = Path('..') / 'site' / 'data'
graph = json.loads((DATA / 'graph.json').read_text())
entities = json.loads((DATA / 'entities.json').read_text())
nxdata = json.loads((DATA / 'networkx.json').read_text())

# Build graph
G = nx.Graph()
for node in graph['nodes']:
    G.add_node(node['id'], **{k: v for k, v in node.items() if k != 'id'})
seen = set()
for link in graph['links']:
    key = tuple(sorted([link['source'], link['target']]))
    if key not in seen:
        G.add_edge(link['source'], link['target'])
        seen.add(key)

print(f'Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}')
print(f'Density: {nx.density(G):.4f}')
print(f'Average clustering: {nx.average_clustering(G):.4f}')

## Centrality Analysis

In [None]:
import pandas as pd

betweenness = nx.betweenness_centrality(G)
pagerank = nx.pagerank(G)
degree = dict(G.degree())

df = pd.DataFrame({
    'name': {k: entities[k]['name'] for k in G.nodes()},
    'cluster': {k: entities[k].get('cluster', '?') for k in G.nodes()},
    'degree': degree,
    'betweenness': betweenness,
    'pagerank': pagerank,
    'has_evidence': {k: bool(entities[k].get('evidence', {})) for k in G.nodes()},
}).sort_values('betweenness', ascending=False)

df.head(20)

## Degree Distribution

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Degree histogram
degrees = [d for _, d in G.degree()]
ax1.hist(degrees, bins=30, color='#4e79a7', edgecolor='#1a1a2a')
ax1.set_xlabel('Degree')
ax1.set_ylabel('Count')
ax1.set_title('Degree Distribution')
ax1.set_facecolor('#0a0a0f')

# Log-log degree distribution
unique, counts = np.unique(degrees, return_counts=True)
ax2.scatter(unique, counts, color='#f28e2b', s=30)
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.set_xlabel('Degree (log)')
ax2.set_ylabel('Count (log)')
ax2.set_title('Log-Log Degree (Power Law Check)')
ax2.set_facecolor('#0a0a0f')

fig.patch.set_facecolor('#0a0a0f')
for ax in [ax1, ax2]:
    ax.tick_params(colors='#888')
    ax.xaxis.label.set_color('#888')
    ax.yaxis.label.set_color('#888')
    ax.title.set_color('#c8c8d0')
plt.tight_layout()
plt.show()

## Community Detection

In [None]:
communities = nx.community.louvain_communities(G, seed=42)
print(f'Communities detected: {len(communities)}')
for i, comm in enumerate(sorted(communities, key=len, reverse=True)):
    names = [entities[n]['name'] for n in sorted(comm)[:5]]
    print(f'  C{i} ({len(comm)} members): {", ".join(names)}...')

## Path Finding

Find shortest paths between any two entities.

In [None]:
def find_path(source, target):
    """Find and display shortest path between two entity IDs."""
    try:
        path = nx.shortest_path(G, source, target)
        print(f'Path ({len(path)-1} hops):')
        for i, node in enumerate(path):
            arrow = '  -> ' if i > 0 else '     '
            ent = entities[node]
            print(f'{arrow}{ent["name"]} ({ent.get("cluster", "?")})' +
                  (' [has evidence]' if ent.get('evidence') else ''))
    except nx.NetworkXNoPath:
        print(f'No path between {source} and {target}')

# Example: path from Bill Gates to Virginia Giuffre
find_path('bill_gates', 'virginia_giuffre')
print()
find_path('peter_thiel', 'prince_andrew')

## Evidence Gap Analysis

Entities with many connections but zero evidence.

In [None]:
gaps = df[~df['has_evidence']].sort_values('degree', ascending=False)
print(f'{len(gaps)} entities with no evidence ({len(gaps)/len(df)*100:.0f}%):')
print()

fig, ax = plt.subplots(figsize=(12, 6))
top_gaps = gaps.head(20)
bars = ax.barh(range(len(top_gaps)), top_gaps['degree'], color='#e15759')
ax.set_yticks(range(len(top_gaps)))
ax.set_yticklabels(top_gaps['name'], fontsize=9)
ax.set_xlabel('Degree (connections)')
ax.set_title('Top 20 Evidence Gaps (high connectivity, zero evidence)')
ax.invert_yaxis()
ax.set_facecolor('#0a0a0f')
fig.patch.set_facecolor('#0a0a0f')
ax.tick_params(colors='#888')
ax.xaxis.label.set_color('#888')
ax.title.set_color('#c8c8d0')
plt.tight_layout()
plt.show()

## Neighborhood Explorer

Inspect any entity's 1-hop and 2-hop neighborhood.

In [None]:
def explore(entity_id, hops=2):
    """Show entity neighborhood up to N hops."""
    ent = entities.get(entity_id)
    if not ent:
        print(f'Entity {entity_id} not found')
        return
    
    print(f'=== {ent["name"]} ===')
    print(f'Cluster: {ent.get("cluster", "?")}  |  Types: {", ".join(k for k in ent.get("@type", {}).keys())}')
    print(f'Role: {ent.get("role", "n/a")}')
    print(f'Evidence: {len(ent.get("evidence", {}))} documents')
    print(f'Degree: {G.degree(entity_id)}  |  Betweenness: {betweenness.get(entity_id, 0):.4f}')
    print()
    
    for hop in range(1, hops + 1):
        if hop == 1:
            neighbors = set(G.neighbors(entity_id))
        else:
            next_hop = set()
            for n in neighbors:
                next_hop.update(G.neighbors(n))
            neighbors = next_hop - {entity_id} - set(G.neighbors(entity_id))
        
        print(f'Hop {hop} ({len(neighbors)} entities):')
        for n in sorted(neighbors, key=lambda x: betweenness.get(x, 0), reverse=True)[:10]:
            e = entities[n]
            ev = '✓' if e.get('evidence') else '✗'
            print(f'  {ev} {e["name"]} ({e.get("cluster", "?")}, bc={betweenness.get(n, 0):.4f})')
        if len(neighbors) > 10:
            print(f'  ... and {len(neighbors) - 10} more')
        print()

explore('maxwell')