In [1]:
!pip install networkx matplotlib pandas
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

# Load the Graph
graph_filename = "Artificial_Intelligence_Graph.graphml"
G = nx.read_graphml(graph_filename)

# Display Basic Info
print(f"Graph Loaded: {graph_filename}")
print(f"Total Nodes: {G.number_of_nodes()}")
print(f"Total Edges: {G.number_of_edges()}")
print(f"Graph Density: {nx.density(G):.4f}")

# Convert to DataFrame for easy analysis
nodes_df = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
edges_df = pd.DataFrame(list(G.edges()), columns=['Source', 'Target'])

# Display first few rows
print("\nSample Nodes:")
print(nodes_df.head())
print("\nSample Edges:")
print(edges_df.head())

# Compute Degree Centrality
degree_centrality = nx.degree_centrality(G)
sorted_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)

# Display top 10 most connected nodes
print("\nTop 10 Most Connected Nodes:")
for name, score in sorted_degree[:10]:
    print(f"{name}: {score:.4f}")

# Compute Betweenness Centrality (Who acts as a bridge)
betweenness_centrality = nx.betweenness_centrality(G)
sorted_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)

print("\nTop 10 Nodes by Betweenness Centrality:")
for name, score in sorted_betweenness[:10]:
    print(f"{name}: {score:.4f}")

# Compute Closeness Centrality (Who is closest to everyone)
closeness_centrality = nx.closeness_centrality(G)
sorted_closeness = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)

print("\nTop 10 Nodes by Closeness Centrality:")
for name, score in sorted_closeness[:10]:
    print(f"{name}: {score:.4f}")

# Visualize the Graph
plt.figure(figsize=(12, 8))

# Assign colors based on node type
color_map = ['red' if G.nodes[n].get('type') == 'Topic' else 'blue' for n in G.nodes]

# Draw the Graph
pos = nx.spring_layout(G, seed=42)  # Layout algorithm
nx.draw(G, pos, with_labels=True, node_size=50, node_color=color_map, font_size=8, edge_color="gray")

plt.title("Network Graph of AI-Related People on Wikipedia")
plt.show()


# Assign colors based on node type
color_map = ['red' if G.nodes[n].get('type') == 'Topic' else 'blue' for n in G.nodes]

# Draw the Graph with a force-directed layout
plt.figure(figsize=(15, 10))
pos = nx.spring_layout(G, seed=42)  # Spring layout for better separation
nx.draw(G, pos, with_labels=True, node_size=100, node_color=color_map, font_size=8, edge_color="gray", alpha=0.7)

plt.title("AI Wikipedia Network Graph")
plt.show()

Graph Loaded: Artificial_Intelligence_Graph.graphml
Total Nodes: 16633
Total Edges: 41933
Graph Density: 0.0002

Sample Nodes:
                           type   gender
Artificial Intelligence   Topic      NaN
Valentin Lapa            Person     Male
Anthropomorphise         Person  Unknown
Alan Turing              Person     Male
Peter Cathcart Wason     Person     Male

Sample Edges:
                    Source                Target
0  Artificial Intelligence         Valentin Lapa
1  Artificial Intelligence      Anthropomorphise
2  Artificial Intelligence           Alan Turing
3  Artificial Intelligence  Peter Cathcart Wason
4  Artificial Intelligence           Gary Marcus

Top 10 Most Connected Nodes:
Thomas Aquinas: 0.0699
David Hume: 0.0673
Noam Chomsky: 0.0578
Herbert A. Simon: 0.0571
Bertrand Russell: 0.0542
Bill Gates: 0.0484
Claude Shannon: 0.0463
John McCarthy (computer scientist): 0.0455
Norbert Wiener: 0.0429
Immanuel Kant: 0.0421

Top 10 Nodes by Betweenness Centrality:
Bibc

ModuleNotFoundError: No module named 'scipy'

<Figure size 1200x800 with 0 Axes>