In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

# Load the Graph
graph_filename = "Artificial_Intelligence_Graph.graphml"
G = nx.read_graphml(graph_filename)

# Display Basic Info
print(f"Graph Loaded: {graph_filename}")
print(f"Total Nodes: {G.number_of_nodes()}")
print(f"Total Edges: {G.number_of_edges()}")
print(f"Graph Density: {nx.density(G):.4f}")

# Convert to DataFrame for easy analysis
nodes_df = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
edges_df = pd.DataFrame(list(G.edges()), columns=['Source', 'Target'])

# Display first few rows
print("\nSample Nodes:")
print(nodes_df.head())
print("\nSample Edges:")
print(edges_df.head())

# Compute Degree Centrality
degree_centrality = nx.degree_centrality(G)
sorted_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)

# Display top 10 most connected nodes
top_10_degree = sorted_degree[:10]
print("\nTop 10 Most Connected Nodes:")
for name, score in top_10_degree:
    print(f"{name}: {score:.4f}")

# Snowball Sampling Function
def snowball_sampling(G, seed_nodes, num_layers=2):
    sampled_nodes = set(seed_nodes)
    for _ in range(num_layers):
        new_nodes = set()
        for node in sampled_nodes:
            neighbors = set(G.neighbors(node))
            new_nodes.update(neighbors)
        sampled_nodes.update(new_nodes)
    return G.subgraph(sampled_nodes)

# Extract top 10 most connected nodes
seed_nodes = [node for node, _ in top_10_degree]

# Apply snowball sampling
sampled_subgraph = snowball_sampling(G, seed_nodes, num_layers=2)

# Compute Degree Centrality for the sampled subgraph
subgraph_degree_centrality = nx.degree_centrality(sampled_subgraph)
sorted_subgraph_degree = sorted(subgraph_degree_centrality.items(), key=lambda x: x[1], reverse=True)

# Display top 10 most connected nodes in the subgraph
print("\nTop 10 Most Connected Nodes in Sampled Subgraph:")
for name, score in sorted_subgraph_degree[:10]:
    print(f"{name}: {score:.4f}")

# Visualize the sampled subgraph
plt.figure(figsize=(12, 8))

# Assign colors based on node type
color_map = ['red' if sampled_subgraph.nodes[n].get('type') == 'Topic' else 'blue' for n in sampled_subgraph.nodes]

# Draw the Graph with a force-directed layout
pos = nx.spring_layout(sampled_subgraph, seed=42)  # Spring layout for better separation
nx.draw(sampled_subgraph, pos, with_labels=True, node_size=100, node_color=color_map, font_size=8, edge_color="gray", alpha=0.7)

plt.title("AI Wikipedia Network Graph (Sampled Subgraph)")
plt.show()




Graph Loaded: Artificial_Intelligence_Graph.graphml
Total Nodes: 16633
Total Edges: 41933
Graph Density: 0.0002

Sample Nodes:
                           type   gender
Artificial Intelligence   Topic      NaN
Valentin Lapa            Person     Male
Anthropomorphise         Person  Unknown
Alan Turing              Person     Male
Peter Cathcart Wason     Person     Male

Sample Edges:
                    Source                Target
0  Artificial Intelligence         Valentin Lapa
1  Artificial Intelligence      Anthropomorphise
2  Artificial Intelligence           Alan Turing
3  Artificial Intelligence  Peter Cathcart Wason
4  Artificial Intelligence           Gary Marcus

Top 10 Most Connected Nodes:
Thomas Aquinas: 0.0699
David Hume: 0.0673
Noam Chomsky: 0.0578
Herbert A. Simon: 0.0571
Bertrand Russell: 0.0542
Bill Gates: 0.0484
Claude Shannon: 0.0463
John McCarthy (computer scientist): 0.0455
Norbert Wiener: 0.0429
Immanuel Kant: 0.0421

Top 10 Most Connected Nodes in Sampled Subgr