In [None]:
from torch_geometric.datasets import Planetoid
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

# Load Cora dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]  # Get the first (and only) graph

print("Cora Dataset Info:")
print("==================")
print(f"Number of nodes: {data.num_nodes}")      # 2,708 nodes (papers)
print(f"Number of edges: {data.num_edges}")      # 5,429 edges (citations)
print(f"Number of features: {data.num_features}") # 1,433 (bag-of-words)
print(f"Number of classes: {dataset.num_classes}") # 7 (paper topics)
print(f"Has isolated nodes: {data.has_isolated_nodes()}") # False
print(f"Has self-loops: {data.has_self_loops()}") # False

In [None]:
# Use t-SNE for non-linear dimensionality reduction
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
node_features_2d = tsne.fit_transform(data.x.numpy())

# Plot
plt.figure(figsize=(10, 8))
scatter = plt.scatter(
    node_features_2d[:, 0],
    node_features_2d[:, 1],
    c=data.y.numpy(),
    cmap='viridis',
    alpha=0.7,
    s=10
)
plt.colorbar(scatter, label='Class')
plt.title("Cora Node Features (t-SNE)")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.show()

In [None]:
# Convert PyG graph to NetworkX (for visualization)
edge_index = data.edge_index.numpy()
G = nx.Graph()
G.add_edges_from([(edge_index[0][i], edge_index[1][i]) for i in range(edge_index.shape[1])])

# Plot a subgraph
n_points = 1000
subgraph = G.subgraph(range(n_points))
plt.figure(figsize=(10, 8))
nx.draw(
    subgraph,
    pos=nx.spring_layout(subgraph, seed=42),
    node_size=50,
    node_color=data.y[:n_points].numpy(),
    cmap='viridis',
    with_labels=False,
    alpha=0.8
)
plt.title("Cora Citation Graph (Subset)")
plt.show()