In [2]:
import torch
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import os

# --- 設定 ---
# 建立儲存圖片的資料夾
SAVE_DIR = "tsne_plots"
os.makedirs(SAVE_DIR, exist_ok=True)

def visualize_dataset(name):
    print(f"Processing {name}...")
    
    # 1. 加載數據 (NormalizeFeatures 對 t-SNE 效果比較好)
    dataset = Planetoid(root=f'./data/{name}', name=name, transform=T.NormalizeFeatures())
    data = dataset[0]
    
    X = data.x.numpy()
    y = data.y.numpy()
    
    # 2. 執行 t-SNE
    # init='pca' 可以讓結果更穩定，n_jobs=-1 用盡 server 所有 CPU
    tsne = TSNE(n_components=2, random_state=42, init='pca', learning_rate='auto', n_jobs=10)
    X_embedded = tsne.fit_transform(X)
    
    # 3. 畫圖 (Layout 優化)
    plt.figure(figsize=(10, 10), dpi=300) # 高解析度
    
    # 繪製散點
    # alpha=0.6 增加透明度以觀察重疊，s=10 設定點的大小
    # cmap='tab10' 提供清晰的分類顏色
    scatter = plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, cmap='tab10', s=14, alpha=0.8)
    
    # 4. Layout 清潔化 (Remove clutter)
    # 移除座標軸與邊框 (因為 t-SNE 的座標數值無物理意義)
    plt.axis('off')
    
    # 加上標題與圖例
    plt.title(f"{name} (t-SNE)", fontsize=16, fontweight='bold', pad=20)
    plt.legend(*scatter.legend_elements(), title="Classes", loc="upper right", frameon=True, framealpha=0.9)
    
    # 5. 存檔
    save_path = os.path.join(SAVE_DIR, f"{name}_tsne.png")
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1)
    plt.close() # 關閉圖形以釋放記憶體
    
    print(f"  -> Saved to {save_path}")

# --- 執行 ---
datasets = ['Cora', 'CiteSeer', 'PubMed']

for name in datasets:
    visualize_dataset(name)

print("\nDone. Check the 'tsne_plots' folder.")

Processing Cora...
  -> Saved to tsne_plots/Cora_tsne.png
Processing CiteSeer...
  -> Saved to tsne_plots/CiteSeer_tsne.png
Processing PubMed...
  -> Saved to tsne_plots/PubMed_tsne.png

Done. Check the 'tsne_plots' folder.
