# file

In [11]:
import pickle
with open("./../../d3-app/data/cluster_similarities.pkl", "rb") as f:
    cluster_similarities = pickle.load(f)
cluster_similarities.keys() # dict_keys(['kl_divergence', 'bhattacharyya_coefficient', 'mahalanobis_distance'])

# # condensed tree
# with open("./../../d3-app/data/condensed_tree_object.pkl", "rb") as f:
#     condensed_tree_object = pickle.load(f)
# raw_tree = condensed_tree_object.raw_tree

# point_cluster_map
import numpy as np
point_cluster_map = np.load("./../../d3-app/data/point_cluster_map.npy")
print(f"len(point_cluster_map): {len(point_cluster_map)}")


# 低次元投影
projected_vector = np.load("./../../d3-app/data/projection.npy")
print(f"projected_vector.shape: {projected_vector.shape}")


# hdbscan_label(noise points are labeled as -1)
hdbscan_labels = np.load("../../d3-app/data/hdbscan_label.npy")

projected_vector_no_noise = projected_vector[hdbscan_labels != -1]
print(f"projected_vector after removing noise points: {projected_vector_no_noise.shape}")

len(point_cluster_map): 115754
projected_vector.shape: (115754, 2)
projected_vector after removing noise points: (6367, 2)


# 3つのクラスタ間の距離の表示

3つのクラスタ間の距離の関係を表示する

In [14]:
cluster_id_a = 115760
cluster_id_b = 115760
cluster_id_c = 115760

import numpy as np
import matplotlib.pyplot as plt

# 設定：評価したいクラスタID
target_clusters = [cluster_id_a, cluster_id_b, cluster_id_c]
metrics = cluster_similarities.keys() # ['kl_divergence', 'bhattacharyya_coefficient', 'mahalanobis_distance']
colors = ['#FF4B00', '#005AFF', '#03AF7A'] # 見やすいユニバーサルカラー

# 1. すべてのメトリクスの距離を表示
print("=== Cluster Similarity Metrics ===")
for i, id1 in enumerate(target_clusters):
    for id2 in target_clusters[i+1:]:
        print(f"\n[ Pair: Cluster {id1} <-> Cluster {id2} ]")
        for m in metrics:
            # 辞書から (id1, id2) または (id2, id1) で値を取得
            val = cluster_similarities[m].get((id1, id2)) or cluster_similarities[m].get((id2, id1))
            if val is not None:
                print(f" - {m:25}: {val:.4f}")
            else:
                print(f" - {m:25}: Data not found")



import plotly.graph_objects as go
import numpy as np

# 設定：評価対象のクラスタ
target_clusters = [cluster_id_a, cluster_id_b, cluster_id_c]
colors = ['#FF4B00', '#005AFF', '#03AF7A']

fig = go.Figure()

# 1. 背景：全データを描画（WebGLを使用して高速化）
fig.add_trace(go.Scattergl(
    x=projected_vector_no_noise[:, 0],
    y=projected_vector_no_noise[:, 1],
    mode='markers',
    marker=dict(color='lightgrey', size=2, opacity=0.3),
    name='Other Points',
    hoverinfo='none'
))

# 2. ターゲットクラスタを描画
for i, cid in enumerate(target_clusters):
    mask = point_cluster_map == cid
    points = projected_vector[mask]
    center = points.mean(axis=0)
    
    # クラスタの点
    fig.add_trace(go.Scattergl(
        x=points[:, 0],
        y=points[:, 1],
        mode='markers',
        marker=dict(color=colors[i], size=6),
        name=f'Cluster {cid}'
    ))
    
    # 重心のアノテーション（テキストラベル）
    fig.add_annotation(
        x=center[0], y=center[1],
        text=f"ID: {cid}",
        showarrow=True,
        arrowhead=2,
        ax=0, ay=-30,
        bgcolor="white",
        bordercolor=colors[i],
        borderwidth=2,
        opacity=0.9
    )

# レイアウト設定
fig.update_layout(
    title="Visual Verification of Cluster Separation",
    xaxis_title="Component 1",
    yaxis_title="Component 2",
    template="plotly_white",
    legend=dict(itemsizing='constant'),
    width=900,
    height=700
)

fig.show()

# 三角形の頂点座標を計算（正三角形）
nodes_x = [0, 1, 0.5]
nodes_y = [0, 0, np.sqrt(3)/2]
node_labels = [f"Cluster {cid}" for cid in target_clusters]

fig_tri = go.Figure()

# 辺（エッジ）とメトリクスラベルの追加
pairs = [(0, 1), (1, 2), (2, 0)]
for p1, p2 in pairs:
    id1, id2 = target_clusters[p1], target_clusters[p2]
    
    # 距離データの取得
    kl = cluster_similarities['kl_divergence'].get((id1, id2)) or cluster_similarities['kl_divergence'].get((id2, id1), 0)
    bc = cluster_similarities['bhattacharyya_coefficient'].get((id1, id2)) or cluster_similarities['bhattacharyya_coefficient'].get((id2, id1), 0)
    mh = cluster_similarities['mahalanobis_distance'].get((id1, id2)) or cluster_similarities['mahalanobis_distance'].get((id2, id1), 0)
    
    # マハラノビス距離に基づいて線の太さを変える（遠いほど太く、または逆など調整可能）
    line_width = max(1, mh * 0.5) if mh else 1

    # 辺の描画
    fig_tri.add_trace(go.Scatter(
        x=[nodes_x[p1], nodes_x[p2]],
        y=[nodes_y[p1], nodes_y[p2]],
        mode='lines',
        line=dict(width=line_width, color='rgba(100, 100, 100, 0.5)'),
        hoverinfo='none',
        showlegend=False
    ))
    
    # 辺の中央にテキストボックスを配置
    mid_x, mid_y = (nodes_x[p1] + nodes_x[p2]) / 2, (nodes_y[p1] + nodes_y[p2]) / 2
    fig_tri.add_annotation(
        x=mid_x, y=mid_y,
        text=f"<b>KL:</b> {kl:.2f}<br><b>BC:</b> {bc:.2f}<br><b>MH:</b> {mh:.2f}",
        showarrow=False,
        bgcolor="rgba(255, 255, 255, 0.9)",
        bordercolor="lightgray",
        borderwidth=1,
        font=dict(size=11)
    )

# 頂点（ノード）の描画
fig_tri.add_trace(go.Scatter(
    x=nodes_x, 
    y=nodes_y,
    mode='markers+text',
    marker=dict(
        size=50, 
        color=colors, 
        line=dict(width=3, color='white')
    ),
    text=node_labels,
    textposition="top center",
    # ↓ 'font' ではなく 'textfont' に修正
    textfont=dict(
        size=14, 
        family="Arial Black"
    ),
    showlegend=False
))

fig_tri.update_layout(
    title="Structural Distance Map (Inter-cluster Metrics)",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.2, 1.2]),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.2, 1.1]),
    plot_bgcolor='white',
    width=600,
    height=550
)

fig_tri.show()

=== Cluster Similarity Metrics ===

[ Pair: Cluster 115760 <-> Cluster 115760 ]
 - kl_divergence            : Data not found
 - bhattacharyya_coefficient: Data not found
 - mahalanobis_distance     : Data not found

[ Pair: Cluster 115760 <-> Cluster 115760 ]
 - kl_divergence            : Data not found
 - bhattacharyya_coefficient: Data not found
 - mahalanobis_distance     : Data not found

[ Pair: Cluster 115760 <-> Cluster 115760 ]
 - kl_divergence            : Data not found
 - bhattacharyya_coefficient: Data not found
 - mahalanobis_distance     : Data not found
