# Continuity, Trustworthiness

In [None]:
import numpy as np
# ベクトル
vectors = np.load("../../d3-app/data/vector.npy")
# point_to_cluster_map
point_to_cluster_map = np.load("../../d3-app/data/point_cluster_map.npy")

# unique clusters
unique_clusters = np.unique(point_to_cluster_map)

# 低次元投影
projected_vectors = np.load("../../d3-app/data/projection.npy")

# leave IDs
leave_ids = np.load("../../d3-app/data/leaves.npy")

# hdbscan_label(noise points are labeled as -1)
hdbscan_labels = np.load("../../d3-app/data/hdbscan_label.npy")

In [None]:
# ノイズの数
num_noise_points = np.sum(point_to_cluster_map == -1)
print(f"Number of noise points: {num_noise_points}")

Number of noise points: 0


In [5]:
from sklearn.manifold import trustworthiness
import pandas as pd

def calculate_overall_metrics(X_high, X_low, k=10):
    """
    データ全体の Trustworthiness と Continuity の平均値を算出する
    """
    print(f"Calculating metrics with k={k}...")
    
    # 1. Trustworthiness (信頼性)
    # 低次元で近いものが、高次元でも近かったか
    t_score = trustworthiness(X_high, X_low, n_neighbors=k)
    
    # 2. Continuity (連続性)
    # 高次元で近いものが、低次元でも維持されているか
    # (高次元と低次元を入れ替えて計算することで算出可能)
    c_score = trustworthiness(X_low, X_high, n_neighbors=k)
    
    return {
        "k": k,
        "overall_trustworthiness": t_score,
        "overall_continuity": c_score
    }

# --- 実行 ---
# result = calculate_overall_metrics(vectors, vectors_low, k=20)
# print(result)

## ノイズあり

In [None]:
# 多いのでサンプリング
n_samples = 10000
total_points = vectors.shape[0]
np.random.seed(42)
indices = np.random.choice(total_points, n_samples, replace=False)

for k in [1, 5, 10, 15, 20, 25, 100]:
    result = calculate_overall_metrics(vectors[indices], projected_vectors[indices], k=k)
    print(result)

Calculating metrics with k=5...
{'k': 5, 'overall_trustworthiness': 0.8086730384307446, 'overall_continuity': 0.9117377141713371}
Calculating metrics with k=10...
{'k': 10, 'overall_trustworthiness': 0.8059597626320798, 'overall_continuity': 0.8978410786719415}
Calculating metrics with k=15...
{'k': 15, 'overall_trustworthiness': 0.8043461655140156, 'overall_continuity': 0.8874560883365072}
Calculating metrics with k=20...
{'k': 20, 'overall_trustworthiness': 0.8020674371834093, 'overall_continuity': 0.8793134379858568}
Calculating metrics with k=25...
{'k': 25, 'overall_trustworthiness': 0.7997168098775347, 'overall_continuity': 0.8729132543665931}
Calculating metrics with k=100...
{'k': 100, 'overall_trustworthiness': 1.2178688079597948, 'overall_continuity': 0.8238865424640844}


## ノイズなし

In [None]:
indices = np.where(hdbscan_labels != -1)[0]
print(f"Number of non-noise points: {len(indices)}")


for k in [1, 5, 10, 15, 20, 25, 100]:
    result = calculate_overall_metrics(vectors[indices], projected_vectors[indices], k=k)
    print(result)



Number of non-noise points: 6367
Calculating metrics with k=5...
{'k': 5, 'overall_trustworthiness': 0.9253392698774862, 'overall_continuity': 0.9890775563662424}
Calculating metrics with k=10...
{'k': 10, 'overall_trustworthiness': 0.9125574417339585, 'overall_continuity': 0.9792515803752277}
Calculating metrics with k=15...
{'k': 15, 'overall_trustworthiness': 0.9012134572620634, 'overall_continuity': 0.9694702640302004}
Calculating metrics with k=20...
{'k': 20, 'overall_trustworthiness': 0.8912314308156363, 'overall_continuity': 0.9602657096059114}
Calculating metrics with k=25...
{'k': 25, 'overall_trustworthiness': 0.8822072096496731, 'overall_continuity': 0.9518271101959779}
Calculating metrics with k=100...
{'k': 100, 'overall_trustworthiness': 0.8270396458676429, 'overall_continuity': 0.8747641595483913}


In [2]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# データの準備
k_list = [5, 10, 15, 20, 25, 100]
trust_scores = [0.9253, 0.9125, 0.9012, 0.8912, 0.8822, 0.8270]
cont_scores = [0.9890, 0.9792, 0.9694, 0.9602, 0.9518, 0.8747]

# サブプロットの作成 (1行2列)
fig = make_subplots(
    rows=1, cols=2, 
    subplot_titles=("Overall Trustworthiness", "Overall Continuity"),
    horizontal_spacing=0.1
)

# Trustworthiness (左)
fig.add_trace(
    go.Scatter(x=k_list, y=trust_scores, mode='lines+markers',
               line=dict(color='#FF4B00', width=3),
               marker=dict(size=8),
               name='Point-level Trustworthiness'),
    row=1, col=1
)

# Continuity (右)
fig.add_trace(
    go.Scatter(x=k_list, y=cont_scores, mode='lines+markers',
               line=dict(color='#005AFF', width=3),
               marker=dict(size=8),
               name='Point-level Continuity'),
    row=1, col=2
)

# レイアウト設定
fig.update_layout(
    title_text="Point-level Structural Preservation (Overall Metrics)",
    height=500, width=1000,
    template="plotly_white",
    showlegend=False
)

fig.update_yaxes(range=[0.7, 1.02], tickformat=".2f", gridcolor='lightgrey')
fig.update_xaxes(title_text="Neighborhood size (k)", gridcolor='lightgrey')

fig.show()
# save
fig.write_image("point_level_structural_preservation_overall_metrics.png", scale=2)