In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cuml.umap as cuml_umap  # GPU 版 UMAP
from cuml.decomposition import PCA  # GPU 版 PCA
from sklearn.metrics.pairwise import cosine_similarity
import os

# 1. 读取嵌入数据（只计算生成数据的分布）
original_embeddings = np.load("outputs/emb_data/bert/llama_embeddings_original_text.npy")  
generated_response_embeddings = np.load("outputs/emb_data/bert/llama_embeddings_generated_response.npy")  
generated_knowledge_embeddings = np.load("outputs/emb_data/bert/llama_embeddings_generated_knowledge.npy")  

# 2. 计算“生成数据”与“原始数据”的相似性（Cosine Similarity）
print("🔄 Computing Cosine Similarity with Original Data (GPU)...")
response_similarity = cosine_similarity(generated_response_embeddings, original_embeddings)  
knowledge_similarity = cosine_similarity(generated_knowledge_embeddings, original_embeddings)

# 3. 获取最高相似度（取与最相似的原始数据的相似度）
max_response_similarity = np.max(response_similarity, axis=1)
max_knowledge_similarity = np.max(knowledge_similarity, axis=1)

# 4. 计算 Sequence Identity to Training（转换为 0-100%）
sequence_identity = np.concatenate((max_response_similarity, max_knowledge_similarity)) * 100  # 转换为百分比

# 5. **合并“生成数据”嵌入**
generated_embeddings = np.vstack((generated_response_embeddings, generated_knowledge_embeddings))
generated_labels = np.array(["generated_response"] * len(generated_response_embeddings) + 
                            ["generated_knowledge"] * len(generated_knowledge_embeddings))

# 6. **使用 GPU 版 PCA 预降维（4096D → 50D）**
pca_path = "outputs/emb_data/reduced_pca_50_generated.npy"
if os.path.exists(pca_path):
    print("✅ Loading PCA-reduced data from file...")
    pca_50_embeddings = np.load(pca_path)
else:
    print("🔄 Applying GPU PCA (4096 → 50) ...")
    pca = PCA(n_components=50, random_state=42)
    pca_50_embeddings = pca.fit_transform(generated_embeddings)
    np.save(pca_path, pca_50_embeddings)
    print(f"✅ PCA embeddings saved to {pca_path}")

# 7. **使用 GPU 加速 UMAP 降维**
umap_path = "outputs/emb_data/reduced_umap_2d_generated.npy"
if os.path.exists(umap_path):
    print("✅ Loading UMAP-reduced embeddings from file...")
    low_dim_embeddings = np.load(umap_path)
else:
    print("🔄 Applying GPU UMAP on PCA-reduced data ...")
    umap_reducer = cuml_umap.UMAP(n_components=2, metric="cosine", n_neighbors=15, min_dist=0.3, random_state=42)
    low_dim_embeddings = umap_reducer.fit_transform(pca_50_embeddings)
    np.save(umap_path, low_dim_embeddings)
    print(f"✅ UMAP embeddings saved to {umap_path}")

# 8. **定义颜色映射（渐变色，基于相似度）**
import matplotlib.colors as mcolors
cmap = mcolors.LinearSegmentedColormap.from_list("custom_cmap", [
    (100/255, 125/255, 125/255),  # 低相似度（深色）
    (160/255, 200/255, 180/255)   # 高相似度（亮色）
])

# 9. **绘制可视化**
plt.figure(figsize=(10, 6))
sc = plt.scatter(
    low_dim_embeddings[:, 0],  
    low_dim_embeddings[:, 1],  
    c=sequence_identity,  # 颜色基于相似度
    cmap=cmap,  
    alpha=0.7,  
    s=30,  
    edgecolors="black"
)

# 10. **添加颜色条（Colorbar）**
cbar = plt.colorbar(sc)
cbar.set_label("% Sequence Identity to Training")

plt.title("Generated Data UMAP (Colored by Similarity to Training Data)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")

# **保存图片**
img_path = "outputs/emb_data/embedding_umap_generated_similarity_gpu.png"
plt.savefig(img_path, dpi=300)
print(f"📸 Visualization saved as {img_path}")

plt.show()


In [None]:
### 绘制 柱状图
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 修正后的基准列表（与 data 行数匹配）
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "FRAMES", "AlpacaEval2.0",
    "ArenaHard", "LiveCodeBench", "MATH-500", "CNMO 2024", "C-Eval"
]

# 模型列表
models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

# 10 行数据
data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [76.7, 76.0, 86.5, 68.9, 0, 91.8]   # C-Eval
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 设置子图布局（2行5列）
num_rows, num_cols = 2, 5
fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 10))
fig.suptitle("Benchmark Comparison Across AI Models", fontsize=20)

# 遍历 benchmarks 绘制子图
for i, (benchmark, ax) in enumerate(zip(benchmarks, axes.flatten())):
    scores = df.iloc[i, :]
    
    # 颜色风格（最高分用深紫色）
    colors = ["#A7A2FF" if score < max(scores) else "#4A3DA3" for score in scores]

    # 绘制柱状图
    bars = ax.bar(models, scores, color=colors)

    # 在柱状图上方标注数值
    for bar, score in zip(bars, scores):
        ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 1, f'{score:.1f}%',
                ha='center', fontsize=10, fontweight='bold')

    # 设置子图标题
    ax.set_title(benchmark, fontsize=12)
    
    # 确保 x 轴刻度正确
    ax.set_xticks(range(len(models)))
    ax.set_xticklabels(models, rotation=20, ha='right', fontsize=8)

    # 统一 y 轴范围
    ax.set_ylim(0, 100)

# 调整布局
plt.tight_layout(rect=[0, 0, 1, 0.95])

# 显示图表
plt.show()


In [None]:
### 绘制单独的柱状图
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 数据提取自表格
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "DROP", "IF-Eval",
    "GPOA Diamond", "SimpleQA", "FRAMES", "AlpacaEval2.0", "ArenaHard",
    "LiveCodeBench", "Codeforces", "SWE Verified", "Aider-Polyglot",
    "AIME 2024", "MATH-500", "CNMO 2024",
    "CLUEWSC", "C-Eval", "C-SimpleQA"
]

models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [88.3, 83.7, 91.6, 83.9, 90.2, 92.2],  # DROP
    [86.5, 84.3, 86.1, 84.8, 0, 83.3],  # IF-Eval
    [65.0, 49.9, 59.1, 60.0, 75.7, 71.5],  # GPOA Diamond
    [28.4, 38.2, 24.9, 7.0, 47.0, 30.1],  # SimpleQA
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [20.3, 23.6, 58.7, 93.4, 96.6, 96.3],  # Codeforces
    [50.8, 38.8, 42.0, 41.6, 48.9, 49.2],  # SWE Verified
    [45.3, 16.0, 49.6, 32.9, 61.7, 53.3],  # Aider-Polyglot
    [16.0, 9.3, 39.2, 63.6, 79.2, 79.8],   # AIME 2024
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [85.4, 87.9, 90.9, 89.9, 0, 92.8],  # CLUEWSC
    [76.7, 76.0, 86.5, 68.9, 0, 91.8],  # C-Eval
    [55.4, 58.7, 68.0, 40.3, 0, 63.7]   # C-SimpleQA
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 逐个基准测试绘制单独的柱状图
for i, benchmark in enumerate(benchmarks):
    plt.figure(figsize=(6, 4))  # 设置单个图表大小
    
    # 获取当前基准测试的数据
    scores = df.iloc[i, :]
    
    # 颜色风格（淡紫色 & 深紫色），最高分用深紫色，其余用淡紫色
    # colors = ["#A7A2FF" if score < max(scores) else "#4A3DA3" for score in scores]
    colors = ["#A7A2FF", "#A7A2FF", "#A7A2FF", "#A7A2FF", "#A7A2FF", "#4A3DA3"]
    # 绘制柱状图
    bars = plt.bar(models, scores, color=colors)

    # 在柱状图上方标注数值
    for bar, score in zip(bars, scores):
        if score == 0:
            pass
        else:
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 1, f'{score:.1f}%',
                    ha='center', fontsize=8, fontweight='bold')

    # 图表标题
    plt.title(benchmark, fontsize=12)

    # y 轴标签
    plt.ylabel("Score (%)", fontsize=8)
    
    # 隐藏 x 轴标签，仅展示模型名
    plt.xticks(rotation=20, ha='right', fontsize=8)

    # 调整布局
    plt.tight_layout()

    # 显示图表
    plt.show()


In [None]:
# 绘制大柱状图
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 数据提取自表格
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "DROP", "IF-Eval",
    "GPOA Diamond", "SimpleQA", "FRAMES", "AlpacaEval2.0", "ArenaHard",
    "LiveCodeBench", "Codeforces", "SWE Verified", "Aider-Polyglot",
    "AIME 2024", "MATH-500", "CNMO 2024",
    "CLUEWSC", "C-Eval", "C-SimpleQA"
]

models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [88.3, 83.7, 91.6, 83.9, 90.2, 92.2],  # DROP
    [86.5, 84.3, 86.1, 84.8, 0, 83.3],  # IF-Eval
    [65.0, 49.9, 59.1, 60.0, 75.7, 71.5],  # GPOA Diamond
    [28.4, 38.2, 24.9, 7.0, 47.0, 30.1],  # SimpleQA
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [20.3, 23.6, 58.7, 93.4, 96.6, 96.3],  # Codeforces
    [50.8, 38.8, 42.0, 41.6, 48.9, 49.2],  # SWE Verified
    [45.3, 16.0, 49.6, 32.9, 61.7, 53.3],  # Aider-Polyglot
    [16.0, 9.3, 39.2, 63.6, 79.2, 79.8],   # AIME 2024
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [85.4, 87.9, 90.9, 89.9, 0, 92.8],  # CLUEWSC
    [76.7, 76.0, 86.5, 68.9, 0, 91.8],  # C-Eval
    [55.4, 58.7, 68.0, 40.3, 0, 63.7]   # C-SimpleQA
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 绘制多个柱状图
num_benchmarks = len(benchmarks)
num_models = len(models)
x = np.arange(num_benchmarks)

# 设置颜色
colors = ["#7ea8be", "#4a6fa5", "#1f4e79", "#7ea8be", "#4a6fa5", "#FFA500"]

# 调整图表尺寸
plt.figure(figsize=(18, 12))  # 增大图表尺寸

for i in range(num_models):
    plt.bar(x + i * 0.12, df.iloc[:, i], width=0.12, label=models[i], color=colors[i])

plt.xticks(x + 0.3, benchmarks, rotation=90, fontsize=12)  # 调整 x 轴字体大小
plt.yticks(fontsize=12)  # 调整 y 轴字体大小
plt.ylabel("Score (%)", fontsize=14)  # 调整 y 轴标签字体
plt.title("Benchmark Comparison Across AI Models", fontsize=18)  # 增大标题字体
plt.legend(fontsize=12)  # 增大图例字体
plt.tight_layout()