In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 定义模型数据
models = [
    "Faster R-CNN", "Cascade R-CNN", "YOLOV5n", "YOLOV8n", "YOLOV10n", "YOLOV11n", 
    "YOLOV5s", "YOLOV8s", "YOLOV10s", "YOLOV11s", "RT-DETR", "RT-DETRv2", "DINO", "MSAR-T", "MSAR-B"
]

gflops = [91.3, 119.0, 7.1, 8.1, 8.4, 6.4, 23.8, 28.4, 24.4, 21.6, 130.5, 100.5, 119.0, 12.2, 47.8]
map_values = [51.7, 47.9, 67.2, 74.2, 63.4, 68.8, 68.9, 79.9, 73.4, 76.1, 53.8, 64.0, 57.0, 76.3, 81.9]
params = [41.7, 69.4, 2.5, 3.0, 2.7, 2.6, 9.1, 11.1, 8.0, 9.4, 42.7, 36.4, 47.5, 5.5, 20.7]

# 归一化气泡大小
bubble_size = np.array(params) * 10

# 颜色区分不同算法
colors = [
    'blue', 'blue', 'green', 'green', 'green', 'green', 'orange', 'orange', 'orange', 'orange', 
    'red', 'red', 'purple', 'cyan', 'cyan'
]

# 创建主图
fig, ax = plt.subplots(figsize=(10, 6))
for i, model in enumerate(models):
    ax.scatter(gflops[i], map_values[i], s=bubble_size[i], color=colors[i], alpha=0.6, edgecolors="k", label=model if model not in ax.get_legend_handles_labels()[1] else "")

# 标注模型名称
for i, model in enumerate(models):
    ax.annotate(model, (gflops[i], map_values[i] - 1), fontsize=9, ha='center')

# 画对比虚线，并在中间标注差值
comparison_pairs = [
    ("RT-DETRv2", "MSAR-B"),
    ("YOLOV8n", "MSAR-T")
]

for model1, model2 in comparison_pairs:
    i1, i2 = models.index(model1), models.index(model2)
    gflops_diff = gflops[i2] - gflops[i1]
    map_diff = map_values[i2] - map_values[i1]

    if model1 == "RT-DETRv2" and model2 == "MSAR-B":
        ax.plot([gflops[i1], gflops[i2]], [map_values[i1], map_values[i1]], linestyle='dotted', color='blue', alpha=0.7)
        ax.plot([gflops[i2], gflops[i2]], [map_values[i1], map_values[i2]], linestyle='dotted', color='red', alpha=0.7)
        
        ax.text((gflops[i1] + gflops[i2]) / 2, map_values[i1] - 1, f"ΔGFLOPs: {gflops_diff:.1f}", color='blue', fontsize=10, ha='center')
        ax.text(gflops[i2] + 1, (map_values[i1] + map_values[i2]) / 2, f"ΔmAP: {map_diff:.1f}", color='red', fontsize=10, ha='left')

    elif model1 == "YOLOV8n" and model2 == "MSAR-T":
        ax.plot([gflops[i1], gflops[i1]], [map_values[i1], map_values[i2]], linestyle='dotted', color='blue', alpha=0.7)
        ax.plot([gflops[i1], gflops[i2]], [map_values[i2], map_values[i2]], linestyle='dotted', color='red', alpha=0.7)
        
        ax.text(gflops[i1] - 2, (map_values[i1] + map_values[i2]) / 2, f"ΔmAP: {map_diff:.1f}", color='blue', fontsize=10, ha='right')
        ax.text((gflops[i1] + gflops[i2]) / 2, map_values[i2] + 1, f"ΔGFLOPs: {gflops_diff:.1f}", color='red', fontsize=10, ha='center')

# 设定坐标轴标签和标题
ax.set_xlabel("GFLOPs")
ax.set_ylabel("mAP")
ax.set_title("GFLOPs vs. mAP (Bubble Size Represents Model Parameters)")
ax.grid(True, linestyle='--', alpha=0.5)

# 显示图像
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 定义函数将数据坐标转换为 figure 归一化坐标
def data_to_fig_coords(ax, x, y):
    return ax.transData.transform((x, y))  # 将数据坐标转换为屏幕坐标

def fig_to_axes_coords(ax, x, y):
    inv = ax.transAxes.inverted()  # 获取 figure 归一化坐标
    return inv.transform((x, y))  # 转换为 axes 坐标

# 定义模型数据
models = [
    "Faster R-CNN", "Cascade R-CNN", "YOLOV5n", "YOLOV8n", "YOLOV10n", "YOLOV11n", 
    "YOLOV5s", "YOLOV8s", "YOLOV10s", "YOLOV11s", "RT-DETR", "RT-DETRv2", "DINO", "MSAD-T", "MSAD-B"
]

gflops = [91.3, 119.0, 7.1, 8.1, 8.4, 6.4, 23.8, 28.4, 24.4, 21.6, 130.5, 100.5, 119.0, 12.2, 47.8]
map_values = [51.7, 47.9, 67.2, 74.2, 63.4, 68.8, 68.9, 79.9, 73.4, 76.1, 53.8, 64.0, 57.0, 76.3, 81.9]
params = [41.7, 69.4, 2.5, 3.0, 2.7, 2.6, 9.1, 11.1, 8.0, 9.4, 42.7, 36.4, 47.5, 5.5, 20.7]

# 归一化气泡大小
bubble_size = np.array(params) * 10

# 颜色区分不同算法
colors = [
    'blue', 'blue', 'green', 'green', 'green', 'green', 'orange', 'orange', 'orange', 'orange', 
    'red', 'red', 'purple', 'cyan', 'cyan'
]

# 创建主图
fig, ax = plt.subplots(figsize=(10, 6))
for i, model in enumerate(models):
    ax.scatter(gflops[i], map_values[i], s=bubble_size[i], color=colors[i], alpha=0.6, edgecolors="k", label=model if model not in ax.get_legend_handles_labels()[1] else "")

# 标注模型名称
for i, model in enumerate(models):
    ax.annotate(model, (gflops[i], map_values[i] - 1), fontsize=9, ha='center')

# 画对比虚线，并在中间标注差值
comparison_pairs = [
    ("RT-DETRv2", "MSAD-B"),
    ("YOLOV8n", "MSAD-T")
]

for model1, model2 in comparison_pairs:
    i1, i2 = models.index(model1), models.index(model2)
    gflops_diff = gflops[i2] - gflops[i1]
    map_diff = map_values[i2] - map_values[i1]

    if model1 == "RT-DETRv2" and model2 == "MSAD-B":
        ax.plot([gflops[i1], gflops[i2]], [map_values[i1], map_values[i1]], linestyle='dotted', color='blue', alpha=0.7)
        ax.plot([gflops[i2], gflops[i2]], [map_values[i1], map_values[i2]], linestyle='dotted', color='red', alpha=0.7)
        
        ax.text((gflops[i1] + gflops[i2]) / 2, map_values[i1] - 1.2, f"ΔGFLOPs: {np.abs(gflops_diff):.1f}", color='blue', fontsize=10, ha='center')
        ax.text(gflops[i2] + 1, (map_values[i1] + map_values[i2]) / 2, f"ΔmAP: {map_diff:.1f}", color='red', fontsize=10, ha='left')

# 计算放大区域的边界
i1, i2 = models.index("YOLOV8n"), models.index("MSAD-T")
x_min, x_max = min(gflops[i1], gflops[i2]) - 1, max(gflops[i1], gflops[i2]) + 2
y_min, y_max = min(map_values[i1], map_values[i2]) - 1.5, max(map_values[i1], map_values[i2]) + 1

# 在主图上框选出放大区域
ax.plot([x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min], linestyle="dashed", color="black", alpha=0.4)

# 计算主图框选区域的右上角和右下角（数据坐标系）
ax.plot([x_max, 84.2], [y_max, 79.5], linestyle="dashed", color="black", alpha=0.4)
ax.plot([x_max, 84.2], [y_min, 70], linestyle="dashed", color="black", alpha=0.4)

# 添加放大图，并调整大小
axins = fig.add_axes([0.6, 0.6, 0.2, 0.2])  

# 只绘制YOLOV8n 和 MSAR-T
axins.scatter(gflops[i1], map_values[i1], s=bubble_size[i1], color='green', alpha=0.6, edgecolors="k", label="YOLOV8n")
axins.scatter(gflops[i2], map_values[i2], s=bubble_size[i2], color='cyan', alpha=0.6, edgecolors="k", label="MSAR-T")

# 缩放放大图范围，使其与主图框选区域一致
axins.set_xlim(x_min, x_max)
axins.set_ylim(y_min, y_max)

# 画虚线对比（先竖直后水平）
axins.plot([gflops[i1], gflops[i1]], [map_values[i1], map_values[i2]], linestyle='dotted', color='red', alpha=0.7)
axins.plot([gflops[i1], gflops[i2]], [map_values[i2], map_values[i2]], linestyle='dotted', color='blue', alpha=0.7)

# 在放大图上标注差值
axins.text(gflops[i1] + 2.2, (map_values[i1] + map_values[i2]) / 2 -0.2, f"ΔmAP: {map_diff:.1f}", color='red', fontsize=8, ha='right')
axins.text((gflops[i1] + gflops[i2]) / 2, map_values[i2] + 0.2, f"ΔGFLOPs: {gflops_diff:.1f}", color='blue', fontsize=8, ha='center')

# 放大图内部标题
axins.set_title("YOLOV8n vs. MSAD-T", fontsize=10)
axins.set_xticks([])
axins.set_yticks([])

# 设定坐标轴标签和标题
ax.set_xlabel("GFLOPs")
ax.set_ylabel("mAP(%)")
# ax.set_title("GFLOPs vs. mAP (Bubble Size Represents Model Parameters)")
ax.grid(True, linestyle='--', alpha=0.5)

# 显示图像
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 定义模型数据
models = [
    "Faster R-CNN", "Cascade R-CNN", "YOLOV5n", "YOLOV8n", "YOLOV10n", "YOLOV11n", 
    "YOLOV5s", "YOLOV8s", "YOLOV10s", "YOLOV11s", "RT-DETR", "RT-DETRv2", "DINO", "MSAR-T", "MSAR-B"
]

map_values = [51.7, 47.9, 67.2, 74.2, 63.4, 68.8, 68.9, 79.9, 73.4, 76.1, 53.8, 64.0, 57.0, 76.3, 81.9]
fps_values = [55.6, 43.3, 158.7, 82.0, 238.1, 86.2, 156.3, 70.9, 188.7, 78.7, 59.9, 65.8, 55.6, 125.0, 64.1]
gflops = [91.3, 119.0, 7.1, 8.1, 8.4, 6.4, 23.8, 28.4, 24.4, 21.6, 130.5, 100.5, 119.0, 12.2, 47.8]

# 归一化气泡大小（基于 GFLOPs）
bubble_size = np.array(gflops) * 5

# 颜色区分不同算法
colors = [
    'blue', 'blue', 'green', 'green', 'green', 'green', 'orange', 'orange', 'orange', 'orange', 
    'red', 'red', 'purple', 'cyan', 'cyan'
]

# 创建主图
fig, ax = plt.subplots(figsize=(10, 6))
for i, model in enumerate(models):
    ax.scatter(fps_values[i], map_values[i], s=bubble_size[i], color=colors[i], alpha=0.6, edgecolors="k", label=model if model not in ax.get_legend_handles_labels()[1] else "")

# 标注模型名称
for i, model in enumerate(models):
    ax.annotate(model, (fps_values[i], map_values[i] - 1), fontsize=9, ha='center')

# 设定坐标轴标签和标题
ax.set_xlabel("FPS")
ax.set_ylabel("mAP")
ax.set_title("FPS vs. mAP (Bubble Size Represents GFLOPs)")
ax.grid(True, linestyle='--', alpha=0.5)

# 显示图像
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define model data
models = [
    "Faster R-CNN", "Cascade R-CNN", "YOLOV5n", "YOLOV8n", "YOLOV10n", "YOLOV11n", 
    "YOLOV5s", "YOLOV8s", "YOLOV10s", "YOLOV11s", "RT-DETR", "RT-DETRv2", "DINO", "MSAR-T", "MSAR-B"
]

gflops = [91.3, 119.0, 7.1, 8.1, 8.4, 6.4, 23.8, 28.4, 24.4, 21.6, 130.5, 100.5, 119.0, 12.2, 47.8]
map_values = [51.7, 47.9, 67.2, 74.2, 63.4, 68.8, 68.9, 79.9, 73.4, 76.1, 53.8, 64.0, 57.0, 76.3, 81.9]
params = [41.7, 69.4, 2.5, 3.0, 2.7, 2.6, 9.1, 11.1, 8.0, 9.4, 42.7, 36.4, 47.5, 5.5, 20.7]

# Normalize bubble sizes based on parameters (params M)
bubble_size = np.array(params) * 10

# Create scatter plot
plt.figure(figsize=(10, 6))
scatter = plt.scatter(gflops, map_values, s=bubble_size, alpha=0.6, edgecolors="k")

# Add annotations for each model (position below circles)
for i, model in enumerate(models):
    plt.annotate(model, (gflops[i], map_values[i] - 1), fontsize=9, ha='center')  # Shift text below

# Add comparison arrows with value difference
comparison_pairs = [
    ("RT-DETRv2", "MSAR-B"),  # Comparing RT-DETRv2 and MSAR-B
    ("YOLOV8n", "MSAR-T")     # Comparing YOLOV8n and MSAR-T
]

for model1, model2 in comparison_pairs:
    i1, i2 = models.index(model1), models.index(model2)

    # Compute differences
    gflops_diff = np.abs(gflops[i2] - gflops[i1])
    map_diff = map_values[i2] - map_values[i1]

    # Draw horizontal arrow for GFLOPs difference
    plt.arrow(
        gflops[i1], map_values[i1],
        gflops_diff, 0,
        head_width=0.8, head_length=1, fc='blue', ec='blue', alpha=0.7, linestyle='dotted'
    )
    plt.text(
        (gflops[i1] + gflops[i2]) / 2, map_values[i1] - 1.5,
        f"ΔGFLOPs: {gflops_diff:.1f}", color='blue', fontsize=10, ha='center'
    )

    # Draw vertical arrow for mAP difference
    plt.arrow(
        gflops[i2], map_values[i1],
        0, map_diff,
        head_width=1, head_length=0.8, fc='red', ec='red', alpha=0.7, linestyle='dotted'
    )
    plt.text(
        gflops[i2] + 2, (map_values[i1] + map_values[i2]) / 2,
        f"ΔmAP: {map_diff:.1f}", color='red', fontsize=10, ha='left'
    )

# Labels and title
plt.xlabel("GFLOPs")
plt.ylabel("mAP")
plt.title("GFLOPs vs. mAP (Bubble Size Represents Model Parameters)")
plt.grid(True, linestyle='--', alpha=0.5)

# Show plot
plt.show()


In [None]:
## 合并json文件
import json

# 需要合并的 JSON 文件
files = ["qwen_article_output.json", "qwen_book_output.json", "qwen_web_output.json"]

# 存储合并后的数据
merged_data = []

for file in files:
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)  # 读取 JSON
        if isinstance(data, list):
            merged_data.extend(data)  # 合并列表
        else:
            merged_data.append(data)  # 如果是字典，作为列表项添加

# 写入合并后的 JSON 文件
with open("sft_output_all_250220.json", "w", encoding="utf-8") as f:
    json.dump(merged_data, f, ensure_ascii=False, indent=4)

print("JSON 文件合并完成，结果已保存为 merged_output.json")


In [None]:
## 合并jsonl文件
import json

# 需要合并的 JSONL 文件
# files = ["mateinfo/articals-1112.jsonl", "mateinfo/books-1113.jsonl", "mateinfo/web_deduped-1114.jsonl"]

files = ["outputs/sft_data/final/train_article_data.jsonl", "outputs/sft_data/final/train_book_data.jsonl", "outputs/sft_data/final/train_web_data.jsonl"]

# 目标合并后的文件
output_file = "outputs/sft_data/final/train_data.jsonl"

with open(output_file, "w", encoding="utf-8") as outfile:
    for file in files:
        with open(file, "r", encoding="utf-8") as infile:
            for line in infile:  # 逐行读取
                outfile.write(line)  # 直接写入，不改变格式

print(f"JSONL 文件合并完成，结果已保存为 {output_file}")


In [None]:
import json

# file_path = "/home/wyp/project/forest/forestllm-main/qwen_article_output.json"
# file_path = "/home/wyp/project/forest/forestllm-main/qwen_book_output.json"
# file_path = "/home/wyp/project/forest/forestllm-main/qwen_web_output.json"
file_path = "sft_output_all_250220.json"  # 59576
# 从文件中读取 JSON 数据
with open(file_path, 'r', encoding='utf-8') as file:
    json_data = json.load(file)

# 打印 JSON 数据的长度
print(len(json_data))  # 如果是数组，输出数组长度；如果是对象，输出键值对数量

In [None]:

file_path = "outputs/sft_data/final/train_data.jsonl"  # 替换为你的 JSONL 文件名 31162  4001  516  26645   59576 

with open(file_path, "r", encoding="utf-8") as f:
    line_count = sum(1 for _ in f)

print(f"文件 {file_path} 共有 {line_count} 条数据")


In [None]:
import json
from collections import Counter, defaultdict

# 文件路径
file_path = "/home/wyp/project/ForestLLM/outputs/article/qwen_article_output.json"
# "/home/wyp/project/ForestLLM/outputs/0113/qwen_web_output.json"


# 查找重复 ID 的函数
def find_duplicate_ids_with_consistency(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            data = json.load(file)

        # 提取所有的 ID 和对应数据
        id_map = defaultdict(list)
        for entry in data:
            if "id" in entry:
                id_map[entry["id"]].append(entry)

        # 统计每个 ID 的出现次数
        duplicates = {
            id_: entries for id_, entries in id_map.items() if len(entries) > 1
        }
        duplicate_count = len(duplicates)

        # 检查每组重复 ID 数据是否完全一致
        consistency_results = {}
        for id_, entries in duplicates.items():
            # 使用第一条数据作为参考，逐条比对
            reference_entry = json.dumps(entries[0], sort_keys=True)
            all_consistent = all(
                json.dumps(entry, sort_keys=True) == reference_entry
                for entry in entries
            )
            consistency_results[id_] = {
                "count": len(entries),
                "consistent": all_consistent,
            }

        return consistency_results, duplicate_count
    except Exception as e:
        return str(e), 0


# 执行检查
consistency_results, duplicate_count = find_duplicate_ids_with_consistency(file_path)

# 输出结果
print(f"重复的 ID 总数: {duplicate_count}")
print("重复的 ID 检查结果:")
for id_, result in consistency_results.items():
    status = "一致" if result["consistent"] else "不一致"
    print(f"- ID: {id_}, 出现次数: {result['count']}, 数据是否一致: {status}")
print(f"重复的 ID 总数: {duplicate_count}")

In [None]:
import json

def split_mixed_jsonl(input_file, train_output_file, eval_output_file, batch_size=1000):
    """
    将混合的 JSONL 文件拆分成训练数据 (train) 和 评测数据 (eval)
    """
    train_data = []
    eval_data = []

    # 读取原始 JSONL 文件
    with open(input_file, "r", encoding="utf-8") as f:
        for line in f:
            try:
                data = json.loads(line.strip())  # 解析 JSON
                if "messages" in data:  # **训练数据**
                    train_data.append(data)
                elif "history" in data:  # **评测数据**
                    eval_data.append(data)
                else:
                    print(f"⚠️ 未知数据格式，跳过：{data}")  # 遇到无法解析的数据，跳过
            except json.JSONDecodeError:
                print(f"❌ JSON 解析失败，跳过：{line.strip()}")

            # **批量写入，减少 I/O 操作**
            if len(train_data) >= batch_size:
                with open(train_output_file, "a", encoding="utf-8") as train_f:
                    for entry in train_data:
                        train_f.write(json.dumps(entry, ensure_ascii=False) + "\n")
                train_data = []

            if len(eval_data) >= batch_size:
                with open(eval_output_file, "a", encoding="utf-8") as eval_f:
                    for entry in eval_data:
                        eval_f.write(json.dumps(entry, ensure_ascii=False) + "\n")
                eval_data = []

    # **写入剩余数据**
    if train_data:
        with open(train_output_file, "a", encoding="utf-8") as train_f:
            for entry in train_data:
                train_f.write(json.dumps(entry, ensure_ascii=False) + "\n")

    if eval_data:
        with open(eval_output_file, "a", encoding="utf-8") as eval_f:
            for entry in eval_data:
                eval_f.write(json.dumps(entry, ensure_ascii=False) + "\n")

    print(f"✅ 训练数据已保存到 {train_output_file}")
    print(f"✅ 评测数据已保存到 {eval_output_file}")

# 示例调用
split_mixed_jsonl("/home/wyp/project/ForestLLM/data_sft/eval_general_qa_readable.jsonl", "/home/wyp/project/ForestLLM/data_sft/train_general_qa.jsonl", "/home/wyp/project/ForestLLM/data_sft/eval_general_qa.jsonl")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 数据提取自表格
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "DROP", "IF-Eval",
    "GPOA Diamond", "SimpleQA", "FRAMES", "AlpacaEval2.0", "ArenaHard",
    "LiveCodeBench", "Codeforces", "SWE Verified", "Aider-Polyglot",
    "AIME 2024", "MATH-500", "CNMO 2024",
    "CLUEWSC", "C-Eval", "C-SimpleQA"
]

models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [88.3, 83.7, 91.6, 83.9, 90.2, 92.2],  # DROP
    [86.5, 84.3, 86.1, 84.8, 0, 83.3],  # IF-Eval
    [65.0, 49.9, 59.1, 60.0, 75.7, 71.5],  # GPOA Diamond
    [28.4, 38.2, 24.9, 7.0, 47.0, 30.1],  # SimpleQA
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [20.3, 23.6, 58.7, 93.4, 96.6, 96.3],  # Codeforces
    [50.8, 38.8, 42.0, 41.6, 48.9, 49.2],  # SWE Verified
    [45.3, 16.0, 49.6, 32.9, 61.7, 53.3],  # Aider-Polyglot
    [16.0, 9.3, 39.2, 63.6, 79.2, 79.8],   # AIME 2024
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [85.4, 87.9, 90.9, 89.9, 0, 92.8],  # CLUEWSC
    [76.7, 76.0, 86.5, 68.9, 0, 91.8],  # C-Eval
    [55.4, 58.7, 68.0, 40.3, 0, 63.7]   # C-SimpleQA
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 绘制多个柱状图
num_benchmarks = len(benchmarks)
num_models = len(models)
x = np.arange(num_benchmarks)

# 设置颜色
colors = ["#7ea8be", "#4a6fa5", "#1f4e79", "#7ea8be", "#4a6fa5", "#1f4e79"]

plt.figure(figsize=(12, 8))

for i in range(num_models):
    plt.bar(x + i * 0.12, df.iloc[:, i], width=0.12, label=models[i], color=colors[i])

plt.xticks(x + 0.3, benchmarks, rotation=90)
plt.ylabel("Score (%)")
plt.title("Benchmark Comparison Across AI Models")
plt.legend()
plt.tight_layout()

# 显示图表
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 数据提取自表格
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "DROP", "IF-Eval",
    "GPOA Diamond", "SimpleQA", "FRAMES", "AlpacaEval2.0", "ArenaHard",
    "LiveCodeBench", "Codeforces", "SWE Verified", "Aider-Polyglot",
    "AIME 2024", "MATH-500", "CNMO 2024",
    "CLUEWSC", "C-Eval", "C-SimpleQA"
]

models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [88.3, 83.7, 91.6, 83.9, 90.2, 92.2],  # DROP
    [86.5, 84.3, 86.1, 84.8, 0, 83.3],  # IF-Eval
    [65.0, 49.9, 59.1, 60.0, 75.7, 71.5],  # GPOA Diamond
    [28.4, 38.2, 24.9, 7.0, 47.0, 30.1],  # SimpleQA
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [20.3, 23.6, 58.7, 93.4, 96.6, 96.3],  # Codeforces
    [50.8, 38.8, 42.0, 41.6, 48.9, 49.2],  # SWE Verified
    [45.3, 16.0, 49.6, 32.9, 61.7, 53.3],  # Aider-Polyglot
    [16.0, 9.3, 39.2, 63.6, 79.2, 79.8],   # AIME 2024
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [85.4, 87.9, 90.9, 89.9, 0, 92.8],  # CLUEWSC
    [76.7, 76.0, 86.5, 68.9, 0, 91.8],  # C-Eval
    [55.4, 58.7, 68.0, 40.3, 0, 63.7]   # C-SimpleQA
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 绘制多个柱状图
num_benchmarks = len(benchmarks)
num_models = len(models)
x = np.arange(num_benchmarks)

# 设置颜色
colors = ["#7ea8be", "#4a6fa5", "#1f4e79", "#7ea8be", "#4a6fa5", "#FFA500"]

# 调整图表尺寸
plt.figure(figsize=(18, 12))  # 增大图表尺寸

for i in range(num_models):
    plt.bar(x + i * 0.12, df.iloc[:, i], width=0.12, label=models[i], color=colors[i])

plt.xticks(x + 0.3, benchmarks, rotation=90, fontsize=12)  # 调整 x 轴字体大小
plt.yticks(fontsize=12)  # 调整 y 轴字体大小
plt.ylabel("Score (%)", fontsize=14)  # 调整 y 轴标签字体
plt.title("Benchmark Comparison Across AI Models", fontsize=18)  # 增大标题字体
plt.legend(fontsize=12)  # 增大图例字体
plt.tight_layout()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 数据提取自表格
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "DROP", "IF-Eval",
    "GPOA Diamond", "SimpleQA", "FRAMES", "AlpacaEval2.0", "ArenaHard",
    "LiveCodeBench", "Codeforces", "SWE Verified", "Aider-Polyglot",
    "AIME 2024", "MATH-500", "CNMO 2024",
    "CLUEWSC", "C-Eval", "C-SimpleQA"
]

models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [88.3, 83.7, 91.6, 83.9, 90.2, 92.2],  # DROP
    [86.5, 84.3, 86.1, 84.8, 0, 83.3],  # IF-Eval
    [65.0, 49.9, 59.1, 60.0, 75.7, 71.5],  # GPOA Diamond
    [28.4, 38.2, 24.9, 7.0, 47.0, 30.1],  # SimpleQA
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [20.3, 23.6, 58.7, 93.4, 96.6, 96.3],  # Codeforces
    [50.8, 38.8, 42.0, 41.6, 48.9, 49.2],  # SWE Verified
    [45.3, 16.0, 49.6, 32.9, 61.7, 53.3],  # Aider-Polyglot
    [16.0, 9.3, 39.2, 63.6, 79.2, 79.8],   # AIME 2024
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [85.4, 87.9, 90.9, 89.9, 0, 92.8],  # CLUEWSC
    [76.7, 76.0, 86.5, 68.9, 0, 91.8],  # C-Eval
    [55.4, 58.7, 68.0, 40.3, 0, 63.7]   # C-SimpleQA
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 逐个基准测试绘制单独的柱状图
for i, benchmark in enumerate(benchmarks):
    plt.figure(figsize=(6, 4))  # 设置单个图表大小
    
    # 获取当前基准测试的数据
    scores = df.iloc[i, :]
    
    # 颜色风格（淡紫色 & 深紫色），最高分用深紫色，其余用淡紫色
    # colors = ["#A7A2FF" if score < max(scores) else "#4A3DA3" for score in scores]
    colors = ["#A7A2FF", "#A7A2FF", "#A7A2FF", "#A7A2FF", "#A7A2FF", "#4A3DA3"]
    # 绘制柱状图
    bars = plt.bar(models, scores, color=colors)

    # 在柱状图上方标注数值
    for bar, score in zip(bars, scores):
        if score == 0:
            pass
        else:
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 1, f'{score:.1f}%',
                    ha='center', fontsize=8, fontweight='bold')

    # 图表标题
    plt.title(benchmark, fontsize=12)

    # y 轴标签
    plt.ylabel("Score (%)", fontsize=8)
    
    # 隐藏 x 轴标签，仅展示模型名
    plt.xticks(rotation=20, ha='right', fontsize=8)

    # 调整布局
    plt.tight_layout()

    # 显示图表
    plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 修正后的基准列表（与 data 行数匹配）
benchmarks = [
    "MMLU", "MMLU-Redux", "MMLU-Pro", "FRAMES", "AlpacaEval2.0",
    "ArenaHard", "LiveCodeBench", "MATH-500", "CNMO 2024", "C-Eval"
]

# 模型列表
models = ["Claude-3.5-Sonnet", "GPT-4o", "DeepSeek V3", "OpenAI o1-mini", "OpenAI o1-1217", "DeepSeek R1"]

# 10 行数据
data = np.array([
    [88.3, 87.2, 88.5, 85.2, 91.8, 90.8],  # MMLU
    [88.9, 88.0, 89.1, 86.7, 0, 92.9],  # MMLU-Redux
    [78.0, 72.6, 75.9, 80.3, 0, 84.0],  # MMLU-Pro
    [72.5, 80.5, 73.3, 76.9, 0, 82.5],  # FRAMES
    [52.0, 51.1, 70.0, 57.8, 0, 87.6],  # AlpacaEval2.0
    [85.2, 80.4, 85.5, 92.0, 0, 92.3],  # ArenaHard
    [38.9, 32.9, 36.0, 53.8, 63.4, 65.9],  # LiveCodeBench
    [78.3, 74.6, 90.2, 90.0, 96.4, 97.3],  # MATH-500
    [13.1, 10.8, 43.2, 67.6, 0, 78.8],  # CNMO 2024
    [76.7, 76.0, 86.5, 68.9, 0, 91.8]   # C-Eval
])

# 创建DataFrame
df = pd.DataFrame(data, index=benchmarks, columns=models)

# 设置子图布局（2行5列）
num_rows, num_cols = 2, 5
fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 10))
fig.suptitle("Benchmark Comparison Across AI Models", fontsize=20)

# 遍历 benchmarks 绘制子图
for i, (benchmark, ax) in enumerate(zip(benchmarks, axes.flatten())):
    scores = df.iloc[i, :]
    
    # 颜色风格（最高分用深紫色）
    colors = ["#A7A2FF" if score < max(scores) else "#4A3DA3" for score in scores]

    # 绘制柱状图
    bars = ax.bar(models, scores, color=colors)

    # 在柱状图上方标注数值
    for bar, score in zip(bars, scores):
        ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 1, f'{score:.1f}%',
                ha='center', fontsize=10, fontweight='bold')

    # 设置子图标题
    ax.set_title(benchmark, fontsize=12)
    
    # 确保 x 轴刻度正确
    ax.set_xticks(range(len(models)))
    ax.set_xticklabels(models, rotation=20, ha='right', fontsize=8)

    # 统一 y 轴范围
    ax.set_ylim(0, 100)

# 调整布局
plt.tight_layout(rect=[0, 0, 1, 0.95])

# 显示图表
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap.umap_ as umap

# 1. 读取嵌入数据
original_embeddings = np.load("outputs/emb_data/llama_embeddings_original.npy")  # 原始数据嵌入
generated_embeddings = np.load("outputs/emb_data/llama_embeddings_generated_response.npy")  # 生成数据嵌入

# 2. 给数据添加标签
original_labels = ["original"] * len(original_embeddings)
generated_labels = ["generated"] * len(generated_embeddings)

# 3. 合并数据
all_embeddings = np.vstack((original_embeddings, generated_embeddings))
all_labels = np.array(original_labels + generated_labels)

# 4. 选择降维方法 (可选 PCA, t-SNE, UMAP)
def reduce_dimension(embeddings, method="umap"):
    if method == "pca":
        reducer = PCA(n_components=2)
    elif method == "tsne":
        reducer = TSNE(n_components=2, perplexity=50, random_state=42)
    elif method == "umap":
        reducer = umap.UMAP(n_components=2, n_neighbors=50, min_dist=0.1, random_state=42)
    else:
        raise ValueError("Method should be 'pca', 'tsne', or 'umap'")
    
    return reducer.fit_transform(embeddings)

# 5. 进行降维
low_dim_embeddings = reduce_dimension(all_embeddings, method="umap")

# 6. 绘制可视化
plt.figure(figsize=(10, 6))
sns.scatterplot(x=low_dim_embeddings[:, 0], y=low_dim_embeddings[:, 1], hue=all_labels, alpha=0.7)
plt.title("Embedding Visualization (UMAP)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.legend(title="Data Type", loc="best")
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D

# 3D 降维
low_dim_embeddings_3d = umap.UMAP(n_components=3, random_state=42).fit_transform(all_embeddings)

# 画 3D 图
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection="3d")
scatter = ax.scatter(low_dim_embeddings_3d[:, 0], low_dim_embeddings_3d[:, 1], low_dim_embeddings_3d[:, 2], c=(all_labels == "generated"), cmap="coolwarm", alpha=0.7)
ax.set_title("3D Embedding Visualization (UMAP)")
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import umap.umap_ as umap

# 1. 读取嵌入数据
original_embeddings = np.load("outputs/emb_data/llama_embeddings_original.npy")  # 原始数据嵌入
generated_response_embeddings = np.load("outputs/emb_data/llama_embeddings_generated_response.npy")  # 生成数据嵌入
generated_knowledge_embeddings = np.load("outputs/emb_data/llama_embeddings_generated_knowledge.npy")  # 知识点嵌入

# 2. 给数据添加标签
original_labels = ["original"] * len(original_embeddings)
generated_response_labels = ["generated_response"] * len(generated_response_embeddings)
generated_knowledge_labels = ["generated_knowledge"] * len(generated_knowledge_embeddings)

# 3. 合并数据
all_embeddings = np.vstack((original_embeddings, generated_response_embeddings, generated_knowledge_embeddings))
all_labels = np.array(original_labels + generated_response_labels + generated_knowledge_labels)

# 4. 先用 PCA 降到 256 维
print("🔄 Applying PCA (4096 → 256) ...")
pca_256 = PCA(n_components=256, random_state=42)
pca_256_embeddings = pca_256.fit_transform(all_embeddings)

# 6. 进行 UMAP 降到 2 维
print("🔄 Applying UMAP (50 → 2) ...")
umap_reducer = umap.UMAP(n_components=2, n_neighbors=150, min_dist=0.05, random_state=42)
low_dim_embeddings = umap_reducer.fit_transform(pca_256_embeddings)

# 7. 绘制可视化
plt.figure(figsize=(10, 6))
sns.scatterplot(x=low_dim_embeddings[:, 0], y=low_dim_embeddings[:, 1], hue=all_labels, alpha=0.7, palette="Set1")
plt.title("Embedding Visualization (UMAP)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.legend(title="Data Type", loc="best")
plt.show()
