In [None]:
import os
import json
import pandas as pd

# 设定根目录
root_dir = "/home/yuhaowang/project/FMBC/downstream/Patho-Bench/_test_linprobe/slidelevel_coarse"

# 存储所有模型的评估结果
data = {}

# 遍历所有模型文件夹
for model_name in os.listdir(root_dir):
    model_path = os.path.join(root_dir, model_name)
    
    if os.path.isdir(model_path):  # 确保是一个文件夹
        model_metrics = {}  # 存储该模型的评估数据
        

        json_path = os.path.join(model_path, 'test_metrics_summary.json')
        if not os.path.exists(json_path):
            continue
        with open(json_path, "r") as f:
            metrics = json.load(f)
           # print(metrics)
            # 只提取 `formatted` 字段
            for metric, values in metrics.items():
                model_metrics[metric] = values["formatted"]
    
        data[model_name] = model_metrics

# 转换为DataFrame
df = pd.DataFrame.from_dict(data, orient="index")
selected_columns = ['macro-ovr-auc','bacc','weighted-f1','weighted_kappa']
show_df = df[selected_columns]

# 展示DataFrame
import ace_tools_open as tools;
tools.display_dataframe_to_user(name="Random DataFrame", dataframe=show_df)

higher_is_better = ["macro-ovr-auc", "bacc", "weighted-f1", "weighted_kappa"]  # 越高越好
lower_is_better = [] 

df_sorted = show_df.sort_values(by="bacc", ascending=True)

# 重新定义 LaTeX 代码生成函数，增加最优和次优标记功能
def format_latex_table_with_highlighted_best(df, higher_is_better, lower_is_better):
    latex_str = ""

    best_values = {}
    second_best_values = {}
    
    for col in df.columns[0:]:  
        values = df[col].str.split(" ± ", expand=True)[0].astype(float)  # 提取均值部分
        if col in higher_is_better:
            best_values[col] = values.max()
            second_best_values[col] = values.nlargest(2).iloc[-1] if len(values) > 1 else values.max()
        elif col in lower_is_better:
            best_values[col] = values.min()
            second_best_values[col] = values.nsmallest(2).iloc[-1] if len(values) > 1 else values.min()


    for index, row in df.iterrows():
        method = index  


        values = row.iloc[0:].apply(lambda x: x.replace(" ± ", r"\scriptsize{$\pm$") + "}")


        for col_idx, col_name in enumerate(df.columns[0:]):
            #print(type(col_name))
            col_name = str(col_name)
            mean_value = float(row[col_name].split(" ± ")[0])  # 获取均值
            
            if mean_value == best_values[col_name]: 

                values.iloc[col_idx] = r"\textbf{" + values.iloc[col_idx] + "}"
            elif mean_value == second_best_values[col_name]:  
                values.iloc[col_idx] = r"\underline{" + values.iloc[col_idx] + "}"
        
        
        latex_str +=  f"{method} & {' & '.join(values)} \\\n"
    for line in latex_str.split("\\\n"):  # 逐行处理
        print(line + "\\\\")  # 确保每行都正确换行
    return latex_str

latex_code_with_highlighting = format_latex_table_with_highlighted_best(df_sorted , higher_is_better, lower_is_better)




Random DataFrame


Unnamed: 0,macro-ovr-auc,bacc,weighted-f1,weighted_kappa
Loading ITables v2.2.5 from the internet... (need help?),,,,


mean-UNI & 0.757\scriptsize{$\pm$0.030} & 0.518\scriptsize{$\pm$0.023} & 0.613\scriptsize{$\pm$0.034} & 0.339\scriptsize{$\pm$0.022} \\
mean-CONCH & 0.798\scriptsize{$\pm$0.030} & 0.549\scriptsize{$\pm$0.037} & 0.661\scriptsize{$\pm$0.049} & 0.398\scriptsize{$\pm$0.060} \\
mean-Virchow & 0.832\scriptsize{$\pm$0.029} & 0.586\scriptsize{$\pm$0.028} & 0.677\scriptsize{$\pm$0.044} & 0.383\scriptsize{$\pm$0.048} \\
mean-UNI-2 & \textbf{0.844\scriptsize{$\pm$0.024}} & \underline{0.608\scriptsize{$\pm$0.020}} & \underline{0.704\scriptsize{$\pm$0.036}} & \underline{0.464\scriptsize{$\pm$0.040}} \\
mean-Gigapath_tile & 0.823\scriptsize{$\pm$0.025} & \underline{0.608\scriptsize{$\pm$0.026}} & 0.691\scriptsize{$\pm$0.033} & 0.417\scriptsize{$\pm$0.051} \\
mean-UNI-NEW & \textbf{0.844\scriptsize{$\pm$0.024}} & \textbf{0.611\scriptsize{$\pm$0.022}} & \textbf{0.707\scriptsize{$\pm$0.037}} & \textbf{0.465\scriptsize{$\pm$0.041}} \\
\\
