In [3]:
import pandas as pd 
data = pd.read_csv('/home/yuhaowang/project/FMBC/downstream/finetune/dataset_csv/subtype/BCNB_ALN.csv')
data['label'].value_counts()

label
N0         655
N+(1-2)    210
N+(>2)     193
Name: count, dtype: int64

In [None]:
import numpy as np
import scipy.spatial.distance as distance
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import torch

def get_few_shot_samples(X, y, k):
    """Select k samples per class for few-shot learning."""
    unique_classes = np.unique(y)
    few_shot_X, few_shot_y = [], []
    
    for c in unique_classes:
        class_samples = X[y == c]
        selected_samples = class_samples[:min(k, len(class_samples))]
        few_shot_X.append(selected_samples)
        few_shot_y.append(np.full(len(selected_samples), c))
    
    return np.vstack(few_shot_X), np.hstack(few_shot_y)

class SimpleShot:
    def __init__(self):
        self.class_prototypes = None
        self.classes = None
    
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.class_prototypes = {
            c: np.mean(X[y == c], axis=0) for c in self.classes
        }
    
    def predict(self, X):
        preds = []
        for x in X:
            distances = {c: np.linalg.norm(x - proto) for c, proto in self.class_prototypes.items()}
            preds.append(min(distances, key=distances.get))
        return np.array(preds)

class Retrieval:
    def __init__(self, database, labels):
        self.database = database
        self.labels = labels
    
    def retrieve(self, query, k=5):
        dists = np.linalg.norm(self.database - query, axis=1)
        indices = np.argsort(dists)[:k]
        return self.labels[indices]
    
    def evaluate(self, queries, query_labels, k_vals=[1, 3, 5]):
        acc_at_k = {k: 0 for k in k_vals}
        mvacc_at_5 = 0
        
        for i, query in enumerate(queries):
            retrieved_labels = self.retrieve(query, k=max(k_vals))
            for k in k_vals:
                if query_labels[i] in retrieved_labels[:k]:
                    acc_at_k[k] += 1
            if np.bincount(retrieved_labels[:5]).argmax() == query_labels[i]:
                mvacc_at_5 += 1
        
        num_queries = len(queries)
        return {k: acc_at_k[k] / num_queries for k in k_vals}, mvacc_at_5 / num_queries

def few_shot_experiment(k, X_train, y_train, X_test, y_test):
    """Run few-shot learning experiments for a given k using real dataset."""
    X_few_shot, y_few_shot = get_few_shot_samples(X_train, y_train, k)
    
    # SimpleShot Evaluation
    simpleshot = SimpleShot()
    simpleshot.fit(X_few_shot, y_few_shot)
    y_pred = simpleshot.predict(X_test)
    simpleshot_acc = accuracy_score(y_test, y_pred)
    
    # 20-Nearest Neighbors Evaluation
    knn = KNeighborsClassifier(n_neighbors=20)
    knn.fit(X_few_shot, y_few_shot)
    y_pred_knn = knn.predict(X_test)
    knn_acc = accuracy_score(y_test, y_pred_knn)
    
    # Linear Probing Evaluation
    linear_probe = LogisticRegression(max_iter=1000, C=1.0)
    linear_probe.fit(X_few_shot, y_few_shot)
    y_pred_linear = linear_probe.predict(X_test)
    linear_acc = accuracy_score(y_test, y_pred_linear)
    
    # Retrieval Evaluation
    retrieval = Retrieval(X_few_shot, y_few_shot)
    acc_at_k, mvacc_at_5 = retrieval.evaluate(X_test, y_test)
    
    return {
        "SimpleShot Accuracy": simpleshot_acc,
        "20-NN Accuracy": knn_acc,
        "Linear Probing Accuracy": linear_acc,
        "Retrieval Acc@K": acc_at_k,
        "MVAcc@5": mvacc_at_5
    }


SimpleShot Accuracy: 0.8200
20-NN Accuracy: 0.8200
Linear Probing Accuracy: 0.8600
Retrieval Acc@K: {1: 0.78, 3: 0.96, 5: 0.98}, MVAcc@5: 0.8600


In [6]:
import pandas as pd 
data = pd.read_csv('/home/yuhaowang/project/FMBC/downstream/finetune/outputs/BRACS_COARSE/CONCH/LR/summary.csv')
data

Unnamed: 0,val_loss,val_bacc,val_acc,val_macro_auroc,val_macro_auprc,val_weighted_f1,val_qwk,val_0_auroc,val_1_auroc,val_2_auroc,...,test_macro_auroc,test_macro_auprc,test_weighted_f1,test_qwk,test_0_auroc,test_1_auroc,test_2_auroc,test_0_auprc,test_1_auprc,test_2_auprc
0,0.834959,0.475862,0.654545,0.750391,0.64254,0.516206,0.406843,0.753316,0.714286,0.783571,...,0.70235,0.540863,0.363388,0.38125,0.756065,0.603333,0.747652,0.763799,0.223429,0.635361
1,0.938483,0.514254,0.618182,0.73107,0.594004,0.489013,0.483366,0.748992,0.611434,0.832785,...,0.720668,0.584372,0.459933,0.548828,0.742608,0.585514,0.833882,0.666225,0.264163,0.822727
2,0.899061,0.483583,0.627273,0.738174,0.591267,0.507941,0.420513,0.757129,0.722328,0.735065,...,0.731122,0.576863,0.474645,0.421699,0.749834,0.716951,0.726582,0.702118,0.329745,0.698724
3,0.909625,0.450926,0.563636,0.754923,0.605143,0.427175,0.317882,0.79094,0.655281,0.818548,...,0.755381,0.604385,0.41792,0.299271,0.789231,0.654333,0.822581,0.727746,0.276788,0.808622
4,0.866451,0.524402,0.681818,0.785006,0.639934,0.537006,0.522155,0.768484,0.762778,0.823755,...,0.777716,0.626934,0.454448,0.491189,0.76631,0.745,0.821839,0.803038,0.320038,0.757726


In [4]:
import os
import pandas as pd
import numpy as np
from IPython.display import display
import h5py
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cluster import KMeans
import logging
import math
import numpy as np
import torch
from torch.nn import ModuleList
from functools import partial
from typing import List, Tuple, Dict, Optional, Any
from torchvision.transforms import Compose, RandomApply
from torchvision.transforms import functional as F
from torchvision.transforms.transforms import _setup_angle, _check_sequence_input
from torch import Tensor
from collections import defaultdict, deque
from pathlib import Path
from torch import nn
from PIL import ImageFilter, ImageOps, Image, ImageDraw

import os

result_root_dir = '/home/yuhaowang/project/FMBC/downstream/finetune/outputs'
for dir in os.listdir(result_root_dir):
    print(dir)
    result_dir = os.path.join(result_root_dir, dir)

    evaluation_metrics = ['val_bacc', 'val_weighted_f1', 'val_macro_auroc','val_qwk']


    desired_order = [
        "UNI", "CONCH", "Virchow","Gigapath_Tile",'Gigapath',
        "CHIEF_Tile","TITAN","FMBC"  # 请修改为你的模型名称
    ]

    all_results = []

    # 遍历目录中的所有模型文件夹
    for model_name in os.listdir(result_dir):
    # model_path = os.path.join(result_dir, model_name, "summary.csv")
        for tuning_method in os.listdir(os.path.join(result_dir, model_name)):
            for lr_rate in os.listdir(os.path.join(result_dir, model_name, tuning_method)):
                model_sumary_path = os.path.join(result_dir, model_name, tuning_method,lr_rate, "summary.csv")
                if 'ABMIL' in model_sumary_path:
                    continue
                if os.path.isfile(model_sumary_path):
                    df = pd.read_csv(model_sumary_path)

                    # 计算均值和标准差
                    summary_stats = {"Model": model_name+'_'+tuning_method+'_'+lr_rate}
                    for metric in evaluation_metrics:
                        if metric in df.columns:
                            mean_val = np.mean(df[metric])
                            std_val = np.std(df[metric], ddof=1)  # 样本标准差
                            summary_stats[metric] = f"{mean_val:.3f}±{std_val:.4f}"

                    # 添加到列表
                    all_results.append(summary_stats)


    final_result_df = pd.DataFrame(all_results)


    # final_result_df['sort_order'] = final_result_df['Model'].apply(lambda x: desired_order.index(x) if x in d
    #                                                                esired_order else len(desired_order))
    #delete the model not in desired_order
    #final_result_df = final_result_df[final_result_df['sort_order']!=len(desired_order)]
    #final_result_df = final_result_df.sort_values(by='sort_order').drop(columns=['sort_order'])
    final_result_df.style.hide(axis="index")
    # 在 Jupyter Notebook 中美观显示
    display(final_result_df)




IMPRESS_PR


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc
0,CHIEF_tile_LR_0.01,0.612±0.0749,0.719±0.1365,0.690±0.1424
1,CHIEF_tile_LR_0.001,0.500±0.0000,0.626±0.1111,0.700±0.1423
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.626±0.1111,0.538±0.0886
3,CHIEF_tile_LR_0.1,0.617±0.1445,0.684±0.1571,0.639±0.1539
4,TITAN_LR_0.01,0.649±0.1768,0.703±0.1246,0.749±0.1284
5,TITAN_LR_0.001,0.601±0.1504,0.689±0.1560,0.748±0.1454
6,TITAN_LR_0.0001,0.500±0.0000,0.626±0.1111,0.554±0.0603
7,TITAN_LR_0.1,0.636±0.1459,0.668±0.0970,0.719±0.1192
8,Gigapath_LR_0.01,0.643±0.1862,0.715±0.1248,0.760±0.1502
9,Gigapath_LR_0.001,0.627±0.1331,0.723±0.1025,0.765±0.1304


BCNB_ER


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.686±0.0458,0.799±0.0380,0.773±0.0577,0.428±0.0945
1,CHIEF_tile_LR_0.001,0.564±0.0269,0.711±0.0397,0.744±0.0535,0.169±0.0673
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.647±0.0332,0.670±0.0380,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.681±0.0409,0.786±0.0348,0.780±0.0518,0.399±0.0877
4,TITAN_LR_0.01,0.688±0.0240,0.792±0.0206,0.801±0.0464,0.417±0.0433
5,TITAN_LR_0.001,0.649±0.0350,0.771±0.0374,0.803±0.0364,0.352±0.0763
6,TITAN_LR_0.0001,0.596±0.0470,0.737±0.0473,0.770±0.0486,0.243±0.1078
7,TITAN_LR_0.1,0.691±0.0204,0.780±0.0142,0.767±0.0368,0.396±0.0358
8,Gigapath_LR_0.01,0.564±0.0212,0.709±0.0419,0.701±0.0633,0.170±0.0528
9,Gigapath_LR_0.001,0.500±0.0000,0.647±0.0332,0.635±0.0673,0.000±0.0000


AIDPATH_IDC


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.634±0.0633,0.822±0.0984,0.895±0.0965,0.315±0.1777
1,CHIEF_tile_LR_0.001,0.500±0.0000,0.781±0.0590,0.794±0.1062,0.000±0.0000
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.781±0.0590,0.504±0.0667,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.855±0.0431,0.896±0.1016,0.910±0.1320,0.675±0.1678
4,TITAN_LR_0.01,0.911±0.0799,0.929±0.1191,0.947±0.1064,0.810±0.2485
5,TITAN_LR_0.001,0.781±0.0844,0.875±0.1205,0.944±0.0921,0.581±0.2295
6,TITAN_LR_0.0001,0.500±0.0000,0.780±0.0589,0.874±0.1234,0.000±0.0000
7,TITAN_LR_0.1,0.913±0.0807,0.928±0.1233,0.948±0.1049,0.812±0.2563
8,Gigapath_LR_0.01,0.676±0.0878,0.878±0.0495,0.865±0.0693,0.455±0.2039
9,Gigapath_LR_0.001,0.500±0.0000,0.781±0.0590,0.728±0.1062,0.000±0.0000


AIDPATH_GRADE


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.813±0.0674,0.835±0.0464,0.884±0.1085,0.638±0.1111
1,CHIEF_tile_LR_0.001,0.682±0.0499,0.723±0.0537,0.824±0.0805,0.406±0.1030
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.488±0.0376,0.756±0.0658,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.858±0.0808,0.874±0.0541,0.906±0.1198,0.724±0.1307
4,TITAN_LR_0.01,0.834±0.0665,0.857±0.0453,0.890±0.1185,0.687±0.1092
5,TITAN_LR_0.001,0.784±0.0677,0.804±0.0482,0.864±0.1187,0.577±0.1083
6,TITAN_LR_0.0001,0.683±0.0656,0.723±0.0620,0.832±0.1120,0.399±0.1315
7,TITAN_LR_0.1,0.823±0.0669,0.844±0.0455,0.894±0.0968,0.659±0.1117
8,Gigapath_LR_0.01,0.719±0.0683,0.759±0.0536,0.798±0.0902,0.469±0.1317
9,Gigapath_LR_0.001,0.551±0.0134,0.568±0.0453,0.716±0.0677,0.124±0.0334


BCNB_ALN


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.365±0.0184,0.489±0.0204,0.643±0.0326,0.135±0.0493
1,CHIEF_tile_LR_0.001,0.340±0.0042,0.478±0.0233,0.615±0.0365,0.037±0.0184
2,CHIEF_tile_LR_0.0001,0.333±0.0000,0.483±0.0291,0.555±0.0178,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.388±0.0256,0.505±0.0204,0.647±0.0359,0.172±0.0832
4,TITAN_LR_0.01,0.385±0.0223,0.495±0.0135,0.627±0.0263,0.135±0.0510
5,TITAN_LR_0.001,0.356±0.0170,0.463±0.0255,0.630±0.0209,0.099±0.0256
6,TITAN_LR_0.0001,0.333±0.0011,0.474±0.0204,0.592±0.0243,-0.002±0.0050
7,TITAN_LR_0.1,0.389±0.0318,0.497±0.0143,0.597±0.0224,0.120±0.0534
8,Gigapath_LR_0.01,0.360±0.0176,0.478±0.0184,0.611±0.0174,0.109±0.0414
9,Gigapath_LR_0.001,0.335±0.0043,0.487±0.0301,0.595±0.0262,0.006±0.0143


BCNB_PR


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.686±0.0458,0.799±0.0380,0.773±0.0577,0.428±0.0945
1,CHIEF_tile_LR_0.001,0.564±0.0269,0.711±0.0397,0.744±0.0535,0.169±0.0673
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.647±0.0332,0.670±0.0380,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.681±0.0409,0.786±0.0348,0.780±0.0518,0.399±0.0877
4,TITAN_LR_0.01,0.688±0.0240,0.792±0.0206,0.801±0.0464,0.417±0.0433
5,TITAN_LR_0.001,0.649±0.0350,0.771±0.0374,0.803±0.0364,0.352±0.0763
6,TITAN_LR_0.0001,0.596±0.0470,0.737±0.0473,0.770±0.0486,0.243±0.1078
7,TITAN_LR_0.1,0.691±0.0204,0.780±0.0142,0.767±0.0368,0.396±0.0358
8,Gigapath_LR_0.01,0.564±0.0212,0.709±0.0419,0.701±0.0633,0.170±0.0528
9,Gigapath_LR_0.001,0.500±0.0000,0.647±0.0332,0.635±0.0673,0.000±0.0000


BRACS_COARSE


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.614±0.0528,0.655±0.0515,0.833±0.0219,0.656±0.0552
1,CHIEF_tile_LR_0.001,0.531±0.0366,0.591±0.0432,0.807±0.0219,0.552±0.0515
2,CHIEF_tile_LR_0.0001,0.388±0.0355,0.285±0.0688,0.741±0.0406,0.159±0.1046
3,CHIEF_tile_LR_0.1,0.604±0.0820,0.675±0.0558,0.822±0.0237,0.664±0.0858
4,TITAN_LR_0.01,0.664±0.0413,0.739±0.0506,0.871±0.0273,0.781±0.0433
5,TITAN_LR_0.001,0.603±0.0266,0.689±0.0628,0.858±0.0209,0.707±0.0446
6,TITAN_LR_0.0001,0.535±0.0264,0.541±0.0326,0.813±0.0165,0.565±0.0857
7,TITAN_LR_0.1,0.641±0.0493,0.731±0.0479,0.858±0.0295,0.783±0.0638
8,Gigapath_LR_0.01,0.529±0.0099,0.585±0.0531,0.791±0.0246,0.547±0.0700
9,Gigapath_LR_0.001,0.448±0.0375,0.415±0.0579,0.719±0.0437,0.323±0.1004


SLNBREAST_SUBTYPE


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.713±0.0407,0.802±0.0463,0.842±0.0690,0.471±0.1176
1,CHIEF_tile_LR_0.001,0.500±0.0000,0.609±0.1169,0.747±0.1037,0.000±0.0000
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.609±0.1169,0.601±0.0797,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.730±0.0808,0.798±0.0551,0.833±0.0781,0.464±0.1755
4,TITAN_LR_0.01,0.877±0.0961,0.936±0.0375,0.983±0.0337,0.798±0.1766
5,TITAN_LR_0.001,0.787±0.0917,0.880±0.0230,0.976±0.0442,0.650±0.1640
6,TITAN_LR_0.0001,0.616±0.0382,0.720±0.1064,0.880±0.1018,0.302±0.1054
7,TITAN_LR_0.1,0.882±0.1034,0.928±0.0353,0.948±0.0876,0.783±0.1691
8,Gigapath_LR_0.01,0.567±0.1043,0.694±0.0901,0.742±0.0948,0.166±0.2353
9,Gigapath_LR_0.001,0.533±0.0537,0.650±0.1119,0.610±0.1327,0.090±0.1434


TCGA-BRCA-SUBTYPE


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.834±0.0383,0.908±0.0198,0.948±0.0204,0.704±0.0662
1,CHIEF_tile_LR_0.001,0.736±0.0430,0.870±0.0235,0.921±0.0176,0.567±0.0721
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.707±0.0303,0.852±0.0328,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.859±0.0400,0.916±0.0238,0.940±0.0223,0.736±0.0725
4,TITAN_LR_0.01,0.853±0.0518,0.917±0.0243,0.947±0.0245,0.732±0.0864
5,TITAN_LR_0.001,0.847±0.0461,0.912±0.0282,0.952±0.0266,0.719±0.0908
6,TITAN_LR_0.0001,0.822±0.0546,0.913±0.0238,0.948±0.0272,0.713±0.0940
7,TITAN_LR_0.1,0.841±0.0337,0.896±0.0170,0.921±0.0296,0.678±0.0443
8,Gigapath_LR_0.01,0.707±0.0377,0.845±0.0308,0.868±0.0168,0.489±0.0808
9,Gigapath_LR_0.001,0.556±0.0065,0.756±0.0262,0.825±0.0276,0.163±0.0195


BCNB_HER2


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.569±0.0240,0.673±0.0545,0.714±0.0336,0.172±0.0605
1,CHIEF_tile_LR_0.001,0.503±0.0057,0.599±0.0556,0.690±0.0406,0.009±0.0161
2,CHIEF_tile_LR_0.0001,0.500±0.0000,0.592±0.0540,0.588±0.0431,0.000±0.0000
3,CHIEF_tile_LR_0.1,0.596±0.0416,0.693±0.0396,0.693±0.0253,0.219±0.0912
4,TITAN_LR_0.01,0.591±0.0167,0.689±0.0433,0.719±0.0110,0.213±0.0344
5,TITAN_LR_0.001,0.581±0.0256,0.684±0.0553,0.731±0.0206,0.198±0.0587
6,TITAN_LR_0.0001,0.524±0.0183,0.626±0.0651,0.728±0.0326,0.066±0.0497
7,TITAN_LR_0.1,0.597±0.0118,0.688±0.0247,0.679±0.0202,0.211±0.0217
8,Gigapath_LR_0.01,0.531±0.0134,0.635±0.0468,0.662±0.0588,0.082±0.0364
9,Gigapath_LR_0.001,0.500±0.0000,0.592±0.0540,0.594±0.0394,0.000±0.0000


BRACS_FINE


Unnamed: 0,Model,val_bacc,val_weighted_f1,val_macro_auroc,val_qwk
0,CHIEF_tile_LR_0.01,0.392±0.0603,0.358±0.0141,0.789±0.0191,0.686±0.0170
1,CHIEF_tile_LR_0.001,0.294±0.0532,0.186±0.0328,0.765±0.0234,0.578±0.0598
2,CHIEF_tile_LR_0.0001,0.218±0.0283,0.000±0.0000,0.701±0.0331,0.388±0.1528
3,CHIEF_tile_LR_0.1,0.357±0.0581,0.431±0.0208,0.777±0.0084,0.648±0.0393
4,TITAN_LR_0.01,0.441±0.0554,0.455±0.0314,0.813±0.0128,0.780±0.0224
5,TITAN_LR_0.001,0.357±0.0343,0.314±0.0346,0.793±0.0075,0.658±0.0189
6,TITAN_LR_0.0001,0.248±0.0117,0.039±0.0167,0.732±0.0126,0.490±0.0778
7,TITAN_LR_0.1,0.443±0.0374,0.524±0.0284,0.809±0.0130,0.765±0.0167
8,Gigapath_LR_0.01,0.286±0.0164,0.244±0.0334,0.720±0.0193,0.541±0.0312
9,Gigapath_LR_0.001,0.224±0.0205,0.007±0.0092,0.660±0.0257,0.347±0.0737
