In [1]:
from patho_bench.ExperimentFactory import ExperimentFactory # Make sure you have installed Patho-Bench and this imports correctly

model_name = 'mean-UNI-NEW'
train_source = 'BRACS' 
task_name = 'slidelevel_coarse'

# Initialize the experiment
experiment = ExperimentFactory.linprobe( # This is linear probing, but similar APIs are available for coxnet, protonet, retrieval, and finetune
                    model_name = model_name,
                    train_source = train_source,
                    test_source = None, # Leave as default (None) to automatically use the test split of the training source
                    task_name = task_name,
                    patch_embeddings_dirs = '/data4/embedding/temp/embedding/BRACS/UNI-2', # Can be list of paths if patch features are split across multiple directories. See NOTE below.
                    pooled_embeddings_root = './_test_pooled_features',
                    splits_root = './_test_splits', # Splits are downloaded here from HuggingFace. You can also provide your own splits using the path_to_split and path_to_task_config arguments
                    path_to_split ='/home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.tsv',
                    path_to_task_config = '/home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.yaml',
                    combine_slides_per_patient = False, # Only relevant for patient-level tasks with multiple slides per patient. See NOTE below.
                    cost = 1,
                    balanced = False,
                    saveto = './_test_linprobe/{task_name}/{model_name}'.format(task_name=task_name, model_name=model_name),
                )
experiment.train()
experiment.test()
result = experiment.report_results(metric = 'macro-ovr-auc')

Loaded split from /home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.tsv with 546 samples and 5 folds assigned.
[94mSaving slide_id-level features to ./_test_pooled_features/by_slide_id/mean-UNI-NEW/BRACS, using mean-UNI-NEW...[0m


Pre-pooling features:   0%|          | 0/546 [00:00<?, ?it/s, Running on GPU 1...]



Pre-pooling features: 100%|██████████| 546/546 [00:06<00:00, 83.11it/s, Running on GPU 1...] 


Running linprobe experiment with C = 1...


Training on 497 samples: 100%|██████████| 5/5 [00:10<00:00,  2.08s/it]
  0%|          | 0/5 [00:00<?, ?it/s]


No val set found. Skipping...


Running test split on 49 samples: 100%|██████████| 5/5 [00:01<00:00,  3.96it/s] 

macro-ovr-auc: 0.844 ± 0.024





In [1]:
from patho_bench.ExperimentFactory import ExperimentFactory # Make sure you have installed Patho-Bench and this imports correctly

model_name = 'mean-UNI-2'
train_source = 'BRACS' 
task_name = 'slidelevel_coarse'

# Initialize the experiment
experiment = ExperimentFactory.linprobe( # This is linear probing, but similar APIs are available for coxnet, protonet, retrieval, and finetune
                    model_name = model_name,
                    train_source = train_source,
                    test_source = None, # Leave as default (None) to automatically use the test split of the training source
                    task_name = task_name,
                    patch_embeddings_dirs = '/data4/embedding/BRACS/UNI-2', # Can be list of paths if patch features are split across multiple directories. See NOTE below.
                    pooled_embeddings_root = './_test_pooled_features',
                    splits_root = './_test_splits', # Splits are downloaded here from HuggingFace. You can also provide your own splits using the path_to_split and path_to_task_config arguments
                    path_to_split ='/home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.tsv',
                    path_to_task_config = '/home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.yaml',
                    combine_slides_per_patient = False, # Only relevant for patient-level tasks with multiple slides per patient. See NOTE below.
                    cost = 1,
                    balanced = False,
                    saveto = './_test_linprobe/{task_name}/{model_name}'.format(task_name=task_name, model_name=model_name),
                )
experiment.train()
experiment.test()
result = experiment.report_results(metric = 'macro-ovr-auc')

Loaded split from /home/yuhaowang/project/FMBC/downstream/Patho-Bench/BRACS_COARSE.tsv with 546 samples and 5 folds assigned.
[94mSaving slide_id-level features to ./_test_pooled_features/by_slide_id/mean-UNI-2/BRACS, using mean-UNI-2...[0m


Pre-pooling features:   0%|          | 0/546 [00:00<?, ?it/s, Running on GPU 0...]



Pre-pooling features: 100%|██████████| 546/546 [00:05<00:00, 91.59it/s, Running on GPU 0...] 


Running linprobe experiment with C = 1...


Training on 497 samples: 100%|██████████| 5/5 [00:12<00:00,  2.59s/it]
  0%|          | 0/5 [00:00<?, ?it/s]


No val set found. Skipping...


Running test split on 49 samples: 100%|██████████| 5/5 [00:01<00:00,  2.64it/s] 

macro-ovr-auc: 0.844 ± 0.024





In [None]:
import h5py
test_case = '/home/yuhaowang/project/FMBC/downstream/Patho-Bench/_test_pooled_features/by_slide_id/mean-FMBC/BRACS/BRACS_1003716.h5'
with h5py.File(test_case, 'r') as f:
    print(f.keys())
    #data=f['features']

<KeysViewHDF5 ['features']>


In [11]:
def read_assets_from_h5(h5_path: str) -> tuple:
    '''Read the assets from the h5 file'''
    assets = {}
    attrs = {}
    with h5py.File(h5_path, 'r') as f:
        for key in f.keys():
            assets[key] = f[key][:]
            if f[key].attrs is not None:
                attrs[key] = dict(f[key].attrs)
    return assets, attrs

In [12]:
a,b = read_assets_from_h5(test_case)

In [3]:
import os
import h5py
def load_h5(load_path, keys = None):
    '''
    Loads an hdf5 file and returns a dictionary of assets

    Args:
        load_path (str): The path to the hdf5 file
        keys (list, optional): A list of keys to load. Defaults to None.

    Returns:
        assets (dict): A dictionary of assets
        attributes (dict): A dictionary of attributes
    '''
    assert isinstance(keys, list) or keys is None, 'keys must be a list or None'
    assert os.path.exists(load_path), f'File {load_path} does not exist'
    
    try:
        with h5py.File(load_path, 'r') as file:
            if keys is None:
                keys = list(file.keys())
            assets = {key: file[key][:] for key in keys}
            attributes = {key: dict(file[key].attrs) for key in keys}
    except Exception as e:
        raise Exception(f'\033[91mError loading h5 file at {load_path}\033[0m')
                
    return assets, attributes