In [71]:
import h5py
import os

def rename_h5_key(h5_path: str, old_key: str, new_key: str) -> None:
    """
    Rename a key in an HDF5 file and save the changes.
    
    Parameters:
        h5_path (str): Path to the HDF5 file.
        old_key (str): The existing key to rename.
        new_key (str): The new key name.
    """
    with h5py.File(h5_path, 'r+') as f:
        # Check if the old key exists
        if old_key in f:
            # Copy the data to the new key
            f[new_key] = f[old_key][:]
            
            # Copy attributes if any
            for attr_name, attr_value in f[old_key].attrs.items():
                f[new_key].attrs[attr_name] = attr_value
            
            # Delete the old key
            del f[old_key]
            print(f"Renamed '{old_key}' to '{new_key}' in file: {h5_path}")
        else:
            print(f"Key '{old_key}' not found in file: {h5_path}")
data_dir = '/ruiyan/yuhao/tile_embed/BCNB/TITAN'

# fets -> features
for file in os.listdir(data_dir):
    if file.endswith('.h5'):
        file_path = os.path.join(data_dir, file)
        rename_h5_key(file_path, old_key='feats', new_key='features')

Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_894.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_940.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_476.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_164.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_697.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_377.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_148.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_973.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_153.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_351.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_854.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_embed/BCNB/TITAN/bcnb_265.h5
Key 'feats' not found in file: /ruiyan/yuhao/tile_em

In [7]:
import pandas as pd
data = pd.read_csv('/ruiyan/yuhao/project/FMBC/finetune/dataset_csv/subtype/CPTAC_IDC.csv')
#map label columnt 0,1 to IDC and non-IDC
data['label'] = data['label'].map({1: 'IDC', 0: 'non-IDC'})
data.to_csv('/ruiyan/yuhao/project/FMBC/finetune/dataset_csv/subtype/CPTAC_IDC.csv', index=False)

In [22]:
#/ruiyan/yuhao/embedding/IMPRESS
import pandas as pd
import pandas as pd
df=pd.read_csv('/ruiyan/yuhao/project/FMBC/finetune/dataset_csv/sample/IMPRESS_TNBC_2subtype.csv')
# Assume `df` is your original dataframe
# Create a copy of the dataframe for '_he' and '_ihc'
df_he = df.copy()
df_ihc = df.copy()
#zfill slide_id column to 3 digits
df_he['slide_id'] =df['slide_id'].astype(str)+'_HE'
df_ihc['slide_id'] =df['slide_id'].astype(str)+'_IHC'



# Concatenate the two dataframes vertically
df_combined = pd.concat([df_he, df_ihc], ignore_index=True)

df_combined.to_csv('/ruiyan/yuhao/project/FMBC/finetune/dataset_csv/biomarker/IMPRESS_TNBC_2subtype.csv', index=False)

In [11]:
import os
import glob


In [15]:
fm_dir = '/data4/fmbcembedding/'

h5_files = glob.glob(os.path.join(fm_dir, '*/*/*.h5'))
h5_files
#move h5 file to /data4/fmbcembedding/private_chunk_4/FMBC/S06839.h5 ->/data4/embedding/private_chunk_4/FMBC/S06839.h5
import shutil
for h5_file in h5_files:
    new_path = h5_file.replace(fm_dir, '/data4/embedding/')
    os.makedirs(os.path.dirname(new_path), exist_ok=True)
    shutil.move(h5_file, new_path)