1、image preprocessing

In [None]:
import cv2
import os
import pandas as pd

def generate_patch(patch_dir, image, image_name, x_coord, y_coord, names, spot_size=224):
    half_size = int(spot_size / 2)
    print(x_coord.shape[0])
    for i in range(x_coord.shape[0]):
        print(f"Processing patch at coord: ({x_coord[i]}, {y_coord[i]})")

        
        if (0 <= x_coord[i] - half_size < image.shape[1] and 
            0 <= x_coord[i] + half_size <= image.shape[1] and
            0 <= y_coord[i] - half_size < image.shape[0] and 
            0 <= y_coord[i] + half_size <= image.shape[0]):

            
            top = max(0, y_coord[i] - half_size)
            bottom = min(image.shape[0], y_coord[i] + half_size)
            left = max(0, x_coord[i] - half_size)
            right = min(image.shape[1], x_coord[i] + half_size)

            patch = image[top:bottom, left:right]
            
            if patch.size == 0:
                print(f"Empty patch at coord: ({x_coord[i]}, {y_coord[i]})")
                continue
            
            patch_img_dir = os.path.join(patch_dir, image_name)
            os.makedirs(patch_img_dir, exist_ok=True)

            
            patch_resized = cv2.resize(patch, (112, 112), interpolation=cv2.INTER_CUBIC)
            cv2.imwrite(os.path.join(patch_img_dir, f"{names[i]}.jpg"), patch_resized)
            print(f"Saved patch at coord: ({x_coord[i]}, {y_coord[i]})")
        
        else:
            print(f"Patch coordinates out of image bounds: ({x_coord[i]}, {y_coord[i]})")


def split_spatial(root_dir, spot_size=224):
    img_dir = root_dir + '/ST-imgs/'
    coord_dir = root_dir + '/ST-spotfiles/'
    gene_exp_dir = root_dir + '/ST-cnts/'
    patch_dir = root_dir + '/preprocessed_data/' + '/ST-patches/'
    os.makedirs(patch_dir, exist_ok=True)
    
    for image_name in os.listdir(img_dir):
        img = cv2.imread(img_dir+image_name)
        img_name = image_name.split('.')[0]
        print("Reading image: ", img_name)

        coords = pd.read_csv(coord_dir+img_name+'_selection.tsv', index_col=0, sep='\t')
        gene_exp = pd.read_csv(gene_exp_dir+img_name+'.tsv', index_col=0, sep='\t')
        
        
        # coords.index = coords.index.astype(str) + 'x' + coords['y'].astype(str)
        coords.index = coords['x'].astype(str) + 'x' + coords['y'].astype(str)
        
        # print("Gene expression indices:", gene_exp.index)
        # print("Coords indices:", coords.index)
        
        common_indices = gene_exp.index.intersection(coords.index)
        print("Common indices:", common_indices)
        coords = coords.loc[common_indices]
        gene_exp = gene_exp.loc[common_indices]
        
        x_coord = coords['pixel_x'].round(0).astype(int)
        y_coord = coords['pixel_y'].round(0).astype(int)

        generate_patch(patch_dir, img, img_name, x_coord, y_coord, gene_exp.index, spot_size)

        

split_spatial(".")



Reading image:  H1
Common indices: Index(['10x10', '10x11', '10x12', '10x13', '10x14', '10x15', '10x16', '10x17',
       '10x18', '10x19',
       ...
       '9x24', '9x25', '9x26', '9x27', '9x28', '9x29', '9x30', '9x31', '9x32',
       '9x33'],
      dtype='object', length=613)
613
Processing patch at coord: (2581, 2603)
Saved patch at coord: (2581, 2603)
Processing patch at coord: (2584, 2896)
Saved patch at coord: (2584, 2896)
Processing patch at coord: (2581, 3175)
Saved patch at coord: (2581, 3175)
Processing patch at coord: (2584, 3465)
Saved patch at coord: (2584, 3465)
Processing patch at coord: (2581, 3760)
Saved patch at coord: (2581, 3760)
Processing patch at coord: (2588, 4057)
Saved patch at coord: (2588, 4057)
Processing patch at coord: (2587, 4342)
Saved patch at coord: (2587, 4342)
Processing patch at coord: (2584, 4634)
Saved patch at coord: (2584, 4634)
Processing patch at coord: (2585, 4927)
Saved patch at coord: (2585, 4927)
Processing patch at coord: (2582, 5215)
Sa

In [None]:
# Reinhard color normalization for HE-stained histological images using histomicsTK tools.
import os
import PIL
import skimage.io
import skimage.color
import histomicstk as htk

def nmzd_reinhard_rescale(input_image_file, nmzd_path, barcode):
    """
    Reinhard图像颜色标准化
    使用 'ref_HE.png' 作为参考
    """
    rescale_size = 200
    im_input = skimage.io.imread(input_image_file)[:, :, :3]
    # 导入参考图像
    ref_image_file = 'ref_HE.png' 
    im_reference = skimage.io.imread(ref_image_file)[:, :, :3]
   
    mean_ref, std_ref = htk.preprocessing.color_conversion.lab_mean_std(im_reference)
    
    im_nmzd = htk.preprocessing.color_normalization.reinhard(im_input, mean_ref, std_ref)
    pil_img = PIL.Image.fromarray(im_nmzd)
   
    pil_img.save(os.path.join(nmzd_path, barcode+".jpg"))


directory_path = './preprocessed_data/ST-patches'

all_items = os.listdir(directory_path)

tissue_list = [item for item in all_items if os.path.isdir(os.path.join(directory_path, item))]
print(len(tissue_list))

for tissue_name in tissue_list:
    source_path = os.path.join(directory_path,tissue_name)
    save_root_path = "./preprocessed_data/patches_nmzd"
    nmzd_path = os.path.join(save_root_path, tissue_name)
    if not os.path.exists(nmzd_path):
        os.makedirs(nmzd_path)

    for filename in os.listdir(source_path):
        if filename.endswith('jpg'):
            try:
                barcode = filename[:-4]
                input_img_file = os.path.join(source_path, filename)
                nmzd_reinhard_rescale(input_img_file, nmzd_path, barcode)

            except:
                print("Error occured in %s" % os.path.join(source_path, filename))
    
    print("End of normalization & rescaling of %s" % tissue_name)

36
End of normalization & rescaling of G1
End of normalization & rescaling of D1
End of normalization & rescaling of D5
End of normalization & rescaling of B3
End of normalization & rescaling of E3
End of normalization & rescaling of A3
End of normalization & rescaling of B2
End of normalization & rescaling of A2
End of normalization & rescaling of B1
End of normalization & rescaling of H1
End of normalization & rescaling of A1
End of normalization & rescaling of C4
End of normalization & rescaling of A5
End of normalization & rescaling of E1
End of normalization & rescaling of G3
End of normalization & rescaling of B6
End of normalization & rescaling of H3
End of normalization & rescaling of F3
End of normalization & rescaling of F2
End of normalization & rescaling of B4
End of normalization & rescaling of E2
End of normalization & rescaling of A4
End of normalization & rescaling of C2
End of normalization & rescaling of D4
End of normalization & rescaling of G2
End of normalization &

In [None]:
cd Hover-net/hover_net-master/hover_net-master/

python run_infer.py \
--gpu='0' \
--nr_types=6 \
--type_info_path=type_info.json \
--batch_size=64 \
--model_mode=fast \
--model_path=pretrained/hovernet_fast_pannuke_type_tf2pytorch.tar \
--nr_inference_workers=8 \
--nr_post_proc_workers=16 \
tile \
--input_dir=../../../dataset/her2st/preprocessed_data/ST-patches/A1 \
--output_dir=../../../dataset/her2st/preprocessed_data/hover_seg/A1 \
--mem_usage=0.1 \
--draw_dot \
--save_qupath






图像特征提取

In [None]:
from PIL import Image
import torch
from transformers import AutoImageProcessor, AutoModel
import os
import numpy as np

processor = AutoImageProcessor.from_pretrained("/d/zhoujl/my_model/model_train/phikon-v2")
model = AutoModel.from_pretrained("/d/zhoujl/my_model/model_train/phikon-v2")
model.eval()


source_patches_dir = "./preprocessed_data/patches_nmzd"
feature_save_dir = "./preprocessed_data/precomputed_features"
os.makedirs(feature_save_dir, exist_ok=True)


for slice_folder in os.listdir(source_patches_dir):
    slice_path = os.path.join(source_patches_dir, slice_folder)
    
    if not os.path.isdir(slice_path):
        continue
    
    
    slice_feature_dir = os.path.join(feature_save_dir, slice_folder)
    os.makedirs(slice_feature_dir, exist_ok=True)
    
    for image_file in os.listdir(slice_path):
        if not image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
            continue
        
        image_path = os.path.join(slice_path, image_file)
        
        try:
           
            image = Image.open(image_path)  
            
           
            inputs = processor(image, return_tensors="pt")
            
            
            with torch.inference_mode():
                outputs = model(**inputs)
                features = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy() 
            

            feature_file = os.path.join(slice_feature_dir, f"{os.path.splitext(image_file)[0]}.npy")
            np.save(feature_file, features)
            
            print(f"Feature extracted and saved from {image_path} to {feature_file}")
        
        except Exception as e:
            print(f"Failed to process {image_path}: {e}")

print("All features extracted and saved according to the original classification!")

  from .autonotebook import tqdm as notebook_tqdm
2025-08-25 11:20:38.630971: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-25 11:20:38.644508: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-08-25 11:20:38.659921: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-08-25 11:20:38.664522: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-25 11:20:38.6

Feature extracted and saved from /d/zhoujl/my_model/dataset/her2st/preprocessed_data/patches_nmzd-112/G1/18x26.jpg to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/precomputed_features-112/G1/18x26.npy
Feature extracted and saved from /d/zhoujl/my_model/dataset/her2st/preprocessed_data/patches_nmzd-112/G1/8x18.jpg to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/precomputed_features-112/G1/8x18.npy
Feature extracted and saved from /d/zhoujl/my_model/dataset/her2st/preprocessed_data/patches_nmzd-112/G1/20x10.jpg to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/precomputed_features-112/G1/20x10.npy
Feature extracted and saved from /d/zhoujl/my_model/dataset/her2st/preprocessed_data/patches_nmzd-112/G1/20x19.jpg to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/precomputed_features-112/G1/20x19.npy
Feature extracted and saved from /d/zhoujl/my_model/dataset/her2st/preprocessed_data/patches_nmzd-112/G1/21x23.jpg to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/

spot类型选择

In [None]:
import os
import pandas as pd

directory_path = './preprocessed_data/ST-patches'

all_items = os.listdir(directory_path)

sample_list = [item for item in all_items if os.path.isdir(os.path.join(directory_path, item))]
print(len(sample_list))
print(sample_list[0])

file_extension = '.tsv'


all_types = ['nolabe', 'necros', 'neopla', 'inflam', 'connec', 'no-neo']

def calculate_proportions(file_path):
    df = pd.read_csv(file_path, sep='\t')
    total_count = len(df)
    
    
    proportions = df['name'].value_counts(normalize=True) * 100
    
   
    for name_type in all_types:
        if name_type not in proportions:
            proportions[name_type] = 0.0
            
    return proportions.to_dict(), total_count

def save_proportions_to_tsv(proportions, total_count, output_path):
    with open(output_path, 'w') as f:
        f.write('name\tproportion\ttotal_count\n')
        for name in all_types:
            proportion = proportions.get(name, 0.0)
            f.write(f'{name}\t{proportion}\t{total_count}\n')

def summarize_spot_types(output_dir, summary_output_path):
    spot_types = []
    
   
    for filename in os.listdir(output_dir):
        if filename.endswith('_proportions.tsv'):
            file_path = os.path.join(output_dir, filename)
            
           
            df = pd.read_csv(file_path, sep='\t')
            
            
            max_proportion_row = df[df['proportion'] == df['proportion'].max()]
            max_proportion_name = max_proportion_row.iloc[0]['name']
            
          
            spot_name = os.path.splitext(filename)[0].replace('_proportions', '')
            
            
            spot_types.append({'spot': spot_name, 'type': max_proportion_name})
    
   
    summary_df = pd.DataFrame(spot_types)
    summary_df.to_csv(summary_output_path, sep='\t', index=False)

for sample in sample_list:
    input_dir = './preprocessed_data/hover_seg/'+sample+'/qupath/'
    output_dir = './preprocessed_data/spots_type/'+sample+'/'
    os.makedirs(output_dir, exist_ok=True)

    
    for filename in os.listdir(input_dir):
        if filename.endswith(file_extension):
            file_path = os.path.join(input_dir, filename)
            
            
            proportions, total_count = calculate_proportions(file_path)
            
           
            output_filename = os.path.splitext(filename)[0] + '_proportions.tsv'
            output_path = os.path.join(output_dir, output_filename)
            
            
            save_proportions_to_tsv(proportions, total_count, output_path)
            print(f"Processed {filename} and saved results to {output_path}")

    summary_output_path = os.path.join(output_dir, 'summary_spot_types.tsv')
    summarize_spot_types(output_dir, summary_output_path)
    print(f"Summary of spot types saved to {summary_output_path}")

36
G1
Processed 5x9.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/5x9_proportions.tsv
Processed 22x26.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/22x26_proportions.tsv
Processed 12x18.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/12x18_proportions.tsv
Processed 16x18.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/16x18_proportions.tsv
Processed 13x20.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/13x20_proportions.tsv
Processed 7x25.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/7x25_proportions.tsv
Processed 23x14.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spots_type/G1/23x14_proportions.tsv
Processed 14x19.tsv and saved results to /d/zhoujl/my_model/dataset/her2st/preprocessed_data/spot