In [None]:
import os
import SimpleITK as sitk
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

# 定义路径
pred_path = '/Users/ziling/Desktop/MRCP/data/NII_predict_modelALL'
gt_path = '/Users/ziling/Desktop/MRCP/data/NII_GT'

# 获取文件名列表
pred_files = sorted([f for f in os.listdir(pred_path) if f.endswith('.nii.gz')])
gt_files = sorted([f for f in os.listdir(gt_path) if f.endswith('.nii.gz') and f in pred_files])

# 存储结果的列表
results = []

# Define a function to downsample large images (optional, for efficiency)
def downsample_array(arr, factor=2):
    """Downsample a large 3D array by the given factor."""
    return arr[::factor, ::factor, ::factor]

# 计算DICE系数, HD95, Volume Similarity
for filename in pred_files:
    try:
        pred_file = os.path.join(pred_path, filename)
        gt_file = os.path.join(gt_path, filename)
        
        # 读取预测文件和GT文件
        pred_image = sitk.ReadImage(pred_file)
        gt_image = sitk.ReadImage(gt_file)
        
        # Convert to binary arrays
        pred_array = sitk.GetArrayFromImage(pred_image).astype(np.bool_)
        gt_array = sitk.GetArrayFromImage(gt_image).astype(np.bool_)

        # Optional: downsample arrays to reduce memory load
        if pred_array.shape[0] > 100 or pred_array.shape[1] > 100:  # Threshold for downsampling
            pred_array = downsample_array(pred_array)
            gt_array = downsample_array(gt_array)
        
        # Skip if arrays are empty
        if pred_array.sum() == 0 or gt_array.sum() == 0:
            print(f"Skipping {filename} due to empty prediction or ground truth.")
            results.append({'Filename': filename, 'DICE': np.nan, 'HD95': np.nan, 'Volume Similarity': np.nan})
            continue
        
        # Calculate DICE coefficient
        intersection = (pred_array * gt_array).sum()
        dice = (2. * intersection) / (pred_array.sum() + gt_array.sum())
        
        # Efficient HD95 Calculation (downsample further if needed)
        coords1 = np.array(np.nonzero(pred_array)).T
        coords2 = np.array(np.nonzero(gt_array)).T
        
        if len(coords1) == 0 or len(coords2) == 0:
            hd95 = np.inf
        else:
            # Calculate pairwise distances between foreground points
            dists1 = cdist(coords1, coords2)
            dists2 = cdist(coords2, coords1)

            # Only keep the minimum distance per point
            hd95_1 = np.percentile(np.min(dists1, axis=1), 95)
            hd95_2 = np.percentile(np.min(dists2, axis=1), 95)
            hd95 = max(hd95_1, hd95_2)
        
        # Calculate Volume Similarity
        tp = (pred_array & gt_array).sum()
        fp = (pred_array & ~gt_array).sum()
        fn = (~pred_array & gt_array).sum()
        
        if 2 * tp + fp + fn == 0:
            volume_similarity = 1.0
        else:
            volume_similarity = 1 - abs(fn - fp) / (2 * tp + fp + fn)
        
        # 将结果存入列表
        results.append({'Filename': filename, 'DICE': dice, 'HD95': hd95, 'Volume Similarity': volume_similarity})
    
    except Exception as e:
        print(f"Error processing {filename}: {e}")
        results.append({'Filename': filename, 'DICE': np.nan, 'HD95': np.nan, 'Volume Similarity': np.nan})

# 将结果导出到Excel
df = pd.DataFrame(results)
df.to_excel('/Users/ziling/Desktop/MRCP/Segmentation_Metrics_all.xlsx', index=False)

print("Results saved to Excel.")