In [1]:
import os
import numpy as np
import nibabel as nib
from pathlib import Path
from scipy import ndimage
from tqdm import tqdm

def reorient_to_lps(nifti_img):
    """Reorient NIfTI image to LPS orientation"""
    return nib.as_closest_canonical(nifti_img).as_reoriented(
        nib.orientations.axcodes2ornt('LPS')
    )

def clip_and_normalize_ct(data):
    """Clip CT data to HU range and normalize to [0, 1]"""
    data_clipped = np.clip(data, -200, 600)
    data_normalized = (data_clipped - (-200)) / (600 - (-200))
    return data_normalized

def clip_and_normalize_mr(data):
    """Clip MR data at percentiles and normalize to [0, 1]"""
    p_low = np.percentile(data, 0.5)
    p_high = np.percentile(data, 99.5)
    data_clipped = np.clip(data, p_low, p_high)
    
    # Min-max normalization
    data_min = data_clipped.min()
    data_max = data_clipped.max()
    
    if data_max - data_min > 0:
        data_normalized = (data_clipped - data_min) / (data_max - data_min)
    else:
        data_normalized = np.zeros_like(data_clipped)
    
    return data_normalized

def resize_volume(data, target_shape=(256, 256, 256), is_label=False):
    """Resize volume to target shape using appropriate interpolation"""
    original_shape = data.shape
    zoom_factors = [target_shape[i] / original_shape[i] for i in range(3)]
    
    if is_label:
        # Use nearest neighbor for labels to preserve discrete values
        resized = ndimage.zoom(data, zoom_factors, order=0)
    else:
        # Use trilinear interpolation for images
        resized = ndimage.zoom(data, zoom_factors, order=1)
    
    return resized

def create_lps_affine(target_shape=(256, 256, 256), voxel_size=1.0):
    """
    Create affine matrix for LPS orientation with isotropic spacing
    
    LPS orientation:
    - X axis: Left to Right (L->R)
    - Y axis: Posterior to Anterior (P->A)
    - Z axis: Superior to Inferior (S->I)
    
    Standard LPS affine with 1mm isotropic voxels
    """
    affine = np.array([
        [-voxel_size,  0,            0,           target_shape[0] * voxel_size / 2],
        [ 0,          -voxel_size,   0,           target_shape[1] * voxel_size / 2],
        [ 0,           0,            voxel_size, -target_shape[2] * voxel_size / 2],
        [ 0,           0,            0,           1]
    ])
    
    return affine

def find_matching_files(images_dir, labels_dir):
    """Find matching image and label files"""
    matches = []
    
    if not os.path.exists(images_dir) or not os.path.exists(labels_dir):
        return matches
    
    # Get all label files
    label_files = [f for f in os.listdir(labels_dir) if f.endswith('.nii.gz')]
    
    # Get all image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.nii.gz')]
    
    # Track matched pairs to avoid duplicates
    matched_pairs = set()
    
    # For each label, find matching image
    for label_file in label_files:
        # Remove .nii.gz extension to get base name
        label_base = label_file.replace('.nii.gz', '')
        
        # Find image file that matches
        matched_image = None
        for img_file in image_files:
            img_base = img_file.replace('.nii.gz', '')
            
            # Check both directions: label in image OR image in label
            if label_base in img_base or img_base in label_base:
                matched_image = img_file
                break
        
        if matched_image:
            pair = (matched_image, label_file)
            if pair not in matched_pairs:
                matched_pairs.add(pair)
                matches.append({
                    'image': os.path.join(images_dir, matched_image),
                    'label': os.path.join(labels_dir, label_file)
                })
    
    return matches

def detect_modality_from_filename(filename):
    """Detect modality from filename"""
    filename_lower = filename.lower()
    if '_ct_' in filename_lower or 'topcow_ct_' in filename_lower:
        return 'CT'
    elif '_mr_' in filename_lower or 'topcow_mr_' in filename_lower:
        return 'MR'
    else:
        return None

def process_dataset(dataset_name, base_path, modality, output_dir, case_idx):
    """Process a single dataset"""
    print(f"\n{'='*60}")
    print(f"Processing: {dataset_name} ({modality})")
    print(f"{'='*60}")
    
    images_dir = os.path.join(base_path, "imagesTr")
    labels_dir = os.path.join(base_path, "cow_seg_labelsTr")
    
    # Find matching files
    matches = find_matching_files(images_dir, labels_dir)
    print(f"Found {len(matches)} matching pairs")
    
    if len(matches) == 0:
        print(f"No matching files found in {dataset_name}")
        return case_idx
    
    # Process each case
    for match in tqdm(matches, desc=f"Processing {dataset_name}"):
        try:
            # Load images
            img_nii = nib.load(match['image'])
            label_nii = nib.load(match['label'])
            
            # Reorient to LPS
            img_lps = reorient_to_lps(img_nii)
            label_lps = reorient_to_lps(label_nii)
            
            # Get data arrays
            img_data = img_lps.get_fdata()
            label_data = label_lps.get_fdata().astype(np.int16)
            
            # Determine actual modality for mixed datasets
            actual_modality = modality
            if modality == 'MIXED':
                detected = detect_modality_from_filename(os.path.basename(match['image']))
                if detected:
                    actual_modality = detected
                else:
                    print(f"\nWarning: Could not detect modality for {match['image']}, skipping...")
                    continue
            
            # Apply modality-specific preprocessing
            if actual_modality == 'CT':
                img_processed = clip_and_normalize_ct(img_data)
            else:  # MR
                img_processed = clip_and_normalize_mr(img_data)
            
            # Resize to 256^3
            img_resized = resize_volume(img_processed, target_shape=(256, 256, 256), is_label=False)
            label_resized = resize_volume(label_data, target_shape=(256, 256, 256), is_label=True)
            
            # Create output filenames
            output_img_file = os.path.join(output_dir, 'imagesTr', f'CASE_{case_idx:04d}_0000.nii.gz')
            output_label_file = os.path.join(output_dir, 'labelsTr', f'CASE_{case_idx:04d}.nii.gz')
            
            # Create affine matrix for LPS orientation with 1x1x1mm voxel spacing
            affine = create_lps_affine(target_shape=(256, 256, 256), voxel_size=1.0)
            
            # Create new NIfTI images with LPS orientation and 1mm isotropic spacing
            img_nii_out = nib.Nifti1Image(img_resized.astype(np.float32), affine)
            label_nii_out = nib.Nifti1Image(label_resized.astype(np.int16), affine)
            
            # Save
            nib.save(img_nii_out, output_img_file)
            nib.save(label_nii_out, output_label_file)
            
            case_idx += 1
            
        except Exception as e:
            print(f"\nError processing {match['image']}: {str(e)}")
            continue
    
    return case_idx

def main():
    """Main processing pipeline"""
    # Define base directory
    base_dir = r"E:\Topcow"
    output_dir = r"./Dataset500_Topcow"
    
    # Create output directories
    os.makedirs(os.path.join(output_dir, 'imagesTr'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labelsTr'), exist_ok=True)
    
    # Define datasets to process
    datasets = [
        {
            'name': 'MRA_Lausanne',
            'path': os.path.join(base_dir, 'MRA_Lausanne'),
            'modality': 'MR'
        },
        {
            'name': 'MRA_IXI_HH',
            'path': os.path.join(base_dir, 'MRA_IXI_HH'),
            'modality': 'MR'
        },
        {
            'name': 'CTA_ISLES2024_TUM',
            'path': os.path.join(base_dir, 'CTA_ISLES2024_TUM'),
            'modality': 'CT'
        },
        {
            'name': 'TopCoW2024_Data_Release',
            'path': os.path.join(base_dir, 'TopCoW2024_Data_Release'),
            'modality': 'MIXED'  # This dataset contains both CT and MR
        },
    ]
    
    # Process all datasets
    case_idx = 1
    summary = []
    
    for dataset in datasets:
        start_idx = case_idx
        case_idx = process_dataset(
            dataset['name'],
            dataset['path'],
            dataset['modality'],
            output_dir,
            case_idx
        )
        end_idx = case_idx - 1
        
        summary.append({
            'dataset': dataset['name'],
            'modality': dataset['modality'],
            'cases': end_idx - start_idx + 1 if end_idx >= start_idx else 0,
            'case_range': f"{start_idx:04d} - {end_idx:04d}" if end_idx >= start_idx else "None"
        })
    
    # Print summary
    print("\n" + "="*60)
    print("PROCESSING COMPLETE - SUMMARY")
    print("="*60)
    print(f"Output directory: {output_dir}")
    print(f"Total cases processed: {case_idx - 1}\n")
    
    for item in summary:
        print(f"{item['dataset']:30s} ({item['modality']:2s}): {item['cases']:3d} cases [{item['case_range']}]")
    
    print("\nAll volumes reoriented to LPS and resized to 256x256x256")
    print("Voxel spacing: 1x1x1mm (isotropic)")
    print("CT: Clipped to [-200, 600] HU, normalized to [0, 1]")
    print("MR: Clipped at 0.5-99.5 percentile, normalized to [0, 1]")

if __name__ == "__main__":
    main()


Processing: MRA_Lausanne (MR)
Found 20 matching pairs


Processing MRA_Lausanne: 100%|██████████| 20/20 [02:04<00:00,  6.24s/it]



Processing: MRA_IXI_HH (MR)
Found 20 matching pairs


Processing MRA_IXI_HH: 100%|██████████| 20/20 [01:15<00:00,  3.78s/it]



Processing: CTA_ISLES2024_TUM (CT)
Found 26 matching pairs


Processing CTA_ISLES2024_TUM: 100%|██████████| 26/26 [02:10<00:00,  5.02s/it]



Processing: TopCoW2024_Data_Release (MIXED)
Found 250 matching pairs


Processing TopCoW2024_Data_Release: 100%|██████████| 250/250 [20:01<00:00,  4.81s/it]


PROCESSING COMPLETE - SUMMARY
Output directory: ./Dataset500_Topcow
Total cases processed: 316

MRA_Lausanne                   (MR):  20 cases [0001 - 0020]
MRA_IXI_HH                     (MR):  20 cases [0021 - 0040]
CTA_ISLES2024_TUM              (CT):  26 cases [0041 - 0066]
TopCoW2024_Data_Release        (MIXED): 250 cases [0067 - 0316]

All volumes reoriented to LPS and resized to 256x256x256
Voxel spacing: 1x1x1mm (isotropic)
CT: Clipped to [-200, 600] HU, normalized to [0, 1]
MR: Clipped at 0.5-99.5 percentile, normalized to [0, 1]



