# Reorientation and resampling

In [None]:
import os
import nibabel as nib
import nibabel.processing as nib_processing

# Input and output directories
src_root = '/content/drive/MyDrive/LiQA_training_data/train_splited'
dst_root = '/content/drive/MyDrive/LiQA_training_data/train_processed'

# Target spacing (x, y, z) in mm
target_spacing = (1.5, 1.5, 3.0)

def main():
    # Main preprocessing loop
    for class_folder in os.listdir(src_root):  # class0, class1
        class_path = os.path.join(src_root, class_folder)
        if not os.path.isdir(class_path):
            continue

        for stage_folder in os.listdir(class_path):  # S1, S2, S3, S4
            stage_path = os.path.join(class_path, stage_folder)
            if not os.path.isdir(stage_path):
                continue

            for patient_folder in os.listdir(stage_path):  # patient-level folders
                patient_path = os.path.join(stage_path, patient_folder)
                if not os.path.isdir(patient_path):
                    continue

                for file in os.listdir(patient_path):  # multi-modal NIfTI files
                    if not file.endswith('.nii.gz'):
                        continue

                    src_file = os.path.join(patient_path, file)
                    print(f'Processing: {src_file}')

                    try:
                        # Load image
                        nii = nib.load(src_file)

                        # Original spacing and shape
                        sx, sy, sz = nii.header.get_zooms()[:3]
                        voxels_x, voxels_y, voxels_z = nii.header.get_data_shape()[:3]

                        # Target spacing
                        new_sx, new_sy, new_sz = target_spacing

                        # Compute target shape automatically
                        target_voxels_x = int((sx * voxels_x) / new_sx)
                        target_voxels_y = int((sy * voxels_y) / new_sy)
                        target_voxels_z = int((sz * voxels_z) / new_sz)

                        # Orientation + resampling
                        new_nii = nib_processing.conform(
                            nii,
                            out_shape=(target_voxels_x, target_voxels_y, target_voxels_z),
                            voxel_size=(new_sx, new_sy, new_sz),
                            order=3,
                            cval=0.0,
                            orientation='RAS',
                            out_class=None
                        )

                        # Save output
                        dst_patient_path = os.path.join(
                            dst_root, class_folder, stage_folder, patient_folder
                        )
                        os.makedirs(dst_patient_path, exist_ok=True)

                        dst_file = os.path.join(dst_patient_path, file)
                        nib.save(new_nii, dst_file)

                        print(f'Saved to: {dst_file}')

                    except Exception as e:
                        print(f'Failed: {src_file}')
                        print(f'Reason: {e}')


if __name__ == "__main__":
  main()

# Intensity normalization

In [None]:
import os
import numpy as np
import nibabel as nib

SOURCE_ROOT = "/content/drive/MyDrive/LiQA_training_data/train_processed"
SAVE_ROOT = "/content/drive/MyDrive/LiQA_training_data/train_normalized"
MODALITIES = ["T1", "T2", "DWI"]

os.makedirs(SAVE_ROOT, exist_ok=True)

# Intensity normalization

def scale_intensity_range_percentiles(
    image,
    lower=1,
    upper=99,
    b_min=0.0,
    b_max=1.0,
    clip=True
):
    """
    Percentile-based intensity scaling:
    - Map [P(lower), P(upper)] to [b_min, b_max]
    - Optionally clip to [b_min, b_max]

    Parameters
    ----------
    image : np.ndarray
        Input image array.
    lower, upper : float
        Percentiles for robust range scaling.
    b_min, b_max : float
        Output scaling range.
    clip : bool
        Whether to clip output values into [b_min, b_max].
    """
    p_low = np.percentile(image, lower)
    p_high = np.percentile(image, upper)

    if p_high > p_low:
        image = (image - p_low) / (p_high - p_low)
        image = image * (b_max - b_min) + b_min
    else:
        image = np.full_like(image, (b_max + b_min) / 2.0)

    if clip:
        image = np.clip(image, b_min, b_max)

    return image.astype(np.float32)

# Main processing loop

def process_and_save_all():
    total_saved = 0
    total_skipped = 0

    for cls in os.listdir(SOURCE_ROOT):
        class_path = os.path.join(SOURCE_ROOT, cls)
        if not os.path.isdir(class_path):
            continue

        for stage in os.listdir(class_path):
            stage_path = os.path.join(class_path, stage)
            if not os.path.isdir(stage_path):
                continue

            for pid in os.listdir(stage_path):
                patient_dir = os.path.join(stage_path, pid)
                if not os.path.isdir(patient_dir):
                    continue

                for mod in MODALITIES:
                    # Auto-find modality file
                    found = None
                    for fn in os.listdir(patient_dir):
                        fn_lower = fn.lower()
                        if fn_lower.endswith(".nii.gz") and (mod.lower() in fn_lower):
                            found = os.path.join(patient_dir, fn)
                            break

                    if found is None:
                        total_skipped += 1
                        continue

                    # Load + normalize
                    try:
                        nii = nib.load(found)
                        img = nii.get_fdata().astype(np.float32)
                        img_norm = scale_intensity_range_percentiles(img)

                        # Save (preserve structure)
                        rel_dir = os.path.relpath(patient_dir, SOURCE_ROOT)
                        save_dir = os.path.join(SAVE_ROOT, rel_dir)
                        os.makedirs(save_dir, exist_ok=True)

                        save_path = os.path.join(save_dir, f"{mod}_norm.nii.gz")
                        nib.save(nib.Nifti1Image(img_norm, nii.affine, nii.header), save_path)

                        total_saved += 1
                        print(f"Saved: {save_path}")

                    except Exception as e:
                        print(f"Failed: {found}")
                        print(f"Reason: {e}")

    print("=" * 50)
    print(f"Done. Saved: {total_saved} | Missing modality skipped: {total_skipped}")

if __name__ == "__main__":
  process_and_save_all()

# Cropping

In [None]:
import os
import numpy as np
import nibabel as nib

SOURCE_DIR = "/content/drive/MyDrive/LiQA_training_data/train_normalized"
OUTPUT_DIR = "/content/drive/MyDrive/LiQA_training_data/train_resized"

# Target shape for (dim0, dim1, dim2) of the numpy array
TARGET_SHAPE = (200, 200, 64)

# Resize by symmetric padding / center cropping
def resize_with_pad_or_crop(image, target_shape, mode="symmetric"):
    """
    Resize a 3D numpy array to target_shape using:
    - symmetric padding if current dim < target dim
    - center cropping if current dim > target dim

    Parameters
    ----------
    image : np.ndarray
        Input 3D image array.
    target_shape : tuple
        Target shape (s0, s1, s2).
    mode : str
        Padding mode, default 'symmetric'. If not symmetric, uses constant padding.

    Returns
    -------
    np.ndarray
        Resized image array in float32 with shape == target_shape.
    """
    current_shape = image.shape
    result = image.copy()

    for i, (current, target) in enumerate(zip(current_shape, target_shape)):
        if current < target:
            pad_total = target - current
            pad_before = pad_total // 2
            pad_after = pad_total - pad_before

            pad_width = [(0, 0)] * len(current_shape)
            pad_width[i] = (pad_before, pad_after)

            if mode == "symmetric":
                result = np.pad(result, pad_width, mode="symmetric")
            else:
                result = np.pad(result, pad_width, mode="constant")

        elif current > target:
            start = (current - target) // 2
            end = start + target

            slice_obj = [slice(None)] * len(current_shape)
            slice_obj[i] = slice(start, end)

            result = result[tuple(slice_obj)]

    return result.astype(np.float32)

# Folder-level processing
def process_folder(input_dir: str, output_dir: str, target_shape: tuple):
    total = 0
    success = 0
    failed = 0

    for root, _, files in os.walk(input_dir):
        for fname in files:
            if not fname.lower().endswith(".nii.gz"):
                continue

            total += 1
            in_path = os.path.join(root, fname)

            rel_dir = os.path.relpath(root, input_dir)
            out_dir = os.path.join(output_dir, rel_dir)
            os.makedirs(out_dir, exist_ok=True)

            out_path = os.path.join(out_dir, fname)

            try:
                # Load
                img = nib.load(in_path)
                data = img.get_fdata().astype(np.float32)

                # Resize (pad/crop)
                resized = resize_with_pad_or_crop(data, target_shape, mode="symmetric")

                # Save with original affine & header
                out_img = nib.Nifti1Image(resized, img.affine, img.header)
                nib.save(out_img, out_path)

                success += 1
                print(f"Saved: {out_path} | shape={resized.shape}")

            except Exception as e:
                failed += 1
                print(f"Failed: {in_path}")
                print(f"Reason: {e}")

    print("=" * 50)
    print(f"Done. Total: {total} | Success: {success} | Failed: {failed}")


if __name__ == "__main__":
  process_folder(SOURCE_DIR, OUTPUT_DIR, TARGET_SHAPE)

# Slice removal

In [None]:
import os
import nibabel as nib
import numpy as np

def main():
    # Input and output root directories
    input_base = '/content/drive/MyDrive/LiQA_training_data/train_resized'
    output_base = '/content/drive/MyDrive/LiQA_training_data/train_sliced'

    # Traverse all subdirectories and files
    for root, dirs, files in os.walk(input_base):

        # Relative path with respect to input_base
        rel_dir = os.path.relpath(root, input_base)

        # Corresponding output directory
        out_dir = os.path.join(output_base, rel_dir)
        os.makedirs(out_dir, exist_ok=True)

        for fname in files:
            if not (fname.endswith('.nii') or fname.endswith('.nii.gz')):
                continue

            in_path = os.path.join(root, fname)
            out_path = os.path.join(out_dir, fname)

            # Load NIfTI
            nii = nib.load(in_path)
            data = nii.get_fdata()  # shape (X, Y, Z)

            # Check whether the number of slices is sufficient
            if data.shape[2] <= 20:
                print(f"Warning: {in_path} has only {data.shape[2]} slices, skipping.")
                continue

            # Remove the first 20 slices
            data_cropped = data[:, :, 20:]

            # Create new NIfTI, keeping original affine and header
            new_nii = nib.Nifti1Image(
                data_cropped,
                affine=nii.affine,
                header=nii.header
            )

            # Save
            nib.save(new_nii, out_path)
            print(f"Saved: {out_path}")

if __name__ == "__main__":
    main()

# Missing modality handling

In [None]:
# Missing Modality Completion

import os
import nibabel as nib
import numpy as np

# Root directory and modality list
base_dir   = '/content/drive/MyDrive/LiQA_training_data/train_sliced'
modalities = ['T1', 'T2', 'DWI']

# Helper: fuzzy match modality file in a patient folder
def find_modality_file(patient_dir, modality):
    """
    Find a nii/nii.gz file that contains the given modality keyword (case-insensitive)
    in patient_dir.
    Return: absolute file path or None
    """
    for f in os.listdir(patient_dir):
        fl = f.lower()
        if modality.lower() in fl and fl.endswith(('.nii', '.nii.gz')):
            return os.path.join(patient_dir, f)
    return None

# Traverse all patient directories
for root, dirs, files in os.walk(base_dir):

    # Only process folders that contain any nii files
    nii_files = [f for f in files if f.lower().endswith(('.nii', '.nii.gz'))]
    if not nii_files:
        continue

    # Find a reference modality (priority: T1 -> T2 -> DWI)
    ref_path = None
    for m in modalities:
        p = find_modality_file(root, m)
        if p:
            ref_path = p
            break

    if ref_path is None:
        print(f"No reference modality found in: {root}")
        continue

    # Load reference modality info
    ref_nii = nib.load(ref_path)
    ref_shape = ref_nii.get_fdata().shape # (X, Y, Z')
    ref_affine = ref_nii.affine
    ref_header = ref_nii.header

    # For each modality, if missing, create a zero-filled volume and save
    for m in modalities:
        if find_modality_file(root, m):
            # Modality exists, skip
            continue

        # Create zero data with the same shape as the reference
        zero_data = np.zeros(ref_shape, dtype=ref_nii.get_fdata().dtype)
        new_nii = nib.Nifti1Image(zero_data, affine=ref_affine, header=ref_header)

        # Save as *_norm.nii.gz
        out_name = f"{m}_norm.nii.gz"
        out_path = os.path.join(root, out_name)
        nib.save(new_nii, out_path)

        print(f"Completed {m} -> {out_path}")

In [None]:
# Modality Mask Generation

import os
import nibabel as nib
import numpy as np

# Modality list and root directory
modalities = ['T1', 'T2', 'DWI']
base_dir = '/content/drive/MyDrive/LiQA_training_data/train_sliced'

# Traverse patient directories
for root, dirs, files in os.walk(base_dir):

    # Only process directories that contain NIfTI files
    nii_files = [f for f in files if f.lower().endswith(('.nii', '.nii.gz'))]
    if not nii_files:
        continue

    mask = []
    for m in modalities:
        # Fuzzy match modality file
        fp = next((os.path.join(root, f)
              for f in files
              if m.lower() in f.lower() and f.lower().endswith(('.nii', '.nii.gz'))),
            None)

        if fp is None:
            # Modality file not found → missing
            mask.append(0)
        else:
            data = nib.load(fp).get_fdata()
            mask.append(1 if data.any() else 0)

    # Save modality mask
    mask = np.array(mask, dtype=np.int8)
    np.save(os.path.join(root, 'modality_mask.npy'), mask)

    print(f"{root} → modality_mask = {mask}")


# NPZ Conversion

In [None]:
import os
import numpy as np
import nibabel as nib
import shutil

def main():
    src_base = '/content/drive/MyDrive/LiQA_training_data/train_sliced'
    dst_base = '/content/drive/MyDrive/LiQA_training_data/npz_sliced'
    modalities = ['T1', 'T2', 'DWI']

    # Pack *_norm.nii.gz + modality_mask.npy into data.npz for each patient folder
    for cls in os.listdir(src_base):
        cls_dir = os.path.join(src_base, cls)
        for stage in os.listdir(cls_dir):
            stage_dir = os.path.join(cls_dir, stage)
            for pid in os.listdir(stage_dir):
                folder   = os.path.join(stage_dir, pid)
                npz_path = os.path.join(folder, 'data.npz')

                # Skip if data.npz already exists
                if os.path.exists(npz_path):
                    continue

                # Load modality_mask
                mask = np.load(os.path.join(folder, 'modality_mask.npy')) # shape (3,)

                # Load and stack volumes for each modality
                vols = []
                for m in modalities:
                    fn = next(
                        f for f in os.listdir(folder)
                        if f.lower().startswith(m.lower()) and f.lower().endswith('.nii.gz')
                    )
                    arr = nib.load(os.path.join(folder, fn)).get_fdata().astype(np.float32)
                    vols.append(arr) # arr.shape == (X, Y, Z)

                image = np.stack(vols, axis=0) # shape -> (3, X, Y, Z)

                # Save as data.npz
                np.savez(
                    npz_path,
                    image=image,
                    mask=mask.astype(np.int8)
                )
                print(f"Packed → {npz_path}")

    # Copy all data.npz to a new root while preserving class/stage/patient structure
    for root, dirs, files in os.walk(src_base):
        rel = os.path.relpath(root, src_base) # e.g. 'class1/S4/1179-B1-S4'
        dst_dir = os.path.join(dst_base, rel)
        os.makedirs(dst_dir, exist_ok=True)

        if 'data.npz' in files:
            src_file = os.path.join(root, 'data.npz')
            dst_file = os.path.join(dst_dir, 'data.npz')
            shutil.copy(src_file, dst_file)
            print(f"Copied: {src_file} → {dst_file}")

if __name__ == "__main__":
    main()