This notebook present a fake example of a segmentation pipeline to show what we expect as file for the submission.
We assume that the model is trained on 3D images resampled at 1 $mm^3$ and that the input shape of the model is 
[batch_size, x_dim, y_dim, z_dim, channel]. The channel are the CT and PT images. The output image is a binary segmentation
and its shape is [batch_size, x_dim, y_dim, z_dim, 1].

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm

In [2]:
test_folder = Path("../data/hecktor2021_test/hecktor_nii").resolve()
results_folder = Path("../data/dummy_segmentation_results/").resolve()
results_folder.mkdir(exist_ok=True)

bbox_df = pd.read_csv("../data/hecktor2021_test/hecktor2021_bbox_testing.csv").set_index("PatientID")

In [3]:
def dummy_model(x):
    return np.random.uniform(size=x.shape[:4] + (1, )) 

In [4]:
patient_list = [f.name[:7] for f in test_folder.rglob("*_ct.nii.gz")]

In [5]:
# Instantiating the resampler
resampling_spacing = np.array([1.0, 1.0, 1.0])
pre_resampler = sitk.ResampleImageFilter()
pre_resampler.SetInterpolator(sitk.sitkBSpline)
pre_resampler.SetOutputSpacing(resampling_spacing)

post_resampler = sitk.ResampleImageFilter()
post_resampler.SetInterpolator(sitk.sitkNearestNeighbor)

In [10]:
for p_id in tqdm(patient_list[:1]):
    # loading the images and storing the ct spacing
    image_ct = sitk.ReadImage(str(test_folder / (p_id + "_ct.nii.gz")))
    image_pt = sitk.ReadImage(str(test_folder / (p_id + "_pt.nii.gz")))
    spacing_ct = image_ct.GetSpacing()

    # getting the bounding box
    bb = np.squeeze(
        np.array([
            bbox_df.loc[p_id, ["x1", "y1", "z1", "x2", "y2", "z2"]],
        ]))

    # resampling the images
    resampled_size = np.round(
        (bb[3:] - bb[:3]) / resampling_spacing).astype(int)
    pre_resampler.SetOutputOrigin(bb[:3])
    pre_resampler.SetSize([int(k)
                           for k in resampled_size])  # sitk requires this
    image_ct = pre_resampler.Execute(image_ct)
    image_pt = pre_resampler.Execute(image_pt)

    # sitk to numpy, sitk stores images with [dim_z, dim_y, dim_x]
    array_ct = np.transpose(sitk.GetArrayFromImage(image_ct), (2, 1, 0))
    array_pt = np.transpose(sitk.GetArrayFromImage(image_pt), (2, 1, 0))

    # ... apply your preprocessing here

    x = np.stack([array_ct, array_pt], axis=-1)
    x = x[np.newaxis, ...]  # adding batch dimension
    segmentation = dummy_model(x)[0, :, :, :, 0]

    # do not forget to threshold your output
    segmentation = (segmentation < 0.5).astype(np.uint8)

    # numpy to sitk
    image_segmentation = sitk.GetImageFromArray(
        np.transpose(segmentation, (2, 1, 0)))

    image_segmentation.SetOrigin(bb[:3])
    image_segmentation.SetSpacing(resampling_spacing)

    # This step is optional we do exactly this with nearest neighbor resampler,
    # but if you want to use another resampling method
    # it could lead to better results
    final_size = np.round((bb[3:] - bb[:3]) / spacing_ct).astype(int)
    post_resampler.SetOutputSpacing(spacing_ct)
    post_resampler.SetOutputOrigin(bb[:3])
    post_resampler.SetSize([int(k) for k in final_size])  # sitk requires this

    image_segmentation = post_resampler.Execute(image_segmentation)

    # Saving the prediction
    sitk.WriteImage(
        image_segmentation,
        str(results_folder / (p_id + ".nii.gz")),
    )


  0%|          | 0/1 [00:00<?, ?it/s]


ValueError: operands could not be broadcast together with shapes (0,6) (3,) 

In [None]:
segmentation.shape

In [8]:
p_id = patient_list[0]
bb = np.array([
    bbox_df.loc[p_id, ["x1", "y1", "z1", "x2", "y2", "z2"]],
])


In [12]:
bb.shape

(1, 6)