# PyTorch

## Installation

In [None]:
!pip install -qq SimpleITK

## Data nomenclature

Patients

In [None]:
def get_patient_indices():
  patient_indices = [1,2,5,6,8,10,14,16,18,19,21,22,23,24,25,26,27,28,29,30]

  return patient_indices

Folders

In [None]:
def get_data_folder():
  data_folder = 'data/'

  return data_folder

def get_image_folder():
  image_folder = get_data_folder() + 'imagesTr/'

  return image_folder

def get_ground_truth_folder():
  ground_truth_folder = get_data_folder() + 'labelsTr/'

  return ground_truth_folder

Files

In [None]:
def get_file_extension():
  file_extension = '.nii.gz'

  return file_extension

def get_image_file_name(patient_no, modality_no=0):
  image_modality_convention = '_{:04.0f}'.format(modality_no)
  image_file_name = get_image_folder() + 'patientID' + str(patient_no) + image_modality_convention + get_file_extension()

  return  image_file_name

def get_ground_truth_file_name(patient_no):
  ground_truth_file_name = get_ground_truth_folder() +'patientID' + str(patient_no) + get_file_extension()

  return  ground_truth_file_name

## Import data

In [None]:
%cd /content
!curl -O https://zenodo.org/records/3431873/files/CHAOS_Train_Sets.zip
!unzip -qq CHAOS_Train_Sets.zip

In [None]:
%cd /content
!git clone https://github.com/woctezuma/playing-with-simpleitk.git


In [None]:
%mv /content/Train_Sets /content/playing-with-simpleitk/data/

In [None]:
%cd /content/playing-with-simpleitk
!python convert_to_nii.py

In [None]:
%mkdir -p /content/data
%mv /content/playing-with-simpleitk/data/output/* /content/data/

## Tests

In [None]:
root_folder = '/content/'

In [None]:
patient_indices = get_patient_indices()
patient_no = patient_indices[0]

In [None]:
import numpy as np
import SimpleITK as sitk

image_name = root_folder + get_image_file_name(patient_no)

image = sitk.ReadImage(image_name)
print(image.GetSize())

v = sitk.GetArrayViewFromImage(image)

np.unique(v)

In [None]:
import numpy as np
import SimpleITK as sitk

image_name = root_folder + get_ground_truth_file_name(patient_no)

image = sitk.ReadImage(image_name)
print(image.GetSize())

v = sitk.GetArrayViewFromImage(image)

np.unique(v)

## Down-sample images

Reference: https://github.com/jonasteuwen/SimpleITK-examples/blob/master/examples/resample_isotropically.py

In [None]:
# https://github.com/SimpleITK/SlicerSimpleFilters/blob/master/SimpleFilters/SimpleFilters.py
_SITK_INTERPOLATOR_DICT = {
    'nearest': sitk.sitkNearestNeighbor,
    'linear': sitk.sitkLinear,
    'gaussian': sitk.sitkGaussian,
    'label_gaussian': sitk.sitkLabelGaussian,
    'bspline': sitk.sitkBSpline,
    'hamming_sinc': sitk.sitkHammingWindowedSinc,
    'cosine_windowed_sinc': sitk.sitkCosineWindowedSinc,
    'welch_windowed_sinc': sitk.sitkWelchWindowedSinc,
    'lanczos_windowed_sinc': sitk.sitkLanczosWindowedSinc
}

In [None]:
import SimpleITK as sitk
import os
from glob import glob
from tqdm import tqdm
import numpy as np

def resample_sitk_image(sitk_image, spacing=None, interpolator=None,
                        fill_value=0):
    """Resamples an ITK image to a new grid. If no spacing is given,
    the resampling is done isotropically to the smallest value in the current
    spacing. This is usually the in-plane resolution. If not given, the
    interpolation is derived from the input data type. Binary input
    (e.g., masks) are resampled with nearest neighbors, otherwise linear
    interpolation is chosen.
    Parameters
    ----------
    sitk_image : SimpleITK image or str
      Either a SimpleITK image or a path to a SimpleITK readable file.
    spacing : tuple
      Tuple of integers
    interpolator : str
      Either `nearest`, `linear` or None.
    fill_value : int
    Returns
    -------
    SimpleITK image.
    """

    if isinstance(sitk_image, str):
        sitk_image = sitk.ReadImage(sitk_image)
    num_dim = sitk_image.GetDimension()

    if not interpolator:
        interpolator = 'linear'
        pixelid = sitk_image.GetPixelIDValue()

        if pixelid not in [1, 2, 4]:
            raise NotImplementedError(
                'Set `interpolator` manually, '
                'can only infer for 8-bit unsigned or 16, 32-bit signed integers')
        if pixelid == 1: #  8-bit unsigned int
            interpolator = 'nearest'

    orig_pixelid = sitk_image.GetPixelIDValue()
    orig_origin = sitk_image.GetOrigin()
    orig_direction = sitk_image.GetDirection()
    orig_spacing = np.array(sitk_image.GetSpacing())
    orig_size = np.array(sitk_image.GetSize(), dtype=np.int)

    if not spacing:
        min_spacing = orig_spacing.min()
        new_spacing = [min_spacing]*num_dim
    else:
        new_spacing = [float(s) for s in spacing]

    assert interpolator in _SITK_INTERPOLATOR_DICT.keys(),\
        '`interpolator` should be one of {}'.format(_SITK_INTERPOLATOR_DICT.keys())

    sitk_interpolator = _SITK_INTERPOLATOR_DICT[interpolator]

    new_size = orig_size*(orig_spacing/new_spacing)
    new_size = np.ceil(new_size).astype(np.int) #  Image dimensions are in integers
    new_size = [int(s) for s in new_size] #  SimpleITK expects lists, not ndarrays

    resampled_sitk_image = sitk.Resample(sitk_image, new_size)

    return resampled_sitk_image


Down-sample for faster checks.

In [None]:
num_dim = 3
new_spacing = [4]*num_dim

In [None]:
%cd /content

In [None]:
for patient_no in get_patient_indices():

  file_name = get_ground_truth_file_name(patient_no)

  print(file_name)

  original_image = sitk.ReadImage(file_name)

  # Copy information w.r.t. original spacing
  original_intensity_image = sitk.ReadImage(get_image_file_name(patient_no))
  original_image.CopyInformation(original_intensity_image)

  resampled_image = resample_sitk_image(original_image, spacing=new_spacing)

  print(original_image.GetSize())
  print(resampled_image.GetSize())

  sitk.WriteImage(resampled_image, file_name)

In [None]:
for patient_no in get_patient_indices():

  file_name = get_image_file_name(patient_no)

  print(file_name)

  original_image = sitk.ReadImage(file_name)
  resampled_image = resample_sitk_image(original_image,
                                        spacing=new_spacing)

  print(original_image.GetSize())
  print(resampled_image.GetSize())

  sitk.WriteImage(resampled_image, file_name)

[DONE] check label images after information was copied, to ensure orientation is correctly interpreted by ITK-SNAP.

## Binarize label maps

nnUNet wants consecutive labels: 0, 1, etc.

Originally, the label map in the CHAOS challenge contains only two labels:
- 0 (background),
- 255 (region of interest).

We can simply binarize the label map.


In [None]:
import SimpleITK as sitk
import numpy as np

for patient_no in get_patient_indices():
  print('Patient n°{}'.format(patient_no))

  input_image_name = get_ground_truth_file_name(patient_no)
  input_image = sitk.ReadImage(input_image_name)
  print('Image size: {}'.format(input_image.GetSize()))

  v = sitk.GetArrayFromImage(input_image)

  labels = np.unique(v)
  print('Labels: {}'.format(labels))

  max_val = max(labels)
  median_val = max_val/2

  print(median_val)

  # Binarize

  binarized_v = np.zeros(v.shape, v.dtype)
  binarized_v[v>median_val] = 1

  labels = np.unique(binarized_v)
  print('Labels: {}'.format(labels))

  output_image = sitk.GetImageFromArray(binarized_v)

  # Copy meta-data
  output_image.CopyInformation(input_image)

  output_image_name = input_image_name
  sitk.WriteImage(output_image, output_image_name)
  print('Image size: {}'.format(output_image.GetSize()))


## nnUNet

### Installation

In [None]:
%cd /content
!git clone https://github.com/MIC-DKFZ/nnUNet.git
%cd nnUNet
%pip install -e .

### Copy data to the right folder

In [None]:
%mkdir -p /content/nnUNet_base/nnUNet_raw_splitted/
%mv /content/data /content/nnUNet_base/nnUNet_raw_splitted/Dataset000_MY_DATASET

## Edit dataset.json at the root of Task00_MY_DATASET/

Reference: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/LiverTumorSegmentationChallenge.py

In [None]:
output_folder = "/content/nnUNet_base/nnUNet_raw_splitted/Dataset000_MY_DATASET/"

train_ids = [
             'patientID{}'.format(patient_no)
             for patient_no in get_patient_indices()
]

In [None]:
import os
import json
from collections import OrderedDict

json_dict = OrderedDict()

json_dict['channel_names'] = {
    "0": "CT"
}

json_dict['labels'] = {
    "background": 0,
    "liver": 1,
}

json_dict['numTraining'] = len(train_ids)
json_dict['file_ending'] = ".nii.gz"

# json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in train_ids]
# json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_ids]

with open(os.path.join(output_folder, "dataset.json"), 'w') as f:
    json.dump(json_dict, f, indent=4, sort_keys=True)

## How many processes (pl, pf) on Google Colab?

Apparently, 2.


In [None]:
!cat /proc/cpuinfo

## Run pre-processing

In [None]:
%cd /content/nnUNet/nnunetv2

In [None]:
%env nnUNet_raw=/content/nnUNet_base/nnUNet_raw_splitted/
%env nnUNet_preprocessed=/content/nnUNet_preprocessed/
%env nnUNet_results=/content/RESULTS_FOLDER/

In [None]:
!nnUNetv2_plan_and_preprocess -d 0 --verify_dataset_integrity

In [None]:
!ls /content/nnUNet_preprocessed/Dataset000_MY_DATASET/

## Training

In [None]:
!python run/run_training.py -h

In [None]:
!OMP_NUM_THREADS=1 python run/run_training.py Dataset000_MY_DATASET 3d_fullres all

## Inference

In [None]:
!OMP_NUM_THREADS=1 python inference/predict_from_raw_data.py -h

In [None]:
INPUT_FOLDER='/content/nnUNet_base/nnUNet_raw_splitted/Dataset000_MY_DATASET/imagesTr/'

In [None]:
!ls $INPUT_FOLDER

In [None]:
OUTPUT_FOLDER='/content/data/output/'
!mkdir -p $OUTPUT_FOLDER

In [None]:
!ls $OUTPUT_FOLDER

In [None]:
%pip install nnunetv2

In [None]:
!nnUNetv2_predict -i $INPUT_FOLDER -o $OUTPUT_FOLDER -d Dataset000_MY_DATASET -f all -c 3d_fullres

## Visualize segmentation results

In [None]:
import numpy as np
import SimpleITK as sitk

dice_scores = []
original_data_folder = '/content/nnUNet_base/nnUNet_raw_splitted/Dataset000_MY_DATASET/'
downsampled_data_folder = '/content/data/'

for patient_no in get_patient_indices():
  ground_truth_name = original_data_folder + 'labelsTr/patientID' + str(patient_no) + '.nii.gz'
  prediction_name = downsampled_data_folder + 'output/patientID' + str(patient_no) + '.nii.gz'

  ground_truth = sitk.ReadImage(ground_truth_name)
  prediction = sitk.ReadImage(prediction_name)

  v = sitk.GetArrayViewFromImage(ground_truth)
  w = sitk.GetArrayViewFromImage(prediction)

  dice_score = 2 * np.sum((v*w) > 0) / (np.sum(v>0) + np.sum(w>0))
  print('Patient n°{} ; Dice = {:.3f}'.format(patient_no,
                                              dice_score))

  dice_scores.append(dice_score)

print('\n[training and validation dataset] Average Dice score = {:.3f} (#patients={})'.format(np.mean(dice_scores),
                                                          len(dice_scores)))
print('NB: training was done with the first {} patients. Look for possible over-fitting!'.format(num_patients))

dice_scores_for_training = dice_scores[:num_patients]

print('\n[training dataset] Average Dice score = {:.3f} (#patients={})'.format(np.mean(dice_scores_for_training),
                                                          len(dice_scores_for_training)))

dice_scores_for_validation = dice_scores[num_patients:]

print('\n[validation dataset] Average Dice score = {:.3f} (#patients={})'.format(np.mean(dice_scores_for_validation),
                                                          len(dice_scores_for_validation)))


[DONE] Download data and display segmentation overlayed on CT images with ITK-SNAP.