# SNUH - NVIDIA MONAI BootCamp - Auto3DdSeg Hippocampus
 <img src="https://github.com/Project-MONAI/MONAIBootcamp2021/raw/2f28b64f814a03703667c8ea18cc84f53d6795e4/day1/monai.png" width=400>

In [None]:
#!pip install -qU "monai-weekly[all]"

### Check GPU Support

Running `!nvidia-smi`

in a cell will verify this has worked and show you what kind of hardware you have access to.
if GPU Memory Usage is no `0 MiB` shutdown all kernels and restart current kernel.
- step1. shutdown kernel with following <b>Menu</b> > <b>Kernel</b> > <b>Shut Down All kernels </b>
- step2. restart kernelw with following <b>Menu</b> > <b>Kernel</b> > <b>Restart Kernel</b>


In [None]:
!nvidia-smi

### Setup imports

In [None]:
import os
from glob import glob

import numpy as np
import matplotlib.pyplot as plt
import torch

from monai.apps import download_and_extract
from monai.config import print_config
from monai.utils import set_determinism

print_config()
set_determinism(0)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=UserWarning) 

In [None]:
 
from monai.apps.auto3dseg import (
    DataAnalyzer,
    BundleGen,
    AlgoEnsembleBestN,
    AlgoEnsembleBuilder,
    export_bundle_algo_history,
    import_bundle_algo_history,
)
from monai.auto3dseg import algo_to_pickle
from monai.bundle.config_parser import ConfigParser

from monai.utils.enums import AlgoKeys

## 1. Setting up our Dataset and exploring the data
#### Setup data directory

We'll create a temporary directory for all the MONAI data we're going to be using called temp directory in `~/monai-lab/temp`. 

In [None]:
import os 
import glob

root_dir = './autoseg_decathlon'
print(root_dir)

## download dataset 

it would take 1 minutes to download spleen dataset(1.5GB). You would also use cached dataset

You can check Medical Segmentation Decathlon dataset [homepage](http://medicaldecathlon.com/)

In [None]:
%%time 
import glob
import os

#resource = "https://msd-for-monai.s3-us-west-2.amazonaws.com/Task09_Spleen.tar"

#msd_task = "Task01_BrainTumour"   #   7.09GB 
#msd_task = "Task02_Heart"         # 434.6MB 
#msd_task = "Task03_Liver"         #  26.94GB 
msd_task = "Task04_Hippocampus"   #  27.MB
#msd_task = "Task05_Prostate"      # 228.7MB 
#msd_task = "Task06_Lung"          #  28.53GB 
#msd_task = "Task07_Pancreas"      #  11.45GB 
#msd_task = "Task08_HepaticVessel" #   8.71GB 
#msd_task = "Task09_Spleen"         #   1.5GB
#msd_task = 'Task10_Colon"         #   5.81MB 

resource = "https://msd-for-monai.s3-us-west-2.amazonaws.com/" + msd_task + ".tar"

compressed_file = os.path.join(root_dir, msd_task + ".tar")
dataroot = os.path.join(root_dir, msd_task)

if not os.path.exists(dataroot):
    download_and_extract(resource, compressed_file, root_dir)



In [None]:
train_images = sorted(
    glob.glob(os.path.join(dataroot, "imagesTr", "*.nii.gz")))
train_labels = sorted(
    glob.glob(os.path.join(dataroot, "labelsTr", "*.nii.gz")))
data_dicts = [
    {"image": image_name, "label": label_name}
    for image_name, label_name in zip(train_images, train_labels)
]
train_files, val_files = data_dicts[:-9], data_dicts[-9:]

set_determinism(seed=0)

### visualize dataset
Let's use the nibabel library to visualize and examine the Spleen data in the form of a compressed file `nii.gz`.

In [None]:
#!pip install SimpleITK   nibabel

In [None]:
val_files[0]

In [None]:
def nii_loader(filename) :
    import nibabel as nib
    import numpy as np
    import matplotlib.pyplot as plt
    nimg = nib.load( filename )
    return nimg.get_fdata() , nimg.affine, nimg.header 

def visualize( dataset, idx=0, target_layer=10 ):
    import matplotlib.pyplot as plt    

    image = dataset[idx]['image']
    label = dataset[idx]['label']

    image_data, image_affine, image_header = nii_loader(image)
    label_data, label_affine,   label_header = nii_loader(label)

    print(image_data.shape, label_data.shape )
    target_image = image_data[:,:,target_layer]
    target_label = label_data[:,:,target_layer]

    fig, (ax1, ax2) = plt.subplots(1, 2 ,  figsize=(12,8))

    ax1.imshow(target_image, cmap='gray' )
    ax1.set_title('image')
    ax2.imshow(target_label )
    ax2.set_title('GT segmentation')
    plt.show()


In [None]:
visualize( val_files, idx=8,  target_layer=14 ) # check different idx

## Prepare dataset filelist with fold



#### MSD dataset structure follows the following convention:

In [None]:
test_dir = os.path.join(dataroot, "imagesTs/")
train_dir = os.path.join(dataroot, "imagesTr/")
label_dir = os.path.join(dataroot, "labelsTr/")

#### Construct skeleton JSON to populate with your own data

In [None]:
datalist_json = {"testing": [], "training": []}

#### Populate JSON with test data

In [None]:
datalist_json["testing"] = [
    {"image": "./imagesTs/" + file} for file in os.listdir(test_dir) if (".nii.gz" in file) and ("._" not in file)
]

#### list up testing data

In [None]:
datalist_json["testing"][:10]

#### Populate with training images and labels in your directory

In [None]:
datalist_json["training"] = [
    {"image": "./imagesTr/" + file, "label": "./labelsTr/" + file, "fold": 0}
    for file in os.listdir(train_dir)
    if (".nii.gz" in file) and ("._" not in file)
]  # Initialize as single fold

#### list up raining data

In [None]:
datalist_json["training"][:10]

#### Randomise training data

In [None]:
import random
random.seed(42)
random.shuffle(datalist_json["training"])
datalist_json["training"][:10]

#### Split training data into N random folds

In [None]:
num_folds = 5
fold_size = len(datalist_json["training"]) // num_folds
for i in range(num_folds):
    for j in range(fold_size):
        datalist_json["training"][i * fold_size + j]["fold"] = i

#### list up final training data with all randomised folds

In [None]:
datalist_json["training"][:5]

#### Save JSON to file `datalist_file`

In [None]:
import os
import json

datalist_file = os.path.join("./", "msd_" + msd_task.lower() + "_folds.json")
with open(datalist_file, "w", encoding="utf-8") as f:
    json.dump(datalist_json, f, ensure_ascii=False, indent=4)
print(f"Datalist is saved to {datalist_file}")

#### Now we can have filelists for train.

In [None]:
!cat $datalist_file

## Prepare a input YAML configuration

In [None]:
input_cfg = {
    "name": msd_task,  # optional, it is only for your own record
    "task": "segmentation",  # optional, it is only for your own record
    "modality": "MRI",  # required
    "datalist": datalist_file,  # required
    "dataroot": dataroot,  # required
}
input = "./input.yaml"
ConfigParser.export_config_file(input_cfg, input)

## Breaking down the AutoRunner
Below is the typical usage of AutoRunner

```
runner = AutoRunner(input=input)
runner.run()
```
The two lines cover the typical settings in Auto3DSeg and now we are going through the internal APIs calls inside these two lines

## Data Analysis

When the `analyze` flag is set to `True`, AutoRunner will call `DataAnalyzer` to analyze the datasets and generate a statisical report in YAML. Below is the equivalent Python API calls of `DataAnalyzer`:
    

In [None]:
work_dir = "./workdir_auto3dseg_hippocampus"

if not os.path.isdir(work_dir):
    os.makedirs(work_dir)
datastats_file = os.path.join(work_dir, "data_stats.yaml")
analyser = DataAnalyzer(datalist_file, dataroot, output_path=datastats_file)
datastat = analyser.get_all_case_stats()

print("datalist file: ", os.path.abspath(datalist_file))
print("dataroot path: ", os.path.abspath(dataroot))
print("datastat path: ", os.path.abspath(datastats_file))


Besides the Python API call, user can also use command line interface (CLI) provided by the Python Fire:
```
python -m monai.apps.auto3dseg DataAnalyzer get_all_case_stats \
    --datalist="<datalist file>" \
    --dataroot="<dataroot path>" \
    --output_path="<datastat path>"
```

## Algorithm Generation (algo_gen)
When the `algo_gen` flag is set to `True`, `AutoRunner` will use `BundleGen` to generate monai bundles from templated algorithms in the working directory.

The templated algorithms are customized for the datasets when the `generate` method is called. In detail, the `generate` method will fill the templates using information from the data_stats report. 
Also, it will copy the necessary scripts (train.py/infer.py) to the algorithm folder. Finally, it will create an algo_object.pkl to save the `Algo` so that it can be instantiated in the local or remote machine. 
Cross validation is used by default, and `num_fold` can be set to 1 if the users do not want cross validation.

Below is the equivalent Python API calls of BundleGen:

In [None]:
bundle_generator = BundleGen(
    algo_path=work_dir,
    data_stats_filename=datastats_file,
    data_src_cfg_name=input,
)

bundle_generator.generate(work_dir, num_fold=5)

print("algo path: ", os.path.abspath(work_dir))
print("data_stats file: ", os.path.abspath(datastats_file))
print("task input file: ", os.path.abspath(input))

Besides the Python API call, user can also use command line interface (CLI) provided by the user's OS. One example is the following bash commands:

```
python -m monai.apps.auto3dseg BundleGen generate \
    --algo_path="<algo path>" \
    --data_stats_filename="<data_stats file>" \
    --data_src_cfg_name="<task input file>"
    ```

## Getting and saving the algorithm generation history to the local drive
If the users continue to train the algorithms on local system, 
The history of the algorithm generation can be fetched via `get_history` method of the `BundleGen` object. 
There also are scenarios that users need to stop the Python process after the `algo_gen`. 
For example, the users may need to transfer the files to a remote cluster to start the training. 
`Auto3DSeg` offers a utility function `export_bundle_algo_history` to dump the history to hard drive and recall it by `import_bundle_algo_history`.

If the files are copied to a remote system, please ensure the algorithm templates are also copied there. 
Some functions require the path to instantiate the algorithm class properly.

In [None]:
history = bundle_generator.get_history()
export_bundle_algo_history(history)  # save the Algo objects

## Add training parameters to cut down the training time in this notebook (Optional)
This step is not required, but for demo purposes, we'll set a limit of the epochs to train the algorithms.

Some algorithms in `Auto3DSeg` use epoch to mark the progress of training, while the others use iteration to iterate the loops. 
Below is the code block to convert num_epoch to iteration style and override all algorithms with the same training parameters for a 1-GPU/2-GPU machine.

It is not required for the users to set the `train_param`. The users can use either `train()` or `train({})` if no changes are needed. Then the algorithms will go for the full training and repeat 5 folds.

On the other hand, users can also use set `train_param` for each algorithm.

For demo purposes, below is a code block to convert num_epoch to iteration style and override all algorithms with the same training parameters. The setup works fine for a machine that has GPUs less than or equal to 8. The datalist in this example is only using a subset of the original dataset. Users need to ensure the number of GPUs is not greater than the number that the training dataset can be partitioned. For example, the following code block is not suitable for a 16-GPU system. In such cases, please change the code block accordingly.

In [None]:
max_epochs = 2  # change epoch number to 2 to cut down the notebook running time

train_param = {
    "num_epochs_per_validation": 1,
    "num_images_per_batch": 2,
    "num_epochs": max_epochs,
    "num_warmup_epochs": 1,
}

print(train_param)

## Training the neural networks sequentially
The algo_gen history contains Algo object that has multiple methods such as train and predict. We can easily use such APIs to trigger neural network training. By default, `AutoRunnner` will start a training on a single node (single or multiple GPUs) in a seqential manner.

`algo_to_pickle` is optional and it will update the dumped Algo objects with the accuracies information.

In [None]:
history = import_bundle_algo_history(work_dir, only_trained=False)
for algo_dict in history:
    algo = algo_dict[AlgoKeys.ALGO]
    algo.train(train_param)  # can use default params by `algo.train()`
    acc = algo.get_score()
    algo_to_pickle(algo, template_path=algo.template_path, best_metric=acc)

## Ensemble
Finally, after the neural networks are trained, AutoRunner will apply the ensemble methods in `Auto3DSeg` to improve the overall performance.

Here we used a utility function `import_bundle_algo_history` to load the `Algo` that is trained into the ensemble. With the history loaded, we build an ensemble method and use the method to perform the inference on all testing data.

NOTE: Because we need to get the prediction in Python, there is no alternative CLI commands for this step.

In [None]:
from monai.apps.auto3dseg import (
    AlgoEnsemble,
    AlgoEnsembleBuilder)
from monai.utils.enums import AlgoKeys
n_best=5
history = import_bundle_algo_history(work_dir, only_trained=True)
builder = AlgoEnsembleBuilder(history, input)
builder.set_ensemble_method(AlgoEnsembleBestN(n_best=n_best))
ensembler = builder.get_ensemble()
preds = ensembler()
print("ensemble picked the following best {0:d}:".format(n_best))
for algo in ensembler.get_algo_ensemble():
    print(algo[AlgoKeys.ID])

In [None]:
import os
import json
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt

def visualize_inference(dataroot_dir, work_dir, datalist_file, sim_dim, index):
    with open(os.path.join(dataroot_dir, datalist_file), "r") as f:
        sim_datalist = json.load(f)

    dataset = sim_datalist["testing"]

    if index < 0 or index >= len(dataset):
        print("Invalid index.")
        return

    entry = dataset[index]
    image_name = entry["image"].split(".")[0]

    prediction_nib = nib.load(os.path.join(work_dir, "ensemble_output", image_name + "_ensemble" + ".nii.gz"))
    pred = np.array(prediction_nib.dataobj)

    img_nib = nib.load(os.path.join(dataroot_dir, entry["image"]))
    lbl_nib = nib.load(os.path.join(dataroot_dir, entry["label"]))
    img = np.array(img_nib.dataobj)
    lbl = np.array(lbl_nib.dataobj)

    # Display original image
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(img[sim_dim[2] // 2])
    plt.title("Original Image")
    plt.colorbar(shrink=0.55)

    # Display ground truth label
    plt.subplot(1, 3, 2)
    plt.imshow(lbl[sim_dim[2] // 2])
    plt.title("Ground Truth Label")
    plt.colorbar(shrink=0.55)

    # Display predicted label
    plt.subplot(1, 3, 3)
    if pred.ndim == 4:
        plt.imshow(pred[32, :, :, 1])
    else:
        plt.imshow(pred[32])
    plt.title("Predicted Label")
    plt.colorbar(shrink=0.55)

    plt.tight_layout()
    plt.show()



In [None]:
# Example usage
dataroot_dir = "./"
work_dir = "./workdir_auto3dseg_hippocampus"
visualize_inference(dataroot_dir, work_dir, "./msd_task04_hippocampus_folds.json", sim_dim=3, index=3)

# Other dataset cases

## instance22 dataset

The dataset is from MICCAI 2022 challenge [INSTANCE22: The 2022 Intracranial Hemorrhage Segmentation Challenge on Non-Contrast head CT (NCCT)]((https://instance.grand-challenge.org/)). The solution described here won 2nd place (1st place in terms of Dice score).

100 non-contrast head CT volumes of clinically diagnosed patients with different kinds of ICH, (including subdural hemorrhage, epidural hemorrhage, intraventricular hemorrhage, intraparenchymal hemorrhage, and subarachnoid hemorrhage), are used for model training. The size of a CT volume is 512 x 512 x N, where N lies in [20, 70]. The pixel spacing of a CT volume is 0.42mm x 0.42mm x 5mm. The images will be stored in NIFTI files. Voxel-level segmentation annotations are: 0 - Background; 1 - ICH.


*`input.yaml`*
```
name: instance22
task: segmentation

modality: CT
datalist: ./instance22_folds.json # list of files
dataroot: /workspace/data/instance22 # data location

multigpu: True

class_names: ["val_acc_ich"]
```

*`instance22_folds.json`*

```
{
  "testing": [
    {"image": "evaluation/101.nii.gz"},
    {"image": "evaluation/102.nii.gz"},
    {"image": "evaluation/103.nii.gz"},
 ...
    {"image": "evaluation/127.nii.gz"},
    {"image": "evaluation/128.nii.gz"},
    {"image": "evaluation/129.nii.gz"},
    {"image": "evaluation/130.nii.gz"}
  ],
  "training": [
    {
      "label": "train/label/095.nii.gz",
      "image": "train/data/095.nii.gz",
      "fold": 0
    },
    {
      "label": "train/label/066.nii.gz",
      "image": "train/data/066.nii.gz",
      "fold": 0
    },
    {
      "label": "train/label/069.nii.gz",
      "image": "train/data/069.nii.gz",
      "fold": 0
    }, 
    
    ...
    
    
    {
      "label": "train/label/052.nii.gz",
      "image": "train/data/052.nii.gz",
      "fold": 4
    },
    {
      "label": "train/label/023.nii.gz",
      "image": "train/data/023.nii.gz",
      "fold": 4
    }
  ]
}
```

## hecktor22

The HECKTOR22 dataset is from MICCAI 2022 challenge [HEad and NeCK TumOR Segmentation and Outcome Prediction (HECKTOR22)](https://hecktor.grand-challenge.org/). The solution described here won the 1st place in the HECKTOR22 challenge (NVAUTO team):

Andriy Myronenko, Md Mahfuzur Rahman Siddiquee, Dong Yang, Yufan He and Daguang Xu: "Automated head and neck tumor segmentation from 3D PET/CT". In MICCAI (2022). arXiv



*`input.yaml`*

```
modality: CT                        # primary modality
dataroot: /data/hecktor22           # dataset location
datalist: hecktor22_folds.json      # a list of filenames
class_names: [tumor, lymph_node]    # names for tensorboard
extra_modalities: {image2 : pet}    # a second modality

    custom_data_transforms:
  - key: after_resample_transforms
    path: '.'
    transform: {_target_: hecktor_crop_neck_region.HecktorCropNeckRegion, box_size: [200, 200, 310]}

image_size_mm_90: [200, 200, 310]
resample_resolution: [1, 1, 1]

roi_size: [192, 192, 192]       
```


*`hecktor22_folds.json`*

```
{
    "testing": [
        {
            "image": "imagesTr/CHUP-049__CT.nii.gz",
            "image2": "imagesTr/CHUP-049__PT.nii.gz",
            "fold": 0
        },
        
  ...
        {
            "image": "imagesTr/HGJ-062__CT.nii.gz",
            "image2": "imagesTr/HGJ-062__PT.nii.gz",
            "fold": 0
        }
    ],
    "training": [
        {
            "image": "imagesTr/CHUP-049__CT.nii.gz",
            "image2": "imagesTr/CHUP-049__PT.nii.gz",
            "label": "labelsTr/CHUP-049.nii.gz",
            "fold": 0
        },
        {
            "image": "imagesTr/CHUP-034__CT.nii.gz",
            "image2": "imagesTr/CHUP-034__PT.nii.gz",
            "label": "labelsTr/CHUP-034.nii.gz",
            "fold": 0
        },
        
   ...
        {
            "image": "imagesTr/CHUP-015__CT.nii.gz",
            "image2": "imagesTr/CHUP-015__PT.nii.gz",
            "label": "labelsTr/CHUP-015.nii.gz",
            "fold": 4
        }
    ]
}        

```

##  BTCV Dataset
For BTCV dataset, under Institutional Review Board (IRB) supervision, 50 abdomen CT scans of were randomly selected from a combination of an ongoing colorectal cancer chemotherapy trial, and a retrospective ventral hernia study. The 50 scans were captured during portal venous contrast phase with variable volume sizes (512 x 512 x 85 - 512 x 512 x 198) and field of views (approx. 280 x 280 x 280 mm3 - 500 x 500 x 650 mm3). The in-plane resolution varies from 0.54 x 0.54 mm2 to 0.98 x 0.98 mm2, while the slice thickness ranges from 2.5 mm to 5.0 mm.

- Target: 13 abdominal organs including
 - Spleen
 - Right Kidney
 - Left Kidney
 - Gallbladder
 - Esophagus
 - Liver
 - Stomach
 - Aorta
 - IVC
 - Portal and Splenic Veins
 - Pancreas
 - Right adrenal gland
 - Left adrenal gland.
- Modality: CT
-Size: 30 3D volumes (24 Training + 6 Testing)
- Challenge: BTCV MICCAI Challenge


*`input.yaml`*

```
name: BTCV
task: segmentation

modality: CT
datalist: ./btcv_folds.json # list of files
dataroot: /workspace/data/btcv # data location
```

*`btcv_fold.json`*

```
{
"description": "btcv yucheng",
"labels": {
    "0": "background",
    "1": "spleen",
    "2": "rkid",
    "3": "lkid",
    "4": "gall",
    "5": "eso",
    "6": "liver",
    "7": "sto",
    "8": "aorta",
    "9": "IVC",
    "10": "veins",
    "11": "pancreas",
    "12": "rad",
    "13": "lad"
},
"licence": "yt",
"modality": {
    "0": "CT"
},
"name": "btcv",
"numTest": 20,
"numTraining": 80,
"reference": "Vanderbilt University",
"release": "1.0 06/08/2015",
"tensorImageSize": "3D",
"test": [
    "imagesTs/img0061.nii.gz",
    "imagesTs/img0062.nii.gz",
    ...
    
    "imagesTs/img0079.nii.gz",
    "imagesTs/img0080.nii.gz"
],
"training": [
    {
        "fold": 4,
        "image": "imagesTr/img0001.nii.gz",
        "label": "labelsTr/label0001.nii.gz"
    },
    {
        "fold": 4,
        "image": "imagesTr/img0002.nii.gz",
        "label": "labelsTr/label0002.nii.gz"
    },
    
    ...
    
    {
        "fold": 0,
        "image": "imagesTr/img0039.nii.gz",
        "label": "labelsTr/label0039.nii.gz"
    },
    {
        "fold": 0,
        "image": "imagesTr/img0040.nii.gz",
        "label": "labelsTr/label0040.nii.gz"
    }
]
}    
    
```

## Caution !!!
### please shutdown all kernels with [Kernel] menu >  [Shutdown All Kernel]  before launch next notebook

## Navigation

- [01 MONAI transform](./01_getting.ipynb)
- [02_end_to_end_pipeline](./02_end_to_end_pipeline.ipynb)
- [03_spleen_segment](./03_spleen_segment.ipynb)
- [04_Auto3DSeg](./04_Auto3DSeg.ipynb)
- [05_Auto3DSeg_hippocampus](./05_Auto3DSeg_hippocampus.ipynb)
- [06_digital_pathology_wsi](./06_digital_pathology_wsi.ipynbb)
- [07_HoverNet_01_inference](./07_HoverNet_01_inference.ipynb)
- [08_HoverNet_02_train](./08_HoverNet_02_train.ipynb)



<img src="https://github.com/Project-MONAI/MONAIBootcamp2021/raw/2f28b64f814a03703667c8ea18cc84f53d6795e4/day1/monai.png" width=400>