In [None]:
IN_COLAB = 'google.colab' in str(get_ipython())
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    import sys
    sys.path.append('/content/drive/My Drive/dp_tomastik/code')
    !bash "/content/drive/My Drive/dp_tomastik/code/scripts/install_libs.sh"
    
import SimpleITK as sitk
import matplotlib.pyplot as plt
import torch
import os
import numpy as np
import pandas as pd
import logging
import datetime

from torchio import RandomAffine, Compose, ZNormalization
from operator import itemgetter
from IPython.display import display, Markdown
from ipywidgets import widgets

from src.helpers import preview_3d_image
from src.helpers import show_cuda_usage, preview_model_dataset_pred, preview_dataset
from src.helpers import get_threshold_info_df, get_rescaled_preds
from src.helpers import compare_prediction_with_ground_true, compare_one_prediction_with_ground_true
from src.helpers import get_img_outliers_pixels, get_raw_with_prediction
from src.helpers import get_rescaled_pred
from src.helpers import get_transformed_label_np, create_regis_trans_list, trans_list

from src.dataset import HaNOarsDataset, transform_input_with_registration, get_norm_transform
from src.dataset import get_full_res_cut, get_cut_lists, OARS_LABELS, get_dataset, get_dataset_info, get_dataset_transform
from src.dataset import split_dataset, copy_split_dataset

from src.model_and_training import prepare_model, train_loop, show_model_info, load_checkpoint_model_info
from src.model_and_training import iterate_model_v3v2
from src.model_and_training.getters.get_device import get_device
from src.model_and_training.architectures.unet_architecture_v3v2 import UNetV3v2

from src.consts import DATASET_MAX_BOUNDING_BOX, DESIRE_BOUNDING_BOX_SIZE
  
torch.manual_seed(20)
logging.basicConfig(filename='logs/model3v2_all_organs_jupyter.log', level=logging.DEBUG)

print('Dataset biggest bounding box wihtout spinal cord', DATASET_MAX_BOUNDING_BOX)
print('Cut target size', DESIRE_BOUNDING_BOX_SIZE)
print('Done Init')

In [None]:
def get_possible_models(oar_key):
    possible_models = [folder_name for folder_name in os.listdir('./models') if oar_key in folder_name]    
    
    return possible_models

In [None]:
torch.cuda.device_count(), torch.cuda.get_device_name(0)

# Training all organs models

getting random indices

In [None]:
data_path = f'./data/HaN_OAR_cut_all_maps_reg'
example_cut_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
example_cut_dataset.load_from_file(data_path)
example_cut_dataset_obj = split_dataset(example_cut_dataset, train_size=40, valid_size=5, test_size=5)

training each model

In [None]:
# showing dataset input example
# data_path = f'./data/HaN_OAR_cut_left_parotid_reg'
# cut_full_res_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
# cut_full_res_dataset.load_from_file(data_path)
# cut_full_res_dataset.set_output_label(OARS_LABELS.PAROTID_GLAND_R)
                                     
# preview_3d_image(cut_full_res_dataset[0][0][0], figsize=(4,4))
# preview_3d_image(cut_full_res_dataset[0][0][1], figsize=(4,4))
# preview_3d_image(cut_full_res_dataset[0][1], figsize=(4,4))

In [None]:
filter_labels = OARS_LABELS.OARS_LABELS_DICT
if 'SPINAL_CORD' in filter_labels:
    del filter_labels['SPINAL_CORD']

tmp_list = list(filter_labels.items())
labels_list = list()

# creating registration dataset and organ segmentation pairs
# dataset use inverted labeling of left and right
# labels_list.append(('left_parotid', *tmp_list[12]))
# labels_list.append(('right_parotid', *tmp_list[11]))
# labels_list.append(('brainstem', *tmp_list[10]))
# labels_list.append(('parotids', *tmp_list[10]))
# labels_list.append(('all_maps', *tmp_list[10]))

for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
    print(f"{DATASET_REG_NAME}, {OAR_KEY}, {OAR_VALUE}")

In [None]:
TRAIN_MODELS = False
if TRAIN_MODELS:
    for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
        # loading dataset
        data_path = f'./data/HaN_OAR_cut_{DATASET_REG_NAME}_reg'
        cut_full_res_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
        cut_full_res_dataset.load_from_file(data_path)
        cut_full_res_dataset.set_output_label(OAR_VALUE)
        
        cut_full_res_dataset_obj = copy_split_dataset(cut_full_res_dataset, example_cut_dataset_obj)
        cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*['train_dataset', 'valid_dataset', 'test_dataset'])(cut_full_res_dataset_obj)

        # preparing model name
        log_date = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        model_name = f'{log_date}_3d_unet_lowres_model3v2__cloud-{OAR_KEY}-{DATASET_REG_NAME}_reg'

        print(f'Training model with dataset label \'{OAR_KEY}\', value \'{OAR_VALUE}\'')
        print(f'folder \'{model_name}\'')
        cut_model_info = prepare_model(epochs=175,
                                       learning_rate=3e-4,
                                       in_channels=8,
                                       input_data_channels=1,
                                       output_label_channels=1,
                                       dropout_rate=0.2,
                                       train_batch_size=2,
                                       model_name=model_name,
                                       train_dataset=cut_train_dataset, 
                                       valid_dataset=cut_valid_dataset, 
                                       test_dataset=cut_test_dataset,
                                       model_class=UNetV3v2)
        show_model_info(cut_model_info)
        print('\n\n')
        train_loop(cut_model_info, iterate_model_fn=iterate_model_v3v2)
        print('\n\n')

        # clearing memory
        torch.cuda.empty_cache()

In [None]:
from src.model_and_training.getters.get_loaders import get_loaders

In [None]:
RETRAIN_MODELS = False
if RETRAIN_MODELS:
    for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
        # loading dataset
        data_path = f'./data/HaN_OAR_cut_{DATASET_REG_NAME}_reg'
        cut_full_res_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
        cut_full_res_dataset.load_from_file(data_path)
        cut_full_res_dataset.set_output_label(OAR_VALUE)
        
        cut_full_res_dataset_obj = copy_split_dataset(cut_full_res_dataset, example_cut_dataset_obj)
        cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*['train_dataset', 'valid_dataset', 'test_dataset'])(cut_full_res_dataset_obj)

        # preparing model name
        log_date = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        model_name = f'{log_date}_3d_unet_lowres_model3v2__cloud-{OAR_KEY}-{DATASET_REG_NAME}_reg'

        print(f'Training model with dataset label \'{OAR_KEY}\', value \'{OAR_VALUE}\'')
        print(f'folder \'{model_name}\'')
        
        # getting possible models
        possible_models = get_possible_models(f"model3v2__cloud-{OAR_KEY}-{DATASET_REG_NAME}_reg")
        if len(possible_models) <= 0:
            print(f'{OAR_KEY} Model: No avaiable model')
            continue

        model_name = possible_models[0]
        print(f'Model: Loading model {model_name}')

        # loading model checkpoint
        epoch = 175
        cut_model_info = load_checkpoint_model_info(model_name, epoch, cut_train_dataset, cut_valid_dataset, cut_test_dataset, model_class=UNetV3v2)
        cut_model_info['epochs'] = 175

        # train_batch_size = 1
        # train_dataloader, valid_dataloader, test_dataloader = get_loaders(train_batch_size, cut_train_dataset, cut_valid_dataset, cut_test_dataset)
        # cut_model_info["train_dataloader"] = train_dataloader
        # cut_model_info["valid_dataloader"] = valid_dataloader
        # cut_model_info["test_dataloader"] = test_dataloader
        
        show_model_info(cut_model_info)
        print('\n\n')
        
        # training
        train_loop(cut_model_info, iterate_model_fn=iterate_model_v3v2, start_epoch=epoch)
        print('\n\n')

        # clearing memory
        torch.cuda.empty_cache()

# Evaluating Model

In [None]:
filter_labels = OARS_LABELS.OARS_LABELS_DICT
if 'SPINAL_CORD' in filter_labels:
    del filter_labels['SPINAL_CORD']

tmp_list = list(filter_labels.items())
labels_list = list()

# creating registration dataset and organ segmentation pairs
# dataset use inverted labeling of left and right
labels_list.append(('left_parotid', *tmp_list[12]))
labels_list.append(('right_parotid', *tmp_list[11]))

labels_list.append(('brainstem', *tmp_list[10]))
labels_list.append(('parotids', *tmp_list[10]))
labels_list.append(('all_maps', *tmp_list[10]))

for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
    print(f"{DATASET_REG_NAME}, {OAR_KEY}, {OAR_VALUE}")

### loading models to CPU 

In [None]:
models = dict()
for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
    model_reg_name = f'{OAR_KEY}-{DATASET_REG_NAME}'
    
    # dataset loading
    data_path = f'./data/HaN_OAR_cut_{DATASET_REG_NAME}_reg'
    cut_full_res_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
    cut_full_res_dataset.load_from_file(data_path)
    cut_full_res_dataset.set_output_label(OAR_VALUE)
    
    cut_full_res_dataset_obj = copy_split_dataset(cut_full_res_dataset, example_cut_dataset_obj)
    cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*['train_dataset', 'valid_dataset', 'test_dataset'])(cut_full_res_dataset_obj)
    
    # cut_full_res_dataset_obj = split_dataset(cut_full_res_dataset, train_size=40, valid_size=5, test_size=5)
    # cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*['train_dataset', 'valid_dataset', 'test_dataset'])(cut_full_res_dataset_obj)

    possible_models = get_possible_models(f"model3v2__cloud-{OAR_KEY}-{DATASET_REG_NAME}_reg")
    if len(possible_models) <= 0:
        print(f'{OAR_KEY} Model: No avaiable model')
        continue

    model_name = possible_models[0]
    print(f'{model_reg_name} Model: Loading model {model_name}')

    # loading model checkpoint
    epoch = 175
    cut_model_info = load_checkpoint_model_info(model_name, epoch, cut_train_dataset, cut_valid_dataset, cut_test_dataset, model_class=UNetV3v2)

    # moving model to cpu/cuda with eval mode
    cut_model_info['device'] = 'cpu'
    cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])
    cut_model_info['model'].eval()
    cut_model_info['model'].disable_tensorboard_writing = True
    
    models[model_reg_name] = cut_model_info

In [None]:
models.keys()

## Testing Eval vs Train Mode

testing iteration function

In [None]:
# for i, data in enumerate(train_dataloader):
#     if i == 0:
#         print(data[0][0][0].shape)
#         preview_3d_image(data[0][0][0], figsize=(4, 4))
#     pass

In [None]:
cut_full_res_dataset.set_output_label(OARS_LABELS.PITUITARY)
cut_model_info = models['PITUITARY-all_maps'] # list(models.keys())
cut_model_info['device'] = get_device()
cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])
cut_model_info['model'].disable_tensorboard_writing = True
    
model, model_name, optimizer, criterion = itemgetter('model', 'model_name', 'optimizer', 'criterion')(cut_model_info)
epochs, device, tensorboard_writer = itemgetter('epochs', 'device', 'tensorboard_writer')(cut_model_info)
train_dataloader, valid_dataloader, test_dataloader = itemgetter('train_dataloader',
                                                                 'valid_dataloader',
                                                                 'test_dataloader')(cut_model_info)
# model.actual_epoch = 100
# valid_loss, valid_dsc = iterate_model_v3v2(train_dataloader, model, optimizer, criterion, device, is_eval=True)
# print(valid_loss, valid_dsc)

cut_model_info['model'].disable_tensorboard_writing = True
cut_model_info['device'] = 'cpu'
cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])


In [None]:
tmp = get_rescaled_pred(cut_model_info['model'], cut_full_res_dataset, 'cpu', 0, transform_input_fn=transform_input_with_registration)

In [None]:
SHOW_DSC_INFO = True
if SHOW_DSC_INFO:
    info_per_organs_df = {}
    models_info = list()
    for DATASET_REG_NAME, OAR_KEY, OAR_VALUE in labels_list:
        model_name = f'{OAR_KEY}-{DATASET_REG_NAME}'
        
        if model_name not in models:
            print(f'{model_name} Model: No avaiable model')
            continue

        # getting model to gpu
        cut_model_info = models[model_name]
        cut_model_info['device'] = get_device()
        cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])
        cut_model_info['model'].eval()
        cut_model_info['model'].disable_tensorboard_writing = True

        # preparing dataset for comparison
        # dataset loading
        data_path = f'./data/HaN_OAR_cut_{DATASET_REG_NAME}_reg'
        cut_full_res_dataset = HaNOarsDataset(data_path, size=50, load_images=False)
        cut_full_res_dataset.load_from_file(data_path)
        cut_full_res_dataset.set_output_label(OAR_VALUE)
        
        cut_full_res_dataset_obj = copy_split_dataset(cut_full_res_dataset, example_cut_dataset_obj)
        cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*['train_dataset', 'valid_dataset', 'test_dataset'])(cut_full_res_dataset_obj)
        # preview_3d_image(cut_train_dataset[0][0][0], figsize=(5, 5))
        # preview_3d_image(cut_train_dataset[0][0][0], figsize=(5, 5))
        
        # calculating dsc predictions        
        info_df, preds, rescaled_preds = get_threshold_info_df(
                                    model=cut_model_info['model'], 
                                    dataset=cut_full_res_dataset, 
                                    device=cut_model_info['device'], 
                                    train_indices=cut_train_dataset.indices, 
                                    valid_indices=cut_valid_dataset.indices, 
                                    test_indices=cut_test_dataset.indices,
                                    step=0.5,
                                    transform_input_fn=transform_input_with_registration)
        info_per_organs_df[model_name] = info_df

        # moving model back to cpu
        cut_model_info['device'] = 'cpu'
        cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])

        # parsing data
        best_threshold_col = 'thres_rescaled_dsc_0.50'
        train_tmp_df = info_df[info_df['is_train']][best_threshold_col]
        valid_tmp_df = info_df[info_df['is_valid']][best_threshold_col]
        train_dsc = train_tmp_df.mean()
        valid_dsc = valid_tmp_df.mean()
        print(f'{model_name} Model: DSC train {round(train_dsc, 4)} valid {round(valid_dsc, 4)}')

        models_info.append({
            'oar_key': OAR_KEY,
            'model_name': model_name,
            # Train
            'train_dsc_mean': train_dsc,
            'train_dsc_std': train_tmp_df.std(),
            'train_dsc_median': train_tmp_df.median(),
            'train_dsc_min': train_tmp_df.min(),
            'train_dsc_max': train_tmp_df.max(),
            # Valid
            'valid_dsc_mean': valid_dsc,
            'valid_dsc_std': valid_tmp_df.std(),
            'valid_dsc_median': valid_tmp_df.median(),
            'valid_dsc_min': valid_tmp_df.min(),
            'valid_dsc_max': valid_tmp_df.max(),
            # Both
            'train_valid_mean_delta': train_dsc - valid_dsc
        })

    models_info_df = pd.DataFrame(models_info)
    
    tmp_df = models_info_df[['model_name', 'train_dsc_mean', 'train_dsc_std', 'valid_dsc_mean', 'valid_dsc_std']].copy()
    tmp_df['train_dsc_mean'] = (tmp_df['train_dsc_mean'] * 100).round(2)
    tmp_df['valid_dsc_mean'] = (tmp_df['valid_dsc_mean'] * 100).round(2)
    tmp_df['train_dsc_std'] = (tmp_df['train_dsc_std'] * 100).round(2)
    tmp_df['valid_dsc_std'] = (tmp_df['valid_dsc_std'] * 100).round(2)
    
    display(tmp_df.mean().round(2))
    display(tmp_df.round(2))
    display(tmp_df.sort_values(by=['train_dsc_std']).round(2))
    display(models_info_df.sort_values(by=['train_dsc_mean']).round(2))
    display(models_info_df.sort_values(by=['train_valid_mean_delta']).round(2))

In [None]:
if SHOW_DSC_INFO:
    tmp_column = 'is_train'
    
    try:
        print('OARS_LABELS.PAROTID_GLAND_R')
        tmp_df = info_per_organs_df['PAROTID_GLAND_L-right_parotid']
        display(tmp_df[tmp_df[tmp_column]].sort_values(by='thres_rescaled_dsc_0.50'))
    except:
        pass

    try:   
        print('OARS_LABELS.PAROTID_GLAND_L')
        tmp_df = info_per_organs_df['PITUITARY-brainstem Model']
        display(tmp_df[tmp_df[tmp_column]].sort_values(by='thres_rescaled_dsc_0.50'))
    except:
        pass
        
    try: 
        print('OARS_LABELS.OPT_NERVE_L')
        tmp_df = info_per_organs_df['PITUITARY-parotids']
        display(tmp_df[tmp_df[tmp_column]].sort_values(by='thres_rescaled_dsc_0.50'))
    except:
        pass
        
    try: 
        print('OARS_LABELS.PITUITARY')
        tmp_df = info_per_organs_df['PITUITARY-all_maps']
        display(tmp_df[tmp_df[tmp_column]].sort_values(by='thres_rescaled_dsc_0.50'))
    except:
        pass