In [1]:
IN_COLAB = 'google.colab' in str(get_ipython())
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    import sys
    sys.path.append('/content/drive/My Drive/dp_tomastik/code')
    !bash "/content/drive/My Drive/dp_tomastik/code/scripts/install_libs.sh"

import matplotlib.pyplot as plt
import torch
import os
import numpy as np
import logging
import datetime
from torchio import RandomAffine, Compose, ZNormalization

import src.dataset.oars_labels_consts as OARS_LABELS
from src.consts import DATASET_MAX_BOUNDING_BOX, DESIRE_BOUNDING_BOX_SIZE
from src.helpers.threshold_calc_helpers import get_threshold_info_df
from src.helpers.show_model_dataset_pred_preview import show_model_dataset_pred_preview
from src.dataset.get_cut_lists import get_cut_lists
from src.dataset.get_full_res_cut import get_full_res_cut
from src.dataset.get_dataset import get_dataset
from src.dataset.get_dataset_info import get_dataset_info
from src.dataset.preview_dataset import preview_dataset
from src.dataset.get_dataset_transform import get_dataset_transform
from src.model_and_training.prepare_model import prepare_model
from src.model_and_training.train_loop import train_loop
from src.model_and_training.show_model_info import show_model_info
from src.model_and_training.load_checkpoint_model_info import load_checkpoint_model_info
from src.helpers.show_cuda_usage import show_cuda_usage
from src.helpers.get_rescaled_pred import get_rescaled_preds
from src.dataset.split_dataset import split_dataset, copy_split_dataset
from src.helpers.compare_prediction_with_ground_true import compare_prediction_with_ground_true, compare_one_prediction_with_ground_true
from src.helpers.get_img_outliers_pixels import get_img_outliers_pixels
from src.helpers.get_raw_with_prediction import get_raw_with_prediction

from operator import itemgetter
from IPython.display import display, Markdown
from ipywidgets import widgets

torch.manual_seed(20)
logging.basicConfig(filename='logs/all_organs_jupyter.log', level=logging.DEBUG)

print('Dataset biggest bounding box wihtout spinal cord', DATASET_MAX_BOUNDING_BOX)
print('Cut target size', DESIRE_BOUNDING_BOX_SIZE)
print('Done Init')

If you use TorchIO for your research, please cite the following paper:
Pérez-García et al., TorchIO: a Python library for efficient loading,
preprocessing, augmentation and patch-based sampling of medical images
in deep learning. Credits instructions: https://torchio.readthedocs.io/#credits

Dataset biggest bounding box wihtout spinal cord [56, 177, 156]
Cut target size [72, 192, 168]
Done Init


# Loading precourse neural network with datasets

In [2]:
datasets_params = ['train_dataset', 'valid_dataset', 'test_dataset']
filter_labels = OARS_LABELS.OARS_LABELS_LIST
if OARS_LABELS.SPINAL_CORD in filter_labels:
    filter_labels.remove(OARS_LABELS.SPINAL_CORD)

# low res
low_res_dataset = get_dataset(dataset_size=50, shrink_factor=16, filter_labels=filter_labels, unify_labels=True)
low_res_dataset.dilatate_labels(repeat=1)
low_res_dataset.to_numpy()
low_res_split_dataset_obj = split_dataset(low_res_dataset, train_size=40, valid_size=5, test_size=5)
train_low_res_dataset, valid_low_res_dataset, test_low_res_dataset = itemgetter(*datasets_params)(low_res_split_dataset_obj)

# full res
full_res_dataset = get_dataset(dataset_size=50, shrink_factor=1, filter_labels=filter_labels, unify_labels=False)
full_res_dataset.to_numpy()
full_res_split_dataset_obj = copy_split_dataset(full_res_dataset, low_res_split_dataset_obj)

# low res model - precourse model
epoch = 500
log_date = datetime.datetime(year=2020, month=10, day=27, hour=11, minute=45, second=30).strftime("%Y%m%d-%H%M%S")
model_name = f'{log_date}_3d_unet_PRECOURSE'

low_res_model_info = load_checkpoint_model_info(model_name, epoch, train_low_res_dataset, valid_low_res_dataset, test_low_res_dataset)
show_model_info(low_res_model_info)
low_res_model_info['model'] = low_res_model_info['model'].to('cpu')
low_res_model_info['model'].eval()
low_res_model = low_res_model_info['model']

# cut res
cut_full_res_dataset = full_res_dataset.copy(copy_lists=False)
cut_full_res_dataset = get_cut_lists(low_res_model, low_res_dataset, full_res_dataset, cut_full_res_dataset, low_res_mask_threshold=0.5)
cut_full_res_dataset.set_output_label(None)
cut_split_dataset_obj = copy_split_dataset(cut_full_res_dataset, low_res_split_dataset_obj)
cut_train_dataset, cut_valid_dataset, cut_test_dataset = itemgetter(*datasets_params)(cut_split_dataset_obj)

CUDA using 16x dataset
filtering labels
filtering labels done
dilatating 1x dataset
parsing dataset to numpy
numpy parsing done
CUDA using 1x dataset
filtering labels
filtering labels done
parsing dataset to numpy
numpy parsing done
Device running "cuda"
max output channels 64
Model number of params: 298881, trainable 298881
getting cut index 0
debug removing 10 outlier pixels from 1335
debug box delta [21 48 24]
debug, Does cut and original label contain the same amount of pixels? True 1223526 1223526
getting cut index 1
debug removing 0 outlier pixels from 1416
debug box delta [24 16  8]
debug, Does cut and original label contain the same amount of pixels? True 1326052 1326052
getting cut index 2
debug removing 0 outlier pixels from 1873
debug box delta [ 20   0 -24]
debug, Does cut and original label contain the same amount of pixels? True 1890464 1890464
getting cut index 3
debug removing 0 outlier pixels from 1545
debug box delta [17 32  8]
debug, Does cut and original label conta

In [3]:
get_dataset_info(low_res_dataset, low_res_split_dataset_obj)

train 40, valid_size 5, test 5, full 50
train indices [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 17, 18, 20, 21, 22, 23, 24, 28, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
valid indices [6, 13, 19, 25, 38]
test indices [16, 26, 27, 29, 39]


In [4]:
# high res model - eyes_model
epoch = 100
log_date_dict = {
    "year": 2020,
    "month": 10,
    "day": 27,
    "hour": 14,
    "minute": 14, 
    "second": 51
}
log_date = datetime.datetime(**log_date_dict).strftime("%Y%m%d-%H%M%S")
model_name = f'{log_date}_3d_unet_EYES'

cut_model_info = load_checkpoint_model_info(model_name, epoch, cut_train_dataset, cut_valid_dataset, cut_test_dataset)
show_model_info(cut_model_info)
cut_model_info['model'] = cut_model_info['model'].to('cpu')
cut_model_info['model'].eval()
cut_model = cut_model_info['model']

Device running "cuda"
max output channels 128
Model number of params: 1193537, trainable 1193537


In [5]:
preview_dataset(cut_full_res_dataset)

data max 3071, min -1024
label max 22, min 0


VBox(children=(HBox(children=(IntSlider(value=35, max=71),)),))

Output()

# Training all organs models

In [6]:
filter_labels = OARS_LABELS.OARS_LABELS_DICT
if 'SPINAL_CORD' in filter_labels:
    del filter_labels['SPINAL_CORD']


for OAR_KEY, OAR_VALUE in list(filter_labels.items())[:]:
    cut_full_res_dataset.set_output_label(OAR_VALUE)
    print(f'dataset label \'{OAR_KEY}\', \t value \'{OAR_VALUE}\'')
    
    # Preview one image form dataset  
#     tmp_data, tmp_label = cut_train_dataset[0]
#     tmp_idx = np.where(tmp_label > 0)[0]
#     tmp_slice = tmp_idx[int(np.median(tmp_idx))]
#     tmp_slice = 46

#     plt.figure(figsize=(16, 16))
#     plt.subplot(1, 2, 1)
#     plt.imshow(tmp_data[0, tmp_slice], cmap="gray")
#     plt.subplot(1, 2, 2)
#     plt.imshow(tmp_label[tmp_slice])
#     plt.show()

dataset label 'MANDIBLE_L', 	 value '21'
dataset label 'MANDIBLE_R', 	 value '22'


In [8]:
TRAIN_MODELS = False
if TRAIN_MODELS:
    filter_labels = OARS_LABELS.OARS_LABELS_DICT
    if 'SPINAL_CORD' in filter_labels:
        del filter_labels['SPINAL_CORD']


    for OAR_KEY, OAR_VALUE in list(filter_labels.items())[:]:
        cut_full_res_dataset.set_output_label(OAR_VALUE)
        log_date = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        model_name = f'{log_date}_3d_unet_{OAR_KEY}'

        print(f'Training model with dataset label \'{OAR_KEY}\', value \'{OAR_VALUE}\'')
        print(f'folder \'{model_name}\'')
        cut_model_info = prepare_model(epochs=75,
                                       learning_rate=3e-4,
                                       in_channels=8,
                                       dropout_rate=0.2,
                                       train_batch_size=2,
                                       model_name=model_name,
                                       train_dataset=cut_train_dataset, 
                                       valid_dataset=cut_valid_dataset, 
                                       test_dataset=cut_test_dataset)
        show_model_info(cut_model_info)
        print('\n\n')
        train_loop(cut_model_info)
        print('\n\n')

        # clearing memory
        del cut_model_info
        torch.cuda.empty_cache()

    #     # Preview one image form dataset  
    #     tmp_data, tmp_label = cut_train_dataset[0]
    #     tmp_idx = np.where(tmp_label > 0)[0]
    #     tmp_slice = tmp_idx[int(np.median(tmp_idx))]
    #     plt.figure(figsize=(16, 16))
    #     plt.subplot(1, 2, 1)
    #     plt.imshow(tmp_data[0, tmp_slice], cmap="gray")
    #     plt.subplot(1, 2, 2)
    #     plt.imshow(tmp_label[tmp_slice])
    #     plt.show()

Training model with dataset label 'MANDIBLE_L', value '21'
folder '20201104-120543_3d_unet_MANDIBLE_L'
Device running "cuda"
max output channels 128
Model number of params: 1193537, trainable 1193537



Running training loop
Batch train [1] loss 0.99275, dsc 0.00725
Batch train [2] loss 0.99221, dsc 0.00779
Batch train [3] loss 0.99001, dsc 0.00999
Batch train [4] loss 0.98830, dsc 0.01170
Batch train [5] loss 0.98061, dsc 0.01939
Batch train [6] loss 0.98137, dsc 0.01863
Batch train [7] loss 0.98129, dsc 0.01871
Batch train [8] loss 0.98506, dsc 0.01494
Batch train [9] loss 0.98124, dsc 0.01876
Batch train [10] loss 0.98377, dsc 0.01623
Batch train [11] loss 0.97758, dsc 0.02242
Batch train [12] loss 0.97715, dsc 0.02285
Batch train [13] loss 0.97367, dsc 0.02633
Batch train [14] loss 0.97707, dsc 0.02293
Batch train [15] loss 0.97480, dsc 0.02520
Batch train [16] loss 0.97859, dsc 0.02141
Batch train [17] loss 0.96948, dsc 0.03052
Batch train [18] loss 0.98008, dsc 0.01992
Batch trai

# Preview organ model

In [22]:
# loading organ model
# OAR_KEY = 'EYE_L'
# epoch = 75
# log_date_dict = {
#     "year": 2020, 
#     "month": 11, 
#     "day": 2, 
#     "hour": 15, 
#     "minute": 19, 
#     "second": 45
# }
# log_date = datetime.datetime(**log_date_dict).strftime("%Y%m%d-%H%M%S")
# model_name = f'{log_date}_3d_unet_{OAR_KEY}'

# OAR_VALUE = OARS_LABELS.EYE_L
OAR_VALUE = OARS_LABELS.OPT_NERVE_L
# OAR_VALUE = OARS_LABELS.INNER_EAR_L
# OAR_VALUE = OARS_LABELS.T_M_JOINT_L
# OAR_VALUE = OARS_LABELS.MID_EAR_R
# OAR_VALUE = OARS_LABELS.BRAIN_STEM
# OAR_VALUE = OARS_LABELS.OPT_CHIASMA
# OAR_VALUE = OARS_LABELS.PITUITARY # hyp
OAR_KEY = OARS_LABELS.OARS_LABELS_R_DICT[OAR_VALUE]
epoch = 75
possible_models = [folder_name for folder_name in os.listdir('./models') if OAR_KEY in folder_name]
model_name = possible_models[0]
print(f'Loading {OAR_KEY} model')

# loading model checkpoint
cut_model_info = load_checkpoint_model_info(model_name, epoch, cut_train_dataset, cut_valid_dataset, cut_test_dataset)

# moving model to cpu with eval mode
cut_model_info['device'] = 'cpu'
cut_model_info['model'] = cut_model_info['model'].to(cut_model_info['device'])
cut_model_info['model'].eval()

# preparing dataset for comparison
OAR_VALUE = OARS_LABELS.OARS_LABELS_DICT[OAR_KEY]
cut_full_res_dataset.set_output_label(OAR_VALUE)

# train
rnd_train_idx = low_res_split_dataset_obj['train_dataset'].indices[0]
print(f'Train index {rnd_train_idx}')
raw_data, raw_label, raw_prediction = get_raw_with_prediction(cut_model_info['model'], cut_full_res_dataset, cut_model_info["device"], rnd_train_idx)
compare_one_prediction_with_ground_true(raw_data,
                                        raw_label,
                                        raw_prediction,
                                        pred_threshold=0.5)

# valid
rnd_valid_idx = low_res_split_dataset_obj['valid_dataset'].indices[0]
print(f'Valid index {rnd_valid_idx}')
raw_data, raw_label, raw_prediction = get_raw_with_prediction(cut_model_info['model'], cut_full_res_dataset, cut_model_info["device"], rnd_valid_idx)
compare_one_prediction_with_ground_true(raw_data,
                                        raw_label,
                                        raw_prediction,
                                        pred_threshold=0.5)

Loading OPT_NERVE_L model
Device running "cuda"
max output channels 128
Train index 31
raw prediction: min 0.0, max 1.0, dsc 0.565
threshold prediction: min 0.0, max 1.0, dsc 0.624


VBox(children=(HBox(children=(IntSlider(value=36, max=71),)),))

Output()

Valid index 38
raw prediction: min 0.0, max 1.0, dsc 0.4732
threshold prediction: min 0.0, max 1.0, dsc 0.4936


VBox(children=(HBox(children=(IntSlider(value=36, max=71),)),))

Output()