# UW-Madison GI Tract Image Segmentation

## Check model performance

In [1]:
#!/usr/bin/env python

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
pd.options.plotting.backend = "plotly"
from tqdm.notebook import tqdm
tqdm.pandas()
import numpy as np
import os
import re
import cv2
from plotly.offline import init_notebook_mode
from glob import glob

import tensorflow as tf

# Visualization Imports
from matplotlib.colors import ListedColormap
from matplotlib.patches import Rectangle
import matplotlib.patches as patches
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm; tqdm.pandas();
import plotly.express as px
import seaborn as sns
from PIL import Image, ImageEnhance
import matplotlib; print(f"\t\t– MATPLOTLIB VERSION: {matplotlib.__version__}");
from matplotlib import animation, rc; rc('animation', html='jshtml')
from matplotlib.colors import LinearSegmentedColormap
import plotly
import PIL
import cv2

# Import Tensorflow and Keras
from tensorflow import keras
import tensorflow as tf
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

from keras.models import load_model


		– MATPLOTLIB VERSION: 3.5.2


In [2]:
%cd 'hpc_train_files'

/Users/viktor/Documents/BA/GIT/HealthyOrganTracker/hpc_train_files


In [3]:
from utility import rle_encode, rle_decode, open_gray16
from config import CFG
from dataloader import DataGenerator
from loss import dice_coef, iou_coef, dice_loss, bce_dice_loss, dice_coef2
from datapreparation import extract_metadata, remove_faulties

## Config ⚙️


In [4]:
BASE_PATH = 'input/uw-madison-gi-tract-image-segmentation/'
TRAIN_DIR =  BASE_PATH +'train'
TRAIN_CSV =  BASE_PATH +'train.csv'
STYE = "multilabel"
DEBUG = True

"""
Backbones available: 
        'efficientnetb0'
        'efficientnetb1'
        'efficientnetb2'
        'efficientnetb3'
        'efficientnetb4'
        'efficientnetb5'
        'efficientnetb6'
        'efficientnetb7'
        'inceptionresnetv2'
        'inceptionv3'
        'resnet50'
        'resnext50'
        'resnext101'
        'seresnext50'
        'seresnext101'
        'densenet121'
        'densenet201'
        """

"\nBackbones available: \n        'efficientnetb0'\n        'efficientnetb1'\n        'efficientnetb2'\n        'efficientnetb3'\n        'efficientnetb4'\n        'efficientnetb5'\n        'efficientnetb6'\n        'efficientnetb7'\n        'inceptionresnetv2'\n        'inceptionv3'\n        'resnet50'\n        'resnext50'\n        'resnext101'\n        'seresnext50'\n        'seresnext101'\n        'densenet121'\n        'densenet201'\n        "

In [5]:
cfg = CFG(
    backbone            = 'efficientnetb0',
    img_dims            = (256,256,3),
    model               = 'U-Net',
    batch_size          = 16, 
    epochs              = 1, 
    kaggle              = False, 
    use_fold_csv        = True,
    semi3d_data         = False,
    remove_faulty_cases = True,
    use_crop_data       = False)

## Load prepared Dataframe 

In [6]:
DEBUG = False
if DEBUG:
    df_train["multilabel_mask_path"] = df_train.progress_apply(lambda _row: make_seg_mask(_row, outputdir, resize_to=SEG_SHAPE, resize=False), axis=1)
    df_train.to_csv("df_train_with_mask_paths.csv", index=False)
else:
    df_train = pd.read_csv("df_train_with_mask_paths.csv")
    df_train.fillna('', inplace=True)



In [7]:
df_train

Unnamed: 0.1,Unnamed: 0,id,large_bowel,small_bowel,stomach,path,case,day,slice,width,...,count,path00,path01,path02,image_paths,rs,re,cs,ce,multilabel_mask_path
0,0,case123_day20_slice_0001,,,,input/uw-madison-gi-tract-image-segmentation/t...,123,20,1,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,20,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
1,1,case123_day20_slice_0002,,,,input/uw-madison-gi-tract-image-segmentation/t...,123,20,2,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,20,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
2,2,case123_day20_slice_0003,,,,input/uw-madison-gi-tract-image-segmentation/t...,123,20,3,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,20,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
3,3,case123_day20_slice_0004,,,,input/uw-madison-gi-tract-image-segmentation/t...,123,20,4,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,20,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
4,4,case123_day20_slice_0005,,,,input/uw-madison-gi-tract-image-segmentation/t...,123,20,5,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,20,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38491,38491,case30_day0_slice_0140,,,,input/uw-madison-gi-tract-image-segmentation/t...,30,0,140,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,1,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
38492,38492,case30_day0_slice_0141,,,,input/uw-madison-gi-tract-image-segmentation/t...,30,0,141,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,1,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
38493,38493,case30_day0_slice_0142,,,,input/uw-madison-gi-tract-image-segmentation/t...,30,0,142,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,1,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...
38494,38494,case30_day0_slice_0143,,,,input/uw-madison-gi-tract-image-segmentation/t...,30,0,143,266,...,0,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,input/uw-madison-gi-tract-image-segmentation/t...,['input/uw-madison-gi-tract-image-segmentation...,1,10000,0,266,input/uw-madison-gi-tract-image-segmentation/m...


In [8]:
for file in glob(os.path.join("../tensorboard_logs/efb2_trains/batch_size_16", "**", "historyFold3.csv"), recursive=True):
    if file.endswith(".csv"):
        print(file)

In [9]:
def get_performance_from_history_df(path="tensorboard_logs/", output="all_histories.csv", extra_regex=False):
    if not extra_regex:
        all_histories = pd.DataFrame(columns=['Encoder','Größe' ,'Beste Epoche', 'Loss', 'Val. loss', 'Dice', 'Val. Dice', 'IoU', 'Val. IoU'])
    else:
        all_histories = pd.DataFrame(columns=['Encoder','Größe' ,'Beste Epoche', 'Loss', 'Val. loss', 'Dice', 'Val. Dice', 'IoU', 'Val. IoU', '2.5D', 'Crop' , 'Bereinigt', 'Batch Size', 'Epochen'])

    # Iterate over every historyFold3.csv in tensorboard_logs/base_BS_16_no_add_ons/subfolder and plot each column values with plotly and mark the maximum value and drop the first column
    for file in glob(os.path.join(path, "**", "history.csv"), recursive=True):
        if file.endswith(".csv"):
            print(file)
            history_df = pd.read_csv(file, index_col=0)
            
            regexBone, regexDim, threeDim, regexCrop, regexBatch, regexEpochs, regexFold, regexFaulties = get_meta_info_from_model_path(file)
            
            # print all regex parameters 
            print(regexBone, regexDim, threeDim, regexCrop, regexBatch, regexEpochs, regexFold, regexFaulties)

            best_epoch = get_best_epoch(history_df)

            loss = history_df.loc[best_epoch, 'loss']
            val_loss = history_df.loc[best_epoch, 'val_loss']

            dice_coef = history_df.loc[best_epoch, 'dice_coef']
            val_dice_coef = history_df.loc[best_epoch, 'val_dice_coef']

            iou_coef = history_df.loc[best_epoch, 'iou_coef']
            val_iou_coef = history_df.loc[best_epoch, 'val_iou_coef']

            if not extra_regex:
                all_histories = all_histories.append({'Encoder':regexBone, 'Größe':regexDim, 'Beste Epoche':best_epoch, 'Loss':loss, 'Val. loss':val_loss, 'Dice':dice_coef, 'Val. Dice':val_dice_coef, 'IoU':iou_coef, 'Val. IoU':val_iou_coef}, ignore_index=True)
            else:
                all_histories = all_histories.append({'Encoder':regexBone, 'Größe':regexDim, 'Beste Epoche':best_epoch, 'Loss':loss, 'Val. loss':val_loss, 'Dice':dice_coef, 'Val. Dice':val_dice_coef, 'IoU':iou_coef, 'Val. IoU':val_iou_coef, '2.5D':threeDim, 'Crop':regexCrop,'Bereinigt':regexFaulties ,'Batch Size':regexBatch, 'Epochen':regexEpochs}, ignore_index=True)

    all_histories = all_histories.sort_values(by=['Val. Dice'], ascending=False)
    all_histories.to_csv(output)

    return all_histories


def get_best_epoch(history_df):
    return history_df[history_df["val_loss"]==history_df["val_loss"].min()].index.values[0]

def print_best_epoch(history_df):
    best_epoch = get_best_epoch(history_df)
    print(f"Best epoch: {best_epoch}")
    print(f"Best loss: {history_df.loc[best_epoch, 'val_loss']}")

def print_history_latex(history_df):

    print(history_df.to_latex(index=False , float_format=lambda x: '{:.4f}'.format(x)))

def get_meta_info_from_model_path(file):
    regexBone = 'None' if not re.search("BB_(\w+)_D",file) else re.search("BB_(\w+)_D",file).group(1) 
    regexDim = 'None' if not re.search("DIM_\((\d+),",file) else int(re.search("DIM_\((\d+),",file).group(1) )
    threeDim = 'None' if not re.search("SEMI3D_([aA-zZ]{4,5})_",file) else re.search("SEMI3D_([aA-zZ]{4,5})_",file).group(1) 
    regexCrop = 'None' if not re.search("CROPDATA_([aA-zZ]{4,5})_",file) else re.search("CROPDATA_([aA-zZ]{4,5})_",file).group(1) 
    regexBatch = 'None' if not re.search("BATCH_(\d+)_",file) else int(re.search("BATCH_(\d+)_",file).group(1) )
    regexEpochs = 'None' if not re.search("EPOCHS_(\d+)_",file) else int(re.search("EPOCHS_(\d+)_",file).group(1))
    regexFold = 'None' if not re.search("Fold_(\d).",file) else int(re.search("Fold_(\d).",file).group(1))
    regexFaulties = 'None' if not re.search("FAULTIES_([aA-zZ]{4,5})_",file) else re.search("FAULTIES_([aA-zZ]{4,5})_",file).group(1)

    return regexBone, regexDim, threeDim, regexCrop, regexBatch, regexEpochs, regexFold, regexFaulties


In [12]:
histories_bs16 =  get_performance_from_history_df(path="../tensorboard_logs/efb2", output="all_histories.csv", extra_regex=True)


../tensorboard_logs/efb2/UNet_BB_efficientnetb2_DIM_(256, 256, 3)_SEMI3D_False_CROPDATA_True_FAULTIES_False_BATCH_16_EPOCHS_50_FOLD_3.h5_23082022-2113_FOLD_3/history.csv
efficientnetb2 256 False True 16 50 None False
../tensorboard_logs/efb2/UNet_BB_efficientnetb2_DIM_(256, 256, 3)_SEMI3D_True_CROPDATA_False_FAULTIES_True_BATCH_16_EPOCHS_50_FOLD_3.h5_23082022-2114_FOLD_3/history.csv
efficientnetb2 256 True False 16 50 None True
../tensorboard_logs/efb2/UNet_BB_efficientnetb2_DIM_(256, 256, 3)_SEMI3D_False_CROPDATA_False_FAULTIES_True_BATCH_16_EPOCHS_50_FOLD_3.h5_23082022-2111_FOLD_3/history.csv
efficientnetb2 256 False False 16 50 None True
../tensorboard_logs/efb2/UNet_BB_efficientnetb2_DIM_(256, 256, 3)_SEMI3D_True_CROPDATA_True_FAULTIES_False_BATCH_16_EPOCHS_50_FOLD_3.h5_23082022-2121_FOLD_3/history.csv
efficientnetb2 256 True True 16 50 None False
../tensorboard_logs/efb2/UNet_BB_efficientnetb2_DIM_(256, 256, 3)_SEMI3D_False_CROPDATA_True_FAULTIES_True_BATCH_16_EPOCHS_50_FOLD_3.h5_

In [13]:
histories_bs16

Unnamed: 0,Encoder,Größe,Beste Epoche,Loss,Val. loss,Dice,Val. Dice,IoU,Val. IoU,2.5D,Crop,Bereinigt,Batch Size,Epochen
6,efficientnetb2,256,35,0.031243,0.084805,0.927835,0.814872,0.929679,0.839297,True,True,True,16,50
1,efficientnetb2,256,30,0.033286,0.087188,0.922785,0.808507,0.928402,0.839635,True,False,True,16,50
7,efficientnetb2,256,34,0.033411,0.093202,0.922657,0.797341,0.91715,0.831916,True,False,False,16,50
3,efficientnetb2,256,32,0.032862,0.104026,0.924262,0.76664,0.911183,0.825358,True,True,False,16,50
0,efficientnetb2,256,28,0.035612,0.111564,0.918026,0.748171,0.911873,0.816188,False,True,False,16,50
4,efficientnetb2,256,24,0.036018,0.115257,0.916885,0.736857,0.91089,0.820561,False,True,True,16,50
5,efficientnetb2,256,26,0.03644,0.11982,0.915611,0.730743,0.896916,0.81633,False,False,False,16,50
2,efficientnetb2,256,13,0.048727,0.124992,0.887236,0.716258,0.836343,0.814124,False,False,True,16,50


In [61]:
histories_bs16[histories_bs16['Encoder'] == 'efficientnetb2']

Unnamed: 0,Encoder,Größe,Beste Epoche,Loss,Val. loss,Dice,Val. Dice,IoU,Val. IoU,2.5D,Crop,Bereinigt,Batch Size,Epochen


In [14]:
print_history_latex(histories_bs16)

\begin{tabular}{llllllllllllll}
\toprule
       Encoder & Größe & Beste Epoche &   Loss & Val. loss &   Dice & Val. Dice &    IoU & Val. IoU &  2.5D &  Crop & Bereinigt & Batch Size & Epochen \\
\midrule
efficientnetb2 &   256 &           35 & 0.0312 &    0.0848 & 0.9278 &    0.8149 & 0.9297 &   0.8393 &  True &  True &      True &         16 &      50 \\
efficientnetb2 &   256 &           30 & 0.0333 &    0.0872 & 0.9228 &    0.8085 & 0.9284 &   0.8396 &  True & False &      True &         16 &      50 \\
efficientnetb2 &   256 &           34 & 0.0334 &    0.0932 & 0.9227 &    0.7973 & 0.9172 &   0.8319 &  True & False &     False &         16 &      50 \\
efficientnetb2 &   256 &           32 & 0.0329 &    0.1040 & 0.9243 &    0.7666 & 0.9112 &   0.8254 &  True &  True &     False &         16 &      50 \\
efficientnetb2 &   256 &           28 & 0.0356 &    0.1116 & 0.9180 &    0.7482 & 0.9119 &   0.8162 & False &  True &     False &         16 &      50 \\
efficientnetb2 &   256 &  