In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import skimage, os
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
from skimage.measure import label,regionprops, perimeter
from skimage.morphology import binary_dilation, binary_opening
from skimage.filters import roberts, sobel
from skimage import measure, feature
from skimage.segmentation import clear_border
from skimage import data
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import scipy.misc
import numpy as np
from glob import glob
from skimage.io import imread
import re
from random import shuffle

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.animation as anim

import imageio
from skimage.transform import resize

import copy
from scipy import ndimage as nd
import nibabel as nib
import itertools
import cv2

from tensorflow.keras.layers import Input, concatenate, Conv3D, MaxPooling3D, Conv3DTranspose, AveragePooling3D, ZeroPadding3D
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.metrics import AUC

import math

In [None]:
all_images = glob(os.path.join('/content/drive/MyDrive/Data/guangdi_1/Hospital A','*.nii.gz'))
all_masks = glob(os.path.join('/content/drive/MyDrive/Data/guangdi_1/mask_A','*.nii.gz'))
df = pd.read_excel('/content/drive/MyDrive/Data/guangdi_1/clinic.xlsx', sheet_name='Hospital A')
roi = pd.read_excel('/content/drive/MyDrive/Data/guangdi_1/volInfo_mask_A_20211112131836.xlsx', sheet_name='Sheet1')
len(all_images), len(all_masks), df.shape[0], roi.shape[0]

(500, 499, 500, 500)

In [None]:
def match_img_mask(row):
  img_path = np.nan
  mask_path = np.nan
  name = '_'.join(row['Name'].strip().upper().split(' '))

  for img in all_images:
    org_img = img
    img = img.split('/')[-1]
    f_i = re.search(r"\d", img).start()
    cur_name = img[: f_i-1]
    if name == cur_name:
      img_path = org_img
      break
  
  for mask in all_masks:
    org_mask = mask
    mask = mask.split('/')[-1]
    f_i = re.search(r"\d", mask).start()
    cur_name = mask[: f_i-1]
    if name == cur_name:
      mask_path = org_mask
      break

  return pd.Series([img_path, mask_path])

df[['img_path', 'mask_path']] = df.apply(match_img_mask, axis=1)

In [None]:
def extract_name(x):
  f_i = re.search(r"\d", x).start()
  x = x[: f_i-1]
  x = x.lower().split('_')
  x = list(map(lambda x: x.strip(), x))
  x = list(filter(lambda x: len(x) > 0, x))
  return ' '.join(x)

def format_name(x):
  x = x.lower().split(' ')
  x = list(map(lambda x: x.strip(), x))
  x = list(filter(lambda x: len(x) > 0, x))
  return ' '.join(x)

roi['name'] = roi['fileName'].apply(lambda x: extract_name(x))
roi = roi[['name', 'vol_roi1', 'vol_roi2', 'vol_roi3']]
df['Name'] = df['Name'].apply(lambda x: format_name(x))
df = df.sort_values(by=['Name'])

In [1]:
roi.head()

In [None]:
df = pd.merge(df, roi, left_on='Name', right_on='name', how='left')
df[['vol_roi1', 'vol_roi2', 'vol_roi3']] = df[['vol_roi1', 'vol_roi2', 'vol_roi3']].fillna(0.0)

In [2]:
show_ids = np.random.randint(df.shape[0])
print(df.iloc[show_ids]['缩写'])
print(df.iloc[show_ids]['IDx'])
print(df.iloc[show_ids]['img_path'])
print(df.iloc[show_ids]['mask_path'])

In [None]:
df.drop(columns=['name'], inplace=True)
df = df.dropna().reset_index(drop=True)
df.columns = ['r_no', 'name', 'f1', 'f2', 'label', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'img_path', 'mask_path', 'vol_roi1', 'vol_roi2', 'vol_roi3']
df['label'] = df['label'].apply(lambda x: int(1) if x == 2 else int(0))

In [5]:
from sklearn.preprocessing import MinMaxScaler

tab_cols = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'vol_roi1', 'vol_roi2', 'vol_roi3']
scaler = MinMaxScaler()
df[tab_cols] = scaler.fit_transform(df[tab_cols])
df.head(5)

In [None]:
img_rows = 400
img_cols = 400
img_depth = 16


def resize_volume(image_p):
    img = copy.deepcopy(image_p)
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = img_depth
    desired_width = img_rows
    desired_height = img_cols
    z_ids = np.linspace(0, img.shape[2]-1, desired_depth)
    z_ids = list(map(lambda x: int(x), z_ids))
    img3d = np.stack([cv2.resize(img[:, :, i], (desired_width, desired_height)) for i in z_ids]).T
    if np.min(img3d) < np.max(img3d):
      img3d = img3d - np.min(img3d)
      img3d = img3d / np.max(img3d)
    if img3d.shape[-1] < desired_depth:
      n_zero = np.zeros((desired_height, desired_width, desired_depth - img3d.shape[-1]))
      img3d = np.concatenate((img3d,  n_zero), axis = -1)
    return img3d

def load_dicom_images_3d(img_path):

    img3d = nib.load(img_path).get_fdata()

    img3d = resize_volume(img3d)

    return np.expand_dims(img3d,-1)

In [None]:
def check_image_mask(row):
  img_path = row['img_path']
  mask_path = row['img_path']

  img = load_dicom_images_3d(img_path)
  mask = load_dicom_images_3d(mask_path)

  return 1 if img.shape == (400, 400, 16, 1) and mask.shape == (400, 400, 16, 1) else 0

df['check'] = df.apply(check_image_mask, axis=1)

In [None]:
df_copy = df.copy()

In [None]:
df = df[df['check'] == 1].reset_index(drop=True)
df.to_csv('clean_df.csv', index=False)

In [None]:
train_df = df.iloc[:450]
val_df = df.iloc[450:]
train_df.shape, val_df.shape

((450, 23), (40, 23))

In [None]:
from tensorflow.keras.utils import Sequence

class Dataset(Sequence):
    def __init__(self,df,is_train=True,batch_size=2,shuffle=True,tab_cols=tab_cols):
        self.idx = df.index
        self.paths = df["img_path"].values
        self.masks = df['mask_path'].values
        self.y = df['label'].values
        self.tab = df[tab_cols].values
        self.is_train = is_train
        self.batch_size = batch_size
        self.shuffle = shuffle
    def __len__(self):
        return math.ceil(len(self.idx)/self.batch_size)
   
    def __getitem__(self,ids):
        id_path= self.paths[ids]
        id_mask_path = self.masks[ids]

        batch_paths = self.paths[ids * self.batch_size:(ids + 1) * self.batch_size]
        batch_mask_paths = self.masks[ids * self.batch_size:(ids + 1) * self.batch_size]
        
        if self.y is not None:
            batch_y = self.y[ids * self.batch_size: (ids + 1) * self.batch_size]
        
        if self.is_train:
            list_x =  [load_dicom_images_3d(x) for x in batch_paths]
            list_mask = [load_dicom_images_3d(x) for x in batch_mask_paths]
            batch_tab = self.tab[ids * self.batch_size: (ids + 1) * self.batch_size]
            batch_X = np.stack(list_x, axis=0)
            batch_mask = np.stack(list_mask, axis=0)
            return [batch_X, batch_tab], [batch_mask, batch_y]
        else:
            list_x = load_dicom_images_3d(id_path)
            batch_X = np.stack(list_x)
            batch_tab = self.tab[ids * self.batch_size: (ids + 1) * self.batch_size]
            return batch_X, batch_tab
    
    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.idx, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))

In [None]:
train_dataset = Dataset(train_df)
valid_dataset = Dataset(val_df)

In [None]:
show_ids = np.random.randint(100)
[images, tabs], [masks, labels] = train_dataset[show_ids]
print("Dimension of the img is:", images.shape)
print("Dimension of the mask is:", masks.shape)

Dimension of the img is: (2, 400, 400, 16, 1)
Dimension of the mask is: (2, 400, 400, 16, 1)


In [None]:
def get_net():
    inputs_img = Input((img_depth, img_rows, img_cols, 1))
  
    conv1 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(inputs_img)
    conv1 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling3D(pool_size=(2, 2, 2))(conv1)

    conv2 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling3D(pool_size=(2, 2, 2))(conv2)

    conv3 = Conv3D(128, (3, 3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv3D(128, (3, 3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling3D(pool_size=(2, 2, 2))(conv3)

    conv4 = Conv3D(256, (3, 3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv3D(256, (3, 3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling3D(pool_size=(2, 2, 2))(conv4)

    conv5 = Conv3D(512, (3, 3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv3D(512, (3, 3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv3DTranspose(256, (2, 2, 2), strides=(2, 2, 2), padding='same')(conv5), conv4], axis=4)
    conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv3DTranspose(128, (2, 2, 2), strides=(2, 2, 2), padding='same')(conv6), conv3], axis=4)
    conv7 = Conv3D(128, (3, 3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv3D(128, (3, 3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv3DTranspose(64, (2, 2, 2), strides=(2, 2, 2), padding='same')(conv7), conv2], axis=4)
    conv8 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv3DTranspose(32, (2, 2, 2), strides=(2, 2, 2), padding='same')(conv8), conv1], axis=4)
    conv9 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(conv9)


    output_img = Conv3D(1, (1, 1, 1), activation='softmax', name='mask')(conv9)

    x = layers.GlobalAveragePooling3D()(conv5)
    img_x = layers.Dense(units=128, activation="relu")(x)

    inputs_tab = Input(shape=(17,))
    input_x = layers.Dense(units=256, activation="relu")(inputs_tab)

    x = concatenate([img_x, input_x])
    x = layers.BatchNormalization()(x)

    residual_x = x
    for _ in range(4):
      x = layers.Dense(units=384, activation="relu")(x)
      x = layers.Dropout(0.25)(x)
      x = layers.add([x, residual_x])
      residual_x = x


    output_label = layers.Dense(units=1, activation="sigmoid", name='label')(x)


    model = keras.Model(inputs=[inputs_img, inputs_tab], outputs=[output_img, output_label])

    keras.utils.plot_model(model, to_file='model.png', show_shapes=True)

    model.compile(loss={'label': 'binary_crossentropy', 
                        'mask': 'mean_squared_error'},
                  loss_weights = {"label": 9, "mask": 1},
                  optimizer='adam',
                  metrics={'label': keras.metrics.BinaryAccuracy(name='acc')})

    return model

In [None]:
model = get_net()

model_save = ModelCheckpoint('model.h5', 
                             save_best_only = True, 
                             monitor = 'val_acc', 
                             mode = 'max', verbose = 1)
early_stop = EarlyStopping(monitor = 'val_acc', 
                           patience = 10, mode = 'max', verbose = 1,
                           restore_best_weights = True)

model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=15,
    shuffle=True,
    verbose=1,
    callbacks = [model_save, early_stop],
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f3eea269610>