In [1]:
import tensorflow as tf
import os
import pandas as pd
from tensorflow import keras
from functools import partial
import numpy as np

import SimpleITK as sitk # to read nii files

In [2]:
DefaultConv3D = partial(keras.layers.Conv3D, kernel_size=3, strides=(1,)*3,
        padding="SAME", use_bias=True)

In [3]:
class ResidualUnit(keras.layers.Layer):
    # separate construction and execution
    # be aware of the strides' shape
    def __init__(self, filters, strides=(1,)*3, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.ResSubUnit = 2
        
        # a list a layers that can be iterated
        self.main_layers = [
                keras.layers.BatchNormalization(),
                self.activation,
                DefaultConv3D(filters, strides=strides, kernel_initializer="he_normal"),
                keras.layers.BatchNormalization(),
                self.activation,
                DefaultConv3D(filters, strides=(1,)*3, kernel_initializer="he_normal"),
                ]
        self.skip_layers = []
        if np.prod(strides) > 1:
            self.skip_layers = [
                DefaultConv3D(filters, kernel_size=1, strides=strides, kernel_initializer="he_normal")
                ]
            
            
    def call(self, inputs, **kwargs):
        x = inputs
        orig_x = inputs
        
        for layer in self.main_layers:
            x = layer(x)
        
        for layer in self.skip_layers:
            orig_x = layer(orig_x)
        
        return x + orig_x
    
    def get_config(self):
        config = super(ResidualUnit, self).get_config()
        config.update({'ResSubUnit': self.ResSubUnit})
        
        return config

In [4]:
filters = (16, 32, 64, 128)
strides = (1, 2, 2, 2)

model = keras.models.Sequential()
model.add(DefaultConv3D(filters[0], kernel_size=3, strides=(1,)*3,
        input_shape=[48, 108, 108, 1], kernel_initializer="he_normal"))
model.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME"))

for filter, stride in zip(filters[1:], strides[1:]):
    model.add(ResidualUnit(filter, strides=(stride,)*3))

model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation("relu"))
model.add(keras.layers.GlobalAvgPool3D())

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(2, activation="softmax"))

model.compile(loss="sparse_categorical_crossentropy",
        optimizer="sgd",
        metrics=["accuracy"])

In [6]:
batch_size = 12

In [7]:
# read the file references
f = 'File_references.csv'

file_df = pd.read_csv(f)
file_df.head()

subject_ID = file_df['Sample'].to_numpy()
file_name = file_df['File name'].to_numpy()
label = file_df['Perforation'].to_numpy()
datapath = r"/home/spl/Machine Learning/Data"
print(label.shape)

(320,)


In [8]:
imgs = np.zeros(shape = (len(file_name),48,108,108))
for i, f in enumerate(file_name):
    img = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(datapath,f)))
    img = img / 255.0
    imgs[i] = img
    
imgs = np.expand_dims(imgs,axis = -1)
print(imgs.shape)

(320, 48, 108, 108, 1)


In [9]:
from sklearn.model_selection import train_test_split

# split data into training set and test set by a ratio of 7:3
train_img, test_img, train_l, test_l = train_test_split(
        imgs, label, test_size=0.3, random_state=41)

# split test data into validation and evaluation evenly
val_img, evl_img, val_l, evl_l = train_test_split(
        test_img, test_l, test_size = 0.5, random_state=41)

print(train_img.shape)
print(val_img.shape)
print(evl_img.shape)

print(train_l.sum())
print(val_l.sum())
print(evl_l.sum())

(224, 48, 108, 108, 1)
(48, 48, 108, 108, 1)
(48, 48, 108, 108, 1)
41
12
10


In [10]:
def read_dataset(imgs, labels, batch_size=8,shuffle_size=224):
    dataset = tf.data.Dataset.from_tensor_slices((imgs, labels))
    dataset = dataset.shuffle(shuffle_size).batch(batch_size)
    dataset = dataset.repeat()
    return dataset.prefetch(1)

train_set = read_dataset(train_img, train_l, batch_size, shuffle_size = 224)
val_set = read_dataset(val_img, val_l, batch_size, shuffle_size = 48)
evl_set = read_dataset(evl_img, evl_l, batch_size, shuffle_size = 48)

for line in train_set.take(1):
    print(line[0].shape.as_list())
    print(line[1].shape.as_list())

[12, 48, 108, 108, 1]
[12]


In [11]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("4th_batch_only.h5",
        save_best_only=True)

class PrintValTrainRatioCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        print("\nval/train: {:.2f} \n".format(logs["val_loss"] / logs["loss"]))

root_logdir = os.path.join(os.curdir, "4th batch only")

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir) 

In [12]:
model.fit(train_set, steps_per_epoch=224 // batch_size, epochs=40,
          validation_data=val_set,
          validation_steps=48 // batch_size,
          callbacks=[checkpoint_cb,  
                     PrintValTrainRatioCallback(), tensorboard_cb]
         )

Train for 18 steps, validate for 4 steps
Epoch 1/40
val/train: 0.78 

Epoch 2/40
val/train: 1.14 

Epoch 3/40
val/train: 1.14 

Epoch 4/40
val/train: 1.32 

Epoch 5/40
val/train: 1.16 

Epoch 6/40
val/train: 1.40 

Epoch 7/40
val/train: 1.54 

Epoch 8/40
val/train: 1.80 

Epoch 9/40
val/train: 1.69 

Epoch 10/40
val/train: 1.97 

Epoch 11/40
val/train: 2.20 

Epoch 12/40
val/train: 2.36 

Epoch 13/40
val/train: 2.76 

Epoch 14/40
val/train: 2.54 

Epoch 15/40
val/train: 2.50 

Epoch 16/40
val/train: 2.88 

Epoch 17/40
val/train: 2.41 

Epoch 18/40
val/train: 3.09 

Epoch 19/40
val/train: 2.56 

Epoch 20/40
val/train: 2.51 

Epoch 21/40
val/train: 2.30 

Epoch 22/40
val/train: 2.36 

Epoch 23/40
val/train: 1.95 

Epoch 24/40
val/train: 1.85 

Epoch 25/40
val/train: 1.60 

Epoch 26/40
val/train: 1.54 

Epoch 27/40
val/train: 1.55 

Epoch 28/40
val/train: 1.57 

Epoch 29/40
val/train: 1.50 

Epoch 30/40
val/train: 1.34 

Epoch 31/40
val/train: 1.32 

Epoch 32/40
val/train: 1.58 

Epoch 33

Epoch 34/40
val/train: 1.39 

Epoch 35/40
val/train: 1.59 

Epoch 36/40
val/train: 1.37 

Epoch 37/40
val/train: 1.35 

Epoch 38/40
val/train: 1.54 

Epoch 39/40
val/train: 1.74 

Epoch 40/40
val/train: 1.58 



<tensorflow.python.keras.callbacks.History at 0x7f8c34b9cd50>

In [None]:
model.evaluate(evl_set, steps= 48 // batch_size)

In [None]:
# the following is an inplementation of dataset pipline using generator

In [None]:
def simple_reader(file_name, subject_ID, label):
    def generator():
        for f, ID, y in zip(file_name,subject_ID, label):
            img = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(datapath,f)))
            img = img / 255.0
            img = np.expand_dims(img, axis = -1).astype(np.float32)

            y = np.expand_dims(y, axis = -1).astype(np.float32)

            yield img,y
    return generator # return a generator function 

# an example of construct tf.data.Dataset using from_generator()

'''file_generator = simple_reader(file_name, subject_ID, label)
filepath_dataset = tf.data.Dataset.from_generator(file_generator, 
        (tf.float32,tf.float32))

filepath_dataset = filepath_dataset.repeat(3).shuffle(160).batch(batch_size)


for line in filepath_dataset.take(1):
    print(line[0].shape.as_list())'''

In [None]:
def reader(file_name, subject_ID, label, shuffle_size, batch_size):
    file_generator = simple_reader(file_name, subject_ID, label)
    dataset = tf.data.Dataset.from_generator(file_generator, 
            (tf.float32,tf.float32))
    dataset = dataset.repeat(3)
    dataset = dataset.shuffle(shuffle_size)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)

from sklearn.model_selection import train_test_split

train_f, test_f, train_ID, test_ID, train_l, test_l = train_test_split(
        file_name, subject_ID, label, test_size=0.3, random_state=42)

val_f, evl_f, val_ID, evl_ID, val_l, evl_l = train_test_split(
        test_f, test_ID, test_l, test_size = 0.5, random_state=42)

print(train_f.shape)
print(val_f.shape)
print(evl_f.shape)
    
train_set = reader(train_f, train_ID, train_l, 224, 8)
val_set = reader(val_f, val_ID, val_l, 48, 8)
evl_set = reader(evl_f, evl_ID, evl_l, 48, 8)

for line in train_set.take(2):
    print(line[0].shape.as_list())
    print(line[1].shape.as_list())