In [29]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
sys.path.append('../scripts/helper_functions_cv/tensorflow_helpers/')
from gpu_starter_mirror_strategy import start_gpus
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import TensorBoard
from sklearn.utils import compute_class_weight
import collections

In [2]:
strategy, REPLICAS, AUTO = start_gpus([0])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Returning objects as strategy, replicas and auto in same order.


In [3]:
csv_file = pd.read_csv('../../files/train.csv')

In [4]:
csv_file.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label
0,006388_0,11,006388,0,CE
1,008e5c_0,11,008e5c,0,CE
2,00c058_0,11,00c058,0,LAA
3,01adc5_0,11,01adc5,0,LAA
4,026c97_0,4,026c97,0,CE


In [37]:
csv_file['updated_paths'] = csv_file['image_id'].apply(lambda x: '../../files/resized_train_1024/' + x + '.png')

In [38]:
for x in csv_file['updated_paths'].values.tolist():
    if os.path.exists(x) == False:
        print(x)

In [39]:
updated_csv = csv_file[['patient_id', 'label', 'updated_paths']]

In [40]:
updated_csv = updated_csv.groupby('patient_id', as_index = False).agg(np.array)

In [42]:
def split_datasets(data, test_size = 0.1):
    train, test = train_test_split(data, test_size=test_size)
    train, val = train_test_split(train, test_size=test_size)
    return train, val, test

In [43]:
train, val, test = split_datasets(updated_csv)

In [44]:
print(f'Train: {len(train)}')
print(f'Val: {len(val)}')
print(f'Test: {len(test)}')

Train: 511
Val: 57
Test: 64


In [45]:
def give_final_data(csv_file):
    labels = csv_file['label'].values.tolist()
    paths = csv_file['updated_paths'].values.tolist()
    labels_, paths_ = [], []
    for x in labels:
        for y in x:
            if y == 'LAA':
                labels_.append(np.float32(1))
            else:
                labels_.append(np.float32(0))
    for x in paths:
        for y in x:
            paths_.append(y)
    return labels_, paths_

In [46]:
def read_train_images(img, label):
    img = tf.io.read_file(img)
    img = tf.image.decode_jpeg(img, channels = 3)
    img  = tf.image.random_flip_left_right(img)
    return img, label
def read_images(img, label):
    img = tf.io.read_file(img)
    img = tf.image.decode_jpeg(img, channels = 3)
    img = img / 255
    return img, label

In [47]:
def get_data(csv_file, train=True, repeat = True, shuffle = True, batch = True, batch_size = 64, prefetch = True):
    labels, imgs = give_final_data(csv_file)
    tensor = tf.data.Dataset.from_tensor_slices((imgs, labels))
    tensor = tensor.cache()
    if repeat:
        tensor = tensor.repeat()
    if shuffle:
        tensor = tensor.shuffle(1024 * REPLICAS)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        tensor = tensor.with_options(opt)
    if train:
        tensor = tensor.map(read_train_images, num_parallel_calls = AUTO)
    else:    
        tensor = tensor.map(read_images, num_parallel_calls = AUTO)
    if batch:
        tensor = tensor.batch(batch_size * REPLICAS)
    if prefetch:
        tensor = tensor.prefetch(AUTO)
    return tensor

In [48]:
def create_model(model_name, size):
    input_layer = keras.Input((size, size, 3))
    construct = getattr(keras.applications, model_name)
    main_layer = construct(include_top = False,
                           weights = None,
                           pooling = 'avg')(input_layer)
    last_layer = keras.layers.Dense(1, activation='sigmoid')(main_layer)
    model = keras.Model(input_layer, last_layer)
    return model
def compile_new_model(model):
    with strategy.scope():
        loss = keras.losses.BinaryCrossentropy()
        optimier = keras.optimizers.SGD()
        model.compile(
            loss = loss,
            optimizer = optimier,
        )
    return model

In [29]:
K.clear_session()
batch_size = 32
tb_path = '../TB/res50_baseline_bs_32_is_256/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('ResNet50', 256)
    model = compile_new_model(model)
train_dataset = get_data(train, batch_size=batch_size)
val_dataset = get_data(val, repeat=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)

KeyboardInterrupt: 

In [21]:
K.clear_session()
batch_size = 32
tb_path = '../TB/incep_baseline_bs_32_is_256/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('InceptionV3', 256)
    model = compile_new_model(model)
train_dataset = get_data(train, batch_size=batch_size)
val_dataset = get_data(val, repeat=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
K.clear_session()
batch_size = 32
tb_path = '../TB/xcep_baseline_bs_32_is_256/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('Xception', 256)
    model = compile_new_model(model)
train_dataset = get_data(train, train=False, batch_size=batch_size)
val_dataset = get_data(val, train = False, repeat=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
K.clear_session()
batch_size = 32
tb_path = '../TB/xcep_baseline_bs_32_is_256_i_flip/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('Xception', 256)
    model = compile_new_model(model)
train_dataset = get_data(train, batch_size=batch_size)
val_dataset = get_data(val, train=False, repeat=False, shuffle=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Class weights calculation

In [25]:
label_coded = []
for x in train['label']:
    for y in x:
        if y == "LAA":
            label_coded.append(np.float32(1.0))
        else:
            label_coded.append(np.float32(0.0))

In [31]:
computed_weights = compute_class_weight(class_weight='balanced', classes=[0,1], y = label_coded)

In [33]:
weights = dict(zip([0, 1], computed_weights))

In [35]:
K.clear_session()
batch_size = 32
tb_path = '../TB/xcep_baseline_bs_32_is_256_i_flip_cw/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('Xception', 256)
    model = compile_new_model(model)
train_dataset = get_data(train, batch_size=batch_size)
val_dataset = get_data(val, train=False, repeat=False, shuffle=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback],
    class_weight=weights
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [51]:
K.clear_session()
batch_size = 8
tb_path = '../TB/xcep_baseline_bs_8_is_1024_i_flip_cw/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('Xception', 1024)
    model = compile_new_model(model)
train_dataset = get_data(train, train=False, batch_size=batch_size)
val_dataset = get_data(val, train=False, repeat=False, shuffle=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20,  
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
K.clear_session()
batch_size = 8
tb_path = '../TB/xcep_baseline_bs_8_is_1024_i_flip_cw/'
if os.path.exists(tb_path) == False:
    os.makedirs(tb_path)
tb_callback = TensorBoard(log_dir=tb_path)
with strategy.scope():
    model = create_model('Xception', 1024)
    model = compile_new_model(model)
train_dataset = get_data(train, train=False, batch_size=batch_size)
val_dataset = get_data(val, train=False, repeat=False, shuffle=False, batch_size=batch_size)
modelhist = model.fit(
    train_dataset,
    epochs = 20, 
    verbose = 1,
    validation_data = val_dataset,
    steps_per_epoch = len(train) // (REPLICAS * batch_size),
    callbacks = [tb_callback]
)