In [1]:
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from functools import partial
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPool2D
from keras.layers.advanced_activations import LeakyReLU
from datetime import datetime
from keras.preprocessing import image

Using TensorFlow backend.


In [2]:
# Working directory; to the tfrecord files
cwd = os.getcwd()
tfrecord_files_dir = (cwd + '/tfrecords')

In [3]:
# Prepare the lists of train and test tfrecords files
tfrecord_files = os.listdir(tfrecord_files_dir)
full_train_tfrecords = []
test_tfrecords = []
for i in tfrecord_files:
    if i[:4] == 'trai':
        full_train_tfrecords.append(tfrecord_files_dir + '/' + i)
    elif i[:4] == 'test':
        test_tfrecords.append(tfrecord_files_dir + '/' + i)

In [4]:
full_train_tfrecords

['/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train12-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train04-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train08-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train03-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train06-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train10-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train01-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train00-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train07-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train11-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train09-2071.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/train02-2071.tfrec',
 '/s

In [5]:
test_tfrecords

['/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test14-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test15-677.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test10-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test11-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test12-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test13-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test01-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test00-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test03-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test02-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test05-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test04-687.tfrec',
 '/sfs/lustre/bahamut/scratc

In [6]:
input_shape = [300,300,3]

In [7]:
# Define functions to create train and validation datasets

def preprocess(tfrecord):
    train_feature_descriptions = {
        "image": tf.io.VarLenFeature(tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string, default_value=""),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(tfrecord, train_feature_descriptions)
    image = tf.io.decode_image(example["image"].values[0])
    image = tf.reshape(image, shape=[1024, 1024, 3])
    image = tf.image.resize(image, input_shape[:2], method='nearest')
    return image/255, example["target"]

def create_dataset(filepaths, n_read_threads=5, n_parse_threads=5, batch_size=32):
    dataset = tf.data.TFRecordDataset(filepaths, num_parallel_reads=n_read_threads)
    
    train_size = int(0.8 * len(list(dataset)))
    valid_size = int(0.2 * len(list(dataset)))
    
    dataset = dataset.shuffle(len(list(dataset)))
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads)
    
    train_dataset = dataset.take(train_size)
    valid_dataset = dataset.skip(valid_size)

    train_dataset = train_dataset.batch(batch_size)
    valid_dataset = valid_dataset.batch(batch_size)
    return train_dataset.prefetch(1), valid_dataset.prefetch(1)

In [None]:
train_set, valid_set = create_dataset(full_train_tfrecords)

In [None]:
train_set

In [None]:
valid_set

In [None]:
plt.figure(figsize=(18,6))
for X, y in train_set.take(1):
    for i in range(5):
        plt.subplot(1, 5, i + 1)
        plt.imshow(X[i].numpy())
        plt.axis("off")
        plt.title(str(y[i].numpy()))

In [None]:
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, padding="SAME")

model = keras.models.Sequential([
    DefaultConv2D(filters=64, kernel_size=3, input_shape=input_shape),
    keras.layers.LeakyReLU(alpha=0.2),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=128),
    keras.layers.LeakyReLU(alpha=0.2),
    DefaultConv2D(filters=128),
    keras.layers.LeakyReLU(alpha=0.2),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=256),
    keras.layers.LeakyReLU(alpha=0.2),
    DefaultConv2D(filters=256),
    keras.layers.LeakyReLU(alpha=0.2),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(units=128),
    keras.layers.LeakyReLU(alpha=0.2),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=64),
    keras.layers.LeakyReLU(alpha=0.2),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=1, activation='sigmoid')
])

model.compile(
    optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC()])

model.summary()

In [None]:
logs = os.path.join(os.curdir, "my_logs", "run_" + datetime.now().strftime("%Y%m%d_%H%M%S"))
tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=logs, histogram_freq=1, profile_batch=10)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
checkpoint_cb = keras.callbacks.ModelCheckpoint("my_model_tfrecords.h5", save_best_only=True)

In [None]:
weight_for_0 = (1 / 32542)*(33126)/2.0 
weight_for_1 = (1 / 584)*(33126)/2.0

class_weight = {0: weight_for_0, 1: weight_for_1}

history = model.fit(train_set, 
                    epochs=200, 
                    validation_data=valid_set, 
                    class_weight = class_weight,
                    callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb])