In [3]:
import re
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from functools import partial
from sklearn.model_selection import train_test_split
import tempfile




In [4]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [1024, 1024]
BATCH_SIZE = 32

In [5]:
# Split into train and validation sets.
path_to_images = tf.io.gfile.glob(os.path.join('train_jpg', '*.jpg'))
labels = pd.read_csv(os.path.join('train_jpg', 'retained_labels.csv'))
path_to_images.sort()  # Sort both path names and labels data frame so that they have the same order.
labels.sort_values(by=['id'], inplace=True)

train_fnames, valid_fnames, train_labels, valid_labels = train_test_split(
    path_to_images,
    labels['label'].values,
    test_size=0.2,
    random_state=0
)

In [6]:
# Define functions for loading data.
# Turn a loaded JPEG image into a tensor.
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, IMAGE_SIZE)  # What's the difference b/w reshape and resize?
    return image

# Load JPEG files.
def load_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

# Make a dataset.
def load_train_dataset(filenames, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)  # Load labels.
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))  # Zip images and labels.
    
    num_images = len(filenames)
    
    ds_out = image_label_ds.apply(
        tf.data.experimental.shuffle_and_repeat(buffer_size=num_images)
    )
    ds_out = ds_out.batch(BATCH_SIZE)
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out, num_images  # Return a dataset and number of items.

def load_valid_dataset(filenames, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)  # Load labels.
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))  # Zip images and labels.
    
    num_images = len(filenames)
    
    ds_out = image_label_ds.batch(BATCH_SIZE)
    ds_out = ds_out.cache()
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out, num_images  # Return a dataset and number of items.

def load_test_dataset(filenames):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)

    ds_out = image_ds.batch(BATCH_SIZE)
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out  # Return an image dataset alone.

In [7]:
train_ds, num_train_images = load_train_dataset(train_fnames[0:32], train_labels)
valid_ds, num_valid_images = load_valid_dataset(valid_fnames[0:32], valid_labels)

Instructions for updating:
Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation.


In [9]:
num_train_images

32

In [6]:
def build_lrfn(
    lr_start=0.00001, lr_max=0.000075, 
    lr_min=0.000001, lr_rampup_epochs=20, 
    lr_sustain_epochs=0, lr_exp_decay=.8
):
    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_exp_decay ** (epoch - lr_rampup_epochs - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

In [7]:
def make_model(output_bias = None, metrics = None):
    # Create the base model from the pre-trained model MobileNet V2
    
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
        
    base_model = tf.keras.applications.VGG16(
        input_shape=(*IMAGE_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    
    base_model.trainable = False
    
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(
            8, activation='relu'
        ),
        tf.keras.layers.Dense(
            1, activation='sigmoid',
            bias_initializer=output_bias
        )
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.RMSprop(),
        loss='binary_crossentropy',
        metrics=[metrics]
    )
    
    return model

In [8]:
model = make_model(
    metrics=tf.keras.metrics.AUC(name='auc')
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [9]:
lrfn = build_lrfn()
STEPS_PER_EPOCH = num_train_images // BATCH_SIZE
VALID_STEPS = num_valid_images // BATCH_SIZE

In [None]:
history = model.fit(
    train_ds, epochs=3,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=valid_ds,
    validation_steps=VALID_STEPS,
    callbacks=[
        tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)
    ]
)

Train on 1 steps, validate on 1 steps

Epoch 00001: LearningRateScheduler reducing learning rate to 1e-05.
Epoch 1/3


In [None]:
ds = load_test_dataset(train_fnames[0:32])


In [None]:
pred = model.predict(ds)

In [None]:
pred
