In [1]:
import re
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from functools import partial
from sklearn.model_selection import train_test_split
import tempfile

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [1024, 1024]
BATCH_SIZE = 32

In [3]:
# Split into train and validation sets.
train_fnames, valid_fnames, train_labels, valid_labels = train_test_split(
    tf.io.gfile.glob('train_jpg/*.jpg'), 
    np.load('labels_retained.npy'),
    test_size=0.2, 
    random_state=0
)

In [15]:
# Define functions for loading data.
# Turn a loaded JPEG image into a tensor.
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, IMAGE_SIZE)  # What's the difference b/w reshape and resize?
    return image

# Load JPEG files.
def load_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

# Make a dataset.
def load_train_dataset(filenames, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)  # Load labels.
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))  # Zip images and labels.
    
    num_images = len(filenames)
    
    ds_out = image_label_ds.apply(
        tf.data.experimental.shuffle_and_repeat(buffer_size=num_images)
    )
    ds_out = ds_out.batch(BATCH_SIZE)
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out, num_images  # Return a dataset and number of items.

def load_valid_dataset(filenames, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)  # Load labels.
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))  # Zip images and labels.
    
    num_images = len(filenames)
    
    ds_out = image_label_ds.batch(BATCH_SIZE)
    ds_out = ds_out.cache()
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out, num_images  # Return a dataset and number of items.

def load_test_dataset(filenames):
    path_ds = tf.data.Dataset.from_tensor_slices(filenames)
    image_ds = path_ds.map(load_preprocess_image, num_parallel_calls=AUTOTUNE)

    ds_out = image_ds.batch(BATCH_SIZE)
    ds_out = ds_out.prefetch(buffer_size=AUTOTUNE)

    return ds_out  # Return an image dataset alone.

In [5]:
train_ds, num_train_images = load_train_dataset(train_fnames[0:32], train_labels)
valid_ds, num_valid_images = load_valid_dataset(valid_fnames[0:32], valid_labels)

Instructions for updating:
Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation.


In [6]:
def build_lrfn(
    lr_start=0.00001, lr_max=0.000075, 
    lr_min=0.000001, lr_rampup_epochs=20, 
    lr_sustain_epochs=0, lr_exp_decay=.8
):
    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_exp_decay ** (epoch - lr_rampup_epochs - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

In [7]:
def make_model(output_bias = None, metrics = None):
    # Create the base model from the pre-trained model MobileNet V2
    
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
        
    base_model = tf.keras.applications.VGG16(
        input_shape=(*IMAGE_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    
    base_model.trainable = False
    
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(
            8, activation='relu'
        ),
        tf.keras.layers.Dense(
            1, activation='sigmoid',
            bias_initializer=output_bias
        )
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.RMSprop(),
        loss='binary_crossentropy',
        metrics=metrics
    )
    
    return model

In [8]:
model = make_model(
    metrics=tf.keras.metrics.AUC(name='auc')
)

In [9]:
lrfn = build_lrfn()
STEPS_PER_EPOCH = num_train_images // BATCH_SIZE
VALID_STEPS = num_valid_images // BATCH_SIZE

In [10]:
history = model.fit(
    train_ds, epochs=3,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=valid_ds,
    validation_steps=VALID_STEPS,
    callbacks=[
        tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)
    ]
)


Epoch 00001: LearningRateScheduler reducing learning rate to 1e-05.
Epoch 1/3

Epoch 00002: LearningRateScheduler reducing learning rate to 1.325e-05.
Epoch 2/3

Epoch 00003: LearningRateScheduler reducing learning rate to 1.65e-05.
Epoch 3/3


In [16]:
ds = load_test_dataset(train_fnames[0:32])


In [17]:
pred = model.predict(ds)

In [18]:
pred


array([[0.74847436],
       [0.7555566 ],
       [0.75395626],
       [0.7540812 ],
       [0.7481322 ],
       [0.7573752 ],
       [0.74900055],
       [0.7445854 ],
       [0.7304162 ],
       [0.75058496],
       [0.7622591 ],
       [0.76029503],
       [0.75988936],
       [0.755166  ],
       [0.7539537 ],
       [0.7537733 ],
       [0.75265926],
       [0.7496214 ],
       [0.75989544],
       [0.75561464],
       [0.7473942 ],
       [0.75829566],
       [0.7496162 ],
       [0.75186676],
       [0.7457945 ],
       [0.7582307 ],
       [0.7597212 ],
       [0.76188326],
       [0.75703835],
       [0.7518357 ],
       [0.7542299 ],
       [0.75614274]], dtype=float32)