In [1]:
import os
import random
import pathlib
import tensorflow as tf
import tensorflow.io as tfio
import tensorflow_hub as hub
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
NUM_CLASSES = 2
RESIZE_TO = 512
CROP_TO = 256
IMAGE_SIZE = (CROP_TO, CROP_TO)
NUM_CHANNELS = 3
SCHEDULE_LENGTH = 1000
SCHEDULE_BOUNDARIES = [300, 600, 900]
BATCH_SIZE = 64
SCHEDULE_LENGTH = SCHEDULE_LENGTH * 512 / BATCH_SIZE
STEPS_PER_EPOCH = 10
lr = 0.03 * BATCH_SIZE / 512 

def load_and_preprocess_image(path, target_image_shape=IMAGE_SIZE, num_channnels=NUM_CHANNELS):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=num_channnels)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, 0.2, seed=None)
    image = tf.image.random_hue(image, 0.2)
#     image = tf.image.resize(image, [RESIZE_TO, RESIZE_TO])
#     image = tf.image.random_crop(image, [CROP_TO, CROP_TO, 3])
    image = tf.image.resize(image, [CROP_TO, CROP_TO])
    image /= 255.0  # normalize to [0,1] range
#     image = 2*image-1
    return image

In [3]:
train_val_data_root = '../dataset_5_512/train'
train_val_data_root = pathlib.Path(train_val_data_root)
all_image_paths = [str(path) for path in list(train_val_data_root.glob('*/*'))]
label_names = sorted(item.name for item in train_val_data_root.glob('*/') if item.is_dir())
label_to_index = dict((name, index) for index,name in enumerate(label_names))
all_image_labels = [label_to_index[pathlib.Path(path).parent.name] for path in all_image_paths]
image_count = len(all_image_paths)
image_path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = image_path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
train_pipeline = (
    image_label_ds
        .cache()
        .apply(tf.data.experimental.shuffle_and_repeat(buffer_size=len(all_image_paths)))
        .batch(BATCH_SIZE)
        .prefetch(buffer_size=AUTOTUNE)
)
train_steps_per_epoch=tf.math.ceil(len(all_image_paths)/BATCH_SIZE).numpy()

Instructions for updating:
Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation.


In [4]:
test_data_root = '../dataset_5_512/val'
test_data_root = pathlib.Path(test_data_root)
all_image_paths = [str(path) for path in list(test_data_root.glob('*/*'))]
label_names = sorted(item.name for item in test_data_root.glob('*/') if item.is_dir())
label_to_index = dict((name, index) for index,name in enumerate(label_names))
all_image_labels = [label_to_index[pathlib.Path(path).parent.name] for path in all_image_paths]
image_count = len(all_image_paths)
image_path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = image_path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
test_pipeline = (image_label_ds
                    .cache()
                    .apply(tf.data.experimental.shuffle_and_repeat(buffer_size=len(all_image_paths)))
                    .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE))
val_steps_per_epoch=tf.math.ceil(len(all_image_paths)/BATCH_SIZE).numpy()

In [8]:
input_tensor = tf.keras.Input(shape=(CROP_TO,CROP_TO,NUM_CHANNELS))
vgg16 =  tf.keras.applications.vgg16.VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
x = vgg16.output
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=vgg16.input, outputs=x)

In [9]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
)
loss_fn = tf.keras.losses.BinaryCrossentropy()
model.compile(
    loss=loss_fn,
    optimizer=optimizer,
    metrics=['accuracy']
)

In [10]:
history = model.fit(
    train_pipeline, 
    epochs=100, 
    steps_per_epoch=train_steps_per_epoch, 
    validation_data=test_pipeline, 
    validation_steps=val_steps_per_epoch
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
