In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from matplotlib.image import imread

# Create dataset of image paths in train folder
cap_ds = tf.data.Dataset.list_files(str("./cap_nocap_dataset/cap/*.jpg"), shuffle=False)
nocap_ds = tf.data.Dataset.list_files(str("./cap_nocap_dataset/nocap/*.jpg"), shuffle=False)
# noperson_ds = tf.data.Dataset.list_files(str("./cap_nocap_dataset/noperson/*.jpg"), shuffle=False)
print(cap_ds)
print(nocap_ds)
# print(noperson_ds)

# Create list of class indices
label_names = ['person_cap', 'person_nocap']
cap_target_ds = [0 for cap in cap_ds]
nocap_target_ds = [1 for nocap in nocap_ds]

# Encode class indices in one-hot form
cap_target_ds = tf.one_hot(cap_target_ds, len(label_names), on_value=1.0, off_value=0.0)
nocap_target_ds = tf.one_hot(nocap_target_ds, len(label_names), on_value=1.0, off_value=0.0)
# noperson_target_ds = tf.zeros([116,2])

print(cap_target_ds.shape)
print(nocap_target_ds.shape)
# print(noperson_target_ds.shape)

cap_target_ds = tf.data.Dataset.from_tensor_slices(cap_target_ds)
nocap_target_ds = tf.data.Dataset.from_tensor_slices(nocap_target_ds)
# noperson_target_ds = tf.data.Dataset.from_tensor_slices(noperson_target_ds)

<TensorSliceDataset shapes: (), types: tf.string>
<TensorSliceDataset shapes: (), types: tf.string>
(249, 2)
(165, 2)


In [2]:
# Helper function to read images and pre-process images
# InceptionV3 accepts input of size (299,299,3)
# The input range is (-1,1)
def readImages_gray(path):
    image = tf.io.read_file(path)
    image = tf.io.decode_jpeg(image)
    image = tf.image.resize_with_pad(image, 299, 299)
    image = tf.image.grayscale_to_rgb(image)
    image = keras.applications.inception_v3.preprocess_input(image)

    return image

def readImages(path):
    image = tf.io.read_file(path)
    image = tf.io.decode_jpeg(image)
    image = tf.image.resize_with_pad(image, 299, 299)
    image = keras.applications.inception_v3.preprocess_input(image)

    return image

In [3]:
# print some information to check on images, label and target
path = next(iter(cap_ds))
image = readImages(path)

print("Path: " + str(path))
print("Label: " + str(next(iter(cap_target_ds))))
print(image.shape)
print(np.max(image))
print(np.min(image))

Path: tf.Tensor(b'.\\cap_nocap_dataset\\cap\\0_0.jpg', shape=(), dtype=string)
Label: tf.Tensor([1. 0.], shape=(2,), dtype=float32)
(299, 299, 1)
1.0
-1.0


In [4]:
# Create dataset of input data, pre-processed images and target
cap_images_ds = cap_ds.map(readImages_gray)
nocap_images_ds = nocap_ds.map(readImages_gray)
# noperson_images_ds = noperson_ds.map(readImages)

print(cap_images_ds)
print(nocap_images_ds)
# print(noperson_images_ds)

<MapDataset shapes: (299, 299, 3), types: tf.float32>
<MapDataset shapes: (299, 299, 3), types: tf.float32>


In [5]:
# Create train and test datasets from input and target datasets of all 3 labels
test_images_ds = cap_images_ds.take(16).concatenate(nocap_images_ds.take(16))
test_target_ds = cap_target_ds.take(16).concatenate(nocap_target_ds.take(16))

print(test_images_ds)
print(test_target_ds)

train_images_ds = cap_images_ds.concatenate(nocap_images_ds)
train_target_ds = cap_target_ds.concatenate(nocap_target_ds)

print(train_images_ds)
print(train_target_ds)

test_ds = tf.data.Dataset.zip((test_images_ds, test_target_ds))
train_ds = tf.data.Dataset.zip((train_images_ds, train_target_ds))

test_ds = test_ds.shuffle(64)
test_ds = test_ds.batch(8)

train_ds = train_ds.shuffle(512)
train_ds = train_ds.batch(8)

print(test_ds)
print(train_ds)

<ConcatenateDataset shapes: (299, 299, 3), types: tf.float32>
<ConcatenateDataset shapes: (2,), types: tf.float32>
<ConcatenateDataset shapes: (299, 299, 3), types: tf.float32>
<ConcatenateDataset shapes: (2,), types: tf.float32>
<BatchDataset shapes: ((None, 299, 299, 3), (None, 2)), types: (tf.float32, tf.float32)>
<BatchDataset shapes: ((None, 299, 299, 3), (None, 2)), types: (tf.float32, tf.float32)>


In [10]:
# Load PASCAL VOC trained model, print layer names
inceptionv3 = keras.models.load_model("inceptionv3_pascalvoc_0_1_1.h5")
x = inceptionv3.layers[-2].output
predictions = keras.layers.Dense(2, activation='softmax', name='dense10')(x)
new_inceptionv3 = keras.models.Model(inputs=inceptionv3.input, outputs=predictions)
new_inceptionv3.summary()

             
__________________________________________________________________________________________________
batch_normalization_69 (BatchNo (None, 17, 17, 192)  576         conv2d_69[0][0]                  
__________________________________________________________________________________________________
activation_60 (Activation)      (None, 17, 17, 192)  0           batch_normalization_60[0][0]     
__________________________________________________________________________________________________
activation_63 (Activation)      (None, 17, 17, 192)  0           batch_normalization_63[0][0]     
__________________________________________________________________________________________________
activation_68 (Activation)      (None, 17, 17, 192)  0           batch_normalization_68[0][0]     
__________________________________________________________________________________________________
activation_69 (Activation)      (None, 17, 17, 192)  0           batch_normalization_69[0][0]  

In [11]:
# First: Train the new top-layer only
# Hence, freeze all layers in pre-trained model
for layer in inceptionv3.layers:
    layer.trainable = False

# Compile model, ready to be trained on new data
new_inceptionv3.compile(optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.9, decay=0.0001), loss=keras.losses.CategoricalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])

# Train the top-layer only
e_1 = 3
new_inceptionv3.fit(train_ds, epochs=e_1)
new_inceptionv3.save('inceptionv3_yalecap_0_'+str(e_1)+'_0.h5')

Train for 52 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [14]:
# Now start fine-tuning the convolutional layers, freeze the bottom N layers and train the remaining top layers
# Choose N layers to freeze and unfreeze the rest
N = 249
for layer in new_inceptionv3.layers[:N]:
    layer.trainable = False
for layer in new_inceptionv3.layers[N:]:
    layer.trainable = True
    
# Recompile model for modifications to take effect
# Use Stochastic Gradient Descent with a Low learning rate this time
new_inceptionv3.compile(optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9, decay=0.00001), loss=keras.losses.CategoricalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])

# Train the model again, this time fine-tuning some inception blocks alongside new top layers
e_2 = 3
new_inceptionv3.fit(train_ds, epochs=e_2)
new_inceptionv3.save('inceptionv3_yalecap_0_'+str(e_1)+'_'+str(e_2)+'.h5')

Train for 52 steps
Epoch 1/2
Epoch 2/2


In [15]:
# Evaluate the model
eval = new_inceptionv3.evaluate(test_ds)
print(eval)

[0.5253678038716316, 0.71875]
