In [15]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from matplotlib.image import imread

# Create dataset of image paths in train folder
paths_ds = tf.data.Dataset.list_files(str("./VOC2007_train_subimages/*.jpg"), shuffle=False)
print(paths_ds)

# Helpfer function to extract class/label names from paths
def classnamefromPath(path):
    parts = tf.strings.split(path, os.sep)
    filename = parts[-1]
    label = tf.strings.split(filename, '_')[-1]
    label = tf.strings.split(label, '.')[0]

    return label

# Create list of label/class names
labels = paths_ds.map(classnamefromPath)
labels = np.array(list(labels.as_numpy_iterator()))
labels = [label.decode('utf-8') for label in labels]
label_names = np.unique(labels)
print(label_names)

# Helper fuction to convert string labels to class number
def indexfromClassname(name, labels):
    idx = np.where(labels == name)
    return idx[0][0]

# Create list of class indices
labels_indices = [indexfromClassname(label, label_names) for label in labels]
labels_indices = np.array(labels_indices)
print(labels_indices.shape)

# Encode class indices in one-hot form
target_ds = tf.one_hot(labels_indices, len(label_names), on_value=1.0, off_value=0.0)
target_ds = tf.data.Dataset.from_tensor_slices(target_ds)
print(target_ds)

<TensorSliceDataset shapes: (), types: tf.string>
['aeroplane' 'bicycle' 'bird' 'boat' 'bottle' 'bus' 'car' 'cat' 'chair'
 'cow' 'diningtable' 'dog' 'horse' 'motorbike' 'person' 'pottedplant'
 'sheep' 'sofa' 'train' 'tvmonitor']
(5834,)
<TensorSliceDataset shapes: (20,), types: tf.float32>


In [9]:
# Helper function to read images and pre-process images
# InceptionV3 accepts input of size (299,299,3)
# The input range is (-1,1)
def readImages(path):
    image = tf.io.read_file(path)
    image = tf.io.decode_jpeg(image)
    image = tf.image.resize_with_pad(image, 299, 299)
    image = keras.applications.inception_v3.preprocess_input(image)

    return image

In [11]:
# print some information to check on images, label and target
path = next(iter(paths_ds))
image = readImages(path)

print("Path: " + str(path))
print("Label: " + str(next(iter(labels))))
print(image.shape)
print(np.max(image))
print(np.min(image))

Path: tf.Tensor(b'.\\VOC2007_train_subimages\\000005_chair.jpg', shape=(), dtype=string)
Label: chair
(299, 299, 3)
1.0
-1.0


In [13]:
# Create dataset of input data, pre-processed images and target
images_ds = paths_ds.map(readImages)
print(images_ds)

<MapDataset shapes: (299, 299, None), types: tf.float32>


In [16]:
# Create main dataset from input and target datasets
train_dataset = tf.data.Dataset.zip((images_ds, target_ds))
train_dataset = train_dataset.batch(8)
print(train_dataset)

<BatchDataset shapes: ((None, 299, 299, None), (None, 20)), types: (tf.float32, tf.float32)>


In [17]:
# Create pre-trained model
inceptionv3 = keras.applications.inception_v3.InceptionV3(weights='imagenet', input_shape=(299, 299, 3), include_top=False)

# Add global spatial average pooling layer
x = inceptionv3.output
x = keras.layers.GlobalAveragePooling2D()(x)

# Add a fully-connected layer to the raw output of network
x = keras.layers.Dense(512, activation='relu')(x)

# Add a drop-out layer
x = keras.layers.Dropout(rate=0.5)(x)

# Add a logistic layer (softmax) - to predict 10 classes
predictions = keras.layers.Dense(20, activation='softmax')(x)

# Compose the model based on new top-layer
new_inceptionv3 = keras.models.Model(inputs=inceptionv3.input, outputs=predictions)

# First: Train the new top-layer only
# Hence, freeze all layers in pre-trained model
for layer in inceptionv3.layers:
    layer.trainable = False

# Compile model, ready to be trained on new data
new_inceptionv3.compile(optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.9, decay=0.0001), loss=keras.losses.CategoricalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])

# Train the top-layer only
new_inceptionv3.fit(train_dataset, epochs=1)
new_inceptionv3.save('inceptionv3_pascalvoc_0_0.h5')

Train for 730 steps


In [18]:
# Now start fine-tuning the convolutional layers, freeze the bottom N layers and train the remaining top layers
# But first, lets visualise the layers available in Inception v3
for i, layer in enumerate(inceptionv3.layers):
    print(i, layer.name)
    
# Choose N layers to freeze and unfreeze the rest
N = 249
for layer in new_inceptionv3.layers[:N]:
    layer.trainable = False
for layer in new_inceptionv3.layers[N:]:
    layer.trainable = True
    
# Recompile model for modifications to take effect
# Use Stochastic Gradient Descent with a Low learning rate this time
new_inceptionv3.compile(optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9, decay=0.00001), loss=keras.losses.CategoricalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])

# Train the model again, this time fine-tuning some inception blocks alongside new top layers
new_inceptionv3.fit(train_dataset, epochs=1)
new_inceptionv3.save('inceptionv3_pascalvoc_0_1.h5')

0 input_1
1 conv2d
2 batch_normalization
3 activation
4 conv2d_1
5 batch_normalization_1
6 activation_1
7 conv2d_2
8 batch_normalization_2
9 activation_2
10 max_pooling2d
11 conv2d_3
12 batch_normalization_3
13 activation_3
14 conv2d_4
15 batch_normalization_4
16 activation_4
17 max_pooling2d_1
18 conv2d_8
19 batch_normalization_8
20 activation_8
21 conv2d_6
22 conv2d_9
23 batch_normalization_6
24 batch_normalization_9
25 activation_6
26 activation_9
27 average_pooling2d
28 conv2d_5
29 conv2d_7
30 conv2d_10
31 conv2d_11
32 batch_normalization_5
33 batch_normalization_7
34 batch_normalization_10
35 batch_normalization_11
36 activation_5
37 activation_7
38 activation_10
39 activation_11
40 mixed0
41 conv2d_15
42 batch_normalization_15
43 activation_15
44 conv2d_13
45 conv2d_16
46 batch_normalization_13
47 batch_normalization_16
48 activation_13
49 activation_16
50 average_pooling2d_1
51 conv2d_12
52 conv2d_14
53 conv2d_17
54 conv2d_18
55 batch_normalization_12
56 batch_normalization_14
5